Skip to content

Commit

Permalink
- tekton tasks for eks mng, fargate, workaround for irsa, eks pipelin…
Browse files Browse the repository at this point in the history
…es with MNG and fargate (#203)

- add sleep time for mng to workaround vas cool off time
- add PD support

Co-authored-by: Harish Kuna <[email protected]>
  • Loading branch information
hakuna-matatah and Harish Kuna authored May 11, 2022
1 parent 3ff5c48 commit ede4336
Show file tree
Hide file tree
Showing 14 changed files with 457 additions and 42 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ spec:
kubeletConfiguration:
clusterDNS:
- "10.96.0.10"
ttlSecondsAfterEmpty: 30
ttlSecondsAfterEmpty: 7200
provider:
instanceProfile: %[1]s-tenant-controlplane-node-role
tags:
Expand Down
26 changes: 14 additions & 12 deletions substrate/pkg/controller/substrate/cluster/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -48,18 +48,20 @@ import (
)

const (
ClusterCertsBasePath = "/tmp/"
kubeconfigPath = "/etc/kubernetes"
kubeconfigFile = "etc/kubernetes/admin.conf"
certPKIPath = "/etc/kubernetes/pki"
clusterManifestPath = "/etc/kubernetes/manifests"
kubeletSystemdPath = "/etc/systemd/system"
kubeletConfigPath = "/var/lib/kubelet/"
authenticatorConfigDir = "/etc/aws-iam-authenticator"
kubernetesVersionTag = "v1.21.2-eks-1-21-4"
imageRepository = "public.ecr.aws/eks-distro/kubernetes"
etcdVersionTag = "v3.4.16-eks-1-21-7"
etcdImageRepository = "public.ecr.aws/eks-distro/etcd-io"
ClusterCertsBasePath = "/tmp/"
kubeconfigPath = "/etc/kubernetes"
kubeconfigFile = "etc/kubernetes/admin.conf"
certPKIPath = "/etc/kubernetes/pki"
clusterManifestPath = "/etc/kubernetes/manifests"
kubeletSystemdPath = "/etc/systemd/system"
kubeletConfigPath = "/var/lib/kubelet/"
authenticatorConfigDir = "/etc/aws-iam-authenticator"
kubernetesVersionTag = "v1.21.2-eks-1-21-4"
imageRepository = "public.ecr.aws/eks-distro/kubernetes"
etcdVersionTag = "v3.4.16-eks-1-21-7"
etcdImageRepository = "public.ecr.aws/eks-distro/etcd-io"
//Todo: until we have irsa support - https://github.com/awslabs/kubernetes-iteration-toolkit/issues/186,
//this role name is tightly coupled with tekton pipelines and tasks; Please ensure you change tasks/ accordingly if you change this rolename
TenantControlPlaneNodeRole = "tenant-controlplane-node-role"
)

Expand Down
15 changes: 13 additions & 2 deletions substrate/pkg/controller/substrate/cluster/instanceprofile.go
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ func (i *InstanceProfile) Create(ctx context.Context, substrate *v1alpha1.Substr
}

func (i *InstanceProfile) create(ctx context.Context, resourceName, policy *string, managedPolicies []string) (reconcile.Result, error) {
//Todo: remove fargate service principle when we have this in place - https://github.com/awslabs/kubernetes-iteration-toolkit/issues/186
// Role
if _, err := i.IAM.CreateRole(&iam.CreateRoleInput{RoleName: resourceName, AssumeRolePolicyDocument: aws.String(`{
"Version": "2012-10-17",
Expand All @@ -56,7 +57,10 @@ func (i *InstanceProfile) create(ctx context.Context, resourceName, policy *stri
"Effect": "Allow",
"Action": "sts:AssumeRole",
"Principal": {
"Service": "ec2.amazonaws.com"
"Service": [
"ec2.amazonaws.com",
"eks-fargate-pods.amazonaws.com"
]
}
}
]}`)}); err != nil {
Expand Down Expand Up @@ -289,14 +293,21 @@ func desiredRolesFor(substrate *v1alpha1.Substrate) []role {
},
}, {
// Roles and policies attached to the nodes provisioned by Karpenter
// Todo: remove `eks, iam action once we have this support for this - https://github.com/awslabs/kubernetes-iteration-toolkit/issues/186`
name: discovery.Name(substrate, TenantControlPlaneNodeRole), policy: aws.String(`{
"Version": "2012-10-17",
"Statement": [
{
"Effect": "Allow",
"Action": [
"iam:GetRole",
"iam:PassRole",
"iam:CreateServiceLinkedRole",
"iam:ListAttachedRolePolicies",
"kms:Encrypt",
"kms:Decrypt"
"kms:Decrypt",
"eks:*",
"s3:*"
],
"Resource": ["*"]
}
Expand Down
74 changes: 74 additions & 0 deletions tests/pipelines/eks/awscli-cl2-load.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
apiVersion: tekton.dev/v1beta1
kind: Pipeline
metadata:
name: awscli-eks-cl2loadtest
namespace: tekton-pipelines
spec:
params:
- name: cluster-name
- name: endpoint
- name: servicerole
- name: desired-nodes
- name: host-cluster-node-role-arn
- name: pods-per-node
- name: nodes-per-namespace
- name: cl2-load-test-throughput
- name: results-bucket
tasks:
- name: create-eks-cluster
params:
- name: cluster-name
value: $(params.cluster-name)
- name: servicerole
value: $(params.servicerole)
- name: endpoint
value: $(params.endpoint)
taskRef:
kind: Task
name: awscli-eks-cluster-create
workspaces:
- name: config
workspace: config
- name: create-mng-nodes
params:
- name: cluster-name
value: $(params.cluster-name)
- name: desired-nodes
value: $(params.desired-nodes)
- name: host-cluster-node-role-arn
value: $(params.host-cluster-node-role-arn)
- name: endpoint
value: $(params.endpoint)
runAfter:
- create-eks-cluster
taskRef:
kind: Task
name: awscli-eks-nodegroup-create
- name: generate
params:
- name: pods-per-node
value: $(params.pods-per-node)
- name: nodes-per-namespace
value: $(params.nodes-per-namespace)
- name: cl2-load-test-throughput
value: $(params.cl2-load-test-throughput)
- name: results-bucket
value: $(params.results-bucket)
- name: nodes
value: $(params.desired-nodes)
runAfter:
- create-mng-nodes
taskRef:
kind: Task
name: load
workspaces:
- name: source
workspace: source
- name: config
workspace: config
- name: results
workspace: results
workspaces:
- name: config
- name: source
- name: results
34 changes: 34 additions & 0 deletions tests/pipelines/eks/awscli-eks-fargate.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
---
apiVersion: tekton.dev/v1beta1
kind: Pipeline
metadata:
name: awscli-eks-fargate
namespace: tekton-pipelines
spec:
workspaces:
- name: config
params:
- name: cluster-name
- name: desired-nodes
- name: host-cluster-node-role-arn
tasks:
- name: setup-control-plane
taskRef:
name: awscli-eks-cluster-create
params:
- name: cluster-name
value: '$(params.cluster-name)'
workspaces:
- name: config
workspace: config
- name: setup-data-plane
runAfter: [setup-control-plane]
taskRef:
name: awscli-eks-fargate-create
params:
- name: cluster-name
value: '$(params.cluster-name)'
- name: desired-nodes
value: '$(params.desired-nodes)'
- name: host-cluster-node-role-arn
value: '$(params.host-cluster-node-role-arn)'
38 changes: 12 additions & 26 deletions tests/tasks/generators/clusterloader/load.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ apiVersion: tekton.dev/v1beta1
kind: Task
metadata:
name: load
namespace: tekton-pipelines
spec:
description: "clusterloader2 task to run various types of cl2 tests on a given cluster."
params:
Expand All @@ -11,12 +12,16 @@ spec:
default: https://github.com/kubernetes/perf-tests.git
- name: nodes-per-namespace
description: "nodes per namespace to get created for load test "
default: "100"
- name: cl2-load-test-throughput
description: " throughput used for mutate operations"
default: "15"
- name: pods-per-node
description: "pod density"
default: "10"
- name: nodes
description: "number of dataplane nodes to run the load test against"
default: "1000"
- name: results-bucket
description: "Results bucket with path of s3 to upload results"
workspaces:
Expand All @@ -29,53 +34,34 @@ spec:
workingDir: $(workspaces.source.path)
args: ["clone", "$(params.giturl)"]
- name: prepare-loadtest
image: amazon/aws-cli
image: alpine/k8s:1.22.6
workingDir: $(workspaces.source.path)
script: |
cat > "$(workspaces.source.path)/overrides.yaml" <<EOL
NODES_PER_NAMESPACE: $(params.nodes-per-namespace)
CL2_LOAD_TEST_THROUGHPUT: $(params.cl2-load-test-throughput)
CL2_SCHEDULER_THROUGHPUT_THRESHOLD: 20
PODS_PER_NODE: $(params.pods-per-node)
CL2_USE_HOST_NETWORK_PODS: false
# we are not testing statefulsets at this point
SMALL_STATEFUL_SETS_PER_NAMESPACE: 0
MEDIUM_STATEFUL_SETS_PER_NAMESPACE: 0
# we are not testing PVS at this point
CL2_ENABLE_PVS: false
PROMETHEUS_SCRAPE_APISERVER_ONLY: true
PROMETHEUS_SCRAPE_KUBE_PROXY: false
PROMETHEUS_SCRAPE_KUBE_PROXY: true
ENABLE_SYSTEM_POD_METRICS: false
NODE_MODE: master
EOL
cat $(workspaces.source.path)/overrides.yaml
cp $(workspaces.source.path)/overrides.yaml $(workspaces.results.path)/overrides.yaml
- name: validate-cluster
image: amazon/aws-cli
workingDir: $(workspaces.config.path)
script: |
mkdir -p /root/.kube/
cp $(workspaces.config.path)/kubeconfig /root/.kube/config
# TODO: Move to a separate task and chain it to this task through pipeline if we need more checks than just these before kicking off test.
#kubectl commands are purely for knowing state of cluster before kicking off the test.
curl -LO "https://dl.k8s.io/release/$(curl -L -s https://dl.k8s.io/release/stable.txt)/bin/linux/amd64/kubectl"
install -o root -g root -m 0755 kubectl /usr/local/bin/kubectl
kubectl version
kubectl config current-context
kubectl get nodes
kubectl get ns
# end
- name: run-loadtest
image: 197575167141.dkr.ecr.us-west-2.amazonaws.com/clusterloader2:76e3fd7
image: public.ecr.aws/kit/clusterloader2:v0.0.1
onError: continue
script: |
ENABLE_EXEC_SERVICE=false ./clusterloader --kubeconfig=$(workspaces.config.path)/kubeconfig --testconfig=$(workspaces.source.path)/perf-tests/clusterloader2/testing/load/config.yaml --testoverrides=$(workspaces.source.path)/overrides.yaml --nodes=$(params.nodes) --provider=eks --report-dir=$(workspaces.results.path) --alsologtostderr
#To run the next step regardless of success/failure of the test
TEST_EXIT_CODE=$?
if [ $TEST_EXIT_CODE != 0 ]; then
exit 0
fi
ENABLE_EXEC_SERVICE=false /clusterloader --kubeconfig=$(workspaces.config.path)/kubeconfig --testconfig=$(workspaces.source.path)/perf-tests/clusterloader2/testing/load/config.yaml --testoverrides=$(workspaces.source.path)/overrides.yaml --nodes=$(params.nodes) --provider=eks --report-dir=$(workspaces.results.path) --alsologtostderr --v=6
timeout: 30000s
- name: upload-results
image: amazon/aws-cli
image: alpine/k8s:1.22.6
workingDir: $(workspaces.results.path)
script: |
aws sts get-caller-identity
Expand Down
55 changes: 55 additions & 0 deletions tests/tasks/setup/eks/awscli-cp.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
---
apiVersion: tekton.dev/v1beta1
kind: Task
metadata:
name: awscli-eks-cluster-create
namespace: tekton-pipelines
spec:
description: |
Create an EKS cluster.
This Task can be used to create an EKS cluster for a given service role in an AWS account and write a kubeconfig to a desired location that
can be used by other tasks (in a context with kubectl) to make requests to the cluster.
params:
- name: cluster-name
description: The name of the EKS cluster you want to spin.
- name: kubernetes-version
default: "1.21"
description: The EKS version to install.
- name: region
default: "us-west-2"
description: The region where the cluster is in.
- name: endpoint
default: ""
description: "aws eks enpoint to create clusters against"
- name: servicerole
description: servicerole arn to be used for eks cluster to perform operations in customer account to setup cluster
workspaces:
- name: config
description: |
A workspace into which a kubeconfig file called `kubeconfig` will be written that will contain the information required to access the cluster. The `kubeconfig` will expect to use [aws-iam-authenticator](https://github.com/kubernetes-sigs/aws-iam-authenticator/) to authenticate, so in order for it to be used it must be run in a container which contains both `kubectl` and `aws-iam-authenticator`.
steps:
- name: write-kubeconfig
image: alpine/k8s:1.22.6
script: |
echo "Approving KCM requests"
kubectl certificate approve $(kubectl get csr | grep "Pending" | awk '{print $1}') 2>/dev/null || true
ENDPOINT_FLAG=""
if [ -n "$(params.endpoint)" ]; then
ENDPOINT_FLAG="--endpoint $(params.endpoint)"
fi
CREATED_CLUSTER=$(aws eks $ENDPOINT_FLAG list-clusters --region $(params.region) --query 'clusters[?@==`'$(params.cluster-name)'`]' --output text )
echo "CREATED_CLUSTER=$CREATED_CLUSTER"
TAG=$(kubectl get provisioner -oyaml | grep karpenter.sh/discovery | awk 'NR==1{ print $2}')
subnets=$(aws ec2 describe-subnets --region $(params.region) --filters Name=tag:kit.aws/substrate,Values=$TAG --query 'Subnets[].SubnetId' | jq -r ' [.[]] | join(",")')
echo "subnets=$subnets"
sg=$(aws ec2 describe-security-groups --region $(params.region) --filters Name=tag:kit.aws/substrate,Values=$TAG --query 'SecurityGroups[].GroupId' | jq -r ' .[0] ')
echo "securitygroup=$sg"
if [ "$CREATED_CLUSTER" == "" ]; then
aws eks create-cluster --name $(params.cluster-name) --region $(params.region) --kubernetes-version $(params.kubernetes-version) --role-arn $(params.servicerole) --resources-vpc-config subnetIds=$subnets,securityGroupIds=$sg $ENDPOINT_FLAG
fi
aws eks $ENDPOINT_FLAG --region $(params.region) wait cluster-active --name $(params.cluster-name)
aws eks $ENDPOINT_FLAG update-kubeconfig --name $(params.cluster-name) --region $(params.region)
cp /root/.kube/config $(workspaces.config.path)/kubeconfig
# enable PD on the cluster
kubectl set env ds aws-node -n kube-system ENABLE_PREFIX_DELEGATION=true
Loading

0 comments on commit ede4336

Please sign in to comment.