From 96d577f720bf4fd53d7ec1cf026d3c57c7bc7942 Mon Sep 17 00:00:00 2001 From: Cristiano Saggin <86964015+RH-csaggin@users.noreply.github.com> Date: Fri, 27 Sep 2024 13:59:56 +0200 Subject: [PATCH 1/2] adding zookeper deplyment for ai-telemetry on obs cluster --- ai-telemetry/base/kustomization.yaml | 4 + ai-telemetry/base/zookeeper/configmap.yaml | 30 +++ .../base/zookeeper/kustomization.yaml | 9 + .../base/zookeeper/networkpolicy.yaml | 30 +++ .../base/zookeeper/service-headless.yaml | 26 +++ ai-telemetry/base/zookeeper/service.yaml | 19 ++ .../base/zookeeper/serviceaccount.yaml | 10 + ai-telemetry/base/zookeeper/statefulset.yaml | 172 ++++++++++++++++++ .../overlays/nerc-ocp-obs/kustomization.yaml | 7 + .../nerc-ocp-obs/pvc/kustomization.yaml | 4 + .../nerc-ocp-obs/pvc/zookeeper-data.yaml | 11 ++ 11 files changed, 322 insertions(+) create mode 100644 ai-telemetry/base/kustomization.yaml create mode 100644 ai-telemetry/base/zookeeper/configmap.yaml create mode 100644 ai-telemetry/base/zookeeper/kustomization.yaml create mode 100644 ai-telemetry/base/zookeeper/networkpolicy.yaml create mode 100644 ai-telemetry/base/zookeeper/service-headless.yaml create mode 100644 ai-telemetry/base/zookeeper/service.yaml create mode 100644 ai-telemetry/base/zookeeper/serviceaccount.yaml create mode 100644 ai-telemetry/base/zookeeper/statefulset.yaml create mode 100644 ai-telemetry/overlays/nerc-ocp-obs/kustomization.yaml create mode 100644 ai-telemetry/overlays/nerc-ocp-obs/pvc/kustomization.yaml create mode 100644 ai-telemetry/overlays/nerc-ocp-obs/pvc/zookeeper-data.yaml diff --git a/ai-telemetry/base/kustomization.yaml b/ai-telemetry/base/kustomization.yaml new file mode 100644 index 00000000..22d69fe9 --- /dev/null +++ b/ai-telemetry/base/kustomization.yaml @@ -0,0 +1,4 @@ +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization +resources: +- zookeeper diff --git a/ai-telemetry/base/zookeeper/configmap.yaml b/ai-telemetry/base/zookeeper/configmap.yaml new file mode 100644 index 00000000..ea1e0f2f --- /dev/null +++ b/ai-telemetry/base/zookeeper/configmap.yaml @@ -0,0 +1,30 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: zookeeper-scripts + labels: + app.kubernetes.io/instance: zookeeper + app.kubernetes.io/name: zookeeper + app.kubernetes.io/component: zookeeper +data: + init-certs.sh: |- + #!/bin/bash + setup.sh: |- + #!/bin/bash + + # Execute entrypoint as usual after obtaining ZOO_SERVER_ID + # check ZOO_SERVER_ID in persistent volume via myid + # if not present, set based on POD hostname + if [[ -f "/bitnami/zookeeper/data/myid" ]]; then + export ZOO_SERVER_ID="$(cat /bitnami/zookeeper/data/myid)" + else + HOSTNAME="$(hostname -s)" + if [[ $HOSTNAME =~ (.*)-([0-9]+)$ ]]; then + ORD=${BASH_REMATCH[2]} + export ZOO_SERVER_ID="$((ORD + 1 ))" + else + echo "Failed to get index from hostname $HOSTNAME" + exit 1 + fi + fi + exec /entrypoint.sh /run.sh diff --git a/ai-telemetry/base/zookeeper/kustomization.yaml b/ai-telemetry/base/zookeeper/kustomization.yaml new file mode 100644 index 00000000..f8186c46 --- /dev/null +++ b/ai-telemetry/base/zookeeper/kustomization.yaml @@ -0,0 +1,9 @@ +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization +resources: +- networkpolicy.yaml +- configmap.yaml +- service-headless.yaml +- service.yaml +- serviceaccount.yaml +- statefulset.yaml diff --git a/ai-telemetry/base/zookeeper/networkpolicy.yaml b/ai-telemetry/base/zookeeper/networkpolicy.yaml new file mode 100644 index 00000000..a7d480a5 --- /dev/null +++ b/ai-telemetry/base/zookeeper/networkpolicy.yaml @@ -0,0 +1,30 @@ +kind: NetworkPolicy +apiVersion: networking.k8s.io/v1 +metadata: + name: zookeeper + labels: + app.kubernetes.io/instance: zookeeper + app.kubernetes.io/name: zookeeper +spec: + podSelector: + matchLabels: + app.kubernetes.io/instance: zookeeper + app.kubernetes.io/name: zookeeper + policyTypes: + - Ingress + - Egress + egress: + - {} + ingress: + # Allow inbound connections to ZooKeeper + - ports: + - port: 2181 + # Allow internal communications between nodes + - ports: + - port: 2888 + - port: 3888 + from: + - podSelector: + matchLabels: + app.kubernetes.io/instance: zookeeper + app.kubernetes.io/name: zookeeper diff --git a/ai-telemetry/base/zookeeper/service-headless.yaml b/ai-telemetry/base/zookeeper/service-headless.yaml new file mode 100644 index 00000000..cf62bdfc --- /dev/null +++ b/ai-telemetry/base/zookeeper/service-headless.yaml @@ -0,0 +1,26 @@ +apiVersion: v1 +kind: Service +metadata: + name: zookeeper-headless + labels: + app.kubernetes.io/instance: zookeeper + app.kubernetes.io/name: zookeeper + app.kubernetes.io/component: zookeeper +spec: + type: ClusterIP + clusterIP: None + publishNotReadyAddresses: true + ports: + - name: tcp-client + port: 2181 + targetPort: client + - name: tcp-follower + port: 2888 + targetPort: follower + - name: tcp-election + port: 3888 + targetPort: election + selector: + app.kubernetes.io/instance: zookeeper + app.kubernetes.io/name: zookeeper + app.kubernetes.io/component: zookeeper diff --git a/ai-telemetry/base/zookeeper/service.yaml b/ai-telemetry/base/zookeeper/service.yaml new file mode 100644 index 00000000..c4e7d691 --- /dev/null +++ b/ai-telemetry/base/zookeeper/service.yaml @@ -0,0 +1,19 @@ +apiVersion: v1 +kind: Service +metadata: + name: zookeeper + labels: + app.kubernetes.io/instance: zookeeper + app.kubernetes.io/name: zookeeper + app.kubernetes.io/component: zookeeper +spec: + type: ClusterIP + sessionAffinity: None + ports: + - name: tcp-client + port: 2181 + targetPort: client + selector: + app.kubernetes.io/instance: zookeeper + app.kubernetes.io/name: zookeeper + app.kubernetes.io/component: zookeeper diff --git a/ai-telemetry/base/zookeeper/serviceaccount.yaml b/ai-telemetry/base/zookeeper/serviceaccount.yaml new file mode 100644 index 00000000..85c84e37 --- /dev/null +++ b/ai-telemetry/base/zookeeper/serviceaccount.yaml @@ -0,0 +1,10 @@ +apiVersion: v1 +kind: ServiceAccount +metadata: + name: zookeeper + labels: + app.kubernetes.io/instance: zookeeper + app.kubernetes.io/name: zookeeper + app.kubernetes.io/component: zookeeper + role: zookeeper +automountServiceAccountToken: false diff --git a/ai-telemetry/base/zookeeper/statefulset.yaml b/ai-telemetry/base/zookeeper/statefulset.yaml new file mode 100644 index 00000000..3112e070 --- /dev/null +++ b/ai-telemetry/base/zookeeper/statefulset.yaml @@ -0,0 +1,172 @@ +apiVersion: apps/v1 +kind: StatefulSet +metadata: + name: zookeeper + annotations: + template.alpha.openshift.io/wait-for-ready: "true" + labels: + app.kubernetes.io/instance: zookeeper + app.kubernetes.io/name: zookeeper + app.kubernetes.io/component: zookeeper + role: zookeeper +spec: + replicas: 1 + revisionHistoryLimit: 10 + podManagementPolicy: Parallel + selector: + matchLabels: + app.kubernetes.io/instance: zookeeper + app.kubernetes.io/name: zookeeper + app.kubernetes.io/component: zookeeper + serviceName: zookeeper-headless + updateStrategy: + rollingUpdate: {} + type: RollingUpdate + template: + metadata: + labels: + app.kubernetes.io/instance: zookeeper + app.kubernetes.io/name: zookeeper + app.kubernetes.io/component: zookeeper + spec: + enableServiceLinks: true + serviceAccountName: zookeeper + automountServiceAccountToken: false + affinity: + podAntiAffinity: + preferredDuringSchedulingIgnoredDuringExecution: + - podAffinityTerm: + labelSelector: + matchLabels: + app.kubernetes.io/instance: zookeeper + app.kubernetes.io/name: zookeeper + app.kubernetes.io/component: zookeeper + topologyKey: kubernetes.io/hostname + weight: 1 + securityContext: + fsGroupChangePolicy: Always + containers: + - name: zookeeper + image: docker.io/bitnami/zookeeper:3.9.2-debian-12-r12 + imagePullPolicy: "IfNotPresent" + securityContext: + allowPrivilegeEscalation: false + capabilities: + drop: + - ALL + privileged: false + readOnlyRootFilesystem: true + runAsNonRoot: true + seccompProfile: + type: RuntimeDefault + command: + - /scripts/setup.sh + resources: + limits: + cpu: 375m + ephemeral-storage: 2Gi + memory: 384Mi + requests: + cpu: 250m + ephemeral-storage: 50Mi + memory: 256Mi + env: + - name: BITNAMI_DEBUG + value: "false" + - name: ZOO_DATA_LOG_DIR + value: "" + - name: ZOO_PORT_NUMBER + value: "2181" + - name: ZOO_TICK_TIME + value: "2000" + - name: ZOO_INIT_LIMIT + value: "10" + - name: ZOO_SYNC_LIMIT + value: "5" + - name: ZOO_PRE_ALLOC_SIZE + value: "65536" + - name: ZOO_SNAPCOUNT + value: "100000" + - name: ZOO_MAX_CLIENT_CNXNS + value: "60" + - name: ZOO_4LW_COMMANDS_WHITELIST + value: "srvr, mntr, ruok" + - name: ZOO_LISTEN_ALLIPS_ENABLED + value: "no" + - name: ZOO_AUTOPURGE_INTERVAL + value: "1" + - name: ZOO_AUTOPURGE_RETAIN_COUNT + value: "10" + - name: ZOO_MAX_SESSION_TIMEOUT + value: "40000" + - name: ZOO_SERVERS + value: zookeeper-0.zookeeper-headless.ai-telemetry.svc.cluster.local:2888:3888::1 + - name: ZOO_ENABLE_AUTH + value: "no" + - name: ZOO_ENABLE_QUORUM_AUTH + value: "no" + - name: ZOO_HEAP_SIZE + value: "1024" + - name: ZOO_LOG_LEVEL + value: "ERROR" + - name: ALLOW_ANONYMOUS_LOGIN + value: "yes" + - name: POD_NAME + valueFrom: + fieldRef: + apiVersion: v1 + fieldPath: metadata.name + - name: ZOO_ADMIN_SERVER_PORT_NUMBER + value: "8080" + ports: + - name: client + containerPort: 2181 + - name: http-admin + containerPort: 8080 + livenessProbe: + failureThreshold: 6 + initialDelaySeconds: 30 + periodSeconds: 10 + successThreshold: 1 + timeoutSeconds: 5 + exec: + command: + - /bin/bash + - -ec + - ZOO_HC_TIMEOUT=3 /opt/bitnami/scripts/zookeeper/healthcheck.sh + readinessProbe: + failureThreshold: 6 + initialDelaySeconds: 5 + periodSeconds: 10 + successThreshold: 1 + timeoutSeconds: 5 + exec: + command: + - /bin/bash + - -ec + - ZOO_HC_TIMEOUT=2 /opt/bitnami/scripts/zookeeper/healthcheck.sh + volumeMounts: + - name: empty-dir + mountPath: /tmp + subPath: tmp-dir + - name: empty-dir + mountPath: /opt/bitnami/zookeeper/conf + subPath: app-conf-dir + - name: empty-dir + mountPath: /opt/bitnami/zookeeper/logs + subPath: app-logs-dir + - name: scripts + mountPath: /scripts/setup.sh + subPath: setup.sh + - name: data + mountPath: /bitnami/zookeeper + volumes: + - name: empty-dir + emptyDir: {} + - name: scripts + configMap: + name: zookeeper-scripts + defaultMode: 493 + - name: data + persistentVolumeClaim: + claimName: zookeeper-data diff --git a/ai-telemetry/overlays/nerc-ocp-obs/kustomization.yaml b/ai-telemetry/overlays/nerc-ocp-obs/kustomization.yaml new file mode 100644 index 00000000..499ae3bf --- /dev/null +++ b/ai-telemetry/overlays/nerc-ocp-obs/kustomization.yaml @@ -0,0 +1,7 @@ +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization +resources: +- ../../base +- pvc + +namePrefix: obs- diff --git a/ai-telemetry/overlays/nerc-ocp-obs/pvc/kustomization.yaml b/ai-telemetry/overlays/nerc-ocp-obs/pvc/kustomization.yaml new file mode 100644 index 00000000..8c8d984e --- /dev/null +++ b/ai-telemetry/overlays/nerc-ocp-obs/pvc/kustomization.yaml @@ -0,0 +1,4 @@ +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization +resources: +- zookeeper-data.yaml diff --git a/ai-telemetry/overlays/nerc-ocp-obs/pvc/zookeeper-data.yaml b/ai-telemetry/overlays/nerc-ocp-obs/pvc/zookeeper-data.yaml new file mode 100644 index 00000000..b6a4c276 --- /dev/null +++ b/ai-telemetry/overlays/nerc-ocp-obs/pvc/zookeeper-data.yaml @@ -0,0 +1,11 @@ +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + name: zookeeper-data +spec: + resources: + requests: + storage: "10Gi" + volumeMode: Filesystem + accessModes: + - ReadWriteOnce From 908737600d4dace2daee2bdc7bb7dc504022f147 Mon Sep 17 00:00:00 2001 From: Cristiano Saggin <86964015+RH-csaggin@users.noreply.github.com> Date: Wed, 2 Oct 2024 12:47:37 +0200 Subject: [PATCH 2/2] fixing computate comments and testing namespace creation - added back the volumeClaimTemplates and removed pvc to allow scaling. - prefix to have it more agnostic. - add namespace to base/zookeeper/kustomization to apply to all the resoruces. - rectify limits preventing slowing the application. - set replica to 3 for production ready HA. --- ai-telemetry/base/zookeeper/kustomization.yaml | 1 + ai-telemetry/base/zookeeper/statefulset.yaml | 18 ++++++++++++------ .../overlays/nerc-ocp-obs/kustomization.yaml | 3 --- .../nerc-ocp-obs/pvc/kustomization.yaml | 4 ---- .../nerc-ocp-obs/pvc/zookeeper-data.yaml | 11 ----------- 5 files changed, 13 insertions(+), 24 deletions(-) delete mode 100644 ai-telemetry/overlays/nerc-ocp-obs/pvc/kustomization.yaml delete mode 100644 ai-telemetry/overlays/nerc-ocp-obs/pvc/zookeeper-data.yaml diff --git a/ai-telemetry/base/zookeeper/kustomization.yaml b/ai-telemetry/base/zookeeper/kustomization.yaml index f8186c46..25b5e34b 100644 --- a/ai-telemetry/base/zookeeper/kustomization.yaml +++ b/ai-telemetry/base/zookeeper/kustomization.yaml @@ -1,5 +1,6 @@ apiVersion: kustomize.config.k8s.io/v1beta1 kind: Kustomization +namespace: zookeeper resources: - networkpolicy.yaml - configmap.yaml diff --git a/ai-telemetry/base/zookeeper/statefulset.yaml b/ai-telemetry/base/zookeeper/statefulset.yaml index 3112e070..887b0676 100644 --- a/ai-telemetry/base/zookeeper/statefulset.yaml +++ b/ai-telemetry/base/zookeeper/statefulset.yaml @@ -10,7 +10,7 @@ metadata: app.kubernetes.io/component: zookeeper role: zookeeper spec: - replicas: 1 + replicas: 3 revisionHistoryLimit: 10 podManagementPolicy: Parallel selector: @@ -63,9 +63,9 @@ spec: - /scripts/setup.sh resources: limits: - cpu: 375m + cpu: '1' ephemeral-storage: 2Gi - memory: 384Mi + memory: 1Gi requests: cpu: 250m ephemeral-storage: 50Mi @@ -167,6 +167,12 @@ spec: configMap: name: zookeeper-scripts defaultMode: 493 - - name: data - persistentVolumeClaim: - claimName: zookeeper-data + volumeClaimTemplates: + - metadata: + name: data + spec: + accessModes: + - "ReadWriteOnce" + resources: + requests: + storage: "10Gi" diff --git a/ai-telemetry/overlays/nerc-ocp-obs/kustomization.yaml b/ai-telemetry/overlays/nerc-ocp-obs/kustomization.yaml index 499ae3bf..4e4f1976 100644 --- a/ai-telemetry/overlays/nerc-ocp-obs/kustomization.yaml +++ b/ai-telemetry/overlays/nerc-ocp-obs/kustomization.yaml @@ -2,6 +2,3 @@ apiVersion: kustomize.config.k8s.io/v1beta1 kind: Kustomization resources: - ../../base -- pvc - -namePrefix: obs- diff --git a/ai-telemetry/overlays/nerc-ocp-obs/pvc/kustomization.yaml b/ai-telemetry/overlays/nerc-ocp-obs/pvc/kustomization.yaml deleted file mode 100644 index 8c8d984e..00000000 --- a/ai-telemetry/overlays/nerc-ocp-obs/pvc/kustomization.yaml +++ /dev/null @@ -1,4 +0,0 @@ -apiVersion: kustomize.config.k8s.io/v1beta1 -kind: Kustomization -resources: -- zookeeper-data.yaml diff --git a/ai-telemetry/overlays/nerc-ocp-obs/pvc/zookeeper-data.yaml b/ai-telemetry/overlays/nerc-ocp-obs/pvc/zookeeper-data.yaml deleted file mode 100644 index b6a4c276..00000000 --- a/ai-telemetry/overlays/nerc-ocp-obs/pvc/zookeeper-data.yaml +++ /dev/null @@ -1,11 +0,0 @@ -apiVersion: v1 -kind: PersistentVolumeClaim -metadata: - name: zookeeper-data -spec: - resources: - requests: - storage: "10Gi" - volumeMode: Filesystem - accessModes: - - ReadWriteOnce