yk8s-Cluster Configuration

The environment variables affect the action scripts. The config/config.toml however is the main configuration file and can be adjusted to customize the yk8s cluster to fit your needs. It also contains operational flags which can trigger operational tasks. After initializing a cluster repository, the config/config.toml contains necessary (default) values to create a cluster. However, you'll still need to adjust some of them before triggering a cluster creation.

The config/config.toml configuration file

The config.toml configuration file is created during the cluster repository initialization from the templates/config.template.toml file. You can (and must) adjust some of it's values.

Before triggering an action script, the inventory updater automatically reads the configuration file, processes it, and puts variables into the inventory/. The inventory/ is automatically included. Following the concept of separation of concerns, variables are only available to stages/layers which need them.

Configuring Terraform

You can overwrite all Terraform related variables (see below for a complete list) in the Terraform section of your config.toml.

By default 3 control plane nodes and 4 workers will get created. You'll need to adjust these values if you e.g. want to enable rook.

Note: Right now there is a variable masters to configure the k8s controller server count and workers for the k8s node count. However there is no explicit variable for the gateway node count! This is implicitly defined by the number of elements in the azs array.

Please not that with the introduction of for_each in our terraform module, you can delete individual nodes. Consider the following example:

[terraform]
workers = 3
worker_names = ["0", "1", "2"]

In order to delete any of the nodes, decrease the workers count and remove the suffix of the worker from the list. After removing, i.e., "1", your config would look like this:

[terraform]
workers = 2
worker_names = ["0", "2"]
All Terraform variables and their defaults
variable "cluster_name" {
  type = string
  default = "managed-k8s"
}

variable "haproxy_ports" {
  type = list(number)
  default = [30000, 30060]
}

variable "subnet_cidr" {
  type = string
  default = "172.30.154.0/24"
}

variable "subnet_v6_cidr" {
  type = string
  default = "fd00::/120"  # a default value is needed so
                          # terraform recognizes this variable as optional
}

variable "dualstack_support" {
  description = "If set to true, dualstack support related resources will be (re-)created"
  type = bool
}

variable "ssh_cidrs" {
  type    = list(string)
  default = ["0.0.0.0/0"]
}

variable "public_network" {
  type    = string
  default = "shared-public-IPv4"
}

variable "keypair" {
  type = string
}

variable "default_master_image_name" {
  type    = string
  default = "Ubuntu 20.04 LTS x64"
}

variable "default_worker_image_name" {
  type    = string
  default = "Ubuntu 20.04 LTS x64"
}

variable "gateway_image_name" {
  type    = string
  default = "Debian 11 (bullseye)"
}

variable "gateway_flavor" {
  type    = string
  default = "XS"
}

variable "default_master_flavor" {
  type    = string
  default = "M"
}

variable "default_worker_flavor" {
  type    = string
  default = "M"
}

variable "azs" {
  type    = list(string)
  default = ["AZ1", "AZ2", "AZ3"]
  description = "If 'enable_az_management=true' defines which availability zones of your cloud to use to distribute the spawned server for better HA. Additionally the count of the array will define how many gateway server will be spawned. The naming of the elements doesn't matter if 'enable_az_management=false'. It is also used for unique naming of gateways."
}

variable "masters" {
  type    = number
  default = 3
}

variable "workers" {
  type    = number
  default = 4
}

variable "worker_flavors" {
  type    = list(string)
  default = []
}

variable "worker_images" {
  type    = list(string)
  default = []
}

variable "worker_azs" {
  type    = list(string)
  default = []
}

// It can be used to uniquely identify workers
variable "worker_names" {
  type    = list(string)
  default = []
}

variable "master_flavors" {
  type    = list(string)
  default = []
}

variable "master_images" {
  type    = list(string)
  default = []
}

variable "master_azs" {
  type    = list(string)
  default = []
}

// It can be used to uniquely identify masters
variable "master_names" {
  type    = list(string)
  default = []
}

variable "thanos_delete_container" {
  type    = bool
  default = false
}

// If set to false, the availability zone of instances will not be managed.
// This is useful in CI environments if the Cloud Is Full.
variable "enable_az_management" {
  type    = bool
  default = true
}

variable "create_root_disk_on_volume" {
  type = bool
  default = false
}

variable "timeout_time" {
  type = string
  default = "30m"
}

variable "root_disk_volume_type" {
  type = string
  default = "three_times_replicated"
}

variable "worker_join_anti_affinity_group" {
  type = list(bool)
  default = []
}

variable "worker_anti_affinity_group_name" {
  type = string
  default = "cah-anti-affinity"
}

variable "master_root_disk_sizes" {
  type = list(number)
  default = []
  description = "If 'create_root_disk_on_volume=true' and the master flavor does not specify a disk size, the root disk volume of this particular instance will have this size."
}

variable "worker_root_disk_sizes" {
  type = list(number)
  default = []
  description = "If 'create_root_disk_on_volume=true' and the worker flavor does not specify a disk size, the root disk volume of this particular instance will have this size."
}

variable "gateway_root_disk_volume_size" {
  type = number
  default = 10
  description = "If 'create_root_disk_on_volume=true' and the gateway flavor does not specify a disk size, the root disk volume will have this size."
}

variable "default_master_root_disk_size" {
  type = number
  default = 50
  description = "If 'create_root_disk_on_volume=true', the master flavor does not specify a disk size and no specific value has been given, the root disk volume will have this size."
}

variable "default_worker_root_disk_size" {
  type = number
  default = 50
  description = "If 'create_root_disk_on_volume=true', the worker flavor does not specify a disk size and no specific value has been given, the root disk volume will have this size."
}

variable "network_mtu" {
  type = number
  default = 1450
  description = "MTU for the network used for the cluster."
}

variable "monitoring_manage_thanos_bucket" {
  type = bool
  default = false
  description = "Create an object storage container for thanos."
}

config.toml: Terraform configuration
# --- TERRAFORM ---
# ansible prefix: /
[terraform]
#subnet_cidr = "172.30.154.0/24"
#masters = 3
#workers = 3
#worker_flavors = ["L", "M", "L"]

# if set to true one must set propper values for the "azs" array according to the cloud in use
#enable_az_management = false

# Enable DualStack support
# WARNING: DualStack support is not stable yet, see https://gitlab.com/yaook/k8s/-/issues/502
dualstack_support = false # "•ᴗ•"
# If you enabled DualStack-support you may want to adjust the IPv6 subnet
#subnet_v6_cidr = "fd00::/120"

# If true, create block volume for each instance and boot from there.
# Equivalent to `openstack server create --boot-from-volume […].
#create_root_disk_on_volume = false

# Volume type that is used if `create_root_disk_on_volume` is true.
#root_disk_volume_type = "three_times_replicated"

Configuring Load-Balancing

By default, if you're deploying on top of OpenStack, the self-developed load-balancing solution ch-k8s-lbaas will be used to avoid the aches of using OpenStack Octavia. Nonetheless, you are not forced to use it and can easily disable it.

The following section contains legacy load-balancing options which will probably be removed in the foreseeable future.

config.toml: Historic load-balancing configuration
# --- LOAD-BALANCING ---
# ansible prefix: /
[load-balancing]
# lb_ports is a list of ports that are exposed by HAProxy on the gateway nodes and forwarded
# to NodePorts in the k8s cluster. This poor man's load-balancing / exposing of services
# has been superseded by ch-k8s-lbaas. For legacy reasons and because it's useful under
# certain circumstances it is kept inside the repository.
# The NodePorts are either literally exposed by HAProxy or can be mapped to other ports.
# The `layer` attribute can either be `tcp` (L4) or `http` (L7). For `http`, `option forwardfor`
# is added implicitly to the backend servers in the haproxy configuration.
# If `use_proxy_protocol` is set to `true`, HAProxy will use the proxy protocol to convey information
# about the connection initiator to the backend. NOTE: the backend has to accept the proxy
# protocol, otherwise your traffic will be discarded.
# Short form:
#lb_ports = [30060]
# Explicit form:
#lb_ports = [{external=80,nodeport=30080, layer=tcp, use_proxy_protocol=true}]

# A list of priorities to assign to the gateway/frontend nodes. The priorities
# will be assigned based on the sorted list of matching nodes.
#
# If more nodes exist than there are entries in this list, the rollout will
# fail.
#
# Please note the keepalived.conf manpage for choosing priority values.
#vrrp_priorities = [150, 100, 50]

# Enable/Disable OpenStack-based load-balancing.
# openstack_lbaas = false

# Port for HAProxy statistics
#haproxy_stats_port = 48981

Kubernetes Cluster Configuration

This section contains generic information about the Kubernetes cluster configuration.

Basic Cluster Configuration

config.toml: Kubernetes basic cluster configuration
# --- KUBERNETES: BASIC CLUSTER CONFIGURATION ---
# ansible prefix: "k8s_"
[kubernetes]
# Kubernetes version. Currently, we support from 1.17.* to 1.23.*.
version = "1.23.9" # •ᴗ•

# Uncomment if this cluster contains a worker with GPU access so that the driver
# and surrounding framework is deployed.
# You must set the container_runtime to "docker" for GPU clusters.
is_gpu_cluster = false # •ᴗ•

# Set this variable to virtualize Nvidia GPUs on worker nodes.
# It will install a VGPU manager on the worker node and split the GPU according to chosen vgpu type.
# Note: This will not install Nvidia drivers to utilize vGPU guest VMs!!
# If set to true, please set further variables in the [miscellaneous] section.
#virtualize_gpu = false

[kubernetes.apiserver]
frontend_port = 8888 # •ᴗ•

Storage Configuration

config.toml: Kubernetes - Basic Storage Configuration
# --- KUBERNETES: STORAGE CONFIGURATION ---
# ansible prefix: "k8s_storage"
[kubernetes.storage]
# Many clusters will want to use rook, so you should enable
# or disable it here if you want. It requires extra options
# which need to be chosen with care.
rook_enabled = false # •ᴗ•

# Setting this to true will cause the storage plugins
#to run on all nodes (ignoring all taints). This is often desirable.
nodeplugin_toleration = false # •ᴗ•

# This flag enables the topology feature gate of the cinder controller plugin.
# Its purpose is to allocate volumes from cinder which are in the same AZ as
# the worker node to which the volume should be attached.
# Important: Cinder must support AZs and the AZs must match the AZs used by nova!
#cinder_enable_topology=true

config.toml: Kubernetes - Static Local Storage Configuration
# --- KUBERNETES: STATIC LOCAL STORAGE CONFIGURATION ---
# ansible prefix: "k8s_local_storage"
[kubernetes.local_storage.static]
# Enable static provisioning of local storage. This provisions a single local
# storage volume per worker node.
#
# It is recommended to use the dynamic local storage instead.
enabled = false # •ᴗ•

# Name of the storage class to create.
#
# NOTE: the static and dynamic provisioner must have distinct storage class
# names if both are enabled!
#storageclass_name = "local-storage"

# Namespace to deploy the components in
#namespace = "kube-system"

# Directory where the volume will be placed on the worker node
#data_directory = "/mnt/data"

# Synchronization directory where the provisioner will pick up the volume from
#discovery_directory = "/mnt/mk8s-disks"

# Version of the provisioner to use
#version = "v2.3.4"

# Toleration for the plugin. Defaults to `kubernetes.storage.nodeplugin_toleration`
#nodeplugin_toleration = ...

config.toml: Kubernetes - Dynamic Local Storage Configuration
# --- KUBERNETES: DYNAMIC LOCAL STORAGE CONFIGURATION ---
# ansible prefix: "k8s_local_storage"
[kubernetes.local_storage.dynamic]
# Enable dynamic local storage provisioning. This provides a storage class which
# can be used with PVCs to allocate local storage on a node.
enabled = false # •ᴗ•

# Name of the storage class to create.
#
# NOTE: the static and dynamic provisioner must have distinct storage class
# names if both are enabled!
#storageclass_name = "local-storage"

# Namespace to deploy the components in
#namespace = "kube-system"

# Directory where the volumes will be placed on the worker node
#data_directory = "/mnt/dynamic-data"

# Version of the local path controller to deploy
#version = "v0.0.20"

# Toleration for the plugin. Defaults to `kubernetes.storage.nodeplugin_toleration`
#nodeplugin_toleration = ...

Monitoring Configuration

config.toml: Kubernetes - Monitoring Configuration
# --- KUBERNETES: MONITORING CONFIGURATION ---
# ansible prefix: "k8s_monitoring"
[kubernetes.monitoring]
# Enable Prometheus-based monitoring.
# For prometheus-specific configurations take a look at the
# k8s-service-layer.prometheus section.
enabled = false # •ᴗ•

Global Monitoring Configuration

It is possible to connect the monitoring stack of your yk8s-cluster to an external endpoint like e.g. a monitoring-cluster. The following section can be used to enable and configure that.

Note: This requires changes and therefore the (re-)appliance of all layers.

config.toml: Kubernetes - Global Monitoring Configuration
# --- KUBERNETES: GLOBAL MONITORING CONFIGURATION ---
# ansible prefix: "k8s_global_monitoring"
[kubernetes.global_monitoring]
# This section contains global monitoring related
# information which needs to be known to stage3
# and higher layers.

# Enable/Disable global monitoring
enabled       = false                      # •ᴗ•
#nodeport      = 31911
#nodeport_name = "ch-k8s-global-monitoring"

Network Configuration

Note: To enable the calico network plugin, kubernetes.network.plugin needs to be set to calico.

config.toml: Kubernetes - Network Configuration
# --- KUBERNETES: NETWORK CONFIGURATION ---
# ansible prefix: "k8s_network"
[kubernetes.network]
# This is the subnet used by Kubernetes for Pods. Subnets will be delegated
# automatically to each node.
pod_subnet = "10.244.0.0/16" # •ᴗ•

# This is the subnet used by Kubernetes for Services.
service_subnet = "10.96.0.0/12" # •ᴗ•

# Pick a networking plugin:
# - kube-router: High-performance, low-overhead implementation with support
#   for NetworkPolicy objects (DEPRACTED: Support will be dropped in the near future)
# - calico: High-performance, pure IP networking, policy engine. Calico provides
#   layer 3 networking capabilities and associates a virtual router with each node.
#   Allows the establishment of zone boundaries through BGP
plugin = "calico" # •ᴗ•

# Pick a Calico version:
# Be aware that not all combinations of Kubernetes and Calico versions are recommended:
# https://projectcalico.docs.tigera.io/getting-started/kubernetes/requirements
# We provide Calico in version: 3.17.1, 3.19.0, 3.21.6.
# If not specified here, a predefined Calico version will be matched against
# the above specified Kubernetes version.
# calico_custom_version = "3.21.6"

# Define if the IP-in-IP encapsulation of calico should be activated
# https://projectcalico.docs.tigera.io/networking/vxlan-ipip
# calico_ipipmode = "Never"

# Make the auto detection method variable as one downside of
# using can-reach mechanism is that it produces additional logs about
# other interfaces i.e. tap interfaces. Also a simpler way will be to
# use an interface to detect ip settings i.e. interface=bond0
# calico_ip_autodetection_method = "can-reach=www.cloudandheat.com"
# calico_ipv6_autodetection_method = "can-reach=www.cloudandheat.com"

kubelet Configuration

The LCM supports the customization of certain variables of kubelet for (meta-)worker nodes.

Note: Applying changes requires to enable disruptive actions.

config.toml: Kubernetes - kubelet Configuration
# --- KUBERNETES: KUBELET CONFIGURATION (WORKERS) ---
# ansible prefix: "k8s_kubelet"
[kubernetes.kubelet]
# This section enables you to customize kubelet on the k8s workers (sic!)
# Changes will be rolled out only during k8s upgrades or if you explicitly
# allow disruptions.

# Maximum number of Pods per worker
# Increasing this value may also decrease performance,
# as more Pods can be packed into a single node.
#pod_limit = 110

Continuous Join Key Configuration

Currently, this is only needed for yk8s clusters created via the yaook/metal-controller on bare metal.

config.toml: Kubernetes - Continuous Join Key Configuration
# --- KUBERNETES: CONTINUOUS JOIN KEY ---
# ansible prefix: "k8s_continuous_join_key"
[kubernetes.continuous_join_key]
# This section controls a systemd timer which periodically publishes a fresh
# Kubernetes join key into a HashiCorp Vault server. At the time of writing,
# the primary use case is integration with the scripts provided to every node
# deployed via the Yaook Metal Controller.
enabled = false # •ᴗ•

# Absolute path to an executable which prints a fresh, revokable vault token
# to stdout. This token must be privileged enough to write the given vault path
# below.
#vault_token_script =

# Path to a Vault key/value object where the join key data should be written
# to.
#vault_path =

# Path to a file which should be loaded as additional environment to the token
# renewal script.
#env_path = "/dev/null"

KSL - Kubernetes Service Layer

Rook Configuration

The used rook setup is explained in more detail here.

Note: To enable rook in a cluster on top of OpenStack, you need to set both k8s-service-layer.rook.nosds and k8s-service-layer.rook.osd_volume_size, as well as enable kubernetes.storage.rook_enabled and either kubernetes.local_storage.dynamic.enabled or kubernetes.local_storage.static.enabled local storage (or both).

config.toml: KSL - Rook Configuration
# --- KUBERNETES SERVICE LAYER : ROOK (STORAGE) ---
# ansible prefix: "rook"
[k8s-service-layer.rook]
# If kubernetes.storage.rook_enabled is enabled, rook will be installed.
# In this section you can customize and configure rook.

namespace    = "rook-ceph" # •ᴗ•
cluster_name = "rook-ceph" # •ᴗ•

# Currently we support the following rook versions:
# v1.2.3, v1.3.11, v1.4.9, v1.5.12, v1.6.7, v1.7.11
#version = "v1.7.11"

#nodeplugin_toleration = true

# Storage class name. SHOULD be compliant with one storage class you
# have configured in the kubernetes.local_storage section (or you should
# know what your are doing).
#mon_volume_storage_class = "local-storage"

# Enables rook to use the host network.
#use_host_networking = false

# If set to true Rook won’t perform any upgrade checks on Ceph daemons
# during an upgrade. Use this at YOUR OWN RISK, only if you know what
# you’re doing.
# https://rook.github.io/docs/rook/v1.3/ceph-cluster-crd.html#cluster-settings
#skip_upgrade_checks = false

# Scheduling keys control where services may run. A scheduling key corresponds
# to both a node label and to a taint. In order for a service to run on a node,
# it needs to have that label key.
# If no scheduling key is defined for a service, it will run on any untainted
# node.
#scheduling_key = null
# If you're using a general scheduling key prefix,
# you can reference it here directly:
#scheduling_key = "{{ scheduling_key_prefix }}/storage"

# Set to false to disable CSI plugins, if they are not needed in the rook cluster.
# (For example if the ceph cluster is used for an OpenStack cluster)
#csi_plugins=true

# Additionally it is possible to schedule mons and mgrs pods specifically.
# NOTE: Rook does not merge scheduling rules set in 'all' and the ones in 'mon' and 'mgr',
# but will use the most specific one for scheduling.
#mon_scheduling_key = "{{ scheduling_key_prefix }}/rook-mon"
#mgr_scheduling_key = "{{ scheduling_key_prefix }}/rook-mgr"

# Number of mons to run.
# Default is 3 and is the minimum to ensure high-availability!
# The number of mons has to be uneven.
#nmons = 3

# Number of mgrs to run. Default is 1 and can be extended to 2
# and achieve high-availability.
# The count of mgrs is adjustable since rook v1.6 and does not work with older versions.
#nmgrs = 1

# Number of OSDs to run. This should be equal to the number of storage meta
# workers you use.
#nosds = 3

# The size of the storage backing each OSD.
#osd_volume_size = "90Gi"

# Enable the rook toolbox, which is a pod with ceph tools installed to
# introspect the cluster state.
#toolbox = true

# Enable the CephFS shaerd filesystem.
#ceph_fs = false
#ceph_fs_name = "ceph-fs"
#ceph_fs_replicated = 1
#ceph_fs_preserve_pools_on_delete = false

# Enable the encryption of OSDs
#encrypt_osds = false

# ROOK POD RESOURCE LIMITS
# The default values are the *absolute minimum* values required by rook. Going
# below these numbers will make rook refuse to even create the pods. See also:
# https://rook.io/docs/rook/v1.2/ceph-cluster-crd.html#cluster-wide-resources-configuration-settings

# Memory limit for mon Pods
#mon_memory_limit = "1Gi"
#mon_memory_request = "{{ rook_mon_memory_limit }}"
#mon_cpu_limit = "500m"
#mon_cpu_request = "100m"

# Resource limits for OSD pods
# Note that these are chosen so that the OSD pods end up in the
# Guaranteed QoS class.
#osd_memory_limit = "2Gi"
#osd_memory_request = "{{ rook_osd_memory_limit }}"
#osd_cpu_limit = "500m"
#osd_cpu_request = "{{ rook_osd_cpu_limit }}"

# Memory limit for mgr Pods
#mgr_memory_limit = "512Mi"
#mgr_memory_request = "{{ rook_mgr_memory_limit }}"
#mgr_cpu_limit = "500m"
#mgr_cpu_request = "100m"

# Memory limit for MDS / CephFS Pods
#mds_memory_limit = "4Gi"
#mds_memory_request = "{{ rook_mds_memory_limit }}"
#mds_cpu_limit = "1"
#mds_cpu_request = "{{ rook_mds_cpu_limit }}"

# Rook-ceph operator limits
#operator_memory_limit: "512Mi"
#operator_memory_request: "{{ rook_operator_memory_limit }}"
#operator_cpu_limit = "1"
#operator_cpu_request = "{{ rook_operator_cpu_limit }}"

#[[k8s-service-layer.rook.pools]]
#name = "data"
#create_storage_class = "block"
#replicated = 1

Prometheus-based Monitoring Configuration

The used prometheus-based monitoring setup will be explained in more detail soon :)

Note: To enable prometheus, k8s-serice-layer.prometheus.install and kubernetes.monitoring.enabled need to be set to true.

config.toml: KSL - Prometheus Configuration
# --- KUBERNETES SERVICE LAYER : MONITORING(PROMETHEUS) ---
# ansible prefix: "monitoring_"
[k8s-service-layer.prometheus]
# If kubernetes.monitoring.enabled is true, choose whether to install or uninstall
# Prometheus. IF SET TO FALSE, PROMETHEUS WILL BE DELETED WITHOUT CHECKING FOR
# DISRUPTION (sic!).
#install = true

#namespace = "monitoring"

# helm chart version of the prometheus stack
# https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack
# If you set this empty (not unset), the latest version is used
# Note that upgrades require additional steps and maybe even LCM changes are needed:
# https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack#upgrading-chart
#prometheus_stack_version = "10.3.5"

# Enable grafana
#use_grafana = true

# If this variable is defined, Grafana will store its data in a PersistentVolume
# in the defined StorageClass. Otherwise, persistence is disabled for Grafana.
# The value has to be a valid StorageClass available in your cluster.
#grafana_persistent_storage_class=""

# Enable use of Thanos
# Important: Currently Thanos will not work with OpenStack application credentials [0]. If you use OpenStack
# application credentials then you have to set `use_thanos = false`. f you don't listen to me, then the
# LCM will kindly throw an error message into your face in stage 4 and abort :).
#
# [0] https://gitlab.com/yaook/k8s/-/issues/436#note_873556688
#use_thanos = false

# Let terraform create an object storage container / bucket for you if `true`.
# If set to `false` one must provide a value for `thanos_objectstorage_config_file` to configure an external backend.
# NOTE: If `thanos_objectstorage_config_file` is set, then it (`thanos_objectstorage_config_file`) will take precedence, i.e.,
#       thanos will use the external config although terraform creates the bucket. This has historic reasons [0].
# [0] https://gitlab.com/yaook/k8s/-/merge_requests/635
#manage_thanos_bucket = true

# Thanos uses local storage to keep a copy of the metadata from the object store
# for faster access. The size and storage class for that volume can be
# configured:
#thanos_metadata_volume_size="10Gi"
#thanos_metadata_volume_storage_class="rook-ceph-data"

# By default, the monitoring will capture all namespaces. If this is not
# desired, the following switch can be turned off. In that case, only the
# kube-system, monitoring and rook namespaces are scraped by Prometheus.
#prometheus_monitor_all_namespaces=true

# Scheduling keys control where services may run. A scheduling key corresponds
# to both a node label and to a taint. In order for a service to run on a node,
# it needs to have that label key.
# If no scheduling key is defined for service, it will run on any untainted
# node.
#scheduling_key = "node-restriction.kubernetes.io/cah-managed-k8s-monitoring"
# If you're using a general scheduling key prefix
# you can reference it here directly
#scheduling_key = "{{ scheduling_key_prefix }}/monitoring"

# Monitoring pod resource limits
# PROMETHEUS POD RESOURCE LIMITS
# The following limits are applied to the respective pods.
# Note that the Prometheus limits are chosen fairly conservatively and may need
# tuning for larger and smaller clusters.
# By default, we prefer to set limits in such a way that the Pods end up in the
# Guaranteed QoS class (i.e. both CPU and Memory limits and requests set to the
# same value).

#alertmanager_memory_limit = "256Mi"
#alertmanager_memory_request = "{{ monitoring_alertmanager_memory_limit }}"
#alertmanager_cpu_limit = "100m"
#alertmanager_cpu_request = "{{ monitoring_alertmanager_cpu_limit }}"

#prometheus_memory_limit = "3Gi"
#prometheus_memory_request = "{{ monitoring_prometheus_memory_limit }}"
#prometheus_cpu_limit = "1"
#prometheus_cpu_request = "{{ monitoring_prometheus_cpu_limit }}"

#grafana_memory_limit = "512Mi"
#grafana_memory_request = "256Mi"
#grafana_cpu_limit = "500m"
#grafana_cpu_request = "100m"

#kube_state_metrics_memory_limit = "128Mi"
#kube_state_metrics_memory_request = "50Mi"
#kube_state_metrics_cpu_limit = "50m"
#kube_state_metrics_cpu_request = "20m"

#thanos_sidecar_memory_limit = "256Mi"
#thanos_sidecar_memory_request = "{{ monitoring_thanos_sidecar_memory_limit }}"
#thanos_sidecar_cpu_limit = "500m"
#thanos_sidecar_cpu_request = "{{ monitoring_thanos_sidecar_cpu_limit }}"

#thanos_query_memory_limit = "786Mi"
#thanos_query_memory_request = "128Mi"
#thanos_query_cpu_limit = "1"
#thanos_query_cpu_request = "100m"

#thanos_store_memory_limit = "2Gi"
#thanos_store_memory_request = "256Mi"
#thanos_store_cpu_limit = "500m"
#thanos_store_cpu_request = "100m"

# WARNING: If you have set terraform.cluster_name, you must set this
# variable to "${terraform.cluster_name}-monitoring-thanos-data".
# The default terraform.cluster_name is "managed-k8s" which is why the
# default object store container name is set to the following.
#thanos_objectstorage_container_name = "managed-k8s-monitoring-thanos-data"

# The following two variables are needed if you want thanos to use an object storage
# backend which is not managed by the LCM.
# Configuration file name which contains the credentials to access the (external) object storage
# as backend for thanos. You can find valid configuration formats here [0]
# NOTE: The configuration file is read relative to `thanos_objectstorage_config_path`.
# NOTE: This variable takes precedence over `manage_thanos_bucket`. If it's set, then thanos will always use the credentials from this file.
#       See the discussions in [1] for more details.
# [0] https://thanos.io/tip/thanos/storage.md/#supported-clients
# [1] https://gitlab.com/yaook/k8s/-/merge_requests/635
#thanos_objectstorage_config_file=""

# Path in which the LCM will look for your config file. Defaults to "./config/"
# NOTE: you probably don't have to override this variable.
#thanos_objectstorage_config_path = "{{ ksl_vars_directory }}/../../config/"

# Use legacy jsonnet-based setup (monitoring_v1).
# The jsonnet-based monitoring setup will get removed soon™
# https://github.com/prometheus-operator/kube-prometheus
#use_jsonnet_setup = false

# Include migration tasks
#migrate_from_v1 = false

# Scrape external targets via blackbox exporter
# https://github.com/helm/charts/tree/master/stable/prometheus-blackbox-exporter
#internet_probe = false

# Provide a list of DNS endpoints for additional thanos store endpoints.
# The endpoint will be extended to `dnssrv+_grpc._tcp.{{ endpoint }}.monitoring.svc.cluster.local`.
#thanos_query_additional_store_endpoints = []

# Deploy a specific blackbox exporter version
# https://github.com/prometheus-community/helm-charts/tree/main/charts/prometheus-blackbox-exporter
#blackbox_version = "7.0.0"

#[[k8s-service-layer.prometheus.internet_probe_targets]]
# name="example"                    # Human readable URL that will appear in Prometheus / AlertManager
# url="http://example.com/healthz"  # The URL that blackbox will scrape
# interval="60s"                    # Scraping interval. Overrides value set in `defaults`
# scrapeTimeout="60s"               # Scrape timeout. Overrides value set in `defaults`
# module = "http_2xx"               # module to be used. Can be "http_2xx" (default), "icmp" or "tcp_connect".

# If at least one common_label is defined, Prometheus will be created with selectors
# matching these labels and only ServiceMonitors that meet the criteria of the selector,
# i.e. are labeled accordingly, are included by Prometheus.
# The LCM takes care that all ServiceMonitor created by itself are labeled accordingly.
# The key can not be "release" as that one is already used by the Prometheus helm chart.
#
#[k8s-service-layer.prometheus.common_labels]
#managed-by = "yaook-k8s-monitoring"

NGINX Ingress Controller Configuration

The used NGINX ingress controller setup will be explained in more detail soon :)

Note: To enable an ingress controller, k8s-service-layer.ingress.enabled needs to be set to true.

config.toml: KSL - NGINX Ingress Configuration
# --- KUBERNETES SERVICE LAYER : INGRESS ---
# ansible prefix: "k8s_ingress_"
[k8s-service-layer.ingress]
# Enable nginx-ingress management.
enabled = false # •ᴗ•

# Namespace to deploy the ingress in (will be created if it does not exist, but
# never deleted).
#namespace = "k8s-svc-ingress"

# If enabled, choose whether to install or uninstall the ingress. IF SET TO
# FALSE, THE INGRESS CONTROLLER WILL BE DELETED WITHOUT CHECKING FOR
# DISRUPTION.
#install = true

# Scheduling key for the cert manager instance and its resources. Has no
# default.
#scheduling_key =

# Service type for the frontend Kubernetes service.
#service_type = "LoadBalancer"

# Node port for the HTTP endpoint
#nodeport_http = 32080

# Node port for the HTTPS endpoint
#nodeport_https = 32443

# Enable SSL passthrough in the controller
#enable_ssl_passthrough = true

Cert-Manager Configuration

The used Cert-Manager controller setup will be explained in more detail soon :)

Note: To enable cert-manager, k8s-service-layer.cert-manager.enabled needs to be set to true.

config.toml: KSL - Cert-Manager Configuration
# --- KUBERNETES SERVICE LAYER : CERT MANAGER ---
# ansible prefix: "k8s_cert_manager_"
[k8s-service-layer.cert-manager]
# Enable management of a cert-manager.io instance
enabled = false # •ᴗ•

# Configure in which namespace the cert-manager is run. The namespace is
# created automatically, but never deleted automatically.
#namespace = "k8s-svc-cert-manager"

# Install or uninstall cert manager. If set to false, the cert-manager will be
# uninstalled WITHOUT CHECK FOR DISRUPTION!
#install = true

# Scheduling key for the cert manager instance and its resources. Has no
# default.
#scheduling_key =

# If given, a *cluster wide* Let's Encrypt issuer with that email address will
# be generated. Requires an ingress to work correctly.
# DO NOT ENABLE THIS IN CUSTOMER CLUSTERS, BECAUSE THEY SHOULD NOT CREATE
# CERTIFICATES UNDER OUR NAME. Customers are supposed to deploy their own
# ACME/Let's Encrypt issuer.
#letsencrypt_email = "..."

# By default, the ACME issuer will let the server choose the certificate chain
# to use for the certificate. This can be used to override it.
#letsencrypt_preferred_chain = "..."

# The ingress class to use for responding to the ACME challenge.
# The default value works for the default k8s-service-layer.ingress
# configuration and may need to be adapted in case a different ingress is to be
# used.
#letsencrypt_ingress = "nginx"

etcd-backup Configuration

Automated etcd backups can be configured in this section. When enabled it periodically creates snapshots of etcd database and store it in a object storage using s3. It uses the helm chart etcdbackup present in yaook operator helm chart repository. The object storage retains data for 30 days then deletes it.

The usage of it is disabled by default but can be enabled (and configured) in the following section. The s3 config yaml file name MUST be set when etcd backups are enabled. The file should be kept under config/ dir and should be protected.

Note: To enable etcd-backup, k8s-service-layer.etcd-backup.enabled needs to be set to true.

config.toml: KSL - Etcd-backup Configuration
# --- KUBERNETES SERVICE LAYER : ETCD-BACKUP ---
# ansible prefix: "etcd_backup_"
[k8s-service-layer.etcd-backup]
#enabled = false

# Configure value for the cron job schedule for etcd backups. If not set it will be
# set to default value of  21 */12 * * *
#schedule = "21 * * * *"

# Name of the s3 bucket to store the backups. It defaults to `etcd-backup`
#bucket_name = "etcd-backup"

# Name of the folder to keep the backup files. It defaults to `etcd-backup`
#file_prefix = "backup"

# Configure s3 config yaml name. The file MUST be kept under `config` dir
# and should be protected via some mechanism. This is a mandatory
# variable, if etcd_backup_enabled is set to True. A template for such a file can be found at
# managed-k8s/templates/etcd_backup_s3_config.yaml
# **IMPORTANT:** This file contains credentials in plain text so make sure its appropriately secured (git-crypt, etc)
#s3_config_name = "etcd_backup_s3_config.yaml"

# Number of days after which individual items in the bucket are dropped. Enforced by S3 lifecyle rules which
# are also implemented by Ceph's RGW.
#days_of_retention = 30

# etcdbackup chart version to install.
# If this is not specified, the latest version is installed.
#chart_version=""

# Metrics port on which the backup-shifter Pod will provide metrics.
# Please note that the etcd-backup deployment runs in host network mode
# for easier access to the etcd cluster.
#metrics_port: 19100

The following values need to be set:

VariableDescription
access_keyIdentifier for your S3 endpoint
secret_keyCredential for your S3 endpoint
endpoint_urlURL of your S3 endpoint
endpoint_cacrtCertificate bundle of the endpoint.
etcd-backup configuration template
access_key: REPLACEME
secret_key: REPLACEME
endpoint_url: REPLACEME
endpoint_cacrt: |
  -----BEGIN CERTIFICATE-----
  REPLACEME
  -----END CERTIFICATE-----
Generate/Figure out etcd-backup configuration values
# Generate access and secret key on OpenStack
openstack ec2 credentials create

# Get certificate bundle of url
openssl s_client -connect ENDPOINT_URL:PORT -showcerts 2>&1 < /dev/null | sed -n '/-----BEGIN/,/-----END/p'

Node-Scheduling: Labels and Taints Configuration

More details about the labels and taints configuration can be found here.

config.toml: KSL - Node-Scheduling: Labels and Taints Configuration
# --- NODE SCHEDULING ---
# ansible prefix: /
[node-scheduling]
# Scheduling keys control where services may run. A scheduling key corresponds
# to both a node label and to a taint. In order for a service to run on a node,
# it needs to have that label key. The following defines a prefix for these keys
scheduling_key_prefix = "scheduling.mk8s.cloudandheat.com"

# --- NODE SCHEDULING: LABELS (sent to ansible as k8s_node_labels!) ---
[node-scheduling.labels]
# The following fields are commented out because they make assumptions on the existence
# and naming scheme of nodes. Use them for inspiration :)
#managed-k8s-worker-0 = ["{{ scheduling_key_prefix }}/storage=true"]
#managed-k8s-worker-1 = ["{{ scheduling_key_prefix }}/monitoring=true"]
#managed-k8s-worker-2 = ["{{ scheduling_key_prefix }}/storage=true"]
#managed-k8s-worker-3 = ["{{ scheduling_key_prefix }}/monitoring=true"]
#managed-k8s-worker-4 = ["{{ scheduling_key_prefix }}/storage=true"]
#managed-k8s-worker-5 = ["{{ scheduling_key_prefix }}/monitoring=true"]
#
# --- NODE SCHEDULING: TAINTS (sent to ansible as k8s_node_taints!) ---
[node-scheduling.taints]
# The following fields are commented out because they make assumptions on the existence
# and naming scheme of nodes. Use them for inspiration :)
#managed-k8s-worker-0 = ["{{ scheduling_key_prefix }}/storage=true:NoSchedule"]
#managed-k8s-worker-2 = ["{{ scheduling_key_prefix }}/storage=true:NoSchedule"]
#managed-k8s-worker-4 = ["{{ scheduling_key_prefix }}/storage=true:NoSchedule"]

Wireguard Configuration

You MUST add yourself to the wireguard peers.

You can do so either in the following section of the config file or by using and configuring a git submodule. This submodule would then refer to another repository, holding the wireguard public keys of everybody that should have access to the cluster by default. This is the recommended approach for companies and organizations.

config.toml: Wireguard Configuration
# --- WIREGUARD ---
# ansible prefix: "wg_"
[wireguard]
# Set the environment variable "WG_COMPANY_USERS" or this field to 'false' if C&H company members
# should not be rolled out as wireguard peers.
#rollout_company_users = false

# IP address range to use for WireGuard clients. Must be set to a CIDR and must
# not conflict with the terraform.subnet_cidr.
# Should be chosen uniquely for all clusters of a customer at the very least
# so that they can use all of their clusters at the same time without having
# to tear down tunnels.
ip_cidr = "172.30.153.64/26"
ip_gw   = "172.30.153.65/26"

# Same for IPv6
#ipv6_cidr = "fd01::/120"
#ipv6_gw = "fd01::1/120"

port = 7777 # •ᴗ•

# To add WireGuard keys, create blocks like the following
# You can add as many of them as you want. Inventory updater will auto-allocate IP
# addresses from the configured ip_cidr.
#[[wireguard.peers]]
#pub_key = "test1"
#ident = "testkunde1"

## Wireguard-based site-to-site tunnel
# If enabled, configure site to site tunnel
#s2s_enabled = false

# Subnet of the wireguard "transfer net" between the two endpoints
#s2s_transfer_subnet = "172.30.18.2/31"

# IP which is assigned to our VRRP master in transfer network
#s2s_ip = "172.30.18.2"

# IP which is assigned to our VRRP master in transfer network
#s2s_peer_ip = "172.30.18.3"

# Port on which wireguard listens
#s2s_port = "16000"

# Public wireguard key of the peer
#s2s_peer_pub_key = "7CuC/cSw1US+nilx0ihoA1qb2DsQI0QV2RBuLE8cnhk="

# Endpoint under which the peer can be reached
#s2s_peer_public_endpoint = "<public-IP-of-your-peer>:16000"

# BGP AS IDs for both parties (should differ, unless iBGP is wanted)
#s2s_bgp_as = "65010"
#s2s_peer_bgp_as = "65009"

IPsec Configuration

More details about the IPsec setup can be found here.

config.toml: IPsec Configuration
# --- IPSEC ---
# ansible prefix: "ipsec_"
[ipsec]
# enabled = false

# Flag to enable the test suite.
# Must make sure a remote endpoint, with ipsec enabled, is running and open for connections.
# test_enabled = false

# Must be a list of parent SA proposals to offer to the client.
# Must be explicitly set if ipsec_enabled is set to true.
#proposals =

# Must be a list of ESP proposals to offer to the client.
#esp_proposals = "{{ ipsec_proposals }}"

# List of CIDRs to route to the peer. If not set, only dynamic IP
# assignments will be routed.
#peer_networks = []

# List of CIDRs to offer to the peer.
#local_networks = ["{{ subnet_cidr }}"]

# Pool to source virtual IP addresses from. Those are the IP addresses assigned
# to clients which do not have remote networks. (e.g.: "10.3.0.0/24")
#virtual_subnet_pool = null

# List of addresses to accept as remote. When initiating, the first single IP
# address is used.
#remote_addrs = false

# Private address of remote endpoint.
# only used when test_enabled is True
#remote_private_addrs = ""

# The PSK for EAP. Must be set.
#eap_psk =

Passwordstore Configuration

You MUST add yourself to the passwordstore users.

You can do so either by adding yourself to passwordstore.additional_users in the config file below or by using and configuring a git submodule. This submodule would then refer to another repository, holding the GPG IDs of everybody that should have access to the cluster by default. This is the recommended approach for companies and organizations.

config.toml: Passwordstore Configuration
# --- PASSWORDSTORE ---
# ansible prefix: "passwordstore_"
[passwordstore]
# Set this field to `true` if the "company" users should be rolled out.
#rollout_company_users = false

# Configure Additional GPG-IDs that should have access to the cluster-repo specific passwordstore.
# If you're not member of the "company-wide" list, e.g., because you're a student you must add yourself here.
# yannic.ahrens@cloudandheat.com serves as an example here because the author is incredibly vain and likes
# to see his name written down everywhere.
# Parameters:
#   - 'ident': easy to read identification string, not used anywhere yet
#   - 'gpg_id': ID of your public GPG key, ideally in long-form

# --- PASSWORDSTORE: ADDITIONAL USERS ---
#[[passwordstore.additional_users]]
#ident = "yannic.ahrens@cloudandheat.com"
#gpg_id = "68AA582E81AD111C127F01273370EBE296354805"

Cloud&Heat: ch-role-users Configuration

This section refers to the configuration of the ch-role-users git submodule which is an internally used repository of Cloud&Heat. The usage of it is disabled by default but can be enabled (and configured) in the following section or via an environment variable.

config.toml: ch-role-users Configuration
# --- C&H USERS ---
# ansible prefix: "cah_users_"
[cah-users]
rollout = false # •ᴗ•

# Include and exclude C&H users from rollout
# C&H users refers to items in the ch-users-databag repository
# For additional information refer the ch-role-users repository
# The users have to be provided as List<String>
# Possible Configurations:
# - roll_out_users_from
#   - include specific user group
#   - Default: ["opsi", "it-operations", "head"]
# - exclude_users_from
#   - exclude specific user group
#   - Default: ["students", "service"]
# - include_users
#   - include specific user(s)
#   - Default: []
# - exclude_users
#   - exclude specific user(s)
#   - Default: ["deployer"]
# Example Usage:
# include_users = ["<user>", "<user>"]

Testing

Testing Nodes

The following configuration section can be used to ensure that smoke tests and checks are executed from different nodes. This is disabled by default as it requires some prethinking.

config.toml: Testing Nodes Configuration
# --- TESTING: TEST NODES ---
[testing.test-nodes]
# The following fields are commented out because they make assumptions on the existence
# and naming scheme of nodes. Use them for inspiration :)
#"managed-k8s-worker-1" = "worker0"
#"managed-k8s-worker-3" = "worker1"
#"managed-k8s-worker-5" = "worker2"

Custom Configuration

Since yaook/k8s allows to execute custom playbook(s), the following section allows you to specify your own custom variables to be used in these.

config.toml: Custom Configuration
# --- CUSTOM ---
# ansible prefix: /
# Specify variables to be used in the custom stage here. See below for examples.

##[custom]
#my_var_foo = "" # makes the variable `my_custom_section_prefix_my_var = ""`

#[custom.my_custom_section_prefix]
#my_var = "" # produces the var `my_custom_section_prefix_my_var = ""`

Miscellaneous Configuration

This section contains various configuration options for special use cases. You won't need to enable and adjust any of these under normal circumstances.

Miscellaneous configuration
# --- MISCELLANEOUS ---
# ansible prefix: /
[miscellaneous]
# Install wireguard on all workers (without setting up any server-side stuff)
# so that it can be used from within Pods.
wireguard_on_workers = false

# Configure which container runtime to use.
# Supported values are: "docker" and "containerd".
# For now migrating the container runtime of existing clusters is not
# supported.
container_runtime = "containerd"  # "•ᴗ•"

# Configuration details if the cluster will be placed behind a HTTP proxy.
# If unconfigured images will be used to setup the cluster, the updates of
# package sources, the download of docker images and the initial cluster setup will fail.
# NOTE: These chances are currently only tested for Debian-based operating systems and not for RHEL-based!
#cluster_behind_proxy = false
# Set the approriate HTTP proxy settings for your cluster here. E.g. the address of the proxy or
# internal docker repositories can be added to the no_proxy config entry
# Important note: Settings for the yaook-k8s cluster itself (like the service subnet or the pod subnet)
# will be set automagically and do not have to set manually here.
#http_proxy = "http://proxy.example.com:8889"
#https_proxy = "http://proxy.example.com:8889"
#no_proxy = "localhost,127.0.0.0/8"

# Name of the internal OpenStack network. This field becomes important if a VM is
# attached to two networks but the controller-manager should only pick up one. If
# you don't understand the purpose of this field, there's a very high chance you
# won't need to touch it/uncomment it.
# Note: This network name isn't fetched automagically (by terraform) on purpose
# because there might be situations where the CCM should not pick the managed network.
#openstack_network_name = "managed-k8s-network"

# Value for the kernel parameter `vm.max_map_count` on k8s worker nodes. Modifications
# might be required depending on the software running on the nodes (e.g., ElasticSearch).
# If you leave the value commented out you're fine and the system's default will be kept.
#vm_max_map_count = 262144

# Custom Docker Configuration
# A list of registry mirrors can be configured as a pull through cache to reduce
# external network traffic and the amount of docker pulls from dockerhub.
#docker_registry_mirrors: [ "https://0.docker-mirror.example.org", "https://1.docker-mirror.example.org" ]

# A list of insecure registries that can be accessed without TLS verification.
#docker_insecure_registries: [ "0.docker-registry.example.org", "1.docker-registry.example.org" ]

# Custom Chrony Configration
# The ntp servers used by chrony can be customized if it should be necessary or wanted.
# A list of pools and/or servers can be specified.
# Chrony treats both similarily but it expects that a pool will resolve to several ntp servers.
#custom_chrony_configuration = false
#custom_ntp_pools = [ "0.pool.ntp.example.org", "1.pool.ntp.example.org"]
#custom_ntp_servers = [ "0.server.ntp.example.org", "1.server.ntp.example.org"]

# OpenStack credential checks
# Terrible things will happen when certain tasks are run and OpenStack credentials are not sourced.
# Okay, maybe not so terrible after all, but the templates do not check if certain values exist.
# Hence config files with empty credentials are written. The LCM will execute a simple check to see
# if you provided valid credentials as a sanity check iff you're on openstack and the flag below is set
# to True.
#check_openstack_credentials = True

[nvidia.vgpu]
# vGPU Support
# If virtualize_gpu in the [kubernetes] section is set to true, please also set these variables:
# driver_blob_url should point to a object store or otherwise web server, where the vGPU Manager installation file is available.
# driver_blob_url= "..."
# manager_filename should hold the name of the vGPU Manager installation file.
# manager_filename = "..."

Ansible Configuration

The Ansible configuration file can be found in the ansible/ directory. It is used across all stages and layers.

Default Ansible configuration
# Ansible configuration

[defaults]
action_plugins = plugins/action
filter_plugins = plugins/filter
stdout_callback = yaml
bin_ansible_callbacks = True
host_key_checking = False
force_valid_group_names = never

# Give certain events, e.g., escalation prompt (become) more time to avoid premature cancellations
timeout = 60

retry_files_enabled = False # Do not create .retry files

#callback_whitelist = profile_tasks
forks = 42

[inventory]
enable_plugins = host_list,script,yaml,ini,openstack

# Fail, not warn if any inventory source could not be parsed
unparsed_is_failed = true

[ssh_connection]
# https://stackoverflow.com/questions/40340761/is-it-possible-to-have-ansible-retry-on-connection-failure
retries=10

[connection]
# https://docs.ansible.com/ansible/latest/reference_appendices/config.html#ansible-pipelining
pipelining=true