DNS: run separate CoreDNS instance for each PCloud env.

Previously shared CoreDNS instance was used to handle all domains. This has multiple downsides, most important which is security. For example DNS-Sec keys of all domains were persisted on the same shared volume. Also key itself was generated by PCloud env-manager as part of bootstrapping new env. Which is counter to the main aspirations of PCloud, that environment internal private data must not leak outside of the environment.

With new approach implemented in this change, environment starts up it’s own CoreDNS and DNS record manager servers. Manager generates dns-sec keys internally and only exposes public information to the outside world. PCloud infrastructure runes another instance of CoreDNS which acts as a proxy service forwarding requests to individual environments based an requested domain.

This simplifies DNS based TLS challenge solvers, as private certificate issuer of each env will point directly to the DNS record manager of the same environment.

Change-Id: Ifb0f36d2a133e3b53da22030cc7d6b9099136b3d
diff --git a/core/installer/values-tmpl/cert-manager.cue b/core/installer/values-tmpl/cert-manager.cue
index fdede37..9f9b5d1 100644
--- a/core/installer/values-tmpl/cert-manager.cue
+++ b/core/installer/values-tmpl/cert-manager.cue
@@ -57,7 +57,7 @@
 		chart: charts.certManager
 		dependsOn: [{
 			name: "ingress-public"
-			namespace: _ingressPublic
+			namespace: ingressPublic
 		}]
 		values: {
 			fullnameOverride: "\(global.pcloudEnvName)-cert-manager"
diff --git a/core/installer/values-tmpl/certificate-issuer-private.cue b/core/installer/values-tmpl/certificate-issuer-private.cue
index fc490a3..ee50b49 100644
--- a/core/installer/values-tmpl/certificate-issuer-private.cue
+++ b/core/installer/values-tmpl/certificate-issuer-private.cue
@@ -1,9 +1,4 @@
-input: {
-	apiConfigMap: {
-		name: string
-		namespace: string
-	}
-}
+input: {}
 
 name: "certificate-issuer-private"
 namespace: "ingress-private"
@@ -30,15 +25,15 @@
 		}]
 		values: {
 			issuer: {
-				name: _issuerPrivate
+				name: issuerPrivate
 				server: "https://acme-v02.api.letsencrypt.org/directory"
 				// server: "https://acme-staging-v02.api.letsencrypt.org/directory"
 				domain: global.privateDomain
 				contactEmail: global.contactEmail
 			}
-			apiConfigMap: {
-				name: input.apiConfigMap.name
-				namespace: input.apiConfigMap.namespace
+			config: {
+				createTXTAddr: "http://dns-api.\(global.id)-dns.svc.cluster.local/create-txt-record"
+				deleteTXTAddr: "http://dns-api.\(global.id)-dns.svc.cluster.local/delete-txt-record"
 			}
 		}
 	}
diff --git a/core/installer/values-tmpl/certificate-issuer-public.cue b/core/installer/values-tmpl/certificate-issuer-public.cue
index 58a4bfd..7a5d3ba 100644
--- a/core/installer/values-tmpl/certificate-issuer-public.cue
+++ b/core/installer/values-tmpl/certificate-issuer-public.cue
@@ -25,12 +25,12 @@
 		}]
 		values: {
 			issuer: {
-				name: _issuerPublic
+				name: issuerPublic
 				server: "https://acme-v02.api.letsencrypt.org/directory"
 				// server: "https://acme-staging-v02.api.letsencrypt.org/directory"
 				domain: global.domain
 				contactEmail: global.contactEmail
-				ingressClass: _ingressPublic
+				ingressClass: ingressPublic
 			}
 		}
 	}
diff --git a/core/installer/values-tmpl/core-auth.cue b/core/installer/values-tmpl/core-auth.cue
index 0e9f26f..eb19493 100644
--- a/core/installer/values-tmpl/core-auth.cue
+++ b/core/installer/values-tmpl/core-auth.cue
@@ -160,7 +160,7 @@
 				ingress: {
 					admin: {
 						enabled: true
-						className: _ingressPrivate
+						className: ingressPrivate
 						hosts: [{
 							host: "kratos.\(global.privateDomain)"
 							paths: [{
@@ -176,10 +176,10 @@
 					}
 					public: {
 						enabled: true
-						className: _ingressPublic
+						className: ingressPublic
 						annotations: {
 							"acme.cert-manager.io/http01-edit-in-place": "true"
-							"cert-manager.io/cluster-issuer": _issuerPublic
+							"cert-manager.io/cluster-issuer": issuerPublic
 						}
 						hosts: [{
 							host: "accounts.\(global.domain)"
@@ -342,7 +342,7 @@
 				ingress: {
 					admin: {
 						enabled: true
-						className: _ingressPrivate
+						className: ingressPrivate
 						hosts: [{
 							host: "hydra.\(global.privateDomain)"
 							paths: [{
@@ -356,10 +356,10 @@
 					}
 					public: {
 						enabled: true
-						className: _ingressPublic
+						className: ingressPublic
 						annotations: {
 							"acme.cert-manager.io/http01-edit-in-place": "true"
-							"cert-manager.io/cluster-issuer": _issuerPublic
+							"cert-manager.io/cluster-issuer": issuerPublic
 						}
 						hosts: [{
 							host: "hydra.\(global.domain)"
@@ -455,8 +455,8 @@
 				}
 			}
 			ui: {
-				certificateIssuer: _issuerPublic
-				ingressClassName: _ingressPublic
+				certificateIssuer: issuerPublic
+				ingressClassName: ingressPublic
 				domain: global.domain
 				internalDomain: global.privateDomain
 				hydra: "hydra-admin.\(global.namespacePrefix)core-auth.svc.cluster.local"
diff --git a/core/installer/values-tmpl/dns-gateway.cue b/core/installer/values-tmpl/dns-gateway.cue
new file mode 100644
index 0000000..31b729c
--- /dev/null
+++ b/core/installer/values-tmpl/dns-gateway.cue
@@ -0,0 +1,120 @@
+input: {
+	servers: [...#Server]
+}
+
+#Server: {
+	zone: string
+	address: string
+}
+
+name: "dns-gateway"
+namespace: "dns-gateway"
+
+images: {
+	coredns: {
+		repository: "coredns"
+		name: "coredns"
+		tag: "1.11.1"
+		pullPolicy: "IfNotPresent"
+	}
+}
+
+charts: {
+	coredns: {
+		chart: "charts/coredns"
+		sourceRef: {
+			kind: "GitRepository"
+			name: "pcloud"
+			namespace: global.pcloudEnvName
+		}
+	}
+}
+
+helm: {
+	coredns: {
+		chart: charts.coredns
+		values: {
+			image: {
+				repository: images.coredns.fullName
+				tag: images.coredns.tag
+				pullPolicy: images.coredns.pullPolicy
+			}
+			replicaCount: 1
+			resources: {
+				limits: {
+					cpu: "100m"
+					memory: "128Mi"
+				}
+				requests: {
+					cpu: "100m"
+					memory: "128Mi"
+				}
+			}
+			rollingUpdate: {
+				maxUnavailable: 1
+				maxSurge: "25%"
+			}
+			terminationGracePeriodSeconds: 30
+			serviceType: "ClusterIP"
+			service: name: "coredns"
+			serviceAccount: create: false
+			rbac: {
+				create: false
+				pspEnable: false
+			}
+			isClusterService: false
+			if len(input.servers) > 0 {
+				servers: [
+					for s in input.servers {
+						zones: [{
+							zone: s.zone
+						}]
+						port: 53
+						plugins: [{
+							name: "log"
+						}, {
+							name: "forward"
+							parameters: ". \(s.address)"
+						}, {
+							name: "health"
+							configBlock: "lameduck 5s"
+						}, {
+							name: "ready"
+						}]
+					}
+			    ]
+			}
+			if len(input.servers) == 0 {
+				servers: [{
+					zones: [{
+						zone: "."
+					}]
+					port: 53
+					plugins: [{
+						name: "ready"
+					}]
+				}]
+			}
+			livenessProbe: {
+				enabled: true
+				initialDelaySeconds: 60
+				periodSeconds: 10
+				timeoutSeconds: 5
+				failureThreshold: 5
+				successThreshold: 1
+			}
+			readinessProbe: {
+				enabled: true
+				initialDelaySeconds: 30
+				periodSeconds: 10
+				timeoutSeconds: 5
+				failureThreshold: 5
+				successThreshold: 1
+			}
+			zoneFiles: []
+			hpa: enabled: false
+			autoscaler: enabled: false
+			deployment: enabled: true
+		}
+	}
+}
diff --git a/core/installer/values-tmpl/dns-zone-manager.cue b/core/installer/values-tmpl/dns-zone-manager.cue
deleted file mode 100644
index 0fc66bf..0000000
--- a/core/installer/values-tmpl/dns-zone-manager.cue
+++ /dev/null
@@ -1,178 +0,0 @@
-input: {
-	apiConfigMapName: string
-	volume: {
-		size: string
-		claimName: string
-		mountPath: string
-	}
-}
-
-name: "dns-zone-manager"
-namespace: "dns-zone-manager"
-
-images: {
-	dnsZoneController: {
-		repository: "giolekva"
-		name: "dns-ns-controller"
-		tag: "latest"
-		pullPolicy: "Always"
-	}
-	kubeRBACProxy: {
-		registry: "gcr.io"
-		repository: "kubebuilder"
-		name: "kube-rbac-proxy"
-		tag: "v0.13.0"
-		pullPolicy: "IfNotPresent"
-	}
-	coredns: {
-		repository: "coredns"
-		name: "coredns"
-		tag: "1.11.1"
-		pullPolicy: "IfNotPresent"
-	}
-}
-
-charts: {
-	volume: {
-		chart: "charts/volumes"
-		sourceRef: {
-			kind: "GitRepository"
-			name: "pcloud"
-			namespace: global.pcloudEnvName
-		}
-	}
-	dnsZoneController: {
-		chart: "charts/dns-ns-controller"
-		sourceRef: {
-			kind: "GitRepository"
-			name: "pcloud"
-			namespace: global.pcloudEnvName
-		}
-	}
-	coredns: {
-		chart: "charts/coredns"
-		sourceRef: {
-			kind: "GitRepository"
-			name: "pcloud"
-			namespace: global.pcloudEnvName
-		}
-	}
-}
-
-_volumeName: "zone-configs"
-
-helm: {
-	volume: {
-		chart: charts.volume
-		values: {
-			name: input.volume.claimName
-			size: input.volume.size
-			accessMode: "ReadWriteMany"
-		}
-	}
-	"dns-zone-controller": {
-		chart: charts.dnsZoneController
-		values: {
-			installCRDs: true
-			apiConfigMapName: input.apiConfigMapName
-			volume: {
-				claimName: input.volume.claimName
-				mountPath: input.volume.mountPath
-			}
-			image: {
-				repository: images.dnsZoneController.fullName
-				tag: images.dnsZoneController.tag
-				pullPolicy: images.dnsZoneController.pullPolicy
-			}
-			kubeRBACProxy: {
-				image: {
-					repository: images.kubeRBACProxy.fullName
-					tag: images.kubeRBACProxy.tag
-					pullPolicy: images.kubeRBACProxy.pullPolicy
-				}
-			}
-		}
-	}
-	coredns: {
-		chart: charts.coredns
-		values: {
-			image: {
-				repository: images.coredns.fullName
-				tag: images.coredns.tag
-				pullPolicy: images.coredns.pullPolicy
-			}
-			replicaCount: 1
-			resources: {
-				limits: {
-					cpu: "100m"
-					memory: "128Mi"
-				}
-				requests: {
-					cpu: "100m"
-					memory: "128Mi"
-				}
-			}
-			rollingUpdate: {
-				maxUnavailable: 1
-				maxSurge: "25%"
-			}
-			terminationGracePeriodSeconds: 30
-			serviceType: "ClusterIP"
-			service: name: "coredns"
-			serviceAccount: create: false
-			rbac: {
-				create: true
-				pspEnable: false
-			}
-			isClusterService: true
-			securityContext: capabilities: add: ["NET_BIND_SERVICE"]
-			servers: [{
-				zones: [{
-					zone: "."
-				}]
-				port: 53
-				plugins: [
-					{
-						name: "log"
-					},
-					{
-						name: "health"
-						configBlock: "lameduck 5s"
-					},
-					{
-						name: "ready"
-					}
-			]
-			}]
-			extraConfig: import: parameters: "\(input.volume.mountPath)/coredns.conf"
-			extraVolumes: [{
-				name: _volumeName
-				persistentVolumeClaim: claimName: input.volume.claimName
-			}]
-			extraVolumeMounts: [{
-				name: _volumeName
-				mountPath: input.volume.mountPath
-			}]
-			livenessProbe: {
-				enabled: true
-				initialDelaySeconds: 60
-				periodSeconds: 10
-				timeoutSeconds: 5
-				failureThreshold: 5
-				successThreshold: 1
-			}
-			readinessProbe: {
-				enabled: true
-				initialDelaySeconds: 30
-				periodSeconds: 10
-				timeoutSeconds: 5
-				failureThreshold: 5
-				successThreshold: 1
-			}
-			zoneFiles: []
-			hpa: enabled: false
-			autoscaler: enabled: false
-			deployment: enabled: true
-		}
-	}
-}
diff --git a/core/installer/values-tmpl/env-dns.cue b/core/installer/values-tmpl/env-dns.cue
new file mode 100644
index 0000000..5c95a54
--- /dev/null
+++ b/core/installer/values-tmpl/env-dns.cue
@@ -0,0 +1,235 @@
+import (
+	"strings"
+)
+
+input: {}
+
+name: "env-dns"
+namespace: "dns"
+readme: "env-dns"
+description: "Environment local DNS manager"
+icon: ""
+
+images: {
+	coredns: {
+		repository: "coredns"
+		name: "coredns"
+		tag: "1.11.1"
+		pullPolicy: "IfNotPresent"
+	}
+	api: {
+		repository: "giolekva"
+		name: "dns-api"
+		tag: "latest"
+		pullPolicy: "Always"
+	}
+}
+
+charts: {
+	coredns: {
+		chart: "charts/coredns"
+		sourceRef: {
+			kind: "GitRepository"
+			name: "pcloud"
+			namespace: global.id
+		}
+	}
+	api: {
+		chart: "charts/dns-api"
+		sourceRef: {
+			kind: "GitRepository"
+			name: "pcloud"
+			namespace: global.id
+		}
+	}
+	volume: {
+		chart: "charts/volumes"
+		sourceRef: {
+			kind: "GitRepository"
+			name: "pcloud"
+			namespace: global.id
+		}
+	}
+	service: {
+		chart: "charts/service"
+		sourceRef: {
+			kind: "GitRepository"
+			name: "pcloud"
+			namespace: global.id
+		}
+	}
+	ipAddressPool: {
+		chart: "charts/metallb-ipaddresspool"
+		sourceRef: {
+			kind: "GitRepository"
+			name: "pcloud"
+			namespace: global.id
+		}
+	}
+}
+
+volumes: {
+	data: {
+		name: "data"
+		accessMode: "ReadWriteMany"
+		size: "5Gi"
+	}
+}
+
+helm: {
+	coredns: {
+		chart: charts.coredns
+		values: {
+			image: {
+				repository: images.coredns.fullName
+				tag: images.coredns.tag
+				pullPolicy: images.coredns.pullPolicy
+			}
+			replicaCount: 1
+			resources: {
+				limits: {
+					cpu: "100m"
+					memory: "128Mi"
+				}
+				requests: {
+					cpu: "100m"
+					memory: "128Mi"
+				}
+			}
+			rollingUpdate: {
+				maxUnavailable: 1
+				maxSurge: "25%"
+			}
+			terminationGracePeriodSeconds: 30
+			serviceType: "LoadBalancer"
+			service: {
+				name: "coredns"
+				annotations: {
+					"metallb.universe.tf/loadBalancerIPs": global.network.dns
+				}
+			}
+			serviceAccount: create: false
+			rbac: {
+				create: false
+				pspEnable: false
+			}
+			isClusterService: false
+			servers: [{
+				zones: [{
+					zone: "."
+				}]
+				port: 53
+				plugins: [
+					{
+						name: "log"
+					},
+					{
+						name: "health"
+						configBlock: "lameduck 5s"
+					},
+					{
+						name: "ready"
+					}
+			    ]
+			}]
+			extraConfig: import: parameters: "\(_mountPath)/coredns.conf"
+			extraVolumes: [{
+				name: volumes.data.name
+				persistentVolumeClaim: claimName: volumes.data.name
+			}]
+			extraVolumeMounts: [{
+				name: volumes.data.name
+				mountPath: _mountPath
+			}]
+			livenessProbe: {
+				enabled: true
+				initialDelaySeconds: 60
+				periodSeconds: 10
+				timeoutSeconds: 5
+				failureThreshold: 5
+				successThreshold: 1
+			}
+			readinessProbe: {
+				enabled: true
+				initialDelaySeconds: 30
+				periodSeconds: 10
+				timeoutSeconds: 5
+				failureThreshold: 5
+				successThreshold: 1
+			}
+			zoneFiles: []
+			hpa: enabled: false
+			autoscaler: enabled: false
+			deployment: enabled: true
+		}
+	}
+	api: {
+		chart: charts.api
+		values: {
+			image: {
+				repository: images.api.fullName
+				tag: images.api.tag
+				pullPolicy: images.api.pullPolicy
+			}
+			config: "coredns.conf"
+			db: "records.db"
+			zone: global.domain
+			publicIP: strings.Join(global.publicIP, ",")
+			privateIP: global.network.ingress
+			nameserverIP: strings.Join(global.nameserverIP, ",")
+			service: type: "ClusterIP"
+			volume: {
+				claimName: volumes.data.name
+				mountPath: _mountPath
+			}
+		}
+	}
+	"data-volume": {
+		chart: charts.volume
+		values: volumes.data
+	}
+	"coredns-svc-cluster": {
+		chart: charts.service
+		values: {
+			name: "dns"
+			type: "LoadBalancer"
+			protocol: "TCP"
+			ports: [{
+				name: "udp-53"
+				port: 53
+				protocol: "UDP"
+				targetPort: 53
+			}]
+			targetPort: "http"
+			selector:{
+				"app.kubernetes.io/instance": "coredns"
+				"app.kubernetes.io/name": "coredns"
+			}
+			annotations: {
+				"metallb.universe.tf/loadBalancerIPs": global.network.dnsInClusterIP
+			}
+		}
+	}
+	"ipaddresspool-dns": {
+		chart: charts.ipAddressPool
+		values: {
+			name: "\(global.id)-dns"
+			autoAssign: false
+			from: global.network.dns
+			to: global.network.dns
+			namespace: "metallb-system"
+		}
+	}
+	"ipaddresspool-dns-in-cluster": {
+		chart: charts.ipAddressPool
+		values: {
+			name: "\(global.id)-dns-in-cluster"
+			autoAssign: false
+			from: global.network.dnsInClusterIP
+			to: global.network.dnsInClusterIP
+			namespace: "metallb-system"
+		}
+	}
+}
+
+_mountPath: "/pcloud"
diff --git a/core/installer/values-tmpl/headscale.cue b/core/installer/values-tmpl/headscale.cue
index fee75ab..08b61ef 100644
--- a/core/installer/values-tmpl/headscale.cue
+++ b/core/installer/values-tmpl/headscale.cue
@@ -74,8 +74,8 @@
 				pullPolicy: images.headscale.pullPolicy
 			}
 			storage: size: "5Gi"
-			ingressClassName: _ingressPublic
-			certificateIssuer: _issuerPublic
+			ingressClassName: ingressPublic
+			certificateIssuer: issuerPublic
 			domain: _domain
 			publicBaseDomain: global.domain
 			ipAddressPool: "\(global.id)-headscale"
diff --git a/core/installer/values-tmpl/ingress-public.cue b/core/installer/values-tmpl/ingress-public.cue
index 2258945..6823a7b 100644
--- a/core/installer/values-tmpl/ingress-public.cue
+++ b/core/installer/values-tmpl/ingress-public.cue
@@ -48,7 +48,7 @@
 	"ingress-public": {
 		chart: charts.ingressNginx
 		values: {
-			fullnameOverride: _ingressPublic
+			fullnameOverride: ingressPublic
 			controller: {
 				kind: "DaemonSet"
 				hostNetwork: true
@@ -56,10 +56,10 @@
 				service: enabled: false
 				ingressClassByName: true
 				ingressClassResource: {
-					name: _ingressPublic
+					name: ingressPublic
 					enabled: true
 					default: false
-					controllerValue: "k8s.io/\(_ingressPublic)"
+					controllerValue: "k8s.io/\(ingressPublic)"
 				}
 				config: {
 					"proxy-body-size": "200M" // TODO(giolekva): configurable
@@ -75,10 +75,10 @@
 				}
 			}
 			tcp: {
-				"53": "\(global.pcloudEnvName)-dns-zone-manager/coredns:53"
+				"53": "\(global.pcloudEnvName)-dns-gateway/coredns:53"
 			}
 			udp: {
-				"53": "\(global.pcloudEnvName)-dns-zone-manager/coredns:53"
+				"53": "\(global.pcloudEnvName)-dns-gateway/coredns:53"
 			}
 		}
 	}
diff --git a/core/installer/values-tmpl/matrix.cue b/core/installer/values-tmpl/matrix.cue
index ca5dc98..97b3aca 100644
--- a/core/installer/values-tmpl/matrix.cue
+++ b/core/installer/values-tmpl/matrix.cue
@@ -88,8 +88,8 @@
 				user: "matrix"
 				password: "matrix"
 			}
-			certificateIssuer: _issuerPublic
-			ingressClassName: _ingressPublic
+			certificateIssuer: issuerPublic
+			ingressClassName: ingressPublic
 			configMerge: {
 				configName: "config-to-merge"
 				fileName: "to-merge.yaml"
diff --git a/core/installer/values-tmpl/private-network.cue b/core/installer/values-tmpl/private-network.cue
index 156b078..1cee202 100644
--- a/core/installer/values-tmpl/private-network.cue
+++ b/core/installer/values-tmpl/private-network.cue
@@ -73,15 +73,15 @@
 					enabled: true
 					type: "LoadBalancer"
 					annotations: {
-						"metallb.universe.tf/address-pool": _ingressPrivate
+						"metallb.universe.tf/address-pool": ingressPrivate
 					}
 				}
 				ingressClassByName: true
 				ingressClassResource: {
-					name: _ingressPrivate
+					name: ingressPrivate
 					enabled: true
 					default: false
-					controllerValue: "k8s.io/\(_ingressPrivate)"
+					controllerValue: "k8s.io/\(ingressPrivate)"
 				}
 				config: {
 					"proxy-body-size": "200M" // TODO(giolekva): configurable
@@ -91,7 +91,7 @@
 					"""
 				}
 				extraArgs: {
-					"default-ssl-certificate": "\(_ingressPrivate)/cert-wildcard.\(global.privateDomain)"
+					"default-ssl-certificate": "\(ingressPrivate)/cert-wildcard.\(global.privateDomain)"
 				}
 				admissionWebhooks: {
 					enabled: false
diff --git a/core/installer/values-tmpl/welcome.cue b/core/installer/values-tmpl/welcome.cue
index 0089ee8..30c6980 100644
--- a/core/installer/values-tmpl/welcome.cue
+++ b/core/installer/values-tmpl/welcome.cue
@@ -40,9 +40,9 @@
 			loginAddr: "https://launcher.\(global.domain)"
 			membershipsInitAddr: "http://memberships-api.\(global.namespacePrefix)core-auth-memberships.svc.cluster.local/api/init"
 			ingress: {
-				className: _ingressPublic
+				className: ingressPublic
 				domain: "welcome.\(global.domain)"
-				certificateIssuer: _issuerPublic
+				certificateIssuer: issuerPublic
 			}
 			clusterRoleName: "\(global.id)-welcome"
 			image: {