DNS: run separate CoreDNS instance for each PCloud env.

Previously shared CoreDNS instance was used to handle all domains. This has multiple downsides, most important which is security. For example DNS-Sec keys of all domains were persisted on the same shared volume. Also key itself was generated by PCloud env-manager as part of bootstrapping new env. Which is counter to the main aspirations of PCloud, that environment internal private data must not leak outside of the environment.

With new approach implemented in this change, environment starts up it’s own CoreDNS and DNS record manager servers. Manager generates dns-sec keys internally and only exposes public information to the outside world. PCloud infrastructure runes another instance of CoreDNS which acts as a proxy service forwarding requests to individual environments based an requested domain.

This simplifies DNS based TLS challenge solvers, as private certificate issuer of each env will point directly to the DNS record manager of the same environment.

Change-Id: Ifb0f36d2a133e3b53da22030cc7d6b9099136b3d
diff --git a/core/installer/welcome/appmanager.go b/core/installer/welcome/appmanager.go
index 39d5c3c..e8c929d 100644
--- a/core/installer/welcome/appmanager.go
+++ b/core/installer/welcome/appmanager.go
@@ -211,6 +211,7 @@
 		return err
 	}
 	if err := s.m.Update(a, slug, values); err != nil {
+		fmt.Println(err)
 		return err
 	}
 	ctx, _ := context.WithTimeout(context.Background(), 2*time.Minute)
diff --git a/core/installer/welcome/env.go b/core/installer/welcome/env.go
index 219c67c..856526d 100644
--- a/core/installer/welcome/env.go
+++ b/core/installer/welcome/env.go
@@ -18,6 +18,8 @@
 	"github.com/gorilla/mux"
 
 	"github.com/giolekva/pcloud/core/installer"
+	"github.com/giolekva/pcloud/core/installer/dns"
+	phttp "github.com/giolekva/pcloud/core/installer/http"
 	"github.com/giolekva/pcloud/core/installer/soft"
 	"github.com/giolekva/pcloud/core/installer/tasks"
 )
@@ -78,35 +80,44 @@
 
 type EnvServer struct {
 	port          int
-	ss            *soft.Client
-	repo          installer.RepoIO
+	ss            soft.Client
+	repo          soft.RepoIO
+	repoClient    soft.ClientGetter
 	nsCreator     installer.NamespaceCreator
 	dnsFetcher    installer.ZoneStatusFetcher
 	nameGenerator installer.NameGenerator
-	tasks         map[string]tasks.Task
+	httpClient    phttp.Client
+	dnsClient     dns.Client
+	Tasks         map[string]tasks.Task
 	envInfo       map[string]template.HTML
-	dns           map[string]tasks.DNSZoneRef
+	dns           map[string]installer.EnvDNS
 	dnsPublished  map[string]struct{}
 }
 
 func NewEnvServer(
 	port int,
-	ss *soft.Client,
-	repo installer.RepoIO,
+	ss soft.Client,
+	repo soft.RepoIO,
+	repoClient soft.ClientGetter,
 	nsCreator installer.NamespaceCreator,
 	dnsFetcher installer.ZoneStatusFetcher,
 	nameGenerator installer.NameGenerator,
+	httpClient phttp.Client,
+	dnsClient dns.Client,
 ) *EnvServer {
 	return &EnvServer{
 		port,
 		ss,
 		repo,
+		repoClient,
 		nsCreator,
 		dnsFetcher,
 		nameGenerator,
+		httpClient,
+		dnsClient,
 		make(map[string]tasks.Task),
 		make(map[string]template.HTML),
-		make(map[string]tasks.DNSZoneRef),
+		make(map[string]installer.EnvDNS),
 		make(map[string]struct{}),
 	}
 }
@@ -130,7 +141,7 @@
 		http.Error(w, "Task key not provided", http.StatusBadRequest)
 		return
 	}
-	t, ok := s.tasks[key]
+	t, ok := s.Tasks[key]
 	if !ok {
 		http.Error(w, "Task not found", http.StatusBadRequest)
 		return
@@ -142,15 +153,9 @@
 			http.Error(w, "Task dns configuration not found", http.StatusInternalServerError)
 			return
 		}
-		err, ready, info := s.dnsFetcher.Fetch(dnsRef.Namespace, dnsRef.Name)
-		// TODO(gio): check error type
-		if err != nil && (ready || len(info.Records) > 0) {
-			panic("!! SHOULD NOT REACH !!")
+		if records, err := s.dnsFetcher.Fetch(dnsRef.Address); err == nil {
+			dnsRecords = records
 		}
-		if !ready && len(info.Records) > 0 {
-			panic("!! SHOULD NOT REACH !!")
-		}
-		dnsRecords = info.Records
 	}
 	data := map[string]any{
 		"Root":       t,
@@ -175,13 +180,10 @@
 		http.Error(w, "Task dns configuration not found", http.StatusInternalServerError)
 		return
 	}
-	err, ready, info := s.dnsFetcher.Fetch(dnsRef.Namespace, dnsRef.Name)
-	// TODO(gio): check error type
-	if err != nil && (ready || len(info.Records) > 0) {
-		panic("!! SHOULD NOT REACH !!")
-	}
-	if !ready && len(info.Records) > 0 {
-		panic("!! SHOULD NOT REACH !!")
+	records, err := s.dnsFetcher.Fetch(dnsRef.Address)
+	if err != nil {
+		http.Error(w, "Task dns configuration not found", http.StatusInternalServerError)
+		return
 	}
 	r.ParseForm()
 	if apiToken, err := getFormValue(r.PostForm, "api-token"); err != nil {
@@ -189,8 +191,8 @@
 		return
 	} else {
 		p := NewGandiUpdater(apiToken)
-		zone := strings.Join(strings.Split(info.Zone, ".")[1:], ".") // TODO(gio): this is not gonna work with no subdomain case
-		if err := p.Update(zone, strings.Split(info.Records, "\n")); err != nil {
+		zone := strings.Join(strings.Split(dnsRef.Zone, ".")[1:], ".") // TODO(gio): this is not gonna work with no subdomain case
+		if err := p.Update(zone, strings.Split(records, "\n")); err != nil {
 			http.Error(w, err.Error(), http.StatusInternalServerError)
 			return
 		}
@@ -332,7 +334,7 @@
 		return
 	}
 	var infra installer.InfraConfig
-	if err := installer.ReadYaml(s.repo, "config.yaml", &infra); err != nil {
+	if err := soft.ReadYaml(s.repo, "config.yaml", &infra); err != nil {
 		http.Error(w, err.Error(), http.StatusInternalServerError)
 		return
 	}
@@ -347,7 +349,7 @@
 		req.Name = name
 	}
 	var cidrs installer.EnvCIDRs
-	if err := installer.ReadYaml(s.repo, "env-cidrs.yaml", &cidrs); err != nil {
+	if err := soft.ReadYaml(s.repo, "env-cidrs.yaml", &cidrs); err != nil {
 		http.Error(w, err.Error(), http.StatusInternalServerError)
 		return
 	}
@@ -357,7 +359,7 @@
 		return
 	}
 	cidrs = append(cidrs, installer.EnvCIDR{req.Name, startIP})
-	if err := installer.WriteYaml(s.repo, "env-cidrs.yaml", cidrs); err != nil {
+	if err := soft.WriteYaml(s.repo, "env-cidrs.yaml", cidrs); err != nil {
 		http.Error(w, err.Error(), http.StatusInternalServerError)
 		return
 	}
@@ -365,6 +367,23 @@
 		http.Error(w, err.Error(), http.StatusInternalServerError)
 		return
 	}
+	envNetwork, err := installer.NewEnvNetwork(startIP)
+	if err != nil {
+		http.Error(w, err.Error(), http.StatusInternalServerError)
+		return
+	}
+	env := installer.EnvConfig{
+		Id:              req.Name,
+		InfraName:       infra.Name,
+		Domain:          req.Domain,
+		PrivateDomain:   fmt.Sprintf("p.%s", req.Domain),
+		ContactEmail:    req.ContactEmail,
+		AdminPublicKey:  req.AdminPublicKey,
+		PublicIP:        infra.PublicIP,
+		NameserverIP:    infra.PublicIP,
+		NamespacePrefix: fmt.Sprintf("%s-", req.Name),
+		Network:         envNetwork,
+	}
 	key := func() string {
 		for {
 			key, err := s.nameGenerator.Generate()
@@ -377,22 +396,17 @@
 		s.envInfo[key] = template.HTML(markdown.ToHTML([]byte(info), nil, nil))
 	}
 	t, dns := tasks.NewCreateEnvTask(
-		tasks.Env{
-			PCloudEnvName:   infra.Name,
-			Name:            req.Name,
-			ContactEmail:    req.ContactEmail,
-			Domain:          req.Domain,
-			AdminPublicKey:  req.AdminPublicKey,
-			NamespacePrefix: fmt.Sprintf("%s-", req.Name),
-		},
-		infra.PublicIP,
-		startIP,
+		env,
 		s.nsCreator,
+		s.dnsFetcher,
+		s.httpClient,
+		s.dnsClient,
 		s.repo,
+		s.repoClient,
 		mgr,
 		infoUpdater,
 	)
-	s.tasks[key] = t
+	s.Tasks[key] = t
 	s.dns[key] = dns
 	go t.Start()
 	http.Redirect(w, r, fmt.Sprintf("/env/%s", key), http.StatusSeeOther)
diff --git a/core/installer/welcome/env_test.go b/core/installer/welcome/env_test.go
new file mode 100644
index 0000000..a689f54
--- /dev/null
+++ b/core/installer/welcome/env_test.go
@@ -0,0 +1,300 @@
+package welcome
+
+import (
+	"bytes"
+	"encoding/json"
+	"golang.org/x/crypto/ssh"
+	"io"
+	"io/fs"
+	"log"
+	"net"
+	"net/http"
+	"strings"
+	"sync"
+	"testing"
+
+	"github.com/go-git/go-billy/v5"
+	"github.com/go-git/go-billy/v5/memfs"
+	"github.com/go-git/go-billy/v5/util"
+	// "github.com/go-git/go-git/v5"
+	// "github.com/go-git/go-git/v5/storage/memory"
+
+	"github.com/giolekva/pcloud/core/installer"
+	"github.com/giolekva/pcloud/core/installer/soft"
+)
+
+type fakeNSCreator struct {
+	t *testing.T
+}
+
+func (f fakeNSCreator) Create(name string) error {
+	f.t.Logf("Create namespace: %s", name)
+	return nil
+}
+
+type fakeZoneStatusFetcher struct {
+	t *testing.T
+}
+
+func (f fakeZoneStatusFetcher) Fetch(addr string) (string, error) {
+	f.t.Logf("Fetching status: %s", addr)
+	return addr, nil
+}
+
+type mockRepoIO struct {
+	soft.RepoFS
+	addr string
+	t    *testing.T
+	l    sync.Locker
+}
+
+func (r mockRepoIO) FullAddress() string {
+	return r.addr
+}
+
+func (r mockRepoIO) Pull() error {
+	r.t.Logf("Pull: %s", r.addr)
+	return nil
+}
+
+func (r mockRepoIO) CommitAndPush(message string) error {
+	r.t.Logf("Commit and push: %s", message)
+	return nil
+}
+
+func (r mockRepoIO) Do(op soft.DoFn, _ ...soft.DoOption) error {
+	r.l.Lock()
+	defer r.l.Unlock()
+	msg, err := op(r)
+	if err != nil {
+		return err
+	}
+	return r.CommitAndPush(msg)
+}
+
+type fakeSoftServeClient struct {
+	t     *testing.T
+	envFS billy.Filesystem
+}
+
+func (f fakeSoftServeClient) Address() string {
+	return ""
+}
+
+func (f fakeSoftServeClient) Signer() ssh.Signer {
+	return nil
+}
+
+func (f fakeSoftServeClient) GetPublicKeys() ([]string, error) {
+	return []string{}, nil
+}
+
+func (f fakeSoftServeClient) GetRepo(name string) (soft.RepoIO, error) {
+	var l sync.Mutex
+	return mockRepoIO{soft.NewBillyRepoFS(f.envFS), "foo.bar", f.t, &l}, nil
+}
+
+func (f fakeSoftServeClient) GetRepoAddress(name string) string {
+	return ""
+}
+
+func (f fakeSoftServeClient) AddRepository(name string) error {
+	return nil
+}
+
+func (f fakeSoftServeClient) AddUser(name, pubKey string) error {
+	return nil
+}
+
+func (f fakeSoftServeClient) AddPublicKey(user string, pubKey string) error {
+	return nil
+}
+
+func (f fakeSoftServeClient) RemovePublicKey(user string, pubKey string) error {
+	return nil
+}
+
+func (f fakeSoftServeClient) MakeUserAdmin(name string) error {
+	return nil
+}
+
+func (f fakeSoftServeClient) AddReadWriteCollaborator(repo, user string) error {
+	return nil
+}
+
+func (f fakeSoftServeClient) AddReadOnlyCollaborator(repo, user string) error {
+	return nil
+}
+
+type fakeClientGetter struct {
+	t     *testing.T
+	envFS billy.Filesystem
+}
+
+func (f fakeClientGetter) Get(addr string, clientPrivateKey []byte, log *log.Logger) (soft.Client, error) {
+	return fakeSoftServeClient{f.t, f.envFS}, nil
+}
+
+const infraConfig = `
+infraAdminPublicKey: Zm9vYmFyCg==
+namespacePrefix: infra-
+pcloudEnvName: infra
+publicIP:
+- 1.1.1.1
+- 2.2.2.2
+`
+
+const envCidrs = ``
+
+type fixedNameGenerator struct{}
+
+func (f fixedNameGenerator) Generate() (string, error) {
+	return "test", nil
+}
+
+type fakeHttpClient struct {
+	t      *testing.T
+	counts map[string]int
+}
+
+func (f fakeHttpClient) Get(addr string) (*http.Response, error) {
+	f.t.Logf("HTTP GET: %s", addr)
+	cnt, ok := f.counts[addr]
+	if !ok {
+		cnt = 0
+	}
+	f.counts[addr] = cnt + 1
+	return &http.Response{
+		Status:     "200 OK",
+		StatusCode: http.StatusOK,
+		Proto:      "HTTP/1.0",
+		ProtoMajor: 1,
+		ProtoMinor: 0,
+		Body:       io.NopCloser(strings.NewReader("ok")),
+	}, nil
+}
+
+type fakeDnsClient struct {
+	t      *testing.T
+	counts map[string]int
+}
+
+func (f fakeDnsClient) Lookup(host string) ([]net.IP, error) {
+	f.t.Logf("HTTP GET: %s", host)
+	return []net.IP{net.ParseIP("1.1.1.1"), net.ParseIP("2.2.2.2")}, nil
+}
+
+func TestCreateNewEnv(t *testing.T) {
+	apps := installer.NewInMemoryAppRepository(installer.CreateAllApps())
+	infraFS := memfs.New()
+	envFS := memfs.New()
+	nsCreator := fakeNSCreator{t}
+	infraRepo := mockRepoIO{soft.NewBillyRepoFS(infraFS), "foo.bar", t, &sync.Mutex{}}
+	infraMgr, err := installer.NewInfraAppManager(infraRepo, nsCreator)
+	if err != nil {
+		t.Fatal(err)
+	}
+	if err := util.WriteFile(infraFS, "config.yaml", []byte(infraConfig), fs.ModePerm); err != nil {
+		t.Fatal(err)
+	}
+	if err := util.WriteFile(infraFS, "env-cidrs.yaml", []byte(envCidrs), fs.ModePerm); err != nil {
+		t.Fatal(err)
+	}
+	{
+		app, err := installer.FindInfraApp(apps, "dns-gateway")
+		if err != nil {
+			t.Fatal(err)
+		}
+		if err := infraMgr.Install(app, "/infrastructure/dns-gateway", "dns-gateway", map[string]any{
+			"servers": []installer.EnvDNS{},
+		}); err != nil {
+			t.Fatal(err)
+		}
+	}
+	cg := fakeClientGetter{t, envFS}
+	httpClient := fakeHttpClient{t, make(map[string]int)}
+	dnsClient := fakeDnsClient{t, make(map[string]int)}
+	s := NewEnvServer(
+		8181,
+		fakeSoftServeClient{t, envFS},
+		infraRepo,
+		cg,
+		nsCreator,
+		fakeZoneStatusFetcher{t},
+		fixedNameGenerator{},
+		httpClient,
+		dnsClient,
+	)
+	go s.Start()
+	req := createEnvReq{
+		Name:           "test",
+		ContactEmail:   "test@test.t",
+		Domain:         "test.t",
+		AdminPublicKey: "test",
+		SecretToken:    "test",
+	}
+	var buf bytes.Buffer
+	if err := json.NewEncoder(&buf).Encode(req); err != nil {
+		t.Fatal(err)
+	}
+	resp, err := http.Post("http://localhost:8181/", "application/json", &buf)
+	var done sync.WaitGroup
+	done.Add(1)
+	var taskErr error
+	s.Tasks["test"].OnDone(func(err error) {
+		taskErr = err
+		done.Done()
+	})
+	if err != nil {
+		t.Fatal(err)
+	}
+	if resp.StatusCode != http.StatusOK {
+		var buf bytes.Buffer
+		io.Copy(&buf, resp.Body)
+		t.Fatal(buf.String())
+	}
+	done.Wait()
+	http.Get("http://localhost:8181/env/test")
+	debugFS(infraFS, t, "/infrastructure/dns-gateway/resources/coredns.yaml")
+	debugFS(envFS, t)
+	if taskErr != nil {
+		t.Fatal(taskErr)
+	}
+	expected := []string{
+		"https://accounts-ui.test.t",
+		"https://welcome.test.t",
+		"https://memberships.p.test.t",
+		"https://headscale.test.t/apple",
+	}
+	for _, e := range expected {
+		if cnt, ok := httpClient.counts[e]; !ok || cnt != 1 {
+			t.Fatal(httpClient.counts)
+		}
+	}
+	if len(httpClient.counts) != 4 {
+		t.Fatal(httpClient.counts)
+	}
+}
+
+func debugFS(bfs billy.Filesystem, t *testing.T, files ...string) {
+	f := map[string]struct{}{}
+	for _, i := range files {
+		f[i] = struct{}{}
+	}
+	t.Log("----- START ------")
+	err := util.Walk(bfs, "/", func(path string, info fs.FileInfo, err error) error {
+		t.Logf("%s %t\n", path, info.IsDir())
+		if _, ok := f[path]; ok && !info.IsDir() {
+			contents, err := util.ReadFile(bfs, path)
+			if err != nil {
+				return err
+			}
+			t.Log(string(contents))
+		}
+		return nil
+	})
+	if err != nil {
+		t.Fatal(err)
+	}
+	t.Log("----- END ------")
+}
diff --git a/core/installer/welcome/welcome.go b/core/installer/welcome/welcome.go
index fe6c560..c5d732c 100644
--- a/core/installer/welcome/welcome.go
+++ b/core/installer/welcome/welcome.go
@@ -14,6 +14,7 @@
 	"github.com/gorilla/mux"
 
 	"github.com/giolekva/pcloud/core/installer"
+	"github.com/giolekva/pcloud/core/installer/soft"
 )
 
 //go:embed create-account.html
@@ -27,7 +28,7 @@
 
 type Server struct {
 	port                int
-	repo                installer.RepoIO
+	repo                soft.RepoIO
 	nsCreator           installer.NamespaceCreator
 	createAccountAddr   string
 	loginAddr           string
@@ -36,7 +37,7 @@
 
 func NewServer(
 	port int,
-	repo installer.RepoIO,
+	repo soft.RepoIO,
 	nsCreator installer.NamespaceCreator,
 	createAccountAddr string,
 	loginAddr string,
@@ -250,9 +251,9 @@
 }
 
 func (s *Server) initMemberships(username string) error {
-	return s.repo.Do(func(r installer.RepoFS) (string, error) {
+	return s.repo.Do(func(r soft.RepoFS) (string, error) {
 		var fa firstaccount
-		if err := installer.ReadYaml(r, "first-account.yaml", &fa); err != nil {
+		if err := soft.ReadYaml(r, "first-account.yaml", &fa); err != nil {
 			return "", err
 		}
 		if fa.Created {
@@ -267,7 +268,7 @@
 			return "", err
 		}
 		fa.Created = true
-		if err := installer.WriteYaml(r, "first-account.yaml", fa); err != nil {
+		if err := soft.WriteYaml(r, "first-account.yaml", fa); err != nil {
 			return "", err
 		}
 		return "initialized groups for first account", nil