Clusters: Support persistent storage on remote clusters.
With this merged users can request persistent volumes and PostgreSQL
instances on remote clusters.
This is achieved by Cluster manager installing open-iscsi on all
remote servers and running longhorn on top of them.
Change-Id: Ic1b24ede12fa32bb99f38e560207230437b45fd6
diff --git a/core/installer/app_configs/app_base.cue b/core/installer/app_configs/app_base.cue
index 0bb511a..3f0036b 100644
--- a/core/installer/app_configs/app_base.cue
+++ b/core/installer/app_configs/app_base.cue
@@ -7,7 +7,6 @@
input: {
cluster?: #Cluster @name(Cluster)
- ...
}
name: string | *""
@@ -53,6 +52,7 @@
}
#Volume: {
+ cluster?: #Cluster
size: string
accessMode: "ReadWriteOnce" | "ReadOnlyMany" | "ReadWriteMany" | "ReadWriteOncePod" | *"ReadWriteOnce"
}
@@ -305,6 +305,8 @@
}
#PostgreSQL: #WithOut & {
+ cluster?: #Cluster
+ _cluster: cluster
name: string
version: "15.3"
initSQL: string | *""
@@ -330,11 +332,19 @@
}
}
volumes: {
- "\(_volumeClaimName)": size: _size
+ "\(_volumeClaimName)": {
+ size: _size
+ if _cluster != _|_ {
+ cluster: _cluster
+ }
+ }
}
helm: {
postgres: {
chart: charts.postgres
+ if _cluster != _|_ {
+ cluster: _cluster
+ }
annotations: {
"dodo.cloud/resource-type": "postgresql"
"dodo.cloud/resource.postgresql.name": name
@@ -578,6 +588,8 @@
}
#WithOut: {
+ cluster?: #Cluster
+ _cluster: cluster
charts: {
volume: {
kind: "GitRepository"
@@ -598,31 +610,42 @@
for k, v in volumes {
"\(k)": #volume & v & {
name: k
+ if _cluster != _|_ {
+ cluster: _cluster
+ }
}
}
}
- helmR: {
- for key, value in volumes {
- "\(key)-volume": #Helm & {
- name: key
+ helm: {
+ for k, v in volumes {
+ "\(k)-volume": {
chart: charts.volume
- info: "Creating disk for \(key)"
+ info: "Creating disk for \(k)"
annotations: {
"dodo.cloud/resource-type": "volume"
- "dodo.cloud/resource.volume.name": value.name
- "dodo.cloud/resource.volume.size": value.size
+ "dodo.cloud/resource.volume.name": v.name
+ "dodo.cloud/resource.volume.size": v.size
}
- values: value
+ values: v
+ if v.cluster != _|_ {
+ cluster: v.cluster
+ }
}
}
}
}
#WithOut: {
+ cluster?: #Cluster
+ _cluster: cluster
postgresql: {...}
postgresql: {
for k, v in postgresql {
- "\(k)": #PostgreSQL & v
+ "\(k)": #PostgreSQL & v & {
+ if _cluster != _|_ {
+ cluster: _cluster
+ }
+ }
}
...
}
diff --git a/core/installer/app_repository.go b/core/installer/app_repository.go
index e4bfa4d..67d2833 100644
--- a/core/installer/app_repository.go
+++ b/core/installer/app_repository.go
@@ -55,6 +55,7 @@
"values-tmpl/env-dns.cue",
"values-tmpl/launcher.cue",
"values-tmpl/cluster-network.cue",
+ "values-tmpl/longhorn.cue",
}
var infraAppConfigs = []string{
diff --git a/core/installer/app_test.go b/core/installer/app_test.go
index 83cf701..bcad861 100644
--- a/core/installer/app_test.go
+++ b/core/installer/app_test.go
@@ -344,6 +344,22 @@
}
}
+func TestLonghorn(t *testing.T) {
+ contents, err := valuesTmpls.ReadFile("values-tmpl/longhorn.cue")
+ if err != nil {
+ t.Fatal(err)
+ }
+ app, err := NewCueEnvApp(CueAppData{
+ "base.cue": []byte(cueBaseConfig),
+ "app.cue": []byte(contents),
+ "global.cue": []byte(cueEnvAppGlobal),
+ })
+ if err != nil {
+ t.Fatal(err)
+ }
+ t.Logf("%+v\n", app.Schema())
+}
+
func TestDNSGateway(t *testing.T) {
contents, err := valuesTmpls.ReadFile("values-tmpl/dns-gateway.cue")
if err != nil {
diff --git a/core/installer/cluster/kube.go b/core/installer/cluster/kube.go
index ee123fa..11fe746 100644
--- a/core/installer/cluster/kube.go
+++ b/core/installer/cluster/kube.go
@@ -25,6 +25,7 @@
serverToken string
controllers []Server
workers []Server
+ storageEnabled bool
}
func NewKubeManager() *KubeManager {
@@ -42,6 +43,7 @@
serverToken: st.ServerToken,
controllers: st.Controllers,
workers: st.Workers,
+ storageEnabled: st.StorageEnabled,
}, nil
}
@@ -57,10 +59,15 @@
m.kubeCfg,
m.controllers,
m.workers,
+ m.storageEnabled,
}
}
-func (m *KubeManager) Init(s Server, setupFn ClusterSetupFunc) (net.IP, error) {
+func (m *KubeManager) EnableStorage() {
+ m.storageEnabled = true
+}
+
+func (m *KubeManager) Init(s Server, setupFn ClusterIngressSetupFunc) (net.IP, error) {
m.l.Lock()
defer m.l.Unlock()
if m.kubeCfg != "" {
diff --git a/core/installer/cluster/manager.go b/core/installer/cluster/manager.go
index c8e62d1..c422535 100644
--- a/core/installer/cluster/manager.go
+++ b/core/installer/cluster/manager.go
@@ -30,14 +30,17 @@
Kubeconfig string `json:"kubeconfig"`
Controllers []Server `json:"controllers"`
Workers []Server `json:"workers"`
+ StorageEnabled bool `json:"storageEnabled"`
}
-type ClusterSetupFunc func(name, kubeconfig, ingressClassName string) (net.IP, error)
+type ClusterIngressSetupFunc func(name, kubeconfig, ingressClassName string) (net.IP, error)
+type ClusterSetupFunc func(m Manager) error
type Manager interface {
- Init(s Server, setupFn ClusterSetupFunc) (net.IP, error)
+ Init(s Server, setupFn ClusterIngressSetupFunc) (net.IP, error)
JoinController(s Server) error
JoinWorker(s Server) error
RemoveServer(name string) error
State() State
+ EnableStorage()
}
diff --git a/core/installer/cluster/ssh.go b/core/installer/cluster/ssh.go
index 751a797..21562a3 100644
--- a/core/installer/cluster/ssh.go
+++ b/core/installer/cluster/ssh.go
@@ -109,7 +109,8 @@
func GetTailscaleIP(c *SSHClient) (string, error) {
fmt.Println("Getting Tailscale IP")
- if _, err := c.Exec("sudo apt-get install net-tools -y"); err != nil {
+ // TODO(gio): install all necessary packages beforehand
+ if _, err := c.Exec("sudo apt-get install net-tools open-iscsi -y"); err != nil {
return "", err
}
ip, err := c.Exec("sudo ifconfig | grep 10.42")
diff --git a/core/installer/schema.go b/core/installer/schema.go
index 3c9e237..7f8410f 100644
--- a/core/installer/schema.go
+++ b/core/installer/schema.go
@@ -64,9 +64,10 @@
kubeconfig: string
ingressClassName: string
}
-
value: #Cluster
-value: { %s }
+
+#Schema: %s
+value: #Schema
`
func isCluster(v cue.Value) bool {
@@ -94,7 +95,7 @@
const networkSchema = `
#Network: {
- name: string
+ name: string
ingressClass: string
certificateIssuer: string | *""
domain: string
@@ -102,9 +103,10 @@
reservePortAddr: string
deallocatePortAddr: string
}
-
value: #Network
-value: { %s }
+
+#Schema: %s
+value: #Schema
`
func isNetwork(v cue.Value) bool {
@@ -140,11 +142,11 @@
reservePortAddr: string
deallocatePortAddr: string
}
-
#Networks: [...#Network]
-
value: #Networks
-value: %s
+
+#Schema: %s
+value: #Schema
`
func isMultiNetwork(v cue.Value) bool {
@@ -175,9 +177,10 @@
enabled: bool | false
groups: string | *""
}
-
value: #Auth
-value: { %s }
+
+#Schema: %s
+value: #Schema
`
func isAuth(v cue.Value) bool {
@@ -208,9 +211,10 @@
public: string
private: string
}
-
value: #SSHKey
-value: { %s }
+
+#Schema: %s
+value: #Schema
`
func isSSHKey(v cue.Value) bool {
diff --git a/core/installer/schema_test.go b/core/installer/schema_test.go
index 04375bf..32baa40 100644
--- a/core/installer/schema_test.go
+++ b/core/installer/schema_test.go
@@ -89,3 +89,15 @@
t.Fatal("is network")
}
}
+
+const emptyInput = "input: {}"
+
+func TestIsNotNetworkEmptyInput(t *testing.T) {
+ v, err := ParseCueAppConfig(CueAppData{"/test.cue": []byte(emptyInput)})
+ if err != nil {
+ t.Fatal(err)
+ }
+ if isNetwork(v.LookupPath(cue.ParsePath("input"))) {
+ t.Fatal("not really network")
+ }
+}
diff --git a/core/installer/tasks/install.go b/core/installer/tasks/install.go
index 6ff939b..b704c68 100644
--- a/core/installer/tasks/install.go
+++ b/core/installer/tasks/install.go
@@ -56,7 +56,7 @@
return &t
}
-func NewClusterInitTask(m cluster.Manager, server cluster.Server, cnc installer.ClusterNetworkConfigurator, repo soft.RepoIO, setupFn cluster.ClusterSetupFunc) Task {
+func NewClusterInitTask(m cluster.Manager, server cluster.Server, cnc installer.ClusterNetworkConfigurator, repo soft.RepoIO, setupFn cluster.ClusterIngressSetupFunc) Task {
d := &dynamicTaskSlice{t: []Task{}}
done := make(chan error)
setupTask := newLeafTask(fmt.Sprintf("Installing dodo on %s", server.IP.String()), func() error {
@@ -89,8 +89,7 @@
return &t
}
-func NewRemoveClusterTask(m cluster.Manager, cnc installer.ClusterNetworkConfigurator,
- repo soft.RepoIO) Task {
+func NewRemoveClusterTask(m cluster.Manager, cnc installer.ClusterNetworkConfigurator, repo soft.RepoIO) Task {
t := newLeafTask(fmt.Sprintf("Removing %s cluster", m.State().Name), func() error {
if err := cnc.RemoveCluster(m.State().Name, m.State().IngressIP); err != nil {
return err
@@ -196,3 +195,31 @@
t := newParentTask("Installing application", true, start, d)
return &t
}
+
+func NewClusterSetupTask(m cluster.Manager, setupFn cluster.ClusterSetupFunc, repo soft.RepoIO, msg string) Task {
+ d := &dynamicTaskSlice{t: []Task{}}
+ done := make(chan error)
+ setupTask := newLeafTask(msg, func() error {
+ return setupFn(m)
+ })
+ d.Append(&setupTask)
+ setupTask.OnDone(func(err error) {
+ if err != nil {
+ done <- err
+ return
+ }
+ _, err = repo.Do(func(fs soft.RepoFS) (string, error) {
+ if err := soft.WriteJson(fs, fmt.Sprintf("/clusters/%s/config.json", m.State().Name), m.State()); err != nil {
+ return "", err
+ }
+ return msg, nil
+ })
+ done <- err
+ })
+ start := func() error {
+ setupTask.Start()
+ return <-done
+ }
+ t := newParentTask("Installing application", true, start, d)
+ return &t
+}
diff --git a/core/installer/values-tmpl/longhorn.cue b/core/installer/values-tmpl/longhorn.cue
new file mode 100644
index 0000000..80a693c
--- /dev/null
+++ b/core/installer/values-tmpl/longhorn.cue
@@ -0,0 +1,172 @@
+input: {
+}
+
+name: "longhorn"
+namespace: "longhorn"
+_pullPolicy: "IfNotPresent"
+
+out: {
+ images: {
+ longhornEngine: {
+ repository: "longhornio"
+ name: "longhorn-engine"
+ tag: "v1.5.2"
+ pullPolicy: _pullPolicy
+ }
+ longhornManager: {
+ repository: "longhornio"
+ name: "longhorn-manager"
+ tag: "v1.5.2"
+ pullPolicy: _pullPolicy
+ }
+ longhornUI: {
+ repository: "longhornio"
+ name: "longhorn-ui"
+ tag: "v1.5.2"
+ pullPolicy: _pullPolicy
+ }
+ longhornInstanceManager: {
+ repository: "longhornio"
+ name: "longhorn-instance-manager"
+ tag: "v1.5.2"
+ pullPolicy: _pullPolicy
+ }
+ longhornShareManager: {
+ repository: "longhornio"
+ name: "longhorn-share-manager"
+ tag: "v1.5.2"
+ pullPolicy: _pullPolicy
+ }
+ longhornBackingImageManager: {
+ repository: "longhornio"
+ name: "backing-image-manager"
+ tag: "v1.5.2"
+ pullPolicy: _pullPolicy
+ }
+ longhornSupportBundleKit: {
+ repository: "longhornio"
+ name: "support-bundle-kit"
+ tag: "v0.0.27"
+ pullPolicy: _pullPolicy
+ }
+ csiAttacher: {
+ repository: "longhornio"
+ name: "csi-attacher"
+ tag: "v4.2.0"
+ pullPolicy: _pullPolicy
+ }
+ csiProvisioner: {
+ repository: "longhornio"
+ name: "csi-provisioner"
+ tag: "v3.4.1"
+ pullPolicy: _pullPolicy
+ }
+ csiNodeDriverRegistrar: {
+ repository: "longhornio"
+ name: "csi-node-driver-registrar"
+ tag: "v2.7.0"
+ pullPolicy: _pullPolicy
+ }
+ csiResizer: {
+ repository: "longhornio"
+ name: "csi-resizer"
+ tag: "v1.7.0"
+ pullPolicy: _pullPolicy
+ }
+ csiSnapshotter: {
+ repository: "longhornio"
+ name: "csi-snapshotter"
+ tag: "v6.2.1"
+ pullPolicy: _pullPolicy
+ }
+ csiLivenessProbe: {
+ repository: "longhornio"
+ name: "livenessprobe"
+ tag: "v2.9.0"
+ pullPolicy: _pullPolicy
+ }
+ }
+ charts: {
+ longhorn: {
+ kind: "GitRepository"
+ address: "https://code.v1.dodo.cloud/helm-charts"
+ branch: "main"
+ path: "charts/longhorn"
+ }
+ }
+ helm: {
+ longhorn: {
+ chart: charts.longhorn
+ info: "Installing distributed storage servers"
+ values: {
+ image: {
+ longhorn: {
+ engine: {
+ repository: images.longhornEngine.imageName
+ tag: images.longhornEngine.tag
+ }
+ manager: {
+ repository: images.longhornManager.imageName
+ tag: images.longhornManager.tag
+ }
+ ui: {
+ repository: images.longhornUI.imageName
+ tag: images.longhornUI.tag
+ }
+ instanceManager: {
+ repository: images.longhornInstanceManager.imageName
+ tag: images.longhornInstanceManager.tag
+ }
+ shareManager: {
+ repository: images.longhornShareManager.imageName
+ tag: images.longhornShareManager.tag
+ }
+ backingImageManager: {
+ repository: images.longhornBackingImageManager.imageName
+ tag: images.longhornBackingImageManager.tag
+ }
+ supportBundleKit: {
+ repository: images.longhornSupportBundleKit.imageName
+ tag: images.longhornSupportBundleKit.tag
+ }
+ }
+ csi: {
+ attacher: {
+ repository: images.csiAttacher.imageName
+ tag: images.csiAttacher.tag
+ }
+ provisioner: {
+ repository: images.csiProvisioner.imageName
+ tag: images.csiProvisioner.tag
+ }
+ nodeDriverRegistrar: {
+ repository: images.csiNodeDriverRegistrar.imageName
+ tag: images.csiNodeDriverRegistrar.tag
+ }
+ resizer: {
+ repository: images.csiResizer.imageName
+ tag: images.csiResizer.tag
+ }
+ snapshotter: {
+ repository: images.csiSnapshotter.imageName
+ tag: images.csiSnapshotter.tag
+ }
+ livenessProbe: {
+ repository: images.csiLivenessProbe.imageName
+ tag: images.csiLivenessProbe.tag
+ }
+ }
+ pullPolicy: _pullPolicy
+ }
+ // if input.storageDir != _|_ {
+ // defaultSettings: defaultDataPath: input.storageDir
+ // }
+ // if input.volumeDefaultReplicaCount != _|_ {
+ persistence: defaultClassReplicaCount: 1 // input.volumeDefaultReplicaCount
+ // }
+ service: ui: type: "ClusterIP"
+ ingress: enabled: false
+ }
+ }
+ }
+}
diff --git a/core/installer/welcome/appmanager-tmpl/cluster.html b/core/installer/welcome/appmanager-tmpl/cluster.html
index 2ef2fbe..d16ab45 100644
--- a/core/installer/welcome/appmanager-tmpl/cluster.html
+++ b/core/installer/welcome/appmanager-tmpl/cluster.html
@@ -3,10 +3,11 @@
{{ end }}
{{ define "content" }}
-<form action="/clusters/{{ .Cluster.Name }}/remove" method="POST">
+{{ $c := .Cluster }}
+<form action="/clusters/{{ $c.Name }}/remove" method="POST">
<button type="submit" name="remove-cluster">remove cluster</button>
</form>
-<form action="/clusters/{{ .Cluster.Name }}/servers" method="POST" autocomplete="off">
+<form action="/clusters/{{ $c.Name }}/servers" method="POST" autocomplete="off">
<details class="dropdown">
<summary id="type">worker</summary>
<ul>
@@ -30,7 +31,13 @@
<input type="password" name="password" placeholder="password" />
<button type="submit" name="add-server">add server</button>
</form>
-{{ $c := .Cluster }}
+{{- if $c.StorageEnabled }}
+Supports persistent storage<br/>
+{{- else }}
+<form action="/clusters/{{ $c.Name }}/setup-storage" method="POST">
+ <button type="submit" name="remove-cluster">setup persistent storage</button>
+</form>
+{{- end }}
<table class="striped">
<thead>
<tr>
@@ -41,7 +48,7 @@
</tr>
</thead>
<tbody>
- {{ range $s := .Cluster.Controllers }}
+ {{ range $s := $c.Controllers }}
<tr>
<th>controller</th>
<th scope="row">{{ $s.Name }}</th>
@@ -53,7 +60,7 @@
</td>
</tr>
{{ end }}
- {{ range $s := .Cluster.Workers }}
+ {{ range $s := $c.Workers }}
<tr>
<th>worker</th>
<th scope="row">{{ $s.Name }}</th>
diff --git a/core/installer/welcome/appmanager.go b/core/installer/welcome/appmanager.go
index 808bf7e..190ca12 100644
--- a/core/installer/welcome/appmanager.go
+++ b/core/installer/welcome/appmanager.go
@@ -149,6 +149,7 @@
r.HandleFunc("/clusters/{cluster}/servers/{server}/remove", s.handleClusterRemoveServer).Methods(http.MethodPost)
r.HandleFunc("/clusters/{cluster}/servers", s.handleClusterAddServer).Methods(http.MethodPost)
r.HandleFunc("/clusters/{name}", s.handleCluster).Methods(http.MethodGet)
+ r.HandleFunc("/clusters/{name}/setup-storage", s.handleClusterSetupStorage).Methods(http.MethodPost)
r.HandleFunc("/clusters/{name}/remove", s.handleRemoveCluster).Methods(http.MethodPost)
r.HandleFunc("/clusters", s.handleAllClusters).Methods(http.MethodGet)
r.HandleFunc("/clusters", s.handleCreateCluster).Methods(http.MethodPost)
@@ -679,6 +680,39 @@
}
}
+func (s *AppManagerServer) handleClusterSetupStorage(w http.ResponseWriter, r *http.Request) {
+ cName, ok := mux.Vars(r)["name"]
+ if !ok {
+ http.Error(w, "empty name", http.StatusBadRequest)
+ return
+ }
+ if _, ok := s.tasks[cName]; ok {
+ http.Error(w, "cluster task in progress", http.StatusLocked)
+ return
+ }
+ m, err := s.getClusterManager(cName)
+ if err != nil {
+ if errors.Is(err, installer.ErrorNotFound) {
+ http.Error(w, "not found", http.StatusNotFound)
+ } else {
+ http.Error(w, err.Error(), http.StatusInternalServerError)
+ }
+ return
+ }
+ task := tasks.NewClusterSetupTask(m, s.setupRemoteClusterStorage(), s.repo, fmt.Sprintf("cluster %s: setting up storage", m.State().Name))
+ task.OnDone(func(err error) {
+ go func() {
+ time.Sleep(30 * time.Second)
+ s.l.Lock()
+ defer s.l.Unlock()
+ delete(s.tasks, cName)
+ }()
+ })
+ go task.Start()
+ s.tasks[cName] = taskForward{task, fmt.Sprintf("/clusters/%s", cName)}
+ http.Redirect(w, r, fmt.Sprintf("/tasks/%s", cName), http.StatusSeeOther)
+}
+
func (s *AppManagerServer) handleClusterRemoveServer(w http.ResponseWriter, r *http.Request) {
s.l.Lock()
defer s.l.Unlock()
@@ -759,7 +793,7 @@
return
}
t := r.PostFormValue("type")
- ip := net.ParseIP(r.PostFormValue("ip"))
+ ip := net.ParseIP(strings.TrimSpace(r.PostFormValue("ip")))
if ip == nil {
http.Error(w, "invalid ip", http.StatusBadRequest)
return
@@ -857,7 +891,7 @@
http.Redirect(w, r, fmt.Sprintf("/tasks/%s", cName), http.StatusSeeOther)
}
-func (s *AppManagerServer) setupRemoteCluster() cluster.ClusterSetupFunc {
+func (s *AppManagerServer) setupRemoteCluster() cluster.ClusterIngressSetupFunc {
const vpnUser = "private-network-proxy"
return func(name, kubeconfig, ingressClassName string) (net.IP, error) {
hostname := fmt.Sprintf("cluster-%s", name)
@@ -872,7 +906,7 @@
}
instanceId := fmt.Sprintf("%s-%s", app.Slug(), name)
appDir := fmt.Sprintf("/clusters/%s/ingress", name)
- namespace := fmt.Sprintf("%scluster-network-%s", env.NamespacePrefix, name)
+ namespace := fmt.Sprintf("%scluster-%s-network", env.NamespacePrefix, name)
rr, err := s.m.Install(app, instanceId, appDir, namespace, map[string]any{
"cluster": map[string]any{
"name": name,
@@ -910,3 +944,42 @@
}
}
}
+
+func (s *AppManagerServer) setupRemoteClusterStorage() cluster.ClusterSetupFunc {
+ return func(cm cluster.Manager) error {
+ name := cm.State().Name
+ t := tasks.NewInstallTask(s.h, func() (installer.ReleaseResources, error) {
+ app, err := installer.FindEnvApp(s.fr, "longhorn")
+ if err != nil {
+ return installer.ReleaseResources{}, err
+ }
+ env, err := s.m.Config()
+ if err != nil {
+ return installer.ReleaseResources{}, err
+ }
+ instanceId := fmt.Sprintf("%s-%s", app.Slug(), name)
+ appDir := fmt.Sprintf("/clusters/%s/storage", name)
+ namespace := fmt.Sprintf("%scluster-%s-storage", env.NamespacePrefix, name)
+ rr, err := s.m.Install(app, instanceId, appDir, namespace, map[string]any{
+ "cluster": name,
+ })
+ if err != nil {
+ return installer.ReleaseResources{}, err
+ }
+ ctx, _ := context.WithTimeout(context.Background(), 5*time.Second)
+ go s.reconciler.Reconcile(ctx)
+ return rr, err
+ })
+ ch := make(chan error)
+ t.OnDone(func(err error) {
+ ch <- err
+ })
+ go t.Start()
+ err := <-ch
+ if err != nil {
+ return err
+ }
+ cm.EnableStorage()
+ return nil
+ }
+}