| Earl Lee | 2e463fb | 2025-04-17 11:22:22 -0700 | [diff] [blame] | 1 | package dockerimg |
| 2 | |
| 3 | import ( |
| 4 | "bytes" |
| 5 | "context" |
| 6 | "crypto/sha256" |
| 7 | "encoding/hex" |
| 8 | "encoding/json" |
| 9 | "fmt" |
| 10 | "io/fs" |
| 11 | "maps" |
| 12 | "net/http" |
| 13 | "slices" |
| 14 | "strings" |
| 15 | "text/template" |
| 16 | |
| 17 | "sketch.dev/ant" |
| 18 | ) |
| 19 | |
| 20 | func hashInitFiles(initFiles map[string]string) string { |
| 21 | h := sha256.New() |
| 22 | for _, path := range slices.Sorted(maps.Keys(initFiles)) { |
| 23 | fmt.Fprintf(h, "%s\n%s\n\n", path, initFiles[path]) |
| 24 | } |
| David Crawshaw | 1112949 | 2025-04-25 20:41:53 -0700 | [diff] [blame] | 25 | fmt.Fprintf(h, "docker template 1\n%s\n", dockerfileCustomTmpl) |
| 26 | fmt.Fprintf(h, "docker template 2\n%s\n", dockerfileDefaultTmpl) |
| Earl Lee | 2e463fb | 2025-04-17 11:22:22 -0700 | [diff] [blame] | 27 | return hex.EncodeToString(h.Sum(nil)) |
| 28 | } |
| 29 | |
| David Crawshaw | 1112949 | 2025-04-25 20:41:53 -0700 | [diff] [blame] | 30 | // DefaultImage is intended to ONLY be used by the pushdockerimg.go script. |
| 31 | func DefaultImage() (name, dockerfile, hash string) { |
| 32 | buf := new(bytes.Buffer) |
| 33 | err := template.Must(template.New("dockerfile").Parse(dockerfileBaseTmpl)).Execute(buf, map[string]string{ |
| 34 | "From": defaultBaseImg, |
| 35 | }) |
| 36 | if err != nil { |
| 37 | panic(err) |
| 38 | } |
| 39 | return dockerfileDefaultImg, buf.String(), hashInitFiles(nil) |
| 40 | } |
| 41 | |
| 42 | const dockerfileDefaultImg = "ghcr.io/boldsoftware/sketch:v1" |
| David Crawshaw | 5bff650 | 2025-04-26 09:11:40 -0700 | [diff] [blame] | 43 | |
| David Crawshaw | 1112949 | 2025-04-25 20:41:53 -0700 | [diff] [blame] | 44 | const defaultBaseImg = "golang:1.24.2-alpine3.21" |
| 45 | |
| David Crawshaw | be10fa9 | 2025-04-18 01:16:00 -0700 | [diff] [blame] | 46 | // TODO: add semgrep, prettier -- they require node/npm/etc which is more complicated than apk |
| 47 | // If/when we do this, add them into the list of available tools in bash.go. |
| David Crawshaw | 1112949 | 2025-04-25 20:41:53 -0700 | [diff] [blame] | 48 | const dockerfileBaseTmpl = `FROM {{.From}} |
| David Crawshaw | be10fa9 | 2025-04-18 01:16:00 -0700 | [diff] [blame] | 49 | |
| Josh Bleecher Snyder | e2d24ab | 2025-04-30 00:01:41 +0000 | [diff] [blame] | 50 | RUN apk add bash git make jq sqlite gcc musl-dev linux-headers npm nodejs go github-cli ripgrep fzf python3 curl vim grep |
| David Crawshaw | be10fa9 | 2025-04-18 01:16:00 -0700 | [diff] [blame] | 51 | |
| 52 | ENV GOTOOLCHAIN=auto |
| 53 | ENV GOPATH=/go |
| 54 | ENV PATH="$GOPATH/bin:$PATH" |
| 55 | |
| 56 | RUN go install golang.org/x/tools/cmd/goimports@latest |
| 57 | RUN go install golang.org/x/tools/gopls@latest |
| 58 | RUN go install mvdan.cc/gofumpt@latest |
| 59 | |
| 60 | RUN mkdir -p /root/.cache/sketch/webui |
| David Crawshaw | 1112949 | 2025-04-25 20:41:53 -0700 | [diff] [blame] | 61 | ` |
| David Crawshaw | be10fa9 | 2025-04-18 01:16:00 -0700 | [diff] [blame] | 62 | |
| David Crawshaw | 1112949 | 2025-04-25 20:41:53 -0700 | [diff] [blame] | 63 | const dockerfileFragment = ` |
| David Crawshaw | be10fa9 | 2025-04-18 01:16:00 -0700 | [diff] [blame] | 64 | ARG GIT_USER_EMAIL |
| 65 | ARG GIT_USER_NAME |
| 66 | |
| 67 | RUN git config --global user.email "$GIT_USER_EMAIL" && \ |
| 68 | git config --global user.name "$GIT_USER_NAME" |
| 69 | |
| 70 | LABEL sketch_context="{{.InitFilesHash}}" |
| 71 | COPY . /app |
| 72 | |
| 73 | WORKDIR /app{{.SubDir}} |
| 74 | RUN if [ -f go.mod ]; then go mod download; fi |
| 75 | |
| David Crawshaw | 1112949 | 2025-04-25 20:41:53 -0700 | [diff] [blame] | 76 | {{.ExtraCmds}} |
| 77 | |
| 78 | CMD ["/bin/sketch"] |
| 79 | ` |
| 80 | |
| 81 | // dockerfileCustomTmpl is the dockerfile template used when the LLM |
| 82 | // chooses a custom base image. |
| 83 | const dockerfileCustomTmpl = dockerfileBaseTmpl + dockerfileFragment |
| 84 | |
| 85 | // dockerfileDefaultTmpl is the dockerfile used when the LLM went with |
| 86 | // the defaultBaseImg. In this case, we use a pre-canned image. |
| 87 | const dockerfileDefaultTmpl = "FROM " + dockerfileDefaultImg + "\n" + dockerfileFragment |
| David Crawshaw | be10fa9 | 2025-04-18 01:16:00 -0700 | [diff] [blame] | 88 | |
| Earl Lee | 2e463fb | 2025-04-17 11:22:22 -0700 | [diff] [blame] | 89 | // createDockerfile creates a Dockerfile for a git repo. |
| 90 | // It expects the relevant initFiles to have been provided. |
| 91 | // If the sketch binary is being executed in a sub-directory of the repository, |
| 92 | // the relative path is provided on subPathWorkingDir. |
| 93 | func createDockerfile(ctx context.Context, httpc *http.Client, antURL, antAPIKey string, initFiles map[string]string, subPathWorkingDir string) (string, error) { |
| 94 | if subPathWorkingDir == "." { |
| 95 | subPathWorkingDir = "" |
| 96 | } else if subPathWorkingDir != "" && subPathWorkingDir[0] != '/' { |
| 97 | subPathWorkingDir = "/" + subPathWorkingDir |
| 98 | } |
| 99 | toolCalled := false |
| 100 | var dockerfileFROM, dockerfileExtraCmds string |
| 101 | runDockerfile := func(ctx context.Context, input json.RawMessage) (string, error) { |
| 102 | // TODO: unmarshal straight into a struct |
| 103 | var m map[string]any |
| 104 | if err := json.Unmarshal(input, &m); err != nil { |
| 105 | return "", fmt.Errorf(`input=%[1]v (%[1]T), wanted a map[string]any, got: %w`, input, err) |
| 106 | } |
| 107 | var ok bool |
| 108 | dockerfileFROM, ok = m["from"].(string) |
| 109 | if !ok { |
| 110 | return "", fmt.Errorf(`input["from"]=%[1]v (%[1]T), wanted a string`, m["path"]) |
| 111 | } |
| 112 | dockerfileExtraCmds, ok = m["extra_cmds"].(string) |
| 113 | if !ok { |
| 114 | return "", fmt.Errorf(`input["extra_cmds"]=%[1]v (%[1]T), wanted a string`, m["path"]) |
| 115 | } |
| 116 | toolCalled = true |
| 117 | return "OK", nil |
| 118 | } |
| 119 | convo := ant.NewConvo(ctx, antAPIKey) |
| 120 | if httpc != nil { |
| 121 | convo.HTTPC = httpc |
| 122 | } |
| 123 | if antURL != "" { |
| 124 | convo.URL = antURL |
| 125 | } |
| 126 | convo.Tools = []*ant.Tool{{ |
| 127 | Name: "dockerfile", |
| 128 | Description: "Helps define a Dockerfile that sets up a dev environment for this project.", |
| 129 | Run: runDockerfile, |
| 130 | InputSchema: ant.MustSchema(`{ |
| 131 | "type": "object", |
| 132 | "required": ["from", "extra_cmds"], |
| 133 | "properties": { |
| 134 | "from": { |
| 135 | "type": "string", |
| 136 | "description": "The alpine base image provided to the dockerfile FROM command" |
| 137 | }, |
| 138 | "extra_cmds": { |
| 139 | "type": "string", |
| 140 | "description": "Extra commands to add to the dockerfile." |
| 141 | } |
| 142 | } |
| 143 | }`), |
| 144 | }} |
| 145 | |
| Earl Lee | 2e463fb | 2025-04-17 11:22:22 -0700 | [diff] [blame] | 146 | // TODO: it's basically impossible to one-shot a python env. We need an agent loop for that. |
| 147 | // Right now the prompt contains a set of half-baked workarounds. |
| 148 | |
| 149 | // If you want to edit the model prompt, run: |
| 150 | // |
| Philip Zeyliger | cc3ba22 | 2025-04-23 14:52:21 -0700 | [diff] [blame] | 151 | // go test ./dockerimg -httprecord ".*" -rewritewant |
| Earl Lee | 2e463fb | 2025-04-17 11:22:22 -0700 | [diff] [blame] | 152 | // |
| 153 | // Then look at the changes with: |
| 154 | // |
| Philip Zeyliger | cc3ba22 | 2025-04-23 14:52:21 -0700 | [diff] [blame] | 155 | // git diff dockerimg/testdata/*.dockerfile |
| Earl Lee | 2e463fb | 2025-04-17 11:22:22 -0700 | [diff] [blame] | 156 | // |
| 157 | // If the dockerfile changes are a strict improvement, commit all the changes. |
| 158 | msg := ant.Message{ |
| 159 | Role: ant.MessageRoleUser, |
| 160 | Content: []ant.Content{{ |
| 161 | Type: ant.ContentTypeText, |
| 162 | Text: ` |
| 163 | Call the dockerfile tool to create a Dockerfile. |
| 164 | The parameters to dockerfile fill out the From and ExtraCmds |
| 165 | template variables in the following Go template: |
| 166 | |
| David Crawshaw | 1112949 | 2025-04-25 20:41:53 -0700 | [diff] [blame] | 167 | ` + "```\n" + dockerfileCustomTmpl + "\n```" + ` |
| Earl Lee | 2e463fb | 2025-04-17 11:22:22 -0700 | [diff] [blame] | 168 | |
| 169 | In particular: |
| David Crawshaw | 1112949 | 2025-04-25 20:41:53 -0700 | [diff] [blame] | 170 | - Assume it is primarily a Go project. For a minimal env, prefer ` + defaultBaseImg + ` as a base image. |
| Earl Lee | 2e463fb | 2025-04-17 11:22:22 -0700 | [diff] [blame] | 171 | - If any python is needed at all, switch to using a python alpine image as a the base and apk add go. |
| 172 | Favor using uv, and use one of these base images, depending on the preferred python version: |
| 173 | ghcr.io/astral-sh/uv:python3.13-alpine |
| 174 | ghcr.io/astral-sh/uv:python3.12-alpine |
| 175 | ghcr.io/astral-sh/uv:python3.11-alpine |
| 176 | - When using pip to install packages, use: uv pip install --system. |
| 177 | - Python env setup is challenging and often no required, so any RUN commands involving python tooling should be written to let docker build continue if there is a failure. |
| 178 | - Include any tools particular to this repository that can be inferred from the given context. |
| 179 | - Append || true to any apk add commands in case the package does not exist. |
| David Crawshaw | 1112949 | 2025-04-25 20:41:53 -0700 | [diff] [blame] | 180 | - Do NOT expose any ports. |
| 181 | - Do NOT generate any CMD or ENTRYPOINT extra commands. |
| Earl Lee | 2e463fb | 2025-04-17 11:22:22 -0700 | [diff] [blame] | 182 | `, |
| 183 | }}, |
| 184 | } |
| 185 | if len(initFiles) > 0 { |
| 186 | msg.Content[0].Text += "Here is the content of several files from the repository that may be relevant:\n\n" |
| 187 | } |
| 188 | |
| 189 | for _, name := range slices.Sorted(maps.Keys(initFiles)) { |
| 190 | msg.Content = append(msg.Content, ant.Content{ |
| 191 | Type: ant.ContentTypeText, |
| 192 | Text: fmt.Sprintf("Here is the contents %s:\n<file>\n%s\n</file>\n\n", name, initFiles[name]), |
| 193 | }) |
| 194 | } |
| 195 | msg.Content = append(msg.Content, ant.Content{ |
| 196 | Type: ant.ContentTypeText, |
| 197 | Text: "Now call the dockerfile tool.", |
| 198 | }) |
| 199 | res, err := convo.SendMessage(msg) |
| 200 | if err != nil { |
| 201 | return "", err |
| 202 | } |
| 203 | if res.StopReason != ant.StopReasonToolUse { |
| 204 | return "", fmt.Errorf("expected stop reason %q, got %q", ant.StopReasonToolUse, res.StopReason) |
| 205 | } |
| 206 | if _, err := convo.ToolResultContents(context.TODO(), res); err != nil { |
| 207 | return "", err |
| 208 | } |
| 209 | if !toolCalled { |
| 210 | return "", fmt.Errorf("no dockerfile returned") |
| 211 | } |
| 212 | |
| David Crawshaw | 1112949 | 2025-04-25 20:41:53 -0700 | [diff] [blame] | 213 | tmpl := dockerfileCustomTmpl |
| 214 | if dockerfileFROM == defaultBaseImg { |
| 215 | // Because the LLM has chosen the image we recommended, we |
| 216 | // can use a pre-canned image of our entire template, which |
| 217 | // saves a lot of build time. |
| 218 | tmpl = dockerfileDefaultTmpl |
| 219 | } |
| 220 | |
| Earl Lee | 2e463fb | 2025-04-17 11:22:22 -0700 | [diff] [blame] | 221 | buf := new(bytes.Buffer) |
| David Crawshaw | 1112949 | 2025-04-25 20:41:53 -0700 | [diff] [blame] | 222 | err = template.Must(template.New("dockerfile").Parse(tmpl)).Execute(buf, map[string]string{ |
| Earl Lee | 2e463fb | 2025-04-17 11:22:22 -0700 | [diff] [blame] | 223 | "From": dockerfileFROM, |
| 224 | "ExtraCmds": dockerfileExtraCmds, |
| 225 | "InitFilesHash": hashInitFiles(initFiles), |
| 226 | "SubDir": subPathWorkingDir, |
| 227 | }) |
| 228 | if err != nil { |
| 229 | return "", fmt.Errorf("dockerfile template failed: %w", err) |
| 230 | } |
| 231 | |
| 232 | return buf.String(), nil |
| 233 | } |
| 234 | |
| 235 | // For future reference: we can find the current git branch/checkout with: git symbolic-ref -q --short HEAD || git describe --tags --exact-match 2>/dev/null || git rev-parse HEAD |
| 236 | |
| 237 | func readInitFiles(fsys fs.FS) (map[string]string, error) { |
| 238 | result := make(map[string]string) |
| 239 | |
| 240 | err := fs.WalkDir(fsys, ".", func(path string, d fs.DirEntry, err error) error { |
| 241 | if err != nil { |
| 242 | return err |
| 243 | } |
| 244 | if d.IsDir() && (d.Name() == ".git" || d.Name() == "node_modules") { |
| 245 | return fs.SkipDir |
| 246 | } |
| 247 | if !d.Type().IsRegular() { |
| 248 | return nil |
| 249 | } |
| 250 | |
| 251 | // Case 1: Check for README files |
| 252 | // TODO: find README files between the .git root (where we start) |
| 253 | // and the dir that sketch was initialized. This needs more info |
| 254 | // plumbed to this function. |
| 255 | if strings.HasPrefix(strings.ToLower(path), "readme") { |
| 256 | content, err := fs.ReadFile(fsys, path) |
| 257 | if err != nil { |
| 258 | return err |
| 259 | } |
| 260 | result[path] = string(content) |
| 261 | return nil |
| 262 | } |
| 263 | |
| 264 | // Case 2: Check for GitHub workflow files |
| 265 | if strings.HasPrefix(path, ".github/workflows/") { |
| 266 | content, err := fs.ReadFile(fsys, path) |
| 267 | if err != nil { |
| 268 | return err |
| 269 | } |
| 270 | result[path] = string(content) |
| 271 | return nil |
| 272 | } |
| 273 | |
| 274 | return nil |
| 275 | }) |
| 276 | if err != nil { |
| 277 | return nil, err |
| 278 | } |
| 279 | return result, nil |
| 280 | } |