| Earl Lee | 2e463fb | 2025-04-17 11:22:22 -0700 | [diff] [blame] | 1 | package dockerimg |
| 2 | |
| 3 | import ( |
| 4 | "bytes" |
| 5 | "context" |
| 6 | "crypto/sha256" |
| Philip Zeyliger | 9df94b5 | 2025-05-18 03:43:14 +0000 | [diff] [blame] | 7 | _ "embed" // Using underscore import to keep embed package for go:embed directive |
| Earl Lee | 2e463fb | 2025-04-17 11:22:22 -0700 | [diff] [blame] | 8 | "encoding/hex" |
| 9 | "encoding/json" |
| 10 | "fmt" |
| David Crawshaw | 2a5bd6d | 2025-04-30 14:29:46 -0700 | [diff] [blame] | 11 | "io" |
| Earl Lee | 2e463fb | 2025-04-17 11:22:22 -0700 | [diff] [blame] | 12 | "io/fs" |
| 13 | "maps" |
| 14 | "net/http" |
| 15 | "slices" |
| 16 | "strings" |
| 17 | "text/template" |
| 18 | |
| Josh Bleecher Snyder | 4f84ab7 | 2025-04-22 16:40:54 -0700 | [diff] [blame] | 19 | "sketch.dev/llm" |
| 20 | "sketch.dev/llm/conversation" |
| Earl Lee | 2e463fb | 2025-04-17 11:22:22 -0700 | [diff] [blame] | 21 | ) |
| 22 | |
| 23 | func hashInitFiles(initFiles map[string]string) string { |
| 24 | h := sha256.New() |
| 25 | for _, path := range slices.Sorted(maps.Keys(initFiles)) { |
| 26 | fmt.Fprintf(h, "%s\n%s\n\n", path, initFiles[path]) |
| 27 | } |
| David Crawshaw | 2a5bd6d | 2025-04-30 14:29:46 -0700 | [diff] [blame] | 28 | fmt.Fprintf(h, "docker template\n%s\n", dockerfileDefaultTmpl) |
| Earl Lee | 2e463fb | 2025-04-17 11:22:22 -0700 | [diff] [blame] | 29 | return hex.EncodeToString(h.Sum(nil)) |
| 30 | } |
| 31 | |
| David Crawshaw | 1112949 | 2025-04-25 20:41:53 -0700 | [diff] [blame] | 32 | // DefaultImage is intended to ONLY be used by the pushdockerimg.go script. |
| David Crawshaw | 2a5bd6d | 2025-04-30 14:29:46 -0700 | [diff] [blame] | 33 | func DefaultImage() (name, dockerfile, tag string) { |
| 34 | return dockerImgName, dockerfileBase, dockerfileBaseHash() |
| David Crawshaw | 1112949 | 2025-04-25 20:41:53 -0700 | [diff] [blame] | 35 | } |
| 36 | |
| Philip Zeyliger | bce3a13 | 2025-04-30 22:03:39 +0000 | [diff] [blame] | 37 | const ( |
| 38 | dockerImgRepo = "boldsoftware/sketch" |
| 39 | dockerImgName = "ghcr.io/" + dockerImgRepo |
| 40 | ) |
| David Crawshaw | 5bff650 | 2025-04-26 09:11:40 -0700 | [diff] [blame] | 41 | |
| David Crawshaw | 2a5bd6d | 2025-04-30 14:29:46 -0700 | [diff] [blame] | 42 | func dockerfileBaseHash() string { |
| 43 | h := sha256.New() |
| 44 | io.WriteString(h, dockerfileBase) |
| 45 | return hex.EncodeToString(h.Sum(nil))[:32] |
| 46 | } |
| David Crawshaw | 1112949 | 2025-04-25 20:41:53 -0700 | [diff] [blame] | 47 | |
| David Crawshaw | 8fd5104 | 2025-05-05 12:52:43 -0700 | [diff] [blame] | 48 | const tmpSketchDockerfile = "tmp-sketch-dockerfile" |
| 49 | |
| Philip Zeyliger | 9df94b5 | 2025-05-18 03:43:14 +0000 | [diff] [blame] | 50 | //go:embed Dockerfile.base |
| 51 | var dockerfileBaseData []byte |
| David Crawshaw | be10fa9 | 2025-04-18 01:16:00 -0700 | [diff] [blame] | 52 | |
| Philip Zeyliger | 9df94b5 | 2025-05-18 03:43:14 +0000 | [diff] [blame] | 53 | // dockerfileBase is the content of the base Dockerfile |
| 54 | var dockerfileBase = string(dockerfileBaseData) |
| David Crawshaw | be10fa9 | 2025-04-18 01:16:00 -0700 | [diff] [blame] | 55 | |
| David Crawshaw | 1112949 | 2025-04-25 20:41:53 -0700 | [diff] [blame] | 56 | const dockerfileFragment = ` |
| David Crawshaw | be10fa9 | 2025-04-18 01:16:00 -0700 | [diff] [blame] | 57 | ARG GIT_USER_EMAIL |
| 58 | ARG GIT_USER_NAME |
| 59 | |
| 60 | RUN git config --global user.email "$GIT_USER_EMAIL" && \ |
| David Crawshaw | ca53558 | 2025-05-03 13:04:34 -0700 | [diff] [blame] | 61 | git config --global user.name "$GIT_USER_NAME" && \ |
| 62 | git config --global http.postBuffer 524288000 |
| David Crawshaw | be10fa9 | 2025-04-18 01:16:00 -0700 | [diff] [blame] | 63 | |
| Josh Bleecher Snyder | c76a392 | 2025-05-01 01:18:56 +0000 | [diff] [blame] | 64 | LABEL sketch_context="{{.InitFilesHash}}" |
| David Crawshaw | be10fa9 | 2025-04-18 01:16:00 -0700 | [diff] [blame] | 65 | COPY . /app |
| David Crawshaw | 8fd5104 | 2025-05-05 12:52:43 -0700 | [diff] [blame] | 66 | RUN rm -f /app/` + tmpSketchDockerfile + ` |
| David Crawshaw | be10fa9 | 2025-04-18 01:16:00 -0700 | [diff] [blame] | 67 | |
| 68 | WORKDIR /app{{.SubDir}} |
| 69 | RUN if [ -f go.mod ]; then go mod download; fi |
| 70 | |
| David Crawshaw | b2064de | 2025-05-05 09:12:19 -0700 | [diff] [blame] | 71 | # Switch to lenient shell so we are more likely to get past failing extra_cmds. |
| 72 | SHELL ["/bin/bash", "-uo", "pipefail", "-c"] |
| 73 | |
| David Crawshaw | 1112949 | 2025-04-25 20:41:53 -0700 | [diff] [blame] | 74 | {{.ExtraCmds}} |
| 75 | |
| David Crawshaw | b2064de | 2025-05-05 09:12:19 -0700 | [diff] [blame] | 76 | # Switch back to strict shell after extra_cmds. |
| 77 | SHELL ["/bin/bash", "-euxo", "pipefail", "-c"] |
| 78 | |
| David Crawshaw | 1112949 | 2025-04-25 20:41:53 -0700 | [diff] [blame] | 79 | CMD ["/bin/sketch"] |
| 80 | ` |
| 81 | |
| David Crawshaw | 2a5bd6d | 2025-04-30 14:29:46 -0700 | [diff] [blame] | 82 | var dockerfileDefaultTmpl = "FROM " + dockerImgName + ":" + dockerfileBaseHash() + "\n" + dockerfileFragment |
| David Crawshaw | 1112949 | 2025-04-25 20:41:53 -0700 | [diff] [blame] | 83 | |
| David Crawshaw | 2a5bd6d | 2025-04-30 14:29:46 -0700 | [diff] [blame] | 84 | func readPublishedTags() ([]string, error) { |
| 85 | req, err := http.NewRequest("GET", "https://ghcr.io/token?service=ghcr.io&scope=repository:"+dockerImgRepo+":pull", nil) |
| 86 | if err != nil { |
| 87 | return nil, fmt.Errorf("token: %w", err) |
| 88 | } |
| 89 | res, err := http.DefaultClient.Do(req) |
| 90 | if err != nil { |
| 91 | return nil, fmt.Errorf("token: %w", err) |
| 92 | } |
| 93 | body, err := io.ReadAll(res.Body) |
| 94 | res.Body.Close() |
| 95 | if err != nil || res.StatusCode != 200 { |
| 96 | return nil, fmt.Errorf("token: %d: %s: %w", res.StatusCode, body, err) |
| 97 | } |
| 98 | var tokenBody struct { |
| 99 | Token string `json:"token"` |
| 100 | } |
| 101 | if err := json.Unmarshal(body, &tokenBody); err != nil { |
| 102 | return nil, fmt.Errorf("token: %w: %s", err, body) |
| 103 | } |
| 104 | |
| 105 | req, err = http.NewRequest("GET", "https://ghcr.io/v2/"+dockerImgRepo+"/tags/list", nil) |
| 106 | if err != nil { |
| 107 | return nil, fmt.Errorf("tags: %w", err) |
| 108 | } |
| 109 | req.Header.Set("Authorization", "Bearer "+tokenBody.Token) |
| 110 | res, err = http.DefaultClient.Do(req) |
| 111 | if err != nil { |
| 112 | return nil, fmt.Errorf("tags: %w", err) |
| 113 | } |
| 114 | body, err = io.ReadAll(res.Body) |
| 115 | res.Body.Close() |
| 116 | if err != nil || res.StatusCode != 200 { |
| 117 | return nil, fmt.Errorf("tags: %d: %s: %w", res.StatusCode, body, err) |
| 118 | } |
| 119 | var tags struct { |
| 120 | Tags []string `json:"tags"` |
| 121 | } |
| 122 | if err := json.Unmarshal(body, &tags); err != nil { |
| 123 | return nil, fmt.Errorf("tags: %w: %s", err, body) |
| 124 | } |
| 125 | return tags.Tags, nil |
| 126 | } |
| 127 | |
| 128 | func checkTagExists(tag string) error { |
| 129 | tags, err := readPublishedTags() |
| 130 | if err != nil { |
| 131 | return fmt.Errorf("check tag exists: %w", err) |
| 132 | } |
| 133 | for _, t := range tags { |
| 134 | if t == tag { |
| 135 | return nil // found it |
| 136 | } |
| 137 | } |
| 138 | return fmt.Errorf("check tag exists: %q not found in %v", tag, tags) |
| 139 | } |
| David Crawshaw | be10fa9 | 2025-04-18 01:16:00 -0700 | [diff] [blame] | 140 | |
| Earl Lee | 2e463fb | 2025-04-17 11:22:22 -0700 | [diff] [blame] | 141 | // createDockerfile creates a Dockerfile for a git repo. |
| 142 | // It expects the relevant initFiles to have been provided. |
| 143 | // If the sketch binary is being executed in a sub-directory of the repository, |
| 144 | // the relative path is provided on subPathWorkingDir. |
| Pokey Rule | c31e296 | 2025-05-13 10:53:33 +0000 | [diff] [blame] | 145 | func createDockerfile(ctx context.Context, srv llm.Service, initFiles map[string]string, subPathWorkingDir string, verbose bool) (string, error) { |
| Earl Lee | 2e463fb | 2025-04-17 11:22:22 -0700 | [diff] [blame] | 146 | if subPathWorkingDir == "." { |
| 147 | subPathWorkingDir = "" |
| 148 | } else if subPathWorkingDir != "" && subPathWorkingDir[0] != '/' { |
| 149 | subPathWorkingDir = "/" + subPathWorkingDir |
| 150 | } |
| 151 | toolCalled := false |
| David Crawshaw | 2a5bd6d | 2025-04-30 14:29:46 -0700 | [diff] [blame] | 152 | var dockerfileExtraCmds string |
| Philip Zeyliger | 72252cb | 2025-05-10 17:00:08 -0700 | [diff] [blame] | 153 | runDockerfile := func(ctx context.Context, input json.RawMessage) ([]llm.Content, error) { |
| Earl Lee | 2e463fb | 2025-04-17 11:22:22 -0700 | [diff] [blame] | 154 | // TODO: unmarshal straight into a struct |
| 155 | var m map[string]any |
| 156 | if err := json.Unmarshal(input, &m); err != nil { |
| Philip Zeyliger | 72252cb | 2025-05-10 17:00:08 -0700 | [diff] [blame] | 157 | return nil, fmt.Errorf(`input=%[1]v (%[1]T), wanted a map[string]any, got: %w`, input, err) |
| Earl Lee | 2e463fb | 2025-04-17 11:22:22 -0700 | [diff] [blame] | 158 | } |
| 159 | var ok bool |
| Earl Lee | 2e463fb | 2025-04-17 11:22:22 -0700 | [diff] [blame] | 160 | dockerfileExtraCmds, ok = m["extra_cmds"].(string) |
| 161 | if !ok { |
| Philip Zeyliger | 72252cb | 2025-05-10 17:00:08 -0700 | [diff] [blame] | 162 | return nil, fmt.Errorf(`input["extra_cmds"]=%[1]v (%[1]T), wanted a string`, m["path"]) |
| Earl Lee | 2e463fb | 2025-04-17 11:22:22 -0700 | [diff] [blame] | 163 | } |
| 164 | toolCalled = true |
| Philip Zeyliger | 72252cb | 2025-05-10 17:00:08 -0700 | [diff] [blame] | 165 | return llm.TextContent("OK"), nil |
| Earl Lee | 2e463fb | 2025-04-17 11:22:22 -0700 | [diff] [blame] | 166 | } |
| Josh Bleecher Snyder | 4f84ab7 | 2025-04-22 16:40:54 -0700 | [diff] [blame] | 167 | |
| 168 | convo := conversation.New(ctx, srv) |
| 169 | |
| 170 | convo.Tools = []*llm.Tool{{ |
| Earl Lee | 2e463fb | 2025-04-17 11:22:22 -0700 | [diff] [blame] | 171 | Name: "dockerfile", |
| 172 | Description: "Helps define a Dockerfile that sets up a dev environment for this project.", |
| 173 | Run: runDockerfile, |
| Josh Bleecher Snyder | 4f84ab7 | 2025-04-22 16:40:54 -0700 | [diff] [blame] | 174 | InputSchema: llm.MustSchema(`{ |
| Earl Lee | 2e463fb | 2025-04-17 11:22:22 -0700 | [diff] [blame] | 175 | "type": "object", |
| David Crawshaw | 2a5bd6d | 2025-04-30 14:29:46 -0700 | [diff] [blame] | 176 | "required": ["extra_cmds"], |
| Earl Lee | 2e463fb | 2025-04-17 11:22:22 -0700 | [diff] [blame] | 177 | "properties": { |
| Earl Lee | 2e463fb | 2025-04-17 11:22:22 -0700 | [diff] [blame] | 178 | "extra_cmds": { |
| 179 | "type": "string", |
| David Crawshaw | 53b02a6 | 2025-05-12 14:46:29 -0700 | [diff] [blame] | 180 | "description": "Extra dockerfile commands to add to the dockerfile. Each command should start with RUN." |
| Earl Lee | 2e463fb | 2025-04-17 11:22:22 -0700 | [diff] [blame] | 181 | } |
| 182 | } |
| 183 | }`), |
| 184 | }} |
| 185 | |
| Earl Lee | 2e463fb | 2025-04-17 11:22:22 -0700 | [diff] [blame] | 186 | // TODO: it's basically impossible to one-shot a python env. We need an agent loop for that. |
| 187 | // Right now the prompt contains a set of half-baked workarounds. |
| 188 | |
| 189 | // If you want to edit the model prompt, run: |
| 190 | // |
| Philip Zeyliger | cc3ba22 | 2025-04-23 14:52:21 -0700 | [diff] [blame] | 191 | // go test ./dockerimg -httprecord ".*" -rewritewant |
| Earl Lee | 2e463fb | 2025-04-17 11:22:22 -0700 | [diff] [blame] | 192 | // |
| 193 | // Then look at the changes with: |
| 194 | // |
| Philip Zeyliger | cc3ba22 | 2025-04-23 14:52:21 -0700 | [diff] [blame] | 195 | // git diff dockerimg/testdata/*.dockerfile |
| Earl Lee | 2e463fb | 2025-04-17 11:22:22 -0700 | [diff] [blame] | 196 | // |
| 197 | // If the dockerfile changes are a strict improvement, commit all the changes. |
| Josh Bleecher Snyder | 4f84ab7 | 2025-04-22 16:40:54 -0700 | [diff] [blame] | 198 | msg := llm.Message{ |
| 199 | Role: llm.MessageRoleUser, |
| 200 | Content: []llm.Content{{ |
| 201 | Type: llm.ContentTypeText, |
| Earl Lee | 2e463fb | 2025-04-17 11:22:22 -0700 | [diff] [blame] | 202 | Text: ` |
| 203 | Call the dockerfile tool to create a Dockerfile. |
| 204 | The parameters to dockerfile fill out the From and ExtraCmds |
| 205 | template variables in the following Go template: |
| 206 | |
| David Crawshaw | 2a5bd6d | 2025-04-30 14:29:46 -0700 | [diff] [blame] | 207 | ` + "```\n" + dockerfileBase + dockerfileFragment + "\n```" + ` |
| Earl Lee | 2e463fb | 2025-04-17 11:22:22 -0700 | [diff] [blame] | 208 | |
| 209 | In particular: |
| David Crawshaw | 2a5bd6d | 2025-04-30 14:29:46 -0700 | [diff] [blame] | 210 | - Assume it is primarily a Go project. |
| Earl Lee | 2e463fb | 2025-04-17 11:22:22 -0700 | [diff] [blame] | 211 | - Python env setup is challenging and often no required, so any RUN commands involving python tooling should be written to let docker build continue if there is a failure. |
| 212 | - Include any tools particular to this repository that can be inferred from the given context. |
| David Crawshaw | 2a5bd6d | 2025-04-30 14:29:46 -0700 | [diff] [blame] | 213 | - Append || true to any apt-get install commands in case the package does not exist. |
| 214 | - MINIMIZE the number of extra_cmds generated. Straightforward environments do not need any. |
| David Crawshaw | 1112949 | 2025-04-25 20:41:53 -0700 | [diff] [blame] | 215 | - Do NOT expose any ports. |
| 216 | - Do NOT generate any CMD or ENTRYPOINT extra commands. |
| Earl Lee | 2e463fb | 2025-04-17 11:22:22 -0700 | [diff] [blame] | 217 | `, |
| 218 | }}, |
| 219 | } |
| 220 | if len(initFiles) > 0 { |
| 221 | msg.Content[0].Text += "Here is the content of several files from the repository that may be relevant:\n\n" |
| 222 | } |
| 223 | |
| 224 | for _, name := range slices.Sorted(maps.Keys(initFiles)) { |
| Josh Bleecher Snyder | 4f84ab7 | 2025-04-22 16:40:54 -0700 | [diff] [blame] | 225 | msg.Content = append(msg.Content, llm.StringContent(fmt.Sprintf("Here is the contents %s:\n<file>\n%s\n</file>\n\n", name, initFiles[name]))) |
| Earl Lee | 2e463fb | 2025-04-17 11:22:22 -0700 | [diff] [blame] | 226 | } |
| Josh Bleecher Snyder | 4f84ab7 | 2025-04-22 16:40:54 -0700 | [diff] [blame] | 227 | msg.Content = append(msg.Content, llm.StringContent("Now call the dockerfile tool.")) |
| Earl Lee | 2e463fb | 2025-04-17 11:22:22 -0700 | [diff] [blame] | 228 | res, err := convo.SendMessage(msg) |
| 229 | if err != nil { |
| 230 | return "", err |
| 231 | } |
| Josh Bleecher Snyder | 4f84ab7 | 2025-04-22 16:40:54 -0700 | [diff] [blame] | 232 | if res.StopReason != llm.StopReasonToolUse { |
| 233 | return "", fmt.Errorf("expected stop reason %q, got %q", llm.StopReasonToolUse, res.StopReason) |
| Earl Lee | 2e463fb | 2025-04-17 11:22:22 -0700 | [diff] [blame] | 234 | } |
| Josh Bleecher Snyder | 64f2aa8 | 2025-05-14 18:31:05 +0000 | [diff] [blame] | 235 | _, _, err = convo.ToolResultContents(context.TODO(), res) |
| Pokey Rule | c31e296 | 2025-05-13 10:53:33 +0000 | [diff] [blame] | 236 | if err != nil { |
| Earl Lee | 2e463fb | 2025-04-17 11:22:22 -0700 | [diff] [blame] | 237 | return "", err |
| 238 | } |
| Pokey Rule | c31e296 | 2025-05-13 10:53:33 +0000 | [diff] [blame] | 239 | |
| 240 | // Print the LLM response when verbose is enabled |
| 241 | if verbose && len(res.Content) > 0 && res.Content[0].Type == llm.ContentTypeText && res.Content[0].Text != "" { |
| 242 | fmt.Printf("\n<llm_response>\n%s\n</llm_response>\n\n", res.Content[0].Text) |
| 243 | } |
| 244 | |
| Earl Lee | 2e463fb | 2025-04-17 11:22:22 -0700 | [diff] [blame] | 245 | if !toolCalled { |
| 246 | return "", fmt.Errorf("no dockerfile returned") |
| 247 | } |
| 248 | |
| David Crawshaw | 2a5bd6d | 2025-04-30 14:29:46 -0700 | [diff] [blame] | 249 | tmpl := dockerfileDefaultTmpl |
| 250 | if tag := dockerfileBaseHash(); checkTagExists(tag) != nil { |
| 251 | // In development, if you edit dockerfileBase but don't release |
| 252 | // (as is reasonable for testing things!) the hash won't exist |
| 253 | // yet. In that case, we skip the sketch image and build it ourselves. |
| 254 | fmt.Printf("published container tag %s:%s missing; building locally\n", dockerImgName, tag) |
| 255 | tmpl = dockerfileBase + dockerfileFragment |
| David Crawshaw | 1112949 | 2025-04-25 20:41:53 -0700 | [diff] [blame] | 256 | } |
| Earl Lee | 2e463fb | 2025-04-17 11:22:22 -0700 | [diff] [blame] | 257 | buf := new(bytes.Buffer) |
| David Crawshaw | 1112949 | 2025-04-25 20:41:53 -0700 | [diff] [blame] | 258 | err = template.Must(template.New("dockerfile").Parse(tmpl)).Execute(buf, map[string]string{ |
| Josh Bleecher Snyder | c76a392 | 2025-05-01 01:18:56 +0000 | [diff] [blame] | 259 | "ExtraCmds": dockerfileExtraCmds, |
| 260 | "SubDir": subPathWorkingDir, |
| 261 | "InitFilesHash": hashInitFiles(initFiles), |
| Earl Lee | 2e463fb | 2025-04-17 11:22:22 -0700 | [diff] [blame] | 262 | }) |
| 263 | if err != nil { |
| 264 | return "", fmt.Errorf("dockerfile template failed: %w", err) |
| 265 | } |
| 266 | |
| 267 | return buf.String(), nil |
| 268 | } |
| 269 | |
| 270 | // For future reference: we can find the current git branch/checkout with: git symbolic-ref -q --short HEAD || git describe --tags --exact-match 2>/dev/null || git rev-parse HEAD |
| 271 | |
| 272 | func readInitFiles(fsys fs.FS) (map[string]string, error) { |
| 273 | result := make(map[string]string) |
| 274 | |
| 275 | err := fs.WalkDir(fsys, ".", func(path string, d fs.DirEntry, err error) error { |
| 276 | if err != nil { |
| 277 | return err |
| 278 | } |
| 279 | if d.IsDir() && (d.Name() == ".git" || d.Name() == "node_modules") { |
| 280 | return fs.SkipDir |
| 281 | } |
| 282 | if !d.Type().IsRegular() { |
| 283 | return nil |
| 284 | } |
| 285 | |
| 286 | // Case 1: Check for README files |
| 287 | // TODO: find README files between the .git root (where we start) |
| 288 | // and the dir that sketch was initialized. This needs more info |
| 289 | // plumbed to this function. |
| 290 | if strings.HasPrefix(strings.ToLower(path), "readme") { |
| 291 | content, err := fs.ReadFile(fsys, path) |
| 292 | if err != nil { |
| 293 | return err |
| 294 | } |
| 295 | result[path] = string(content) |
| 296 | return nil |
| 297 | } |
| 298 | |
| 299 | // Case 2: Check for GitHub workflow files |
| 300 | if strings.HasPrefix(path, ".github/workflows/") { |
| 301 | content, err := fs.ReadFile(fsys, path) |
| 302 | if err != nil { |
| 303 | return err |
| 304 | } |
| 305 | result[path] = string(content) |
| 306 | return nil |
| 307 | } |
| 308 | |
| 309 | return nil |
| 310 | }) |
| 311 | if err != nil { |
| 312 | return nil, err |
| 313 | } |
| 314 | return result, nil |
| 315 | } |