blob: 9c145c1969a9eee8ce72a93390fb01ec961002a7 [file] [log] [blame]
Earl Lee2e463fb2025-04-17 11:22:22 -07001package dockerimg
2
3import (
4 "bytes"
5 "context"
6 "crypto/sha256"
7 "encoding/hex"
8 "encoding/json"
9 "fmt"
David Crawshaw2a5bd6d2025-04-30 14:29:46 -070010 "io"
Earl Lee2e463fb2025-04-17 11:22:22 -070011 "io/fs"
12 "maps"
13 "net/http"
14 "slices"
15 "strings"
16 "text/template"
17
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -070018 "sketch.dev/llm"
19 "sketch.dev/llm/conversation"
Earl Lee2e463fb2025-04-17 11:22:22 -070020)
21
22func hashInitFiles(initFiles map[string]string) string {
23 h := sha256.New()
24 for _, path := range slices.Sorted(maps.Keys(initFiles)) {
25 fmt.Fprintf(h, "%s\n%s\n\n", path, initFiles[path])
26 }
David Crawshaw2a5bd6d2025-04-30 14:29:46 -070027 fmt.Fprintf(h, "docker template\n%s\n", dockerfileDefaultTmpl)
Earl Lee2e463fb2025-04-17 11:22:22 -070028 return hex.EncodeToString(h.Sum(nil))
29}
30
David Crawshaw11129492025-04-25 20:41:53 -070031// DefaultImage is intended to ONLY be used by the pushdockerimg.go script.
David Crawshaw2a5bd6d2025-04-30 14:29:46 -070032func DefaultImage() (name, dockerfile, tag string) {
33 return dockerImgName, dockerfileBase, dockerfileBaseHash()
David Crawshaw11129492025-04-25 20:41:53 -070034}
35
Philip Zeyligerbce3a132025-04-30 22:03:39 +000036const (
37 dockerImgRepo = "boldsoftware/sketch"
38 dockerImgName = "ghcr.io/" + dockerImgRepo
39)
David Crawshaw5bff6502025-04-26 09:11:40 -070040
David Crawshaw2a5bd6d2025-04-30 14:29:46 -070041func dockerfileBaseHash() string {
42 h := sha256.New()
43 io.WriteString(h, dockerfileBase)
44 return hex.EncodeToString(h.Sum(nil))[:32]
45}
David Crawshaw11129492025-04-25 20:41:53 -070046
David Crawshaw8fd51042025-05-05 12:52:43 -070047const tmpSketchDockerfile = "tmp-sketch-dockerfile"
48
David Crawshaw2a5bd6d2025-04-30 14:29:46 -070049const dockerfileBase = `FROM golang:1.24-bookworm
David Crawshawbe10fa92025-04-18 01:16:00 -070050
David Crawshawb2064de2025-05-05 09:12:19 -070051# Switch from dash to bash by default.
52SHELL ["/bin/bash", "-euxo", "pipefail", "-c"]
53
David Crawshaw5228b582025-05-01 11:18:12 -070054# attempt to keep package installs lean
55RUN printf '%s\n' \
56 'path-exclude=/usr/share/man/*' \
57 'path-exclude=/usr/share/doc/*' \
58 'path-exclude=/usr/share/doc-base/*' \
59 'path-exclude=/usr/share/info/*' \
60 'path-exclude=/usr/share/locale/*' \
61 'path-exclude=/usr/share/groff/*' \
62 'path-exclude=/usr/share/lintian/*' \
63 'path-exclude=/usr/share/zoneinfo/*' \
64 > /etc/dpkg/dpkg.cfg.d/01_nodoc
65
David Crawshawb2064de2025-05-05 09:12:19 -070066RUN apt-get update; \
David Crawshaw2a5bd6d2025-04-30 14:29:46 -070067 apt-get install -y --no-install-recommends \
Philip Zeyliger33d282f2025-05-03 04:01:54 +000068 git jq sqlite3 npm nodejs gh ripgrep fzf python3 curl vim chromium && \
David Crawshawfa67fe52025-05-01 20:42:08 +000069 apt-get clean && \
Philip Zeyliger33d282f2025-05-03 04:01:54 +000070 rm -rf /var/lib/apt/lists/* && \
71 rm -rf /usr/share/{doc,doc-base,info,lintian,man,groff,locale,zoneinfo}/*
David Crawshaw5228b582025-05-01 11:18:12 -070072
David Crawshawbe10fa92025-04-18 01:16:00 -070073ENV PATH="$GOPATH/bin:$PATH"
74
David Crawshaw5228b582025-05-01 11:18:12 -070075# While these binaries install generally useful supporting packages,
76# the specific versions are rarely what a user wants so there is no
77# point polluting the base image module with them.
78
David Crawshawb2064de2025-05-05 09:12:19 -070079RUN go install golang.org/x/tools/cmd/goimports@latest; \
David Crawshaw5228b582025-05-01 11:18:12 -070080 go install golang.org/x/tools/gopls@latest; \
81 go install mvdan.cc/gofumpt@latest; \
David Crawshawfa67fe52025-05-01 20:42:08 +000082 go clean -cache -testcache -modcache
David Crawshawbe10fa92025-04-18 01:16:00 -070083
David Crawshaw2a5bd6d2025-04-30 14:29:46 -070084ENV GOTOOLCHAIN=auto
David Crawshaw5228b582025-05-01 11:18:12 -070085ENV SKETCH=1
David Crawshaw2a5bd6d2025-04-30 14:29:46 -070086
David Crawshawbe10fa92025-04-18 01:16:00 -070087RUN mkdir -p /root/.cache/sketch/webui
David Crawshaw11129492025-04-25 20:41:53 -070088`
David Crawshawbe10fa92025-04-18 01:16:00 -070089
David Crawshaw11129492025-04-25 20:41:53 -070090const dockerfileFragment = `
David Crawshawbe10fa92025-04-18 01:16:00 -070091ARG GIT_USER_EMAIL
92ARG GIT_USER_NAME
93
94RUN git config --global user.email "$GIT_USER_EMAIL" && \
David Crawshawca535582025-05-03 13:04:34 -070095 git config --global user.name "$GIT_USER_NAME" && \
96 git config --global http.postBuffer 524288000
David Crawshawbe10fa92025-04-18 01:16:00 -070097
Josh Bleecher Snyderc76a3922025-05-01 01:18:56 +000098LABEL sketch_context="{{.InitFilesHash}}"
David Crawshawbe10fa92025-04-18 01:16:00 -070099COPY . /app
David Crawshaw8fd51042025-05-05 12:52:43 -0700100RUN rm -f /app/` + tmpSketchDockerfile + `
David Crawshawbe10fa92025-04-18 01:16:00 -0700101
102WORKDIR /app{{.SubDir}}
103RUN if [ -f go.mod ]; then go mod download; fi
104
David Crawshawb2064de2025-05-05 09:12:19 -0700105# Switch to lenient shell so we are more likely to get past failing extra_cmds.
106SHELL ["/bin/bash", "-uo", "pipefail", "-c"]
107
David Crawshaw11129492025-04-25 20:41:53 -0700108{{.ExtraCmds}}
109
David Crawshawb2064de2025-05-05 09:12:19 -0700110# Switch back to strict shell after extra_cmds.
111SHELL ["/bin/bash", "-euxo", "pipefail", "-c"]
112
David Crawshaw11129492025-04-25 20:41:53 -0700113CMD ["/bin/sketch"]
114`
115
David Crawshaw2a5bd6d2025-04-30 14:29:46 -0700116var dockerfileDefaultTmpl = "FROM " + dockerImgName + ":" + dockerfileBaseHash() + "\n" + dockerfileFragment
David Crawshaw11129492025-04-25 20:41:53 -0700117
David Crawshaw2a5bd6d2025-04-30 14:29:46 -0700118func readPublishedTags() ([]string, error) {
119 req, err := http.NewRequest("GET", "https://ghcr.io/token?service=ghcr.io&scope=repository:"+dockerImgRepo+":pull", nil)
120 if err != nil {
121 return nil, fmt.Errorf("token: %w", err)
122 }
123 res, err := http.DefaultClient.Do(req)
124 if err != nil {
125 return nil, fmt.Errorf("token: %w", err)
126 }
127 body, err := io.ReadAll(res.Body)
128 res.Body.Close()
129 if err != nil || res.StatusCode != 200 {
130 return nil, fmt.Errorf("token: %d: %s: %w", res.StatusCode, body, err)
131 }
132 var tokenBody struct {
133 Token string `json:"token"`
134 }
135 if err := json.Unmarshal(body, &tokenBody); err != nil {
136 return nil, fmt.Errorf("token: %w: %s", err, body)
137 }
138
139 req, err = http.NewRequest("GET", "https://ghcr.io/v2/"+dockerImgRepo+"/tags/list", nil)
140 if err != nil {
141 return nil, fmt.Errorf("tags: %w", err)
142 }
143 req.Header.Set("Authorization", "Bearer "+tokenBody.Token)
144 res, err = http.DefaultClient.Do(req)
145 if err != nil {
146 return nil, fmt.Errorf("tags: %w", err)
147 }
148 body, err = io.ReadAll(res.Body)
149 res.Body.Close()
150 if err != nil || res.StatusCode != 200 {
151 return nil, fmt.Errorf("tags: %d: %s: %w", res.StatusCode, body, err)
152 }
153 var tags struct {
154 Tags []string `json:"tags"`
155 }
156 if err := json.Unmarshal(body, &tags); err != nil {
157 return nil, fmt.Errorf("tags: %w: %s", err, body)
158 }
159 return tags.Tags, nil
160}
161
162func checkTagExists(tag string) error {
163 tags, err := readPublishedTags()
164 if err != nil {
165 return fmt.Errorf("check tag exists: %w", err)
166 }
167 for _, t := range tags {
168 if t == tag {
169 return nil // found it
170 }
171 }
172 return fmt.Errorf("check tag exists: %q not found in %v", tag, tags)
173}
David Crawshawbe10fa92025-04-18 01:16:00 -0700174
Earl Lee2e463fb2025-04-17 11:22:22 -0700175// createDockerfile creates a Dockerfile for a git repo.
176// It expects the relevant initFiles to have been provided.
177// If the sketch binary is being executed in a sub-directory of the repository,
178// the relative path is provided on subPathWorkingDir.
Pokey Rulec31e2962025-05-13 10:53:33 +0000179func createDockerfile(ctx context.Context, srv llm.Service, initFiles map[string]string, subPathWorkingDir string, verbose bool) (string, error) {
Earl Lee2e463fb2025-04-17 11:22:22 -0700180 if subPathWorkingDir == "." {
181 subPathWorkingDir = ""
182 } else if subPathWorkingDir != "" && subPathWorkingDir[0] != '/' {
183 subPathWorkingDir = "/" + subPathWorkingDir
184 }
185 toolCalled := false
David Crawshaw2a5bd6d2025-04-30 14:29:46 -0700186 var dockerfileExtraCmds string
Philip Zeyliger72252cb2025-05-10 17:00:08 -0700187 runDockerfile := func(ctx context.Context, input json.RawMessage) ([]llm.Content, error) {
Earl Lee2e463fb2025-04-17 11:22:22 -0700188 // TODO: unmarshal straight into a struct
189 var m map[string]any
190 if err := json.Unmarshal(input, &m); err != nil {
Philip Zeyliger72252cb2025-05-10 17:00:08 -0700191 return nil, fmt.Errorf(`input=%[1]v (%[1]T), wanted a map[string]any, got: %w`, input, err)
Earl Lee2e463fb2025-04-17 11:22:22 -0700192 }
193 var ok bool
Earl Lee2e463fb2025-04-17 11:22:22 -0700194 dockerfileExtraCmds, ok = m["extra_cmds"].(string)
195 if !ok {
Philip Zeyliger72252cb2025-05-10 17:00:08 -0700196 return nil, fmt.Errorf(`input["extra_cmds"]=%[1]v (%[1]T), wanted a string`, m["path"])
Earl Lee2e463fb2025-04-17 11:22:22 -0700197 }
198 toolCalled = true
Philip Zeyliger72252cb2025-05-10 17:00:08 -0700199 return llm.TextContent("OK"), nil
Earl Lee2e463fb2025-04-17 11:22:22 -0700200 }
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -0700201
202 convo := conversation.New(ctx, srv)
203
204 convo.Tools = []*llm.Tool{{
Earl Lee2e463fb2025-04-17 11:22:22 -0700205 Name: "dockerfile",
206 Description: "Helps define a Dockerfile that sets up a dev environment for this project.",
207 Run: runDockerfile,
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -0700208 InputSchema: llm.MustSchema(`{
Earl Lee2e463fb2025-04-17 11:22:22 -0700209 "type": "object",
David Crawshaw2a5bd6d2025-04-30 14:29:46 -0700210 "required": ["extra_cmds"],
Earl Lee2e463fb2025-04-17 11:22:22 -0700211 "properties": {
Earl Lee2e463fb2025-04-17 11:22:22 -0700212 "extra_cmds": {
213 "type": "string",
David Crawshaw53b02a62025-05-12 14:46:29 -0700214 "description": "Extra dockerfile commands to add to the dockerfile. Each command should start with RUN."
Earl Lee2e463fb2025-04-17 11:22:22 -0700215 }
216 }
217}`),
218 }}
219
Earl Lee2e463fb2025-04-17 11:22:22 -0700220 // TODO: it's basically impossible to one-shot a python env. We need an agent loop for that.
221 // Right now the prompt contains a set of half-baked workarounds.
222
223 // If you want to edit the model prompt, run:
224 //
Philip Zeyligercc3ba222025-04-23 14:52:21 -0700225 // go test ./dockerimg -httprecord ".*" -rewritewant
Earl Lee2e463fb2025-04-17 11:22:22 -0700226 //
227 // Then look at the changes with:
228 //
Philip Zeyligercc3ba222025-04-23 14:52:21 -0700229 // git diff dockerimg/testdata/*.dockerfile
Earl Lee2e463fb2025-04-17 11:22:22 -0700230 //
231 // If the dockerfile changes are a strict improvement, commit all the changes.
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -0700232 msg := llm.Message{
233 Role: llm.MessageRoleUser,
234 Content: []llm.Content{{
235 Type: llm.ContentTypeText,
Earl Lee2e463fb2025-04-17 11:22:22 -0700236 Text: `
237Call the dockerfile tool to create a Dockerfile.
238The parameters to dockerfile fill out the From and ExtraCmds
239template variables in the following Go template:
240
David Crawshaw2a5bd6d2025-04-30 14:29:46 -0700241` + "```\n" + dockerfileBase + dockerfileFragment + "\n```" + `
Earl Lee2e463fb2025-04-17 11:22:22 -0700242
243In particular:
David Crawshaw2a5bd6d2025-04-30 14:29:46 -0700244- Assume it is primarily a Go project.
Earl Lee2e463fb2025-04-17 11:22:22 -0700245- Python env setup is challenging and often no required, so any RUN commands involving python tooling should be written to let docker build continue if there is a failure.
246- Include any tools particular to this repository that can be inferred from the given context.
David Crawshaw2a5bd6d2025-04-30 14:29:46 -0700247- Append || true to any apt-get install commands in case the package does not exist.
248- MINIMIZE the number of extra_cmds generated. Straightforward environments do not need any.
David Crawshaw11129492025-04-25 20:41:53 -0700249- Do NOT expose any ports.
250- Do NOT generate any CMD or ENTRYPOINT extra commands.
Earl Lee2e463fb2025-04-17 11:22:22 -0700251`,
252 }},
253 }
254 if len(initFiles) > 0 {
255 msg.Content[0].Text += "Here is the content of several files from the repository that may be relevant:\n\n"
256 }
257
258 for _, name := range slices.Sorted(maps.Keys(initFiles)) {
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -0700259 msg.Content = append(msg.Content, llm.StringContent(fmt.Sprintf("Here is the contents %s:\n<file>\n%s\n</file>\n\n", name, initFiles[name])))
Earl Lee2e463fb2025-04-17 11:22:22 -0700260 }
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -0700261 msg.Content = append(msg.Content, llm.StringContent("Now call the dockerfile tool."))
Earl Lee2e463fb2025-04-17 11:22:22 -0700262 res, err := convo.SendMessage(msg)
263 if err != nil {
264 return "", err
265 }
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -0700266 if res.StopReason != llm.StopReasonToolUse {
267 return "", fmt.Errorf("expected stop reason %q, got %q", llm.StopReasonToolUse, res.StopReason)
Earl Lee2e463fb2025-04-17 11:22:22 -0700268 }
Josh Bleecher Snyder64f2aa82025-05-14 18:31:05 +0000269 _, _, err = convo.ToolResultContents(context.TODO(), res)
Pokey Rulec31e2962025-05-13 10:53:33 +0000270 if err != nil {
Earl Lee2e463fb2025-04-17 11:22:22 -0700271 return "", err
272 }
Pokey Rulec31e2962025-05-13 10:53:33 +0000273
274 // Print the LLM response when verbose is enabled
275 if verbose && len(res.Content) > 0 && res.Content[0].Type == llm.ContentTypeText && res.Content[0].Text != "" {
276 fmt.Printf("\n<llm_response>\n%s\n</llm_response>\n\n", res.Content[0].Text)
277 }
278
Earl Lee2e463fb2025-04-17 11:22:22 -0700279 if !toolCalled {
280 return "", fmt.Errorf("no dockerfile returned")
281 }
282
David Crawshaw2a5bd6d2025-04-30 14:29:46 -0700283 tmpl := dockerfileDefaultTmpl
284 if tag := dockerfileBaseHash(); checkTagExists(tag) != nil {
285 // In development, if you edit dockerfileBase but don't release
286 // (as is reasonable for testing things!) the hash won't exist
287 // yet. In that case, we skip the sketch image and build it ourselves.
288 fmt.Printf("published container tag %s:%s missing; building locally\n", dockerImgName, tag)
289 tmpl = dockerfileBase + dockerfileFragment
David Crawshaw11129492025-04-25 20:41:53 -0700290 }
Earl Lee2e463fb2025-04-17 11:22:22 -0700291 buf := new(bytes.Buffer)
David Crawshaw11129492025-04-25 20:41:53 -0700292 err = template.Must(template.New("dockerfile").Parse(tmpl)).Execute(buf, map[string]string{
Josh Bleecher Snyderc76a3922025-05-01 01:18:56 +0000293 "ExtraCmds": dockerfileExtraCmds,
294 "SubDir": subPathWorkingDir,
295 "InitFilesHash": hashInitFiles(initFiles),
Earl Lee2e463fb2025-04-17 11:22:22 -0700296 })
297 if err != nil {
298 return "", fmt.Errorf("dockerfile template failed: %w", err)
299 }
300
301 return buf.String(), nil
302}
303
304// For future reference: we can find the current git branch/checkout with: git symbolic-ref -q --short HEAD || git describe --tags --exact-match 2>/dev/null || git rev-parse HEAD
305
306func readInitFiles(fsys fs.FS) (map[string]string, error) {
307 result := make(map[string]string)
308
309 err := fs.WalkDir(fsys, ".", func(path string, d fs.DirEntry, err error) error {
310 if err != nil {
311 return err
312 }
313 if d.IsDir() && (d.Name() == ".git" || d.Name() == "node_modules") {
314 return fs.SkipDir
315 }
316 if !d.Type().IsRegular() {
317 return nil
318 }
319
320 // Case 1: Check for README files
321 // TODO: find README files between the .git root (where we start)
322 // and the dir that sketch was initialized. This needs more info
323 // plumbed to this function.
324 if strings.HasPrefix(strings.ToLower(path), "readme") {
325 content, err := fs.ReadFile(fsys, path)
326 if err != nil {
327 return err
328 }
329 result[path] = string(content)
330 return nil
331 }
332
333 // Case 2: Check for GitHub workflow files
334 if strings.HasPrefix(path, ".github/workflows/") {
335 content, err := fs.ReadFile(fsys, path)
336 if err != nil {
337 return err
338 }
339 result[path] = string(content)
340 return nil
341 }
342
343 return nil
344 })
345 if err != nil {
346 return nil, err
347 }
348 return result, nil
349}