blob: 83430050f5b5be06db8dadc98da0fc2abce5e7d4 [file] [log] [blame]
Earl Lee2e463fb2025-04-17 11:22:22 -07001package dockerimg
2
3import (
4 "bytes"
5 "context"
6 "crypto/sha256"
Philip Zeyliger9df94b52025-05-18 03:43:14 +00007 _ "embed" // Using underscore import to keep embed package for go:embed directive
Earl Lee2e463fb2025-04-17 11:22:22 -07008 "encoding/hex"
9 "encoding/json"
10 "fmt"
David Crawshaw2a5bd6d2025-04-30 14:29:46 -070011 "io"
Earl Lee2e463fb2025-04-17 11:22:22 -070012 "io/fs"
13 "maps"
14 "net/http"
15 "slices"
16 "strings"
17 "text/template"
18
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -070019 "sketch.dev/llm"
20 "sketch.dev/llm/conversation"
Earl Lee2e463fb2025-04-17 11:22:22 -070021)
22
23func hashInitFiles(initFiles map[string]string) string {
24 h := sha256.New()
25 for _, path := range slices.Sorted(maps.Keys(initFiles)) {
26 fmt.Fprintf(h, "%s\n%s\n\n", path, initFiles[path])
27 }
David Crawshaw2a5bd6d2025-04-30 14:29:46 -070028 fmt.Fprintf(h, "docker template\n%s\n", dockerfileDefaultTmpl)
Earl Lee2e463fb2025-04-17 11:22:22 -070029 return hex.EncodeToString(h.Sum(nil))
30}
31
David Crawshaw11129492025-04-25 20:41:53 -070032// DefaultImage is intended to ONLY be used by the pushdockerimg.go script.
David Crawshaw2a5bd6d2025-04-30 14:29:46 -070033func DefaultImage() (name, dockerfile, tag string) {
34 return dockerImgName, dockerfileBase, dockerfileBaseHash()
David Crawshaw11129492025-04-25 20:41:53 -070035}
36
Philip Zeyligerbce3a132025-04-30 22:03:39 +000037const (
38 dockerImgRepo = "boldsoftware/sketch"
39 dockerImgName = "ghcr.io/" + dockerImgRepo
40)
David Crawshaw5bff6502025-04-26 09:11:40 -070041
David Crawshaw2a5bd6d2025-04-30 14:29:46 -070042func dockerfileBaseHash() string {
43 h := sha256.New()
44 io.WriteString(h, dockerfileBase)
45 return hex.EncodeToString(h.Sum(nil))[:32]
46}
David Crawshaw11129492025-04-25 20:41:53 -070047
David Crawshaw8fd51042025-05-05 12:52:43 -070048const tmpSketchDockerfile = "tmp-sketch-dockerfile"
49
Philip Zeyliger9df94b52025-05-18 03:43:14 +000050//go:embed Dockerfile.base
51var dockerfileBaseData []byte
David Crawshawbe10fa92025-04-18 01:16:00 -070052
Philip Zeyliger9df94b52025-05-18 03:43:14 +000053// dockerfileBase is the content of the base Dockerfile
54var dockerfileBase = string(dockerfileBaseData)
David Crawshawbe10fa92025-04-18 01:16:00 -070055
David Crawshaw11129492025-04-25 20:41:53 -070056const dockerfileFragment = `
David Crawshawbe10fa92025-04-18 01:16:00 -070057ARG GIT_USER_EMAIL
58ARG GIT_USER_NAME
59
60RUN git config --global user.email "$GIT_USER_EMAIL" && \
David Crawshawca535582025-05-03 13:04:34 -070061 git config --global user.name "$GIT_USER_NAME" && \
62 git config --global http.postBuffer 524288000
David Crawshawbe10fa92025-04-18 01:16:00 -070063
Josh Bleecher Snyderc76a3922025-05-01 01:18:56 +000064LABEL sketch_context="{{.InitFilesHash}}"
David Crawshawbe10fa92025-04-18 01:16:00 -070065COPY . /app
David Crawshaw8fd51042025-05-05 12:52:43 -070066RUN rm -f /app/` + tmpSketchDockerfile + `
David Crawshawbe10fa92025-04-18 01:16:00 -070067
68WORKDIR /app{{.SubDir}}
69RUN if [ -f go.mod ]; then go mod download; fi
70
David Crawshawb2064de2025-05-05 09:12:19 -070071# Switch to lenient shell so we are more likely to get past failing extra_cmds.
72SHELL ["/bin/bash", "-uo", "pipefail", "-c"]
73
David Crawshaw11129492025-04-25 20:41:53 -070074{{.ExtraCmds}}
75
David Crawshawb2064de2025-05-05 09:12:19 -070076# Switch back to strict shell after extra_cmds.
77SHELL ["/bin/bash", "-euxo", "pipefail", "-c"]
78
David Crawshaw11129492025-04-25 20:41:53 -070079CMD ["/bin/sketch"]
80`
81
David Crawshaw2a5bd6d2025-04-30 14:29:46 -070082var dockerfileDefaultTmpl = "FROM " + dockerImgName + ":" + dockerfileBaseHash() + "\n" + dockerfileFragment
David Crawshaw11129492025-04-25 20:41:53 -070083
David Crawshaw2a5bd6d2025-04-30 14:29:46 -070084func readPublishedTags() ([]string, error) {
85 req, err := http.NewRequest("GET", "https://ghcr.io/token?service=ghcr.io&scope=repository:"+dockerImgRepo+":pull", nil)
86 if err != nil {
87 return nil, fmt.Errorf("token: %w", err)
88 }
89 res, err := http.DefaultClient.Do(req)
90 if err != nil {
91 return nil, fmt.Errorf("token: %w", err)
92 }
93 body, err := io.ReadAll(res.Body)
94 res.Body.Close()
95 if err != nil || res.StatusCode != 200 {
96 return nil, fmt.Errorf("token: %d: %s: %w", res.StatusCode, body, err)
97 }
98 var tokenBody struct {
99 Token string `json:"token"`
100 }
101 if err := json.Unmarshal(body, &tokenBody); err != nil {
102 return nil, fmt.Errorf("token: %w: %s", err, body)
103 }
104
105 req, err = http.NewRequest("GET", "https://ghcr.io/v2/"+dockerImgRepo+"/tags/list", nil)
106 if err != nil {
107 return nil, fmt.Errorf("tags: %w", err)
108 }
109 req.Header.Set("Authorization", "Bearer "+tokenBody.Token)
110 res, err = http.DefaultClient.Do(req)
111 if err != nil {
112 return nil, fmt.Errorf("tags: %w", err)
113 }
114 body, err = io.ReadAll(res.Body)
115 res.Body.Close()
116 if err != nil || res.StatusCode != 200 {
117 return nil, fmt.Errorf("tags: %d: %s: %w", res.StatusCode, body, err)
118 }
119 var tags struct {
120 Tags []string `json:"tags"`
121 }
122 if err := json.Unmarshal(body, &tags); err != nil {
123 return nil, fmt.Errorf("tags: %w: %s", err, body)
124 }
125 return tags.Tags, nil
126}
127
128func checkTagExists(tag string) error {
129 tags, err := readPublishedTags()
130 if err != nil {
131 return fmt.Errorf("check tag exists: %w", err)
132 }
133 for _, t := range tags {
134 if t == tag {
135 return nil // found it
136 }
137 }
138 return fmt.Errorf("check tag exists: %q not found in %v", tag, tags)
139}
David Crawshawbe10fa92025-04-18 01:16:00 -0700140
Earl Lee2e463fb2025-04-17 11:22:22 -0700141// createDockerfile creates a Dockerfile for a git repo.
142// It expects the relevant initFiles to have been provided.
143// If the sketch binary is being executed in a sub-directory of the repository,
144// the relative path is provided on subPathWorkingDir.
Pokey Rulec31e2962025-05-13 10:53:33 +0000145func createDockerfile(ctx context.Context, srv llm.Service, initFiles map[string]string, subPathWorkingDir string, verbose bool) (string, error) {
Earl Lee2e463fb2025-04-17 11:22:22 -0700146 if subPathWorkingDir == "." {
147 subPathWorkingDir = ""
148 } else if subPathWorkingDir != "" && subPathWorkingDir[0] != '/' {
149 subPathWorkingDir = "/" + subPathWorkingDir
150 }
151 toolCalled := false
David Crawshaw2a5bd6d2025-04-30 14:29:46 -0700152 var dockerfileExtraCmds string
Philip Zeyliger72252cb2025-05-10 17:00:08 -0700153 runDockerfile := func(ctx context.Context, input json.RawMessage) ([]llm.Content, error) {
Earl Lee2e463fb2025-04-17 11:22:22 -0700154 // TODO: unmarshal straight into a struct
155 var m map[string]any
156 if err := json.Unmarshal(input, &m); err != nil {
Philip Zeyliger72252cb2025-05-10 17:00:08 -0700157 return nil, fmt.Errorf(`input=%[1]v (%[1]T), wanted a map[string]any, got: %w`, input, err)
Earl Lee2e463fb2025-04-17 11:22:22 -0700158 }
159 var ok bool
Earl Lee2e463fb2025-04-17 11:22:22 -0700160 dockerfileExtraCmds, ok = m["extra_cmds"].(string)
161 if !ok {
Philip Zeyliger72252cb2025-05-10 17:00:08 -0700162 return nil, fmt.Errorf(`input["extra_cmds"]=%[1]v (%[1]T), wanted a string`, m["path"])
Earl Lee2e463fb2025-04-17 11:22:22 -0700163 }
164 toolCalled = true
Philip Zeyliger72252cb2025-05-10 17:00:08 -0700165 return llm.TextContent("OK"), nil
Earl Lee2e463fb2025-04-17 11:22:22 -0700166 }
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -0700167
philip.zeyliger882e7ea2025-06-20 14:31:16 +0000168 convo := conversation.New(ctx, srv, nil)
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -0700169
170 convo.Tools = []*llm.Tool{{
Earl Lee2e463fb2025-04-17 11:22:22 -0700171 Name: "dockerfile",
172 Description: "Helps define a Dockerfile that sets up a dev environment for this project.",
173 Run: runDockerfile,
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -0700174 InputSchema: llm.MustSchema(`{
Earl Lee2e463fb2025-04-17 11:22:22 -0700175 "type": "object",
David Crawshaw2a5bd6d2025-04-30 14:29:46 -0700176 "required": ["extra_cmds"],
Earl Lee2e463fb2025-04-17 11:22:22 -0700177 "properties": {
Earl Lee2e463fb2025-04-17 11:22:22 -0700178 "extra_cmds": {
179 "type": "string",
David Crawshaw53b02a62025-05-12 14:46:29 -0700180 "description": "Extra dockerfile commands to add to the dockerfile. Each command should start with RUN."
Earl Lee2e463fb2025-04-17 11:22:22 -0700181 }
182 }
183}`),
184 }}
185
Earl Lee2e463fb2025-04-17 11:22:22 -0700186 // TODO: it's basically impossible to one-shot a python env. We need an agent loop for that.
187 // Right now the prompt contains a set of half-baked workarounds.
188
189 // If you want to edit the model prompt, run:
190 //
Philip Zeyligercc3ba222025-04-23 14:52:21 -0700191 // go test ./dockerimg -httprecord ".*" -rewritewant
Earl Lee2e463fb2025-04-17 11:22:22 -0700192 //
193 // Then look at the changes with:
194 //
Philip Zeyligercc3ba222025-04-23 14:52:21 -0700195 // git diff dockerimg/testdata/*.dockerfile
Earl Lee2e463fb2025-04-17 11:22:22 -0700196 //
197 // If the dockerfile changes are a strict improvement, commit all the changes.
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -0700198 msg := llm.Message{
199 Role: llm.MessageRoleUser,
200 Content: []llm.Content{{
201 Type: llm.ContentTypeText,
Earl Lee2e463fb2025-04-17 11:22:22 -0700202 Text: `
203Call the dockerfile tool to create a Dockerfile.
204The parameters to dockerfile fill out the From and ExtraCmds
205template variables in the following Go template:
206
David Crawshaw2a5bd6d2025-04-30 14:29:46 -0700207` + "```\n" + dockerfileBase + dockerfileFragment + "\n```" + `
Earl Lee2e463fb2025-04-17 11:22:22 -0700208
209In particular:
David Crawshaw2a5bd6d2025-04-30 14:29:46 -0700210- Assume it is primarily a Go project.
Earl Lee2e463fb2025-04-17 11:22:22 -0700211- Python env setup is challenging and often no required, so any RUN commands involving python tooling should be written to let docker build continue if there is a failure.
212- Include any tools particular to this repository that can be inferred from the given context.
David Crawshaw2a5bd6d2025-04-30 14:29:46 -0700213- Append || true to any apt-get install commands in case the package does not exist.
214- MINIMIZE the number of extra_cmds generated. Straightforward environments do not need any.
David Crawshaw11129492025-04-25 20:41:53 -0700215- Do NOT expose any ports.
216- Do NOT generate any CMD or ENTRYPOINT extra commands.
Earl Lee2e463fb2025-04-17 11:22:22 -0700217`,
218 }},
219 }
220 if len(initFiles) > 0 {
221 msg.Content[0].Text += "Here is the content of several files from the repository that may be relevant:\n\n"
222 }
223
224 for _, name := range slices.Sorted(maps.Keys(initFiles)) {
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -0700225 msg.Content = append(msg.Content, llm.StringContent(fmt.Sprintf("Here is the contents %s:\n<file>\n%s\n</file>\n\n", name, initFiles[name])))
Earl Lee2e463fb2025-04-17 11:22:22 -0700226 }
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -0700227 msg.Content = append(msg.Content, llm.StringContent("Now call the dockerfile tool."))
Earl Lee2e463fb2025-04-17 11:22:22 -0700228 res, err := convo.SendMessage(msg)
229 if err != nil {
230 return "", err
231 }
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -0700232 if res.StopReason != llm.StopReasonToolUse {
233 return "", fmt.Errorf("expected stop reason %q, got %q", llm.StopReasonToolUse, res.StopReason)
Earl Lee2e463fb2025-04-17 11:22:22 -0700234 }
Josh Bleecher Snyder64f2aa82025-05-14 18:31:05 +0000235 _, _, err = convo.ToolResultContents(context.TODO(), res)
Pokey Rulec31e2962025-05-13 10:53:33 +0000236 if err != nil {
Earl Lee2e463fb2025-04-17 11:22:22 -0700237 return "", err
238 }
Pokey Rulec31e2962025-05-13 10:53:33 +0000239
240 // Print the LLM response when verbose is enabled
241 if verbose && len(res.Content) > 0 && res.Content[0].Type == llm.ContentTypeText && res.Content[0].Text != "" {
242 fmt.Printf("\n<llm_response>\n%s\n</llm_response>\n\n", res.Content[0].Text)
243 }
244
Earl Lee2e463fb2025-04-17 11:22:22 -0700245 if !toolCalled {
246 return "", fmt.Errorf("no dockerfile returned")
247 }
248
David Crawshaw2a5bd6d2025-04-30 14:29:46 -0700249 tmpl := dockerfileDefaultTmpl
250 if tag := dockerfileBaseHash(); checkTagExists(tag) != nil {
251 // In development, if you edit dockerfileBase but don't release
252 // (as is reasonable for testing things!) the hash won't exist
253 // yet. In that case, we skip the sketch image and build it ourselves.
254 fmt.Printf("published container tag %s:%s missing; building locally\n", dockerImgName, tag)
255 tmpl = dockerfileBase + dockerfileFragment
David Crawshaw11129492025-04-25 20:41:53 -0700256 }
Earl Lee2e463fb2025-04-17 11:22:22 -0700257 buf := new(bytes.Buffer)
David Crawshaw11129492025-04-25 20:41:53 -0700258 err = template.Must(template.New("dockerfile").Parse(tmpl)).Execute(buf, map[string]string{
Josh Bleecher Snyderc76a3922025-05-01 01:18:56 +0000259 "ExtraCmds": dockerfileExtraCmds,
260 "SubDir": subPathWorkingDir,
261 "InitFilesHash": hashInitFiles(initFiles),
Earl Lee2e463fb2025-04-17 11:22:22 -0700262 })
263 if err != nil {
264 return "", fmt.Errorf("dockerfile template failed: %w", err)
265 }
266
267 return buf.String(), nil
268}
269
270// For future reference: we can find the current git branch/checkout with: git symbolic-ref -q --short HEAD || git describe --tags --exact-match 2>/dev/null || git rev-parse HEAD
271
272func readInitFiles(fsys fs.FS) (map[string]string, error) {
273 result := make(map[string]string)
274
275 err := fs.WalkDir(fsys, ".", func(path string, d fs.DirEntry, err error) error {
276 if err != nil {
277 return err
278 }
279 if d.IsDir() && (d.Name() == ".git" || d.Name() == "node_modules") {
280 return fs.SkipDir
281 }
282 if !d.Type().IsRegular() {
283 return nil
284 }
285
286 // Case 1: Check for README files
287 // TODO: find README files between the .git root (where we start)
288 // and the dir that sketch was initialized. This needs more info
289 // plumbed to this function.
290 if strings.HasPrefix(strings.ToLower(path), "readme") {
291 content, err := fs.ReadFile(fsys, path)
292 if err != nil {
293 return err
294 }
295 result[path] = string(content)
296 return nil
297 }
298
299 // Case 2: Check for GitHub workflow files
300 if strings.HasPrefix(path, ".github/workflows/") {
301 content, err := fs.ReadFile(fsys, path)
302 if err != nil {
303 return err
304 }
305 result[path] = string(content)
306 return nil
307 }
308
309 return nil
310 })
311 if err != nil {
312 return nil, err
313 }
314 return result, nil
315}