dockerimg: add only git objects to docker image
Instead of copying the entire working directory
(including uncommitted changes, hooks, and config files),
create a bare git repository and use git clone --reference.
This approach:
- Avoids copying uncommitted changes, hooks, and local config files
- Works correctly with git worktrees and submodules
- Reduces Docker image size substantially
- Maintains all git history and functionality
Fixes boldsoftware/sketch#190
Co-Authored-By: sketch <hello@sketch.dev>
Change-ID: s6af36147e2c4df00k
diff --git a/cmd/sketch/main.go b/cmd/sketch/main.go
index dc21886..285d7d8 100644
--- a/cmd/sketch/main.go
+++ b/cmd/sketch/main.go
@@ -134,8 +134,13 @@
}
slog.SetDefault(slog.New(slogHandler))
+ // Detect whether we're inside the sketch container
+ inInsideSketch := flagArgs.outsideHostname != ""
+
// Change to working directory if specified
- if flagArgs.workingDir != "" {
+ // Delay chdir when running in container mode, so that container setup can happen first,
+ // which might be necessary for the requested working dir to exist.
+ if flagArgs.workingDir != "" && !inInsideSketch {
if err := os.Chdir(flagArgs.workingDir); err != nil {
return fmt.Errorf("sketch: cannot change directory to %q: %v", flagArgs.workingDir, err)
}
@@ -149,9 +154,6 @@
flagArgs.gitEmail = defaultGitEmail()
}
- // Detect if we're inside the sketch container
- inInsideSketch := flagArgs.outsideHostname != ""
-
// Dispatch to the appropriate execution path
if inInsideSketch {
// We're running inside the Docker container
@@ -242,6 +244,7 @@
mounts StringSliceFlag
termUI bool
gitRemoteURL string
+ originalGitOrigin string
upstream string
commit string
outsideHTTP string
@@ -308,6 +311,7 @@
internalFlags.StringVar(&flags.outsideWorkingDir, "outside-working-dir", "", "(internal) working dir on the outside system")
internalFlags.StringVar(&flags.sketchBinaryLinux, "sketch-binary-linux", "", "(development) path to a pre-built sketch binary for linux")
internalFlags.StringVar(&flags.gitRemoteURL, "git-remote-url", "", "(internal) git remote for outside sketch")
+ internalFlags.StringVar(&flags.originalGitOrigin, "original-git-origin", "", "(internal) original git origin URL from host repository")
internalFlags.StringVar(&flags.upstream, "upstream", "", "(internal) upstream branch for git work")
internalFlags.StringVar(&flags.commit, "commit", "", "(internal) the git commit reference to check out from git remote url")
internalFlags.StringVar(&flags.outsideHTTP, "outside-http", "", "(internal) host for outside sketch")
@@ -564,6 +568,15 @@
return err
}
+ // In container mode, do the (delayed) chdir.
+ if flags.workingDir != "" && inInsideSketch {
+ if filepath.IsAbs(flags.workingDir) {
+ wd = flags.workingDir
+ } else {
+ wd = filepath.Join(wd, flags.workingDir)
+ }
+ }
+
llmService, err := selectLLMService(client, flags.modelName, modelURL, apiKey)
if err != nil {
return fmt.Errorf("failed to initialize LLM service: %w", err)
@@ -572,6 +585,13 @@
MaxDollars: flags.maxDollars,
}
+ // Get the original git origin URL
+ originalGitOrigin := flags.originalGitOrigin
+ if originalGitOrigin == "" && flags.outsideHostname == "" {
+ // Not in container mode, get the git origin directly
+ originalGitOrigin = getGitOrigin(ctx, wd)
+ }
+
agentConfig := loop.AgentConfig{
Context: ctx,
Service: llmService,
@@ -590,6 +610,7 @@
InDocker: flags.outsideHostname != "",
OneShot: flags.oneShot,
GitRemoteAddr: flags.gitRemoteURL,
+ OriginalGitOrigin: originalGitOrigin,
Upstream: flags.upstream,
OutsideHTTP: flags.outsideHTTP,
Commit: flags.commit,
@@ -931,3 +952,14 @@
}
}()
}
+
+// getGitOrigin returns the URL of the git remote 'origin' if it exists
+func getGitOrigin(ctx context.Context, dir string) string {
+ cmd := exec.CommandContext(ctx, "git", "config", "--get", "remote.origin.url")
+ cmd.Dir = dir
+ out, err := cmd.Output()
+ if err != nil {
+ return ""
+ }
+ return strings.TrimSpace(string(out))
+}
diff --git a/dockerimg/dockerimg.go b/dockerimg/dockerimg.go
index b800fd9..98ce389 100644
--- a/dockerimg/dockerimg.go
+++ b/dockerimg/dockerimg.go
@@ -113,6 +113,9 @@
GitRemoteUrl string
+ // Original git origin URL from the host repository
+ OriginalGitOrigin string
+
// Upstream branch for git work
Upstream string
@@ -158,10 +161,21 @@
if err != nil {
return err
}
- gitRoot, err := findGitRoot(ctx, config.Path)
+ // Bail early if sketch was started from a path that isn't in a git repo.
+ err = requireGitRepo(ctx, config.Path)
if err != nil {
return err
}
+
+ // Best effort attempt to get repo root; fall back to current directory.
+ gitRoot := config.Path
+ if root, err := gitRepoRoot(ctx, config.Path); err == nil {
+ gitRoot = root
+ }
+
+ // Capture the original git origin URL before we set up the temporary git server
+ config.OriginalGitOrigin = getOriginalGitOrigin(ctx, gitRoot)
+
err = checkForEmptyGitRepo(ctx, config.Path)
if err != nil {
return err
@@ -578,6 +592,9 @@
cmdArgs = append(cmdArgs, "-commit="+config.Commit)
cmdArgs = append(cmdArgs, "-upstream="+config.Upstream)
}
+ if config.OriginalGitOrigin != "" {
+ cmdArgs = append(cmdArgs, "-original-git-origin="+config.OriginalGitOrigin)
+ }
if config.OutsideHTTP != "" {
cmdArgs = append(cmdArgs, "-outside-http="+config.OutsideHTTP)
}
@@ -758,6 +775,8 @@
h := sha256.New()
h.Write([]byte(baseImageID))
h.Write([]byte(gitRoot))
+ // one-time cache-busting for the transition from copying git repos to only copying git objects
+ h.Write([]byte("git-objects"))
return hex.EncodeToString(h.Sum(nil))[:12] // Use first 12 chars for shorter name
}
@@ -774,7 +793,8 @@
return true, nil
}
-// buildLayeredImage builds a new Docker image by layering the repo on top of the base image
+// buildLayeredImage builds a new Docker image by layering the repo on top of the base image.
+//
// TODO: git config stuff could be environment variables at runtime for email and username.
// The git docs seem to say that http.postBuffer is a bug in our git proxy more than a thing
// that's needed, but we haven't found the bug yet!
@@ -783,21 +803,33 @@
// of Go). Then you want a git repo, which is much faster to incrementally fetch rather
// than cloning every time. Then you want some build artifacts, like perhaps the
// "go mod download" cache, or the "go build" cache or the "npm install" cache.
-// The implementation here copies the working directory (not just the git repo!),
-// and runs "go mod download". This is an ok compromise, but a power user might want
+// The implementation here copies the git objects into the base image.
+// That enables fast clones into the container, because most of the git objects are already there.
+// It also avoids copying uncommitted changes, configs/hooks, etc.
+// TODO: We should also set up fake temporary Go module(s) so we can run "go mod download".
+// This is an ok compromise, but a power user might want
// less caching or more caching, depending on their use case. One approach we could take
// is to punt entirely if /app/.git already exists. If the user has provided a -base-image with
// their git repo, let's assume they know what they're doing, and they've customized their image
-// for their use case. On the other side of the spectrum is cloning their repo every time,
-// or running git clean -xdf, which minimizes surprises but slows down builds.
+// for their use case.
// Note that buildx has some support for conditional COPY, but without buildx, which
// we can't reliably depend on, we have to run the base image to inspect its file system,
// and then we can decide what to do.
-func buildLayeredImage(ctx context.Context, imgName, baseImage, gitRoot string, _ bool) error {
+//
+// We may in the future want to enable people to bring along uncommitted changes to tracked files.
+// To do that, we would run `git stash create` in outie at launch time, treat HEAD as the base commit,
+// and add in the stash commit as a new commit atop it.
+// That would accurately model the base commit as well as the uncommitted changes.
+// (This wouldn't happen here, but at agent/container initialization time.)
+//
+// repoPath is the current working directory where sketch is being run from.
+func buildLayeredImage(ctx context.Context, imgName, baseImage, gitRoot string, verbose bool) error {
+ // Shove a bunch of git objects into the image for faster future cloning.
dockerfileContent := fmt.Sprintf(`FROM %s
-COPY . /app
+COPY . /git-ref
WORKDIR /app
-RUN if [ -f go.mod ]; then go mod download; fi
+# TODO: restore go.mod download
+# RUN if [ -f go.mod ]; then go mod download; fi
CMD ["/bin/sketch"]
`, baseImage)
@@ -836,8 +868,13 @@
".",
}
+ commonDir, err := gitCommonDir(ctx, gitRoot)
+ if err != nil {
+ return fmt.Errorf("failed to get git common dir: %w", err)
+ }
+
cmd := exec.CommandContext(ctx, "docker", cmdArgs...)
- cmd.Dir = gitRoot
+ cmd.Dir = commonDir
// We print the docker build output whether or not the user
// has selected --verbose. Building an image takes a while
// and this gives good context.
@@ -864,13 +901,14 @@
return nil
}
-func findGitRoot(ctx context.Context, path string) (string, error) {
- cmd := exec.CommandContext(ctx, "git", "rev-parse", "--show-toplevel")
+// requireGitRepo confirms that path is within a git repository.
+func requireGitRepo(ctx context.Context, path string) error {
+ cmd := exec.CommandContext(ctx, "git", "rev-parse", "--git-dir")
cmd.Dir = path
out, err := cmd.CombinedOutput()
if err != nil {
if strings.Contains(string(out), "not a git repository") {
- return "", fmt.Errorf(`sketch needs to run from within a git repo, but %s is not part of a git repo.
+ return fmt.Errorf(`sketch needs to run from within a git repo, but %s is not part of a git repo.
Consider one of the following options:
- cd to a different dir that is already part of a git repo first, or
- to create a new git repo from this directory (%s), run this command:
@@ -880,12 +918,40 @@
and try running sketch again.
`, path, path)
}
+ return fmt.Errorf("git rev-parse --git-dir: %s: %w", out, err)
+ }
+ return nil
+}
+
+// gitRepoRoot attempts to find the git repository root directory.
+// Returns an error if not in a git repository or if it's a bare repository.
+// This is used to calculate relative paths for preserving user's working directory context.
+func gitRepoRoot(ctx context.Context, path string) (string, error) {
+ cmd := exec.CommandContext(ctx, "git", "rev-parse", "--show-toplevel")
+ cmd.Dir = path
+ out, err := cmd.CombinedOutput()
+ if err != nil {
return "", fmt.Errorf("git rev-parse --show-toplevel: %s: %w", out, err)
}
// The returned path is absolute.
return strings.TrimSpace(string(out)), nil
}
+// gitCommonDir finds the git common directory for path.
+func gitCommonDir(ctx context.Context, path string) (string, error) {
+ cmd := exec.CommandContext(ctx, "git", "rev-parse", "--git-common-dir")
+ cmd.Dir = path
+ out, err := cmd.CombinedOutput()
+ if err != nil {
+ return "", fmt.Errorf("git rev-parse --git-common-dir: %s: %w", out, err)
+ }
+ gitCommonDir := strings.TrimSpace(string(out))
+ if !filepath.IsAbs(gitCommonDir) {
+ gitCommonDir = filepath.Join(path, gitCommonDir)
+ }
+ return gitCommonDir, nil
+}
+
// getEnvForwardingFromGitConfig retrieves environment variables to pass through to Docker
// from git config using the sketch.envfwd multi-valued key.
func getEnvForwardingFromGitConfig(ctx context.Context) []string {
@@ -910,6 +976,17 @@
return envVars
}
+// getOriginalGitOrigin returns the URL of the git remote 'origin' if it exists in the given directory
+func getOriginalGitOrigin(ctx context.Context, dir string) string {
+ cmd := exec.CommandContext(ctx, "git", "config", "--get", "remote.origin.url")
+ cmd.Dir = dir
+ out, err := cmd.Output()
+ if err != nil {
+ return ""
+ }
+ return strings.TrimSpace(string(out))
+}
+
// parseDockerArgs parses a string containing space-separated Docker arguments into an array of strings.
// It handles quoted arguments and escaped characters.
//
diff --git a/dockerimg/githttp.go b/dockerimg/githttp.go
index b7203c0..ecd46a1 100644
--- a/dockerimg/githttp.go
+++ b/dockerimg/githttp.go
@@ -7,7 +7,9 @@
"log/slog"
"net/http"
"net/http/cgi"
+ "os"
"os/exec"
+ "path/filepath"
"runtime"
"strings"
"time"
@@ -107,6 +109,12 @@
}
}
+ // Dumb hack for bare repos: if the path starts with .git, and there is no .git, strip it off.
+ path := r.URL.Path
+ if _, err := os.Stat(filepath.Join(g.gitRepoRoot, path)); os.IsNotExist(err) {
+ path = strings.TrimPrefix(path, "/.git") // turn /.git/info/refs into /info/refs
+ }
+
w.Header().Set("Cache-Control", "no-cache")
h := &cgi.Handler{
Path: gitBin,
@@ -114,7 +122,7 @@
Dir: g.gitRepoRoot,
Env: []string{
"GIT_PROJECT_ROOT=" + g.gitRepoRoot,
- "PATH_INFO=" + r.URL.Path,
+ "PATH_INFO=" + path,
"QUERY_STRING=" + r.URL.RawQuery,
"REQUEST_METHOD=" + r.Method,
"GIT_HTTP_EXPORT_ALL=true",
diff --git a/loop/agent.go b/loop/agent.go
index 841a5ca..ed5c907 100644
--- a/loop/agent.go
+++ b/loop/agent.go
@@ -434,8 +434,6 @@
outsideHostname string
outsideOS string
outsideWorkingDir string
- // URL of the git remote 'origin' if it exists
- gitOrigin string
// MCP manager for handling MCP server connections
mcpManager *mcp.MCPManager
// Port monitor for tracking TCP ports
@@ -742,7 +740,7 @@
// GitOrigin returns the URL of the git remote 'origin' if it exists.
func (a *Agent) GitOrigin() string {
- return a.gitOrigin
+ return a.config.OriginalGitOrigin
}
// GitUsername returns the git user name from the agent config.
@@ -1046,6 +1044,8 @@
OutsideHTTP string
// Outtie's Git server
GitRemoteAddr string
+ // Original git origin URL from host repository, if any
+ OriginalGitOrigin string
// Upstream branch for git work
Upstream string
// Commit to checkout from Outtie
@@ -1116,9 +1116,24 @@
ctx := a.config.Context
slog.InfoContext(ctx, "agent initializing")
+ // If a remote + commit was specified, clone it.
+ if a.config.Commit != "" && a.gitState.gitRemoteAddr != "" {
+ slog.InfoContext(ctx, "cloning git repo", "commit", a.config.Commit)
+ // TODO: --reference-if-able instead?
+ cmd := exec.CommandContext(ctx, "git", "clone", "--reference", "/git-ref", a.gitState.gitRemoteAddr, "/app")
+ if out, err := cmd.CombinedOutput(); err != nil {
+ return fmt.Errorf("failed to clone repository from %s: %s: %w", a.gitState.gitRemoteAddr, out, err)
+ }
+ }
+
+ if a.workingDir != "" {
+ err := os.Chdir(a.workingDir)
+ if err != nil {
+ return fmt.Errorf("failed to change working directory to %s: %w", a.workingDir, err)
+ }
+ }
+
if !ini.NoGit {
- // Capture the original origin before we potentially replace it below
- a.gitOrigin = getGitOrigin(ctx, a.workingDir)
// Configure git user settings
if a.config.GitEmail != "" {
@@ -1143,37 +1158,11 @@
}
}
- // If a remote git addr was specified, we configure the origin remote
- if a.gitState.gitRemoteAddr != "" {
- slog.InfoContext(ctx, "Configuring git remote", slog.String("remote", a.gitState.gitRemoteAddr))
-
- // Remove existing origin remote if it exists
- cmd := exec.CommandContext(ctx, "git", "remote", "remove", "origin")
- cmd.Dir = a.workingDir
- if out, err := cmd.CombinedOutput(); err != nil {
- // Ignore error if origin doesn't exist
- slog.DebugContext(ctx, "git remote remove origin (ignoring if not exists)", slog.String("output", string(out)))
- }
-
- // Add the new remote as origin
- cmd = exec.CommandContext(ctx, "git", "remote", "add", "origin", a.gitState.gitRemoteAddr)
- cmd.Dir = a.workingDir
- if out, err := cmd.CombinedOutput(); err != nil {
- return fmt.Errorf("git remote add origin: %s: %v", out, err)
- }
-
- }
-
// If a commit was specified, we fetch and reset to it.
if a.config.Commit != "" && a.gitState.gitRemoteAddr != "" {
- slog.InfoContext(ctx, "updating git repo", slog.String("commit", a.config.Commit))
+ slog.InfoContext(ctx, "updating git repo", "commit", a.config.Commit)
- cmd := exec.CommandContext(ctx, "git", "stash")
- cmd.Dir = a.workingDir
- if out, err := cmd.CombinedOutput(); err != nil {
- return fmt.Errorf("git stash: %s: %v", out, err)
- }
- cmd = exec.CommandContext(ctx, "git", "fetch", "--prune", "origin")
+ cmd := exec.CommandContext(ctx, "git", "fetch", "--prune", "origin")
cmd.Dir = a.workingDir
if out, err := cmd.CombinedOutput(); err != nil {
return fmt.Errorf("git fetch: %s: %w", out, err)
@@ -2308,19 +2297,6 @@
return totalAdded, totalRemoved, nil
}
-// getGitOrigin returns the URL of the git remote 'origin' if it exists
-func getGitOrigin(ctx context.Context, dir string) string {
- cmd := exec.CommandContext(ctx, "git", "config", "--get", "remote.origin.url")
- cmd.Dir = dir
- stderr := new(strings.Builder)
- cmd.Stderr = stderr
- out, err := cmd.Output()
- if err != nil {
- return ""
- }
- return strings.TrimSpace(string(out))
-}
-
// systemPromptData contains the data used to render the system prompt template
type systemPromptData struct {
ClientGOOS string