dockerimg: add only git objects to docker image

Instead of copying the entire working directory
(including uncommitted changes, hooks, and config files),
create a bare git repository and use git clone --reference.

This approach:
- Avoids copying uncommitted changes, hooks, and local config files
- Works correctly with git worktrees and submodules
- Reduces Docker image size substantially
- Maintains all git history and functionality

Fixes boldsoftware/sketch#190

Co-Authored-By: sketch <hello@sketch.dev>
Change-ID: s6af36147e2c4df00k
diff --git a/cmd/sketch/main.go b/cmd/sketch/main.go
index dc21886..285d7d8 100644
--- a/cmd/sketch/main.go
+++ b/cmd/sketch/main.go
@@ -134,8 +134,13 @@
 	}
 	slog.SetDefault(slog.New(slogHandler))
 
+	// Detect whether we're inside the sketch container
+	inInsideSketch := flagArgs.outsideHostname != ""
+
 	// Change to working directory if specified
-	if flagArgs.workingDir != "" {
+	// Delay chdir when running in container mode, so that container setup can happen first,
+	// which might be necessary for the requested working dir to exist.
+	if flagArgs.workingDir != "" && !inInsideSketch {
 		if err := os.Chdir(flagArgs.workingDir); err != nil {
 			return fmt.Errorf("sketch: cannot change directory to %q: %v", flagArgs.workingDir, err)
 		}
@@ -149,9 +154,6 @@
 		flagArgs.gitEmail = defaultGitEmail()
 	}
 
-	// Detect if we're inside the sketch container
-	inInsideSketch := flagArgs.outsideHostname != ""
-
 	// Dispatch to the appropriate execution path
 	if inInsideSketch {
 		// We're running inside the Docker container
@@ -242,6 +244,7 @@
 	mounts              StringSliceFlag
 	termUI              bool
 	gitRemoteURL        string
+	originalGitOrigin   string
 	upstream            string
 	commit              string
 	outsideHTTP         string
@@ -308,6 +311,7 @@
 	internalFlags.StringVar(&flags.outsideWorkingDir, "outside-working-dir", "", "(internal) working dir on the outside system")
 	internalFlags.StringVar(&flags.sketchBinaryLinux, "sketch-binary-linux", "", "(development) path to a pre-built sketch binary for linux")
 	internalFlags.StringVar(&flags.gitRemoteURL, "git-remote-url", "", "(internal) git remote for outside sketch")
+	internalFlags.StringVar(&flags.originalGitOrigin, "original-git-origin", "", "(internal) original git origin URL from host repository")
 	internalFlags.StringVar(&flags.upstream, "upstream", "", "(internal) upstream branch for git work")
 	internalFlags.StringVar(&flags.commit, "commit", "", "(internal) the git commit reference to check out from git remote url")
 	internalFlags.StringVar(&flags.outsideHTTP, "outside-http", "", "(internal) host for outside sketch")
@@ -564,6 +568,15 @@
 		return err
 	}
 
+	// In container mode, do the (delayed) chdir.
+	if flags.workingDir != "" && inInsideSketch {
+		if filepath.IsAbs(flags.workingDir) {
+			wd = flags.workingDir
+		} else {
+			wd = filepath.Join(wd, flags.workingDir)
+		}
+	}
+
 	llmService, err := selectLLMService(client, flags.modelName, modelURL, apiKey)
 	if err != nil {
 		return fmt.Errorf("failed to initialize LLM service: %w", err)
@@ -572,6 +585,13 @@
 		MaxDollars: flags.maxDollars,
 	}
 
+	// Get the original git origin URL
+	originalGitOrigin := flags.originalGitOrigin
+	if originalGitOrigin == "" && flags.outsideHostname == "" {
+		// Not in container mode, get the git origin directly
+		originalGitOrigin = getGitOrigin(ctx, wd)
+	}
+
 	agentConfig := loop.AgentConfig{
 		Context:           ctx,
 		Service:           llmService,
@@ -590,6 +610,7 @@
 		InDocker:            flags.outsideHostname != "",
 		OneShot:             flags.oneShot,
 		GitRemoteAddr:       flags.gitRemoteURL,
+		OriginalGitOrigin:   originalGitOrigin,
 		Upstream:            flags.upstream,
 		OutsideHTTP:         flags.outsideHTTP,
 		Commit:              flags.commit,
@@ -931,3 +952,14 @@
 		}
 	}()
 }
+
+// getGitOrigin returns the URL of the git remote 'origin' if it exists
+func getGitOrigin(ctx context.Context, dir string) string {
+	cmd := exec.CommandContext(ctx, "git", "config", "--get", "remote.origin.url")
+	cmd.Dir = dir
+	out, err := cmd.Output()
+	if err != nil {
+		return ""
+	}
+	return strings.TrimSpace(string(out))
+}
diff --git a/dockerimg/dockerimg.go b/dockerimg/dockerimg.go
index b800fd9..98ce389 100644
--- a/dockerimg/dockerimg.go
+++ b/dockerimg/dockerimg.go
@@ -113,6 +113,9 @@
 
 	GitRemoteUrl string
 
+	// Original git origin URL from the host repository
+	OriginalGitOrigin string
+
 	// Upstream branch for git work
 	Upstream string
 
@@ -158,10 +161,21 @@
 	if err != nil {
 		return err
 	}
-	gitRoot, err := findGitRoot(ctx, config.Path)
+	// Bail early if sketch was started from a path that isn't in a git repo.
+	err = requireGitRepo(ctx, config.Path)
 	if err != nil {
 		return err
 	}
+
+	// Best effort attempt to get repo root; fall back to current directory.
+	gitRoot := config.Path
+	if root, err := gitRepoRoot(ctx, config.Path); err == nil {
+		gitRoot = root
+	}
+
+	// Capture the original git origin URL before we set up the temporary git server
+	config.OriginalGitOrigin = getOriginalGitOrigin(ctx, gitRoot)
+
 	err = checkForEmptyGitRepo(ctx, config.Path)
 	if err != nil {
 		return err
@@ -578,6 +592,9 @@
 		cmdArgs = append(cmdArgs, "-commit="+config.Commit)
 		cmdArgs = append(cmdArgs, "-upstream="+config.Upstream)
 	}
+	if config.OriginalGitOrigin != "" {
+		cmdArgs = append(cmdArgs, "-original-git-origin="+config.OriginalGitOrigin)
+	}
 	if config.OutsideHTTP != "" {
 		cmdArgs = append(cmdArgs, "-outside-http="+config.OutsideHTTP)
 	}
@@ -758,6 +775,8 @@
 	h := sha256.New()
 	h.Write([]byte(baseImageID))
 	h.Write([]byte(gitRoot))
+	// one-time cache-busting for the transition from copying git repos to only copying git objects
+	h.Write([]byte("git-objects"))
 	return hex.EncodeToString(h.Sum(nil))[:12] // Use first 12 chars for shorter name
 }
 
@@ -774,7 +793,8 @@
 	return true, nil
 }
 
-// buildLayeredImage builds a new Docker image by layering the repo on top of the base image
+// buildLayeredImage builds a new Docker image by layering the repo on top of the base image.
+//
 // TODO: git config stuff could be environment variables at runtime for email and username.
 // The git docs seem to say that http.postBuffer is a bug in our git proxy more than a thing
 // that's needed, but we haven't found the bug yet!
@@ -783,21 +803,33 @@
 // of Go). Then you want a git repo, which is much faster to incrementally fetch rather
 // than cloning every time. Then you want some build artifacts, like perhaps the
 // "go mod download" cache, or the "go build" cache or the "npm install" cache.
-// The implementation here copies the working directory (not just the git repo!),
-// and runs "go mod download". This is an ok compromise, but a power user might want
+// The implementation here copies the git objects into the base image.
+// That enables fast clones into the container, because most of the git objects are already there.
+// It also avoids copying uncommitted changes, configs/hooks, etc.
+// TODO: We should also set up fake temporary Go module(s) so we can run "go mod download".
+// This is an ok compromise, but a power user might want
 // less caching or more caching, depending on their use case. One approach we could take
 // is to punt entirely if /app/.git already exists. If the user has provided a -base-image with
 // their git repo, let's assume they know what they're doing, and they've customized their image
-// for their use case. On the other side of the spectrum is cloning their repo every time,
-// or running git clean -xdf, which minimizes surprises but slows down builds.
+// for their use case.
 // Note that buildx has some support for conditional COPY, but without buildx, which
 // we can't reliably depend on, we have to run the base image to inspect its file system,
 // and then we can decide what to do.
-func buildLayeredImage(ctx context.Context, imgName, baseImage, gitRoot string, _ bool) error {
+//
+// We may in the future want to enable people to bring along uncommitted changes to tracked files.
+// To do that, we would run `git stash create` in outie at launch time, treat HEAD as the base commit,
+// and add in the stash commit as a new commit atop it.
+// That would accurately model the base commit as well as the uncommitted changes.
+// (This wouldn't happen here, but at agent/container initialization time.)
+//
+// repoPath is the current working directory where sketch is being run from.
+func buildLayeredImage(ctx context.Context, imgName, baseImage, gitRoot string, verbose bool) error {
+	// Shove a bunch of git objects into the image for faster future cloning.
 	dockerfileContent := fmt.Sprintf(`FROM %s
-COPY . /app
+COPY . /git-ref
 WORKDIR /app
-RUN if [ -f go.mod ]; then go mod download; fi
+# TODO: restore go.mod download
+# RUN if [ -f go.mod ]; then go mod download; fi
 CMD ["/bin/sketch"]
 `, baseImage)
 
@@ -836,8 +868,13 @@
 		".",
 	}
 
+	commonDir, err := gitCommonDir(ctx, gitRoot)
+	if err != nil {
+		return fmt.Errorf("failed to get git common dir: %w", err)
+	}
+
 	cmd := exec.CommandContext(ctx, "docker", cmdArgs...)
-	cmd.Dir = gitRoot
+	cmd.Dir = commonDir
 	// We print the docker build output whether or not the user
 	// has selected --verbose. Building an image takes a while
 	// and this gives good context.
@@ -864,13 +901,14 @@
 	return nil
 }
 
-func findGitRoot(ctx context.Context, path string) (string, error) {
-	cmd := exec.CommandContext(ctx, "git", "rev-parse", "--show-toplevel")
+// requireGitRepo confirms that path is within a git repository.
+func requireGitRepo(ctx context.Context, path string) error {
+	cmd := exec.CommandContext(ctx, "git", "rev-parse", "--git-dir")
 	cmd.Dir = path
 	out, err := cmd.CombinedOutput()
 	if err != nil {
 		if strings.Contains(string(out), "not a git repository") {
-			return "", fmt.Errorf(`sketch needs to run from within a git repo, but %s is not part of a git repo.
+			return fmt.Errorf(`sketch needs to run from within a git repo, but %s is not part of a git repo.
 Consider one of the following options:
 	- cd to a different dir that is already part of a git repo first, or
 	- to create a new git repo from this directory (%s), run this command:
@@ -880,12 +918,40 @@
 and try running sketch again.
 `, path, path)
 		}
+		return fmt.Errorf("git rev-parse --git-dir: %s: %w", out, err)
+	}
+	return nil
+}
+
+// gitRepoRoot attempts to find the git repository root directory.
+// Returns an error if not in a git repository or if it's a bare repository.
+// This is used to calculate relative paths for preserving user's working directory context.
+func gitRepoRoot(ctx context.Context, path string) (string, error) {
+	cmd := exec.CommandContext(ctx, "git", "rev-parse", "--show-toplevel")
+	cmd.Dir = path
+	out, err := cmd.CombinedOutput()
+	if err != nil {
 		return "", fmt.Errorf("git rev-parse --show-toplevel: %s: %w", out, err)
 	}
 	// The returned path is absolute.
 	return strings.TrimSpace(string(out)), nil
 }
 
+// gitCommonDir finds the git common directory for path.
+func gitCommonDir(ctx context.Context, path string) (string, error) {
+	cmd := exec.CommandContext(ctx, "git", "rev-parse", "--git-common-dir")
+	cmd.Dir = path
+	out, err := cmd.CombinedOutput()
+	if err != nil {
+		return "", fmt.Errorf("git rev-parse --git-common-dir: %s: %w", out, err)
+	}
+	gitCommonDir := strings.TrimSpace(string(out))
+	if !filepath.IsAbs(gitCommonDir) {
+		gitCommonDir = filepath.Join(path, gitCommonDir)
+	}
+	return gitCommonDir, nil
+}
+
 // getEnvForwardingFromGitConfig retrieves environment variables to pass through to Docker
 // from git config using the sketch.envfwd multi-valued key.
 func getEnvForwardingFromGitConfig(ctx context.Context) []string {
@@ -910,6 +976,17 @@
 	return envVars
 }
 
+// getOriginalGitOrigin returns the URL of the git remote 'origin' if it exists in the given directory
+func getOriginalGitOrigin(ctx context.Context, dir string) string {
+	cmd := exec.CommandContext(ctx, "git", "config", "--get", "remote.origin.url")
+	cmd.Dir = dir
+	out, err := cmd.Output()
+	if err != nil {
+		return ""
+	}
+	return strings.TrimSpace(string(out))
+}
+
 // parseDockerArgs parses a string containing space-separated Docker arguments into an array of strings.
 // It handles quoted arguments and escaped characters.
 //
diff --git a/dockerimg/githttp.go b/dockerimg/githttp.go
index b7203c0..ecd46a1 100644
--- a/dockerimg/githttp.go
+++ b/dockerimg/githttp.go
@@ -7,7 +7,9 @@
 	"log/slog"
 	"net/http"
 	"net/http/cgi"
+	"os"
 	"os/exec"
+	"path/filepath"
 	"runtime"
 	"strings"
 	"time"
@@ -107,6 +109,12 @@
 		}
 	}
 
+	// Dumb hack for bare repos: if the path starts with .git, and there is no .git, strip it off.
+	path := r.URL.Path
+	if _, err := os.Stat(filepath.Join(g.gitRepoRoot, path)); os.IsNotExist(err) {
+		path = strings.TrimPrefix(path, "/.git") // turn /.git/info/refs into /info/refs
+	}
+
 	w.Header().Set("Cache-Control", "no-cache")
 	h := &cgi.Handler{
 		Path: gitBin,
@@ -114,7 +122,7 @@
 		Dir:  g.gitRepoRoot,
 		Env: []string{
 			"GIT_PROJECT_ROOT=" + g.gitRepoRoot,
-			"PATH_INFO=" + r.URL.Path,
+			"PATH_INFO=" + path,
 			"QUERY_STRING=" + r.URL.RawQuery,
 			"REQUEST_METHOD=" + r.Method,
 			"GIT_HTTP_EXPORT_ALL=true",
diff --git a/loop/agent.go b/loop/agent.go
index 841a5ca..ed5c907 100644
--- a/loop/agent.go
+++ b/loop/agent.go
@@ -434,8 +434,6 @@
 	outsideHostname   string
 	outsideOS         string
 	outsideWorkingDir string
-	// URL of the git remote 'origin' if it exists
-	gitOrigin string
 	// MCP manager for handling MCP server connections
 	mcpManager *mcp.MCPManager
 	// Port monitor for tracking TCP ports
@@ -742,7 +740,7 @@
 
 // GitOrigin returns the URL of the git remote 'origin' if it exists.
 func (a *Agent) GitOrigin() string {
-	return a.gitOrigin
+	return a.config.OriginalGitOrigin
 }
 
 // GitUsername returns the git user name from the agent config.
@@ -1046,6 +1044,8 @@
 	OutsideHTTP string
 	// Outtie's Git server
 	GitRemoteAddr string
+	// Original git origin URL from host repository, if any
+	OriginalGitOrigin string
 	// Upstream branch for git work
 	Upstream string
 	// Commit to checkout from Outtie
@@ -1116,9 +1116,24 @@
 	ctx := a.config.Context
 	slog.InfoContext(ctx, "agent initializing")
 
+	// If a remote + commit was specified, clone it.
+	if a.config.Commit != "" && a.gitState.gitRemoteAddr != "" {
+		slog.InfoContext(ctx, "cloning git repo", "commit", a.config.Commit)
+		// TODO: --reference-if-able instead?
+		cmd := exec.CommandContext(ctx, "git", "clone", "--reference", "/git-ref", a.gitState.gitRemoteAddr, "/app")
+		if out, err := cmd.CombinedOutput(); err != nil {
+			return fmt.Errorf("failed to clone repository from %s: %s: %w", a.gitState.gitRemoteAddr, out, err)
+		}
+	}
+
+	if a.workingDir != "" {
+		err := os.Chdir(a.workingDir)
+		if err != nil {
+			return fmt.Errorf("failed to change working directory to %s: %w", a.workingDir, err)
+		}
+	}
+
 	if !ini.NoGit {
-		// Capture the original origin before we potentially replace it below
-		a.gitOrigin = getGitOrigin(ctx, a.workingDir)
 
 		// Configure git user settings
 		if a.config.GitEmail != "" {
@@ -1143,37 +1158,11 @@
 		}
 	}
 
-	// If a remote git addr was specified, we configure the origin remote
-	if a.gitState.gitRemoteAddr != "" {
-		slog.InfoContext(ctx, "Configuring git remote", slog.String("remote", a.gitState.gitRemoteAddr))
-
-		// Remove existing origin remote if it exists
-		cmd := exec.CommandContext(ctx, "git", "remote", "remove", "origin")
-		cmd.Dir = a.workingDir
-		if out, err := cmd.CombinedOutput(); err != nil {
-			// Ignore error if origin doesn't exist
-			slog.DebugContext(ctx, "git remote remove origin (ignoring if not exists)", slog.String("output", string(out)))
-		}
-
-		// Add the new remote as origin
-		cmd = exec.CommandContext(ctx, "git", "remote", "add", "origin", a.gitState.gitRemoteAddr)
-		cmd.Dir = a.workingDir
-		if out, err := cmd.CombinedOutput(); err != nil {
-			return fmt.Errorf("git remote add origin: %s: %v", out, err)
-		}
-
-	}
-
 	// If a commit was specified, we fetch and reset to it.
 	if a.config.Commit != "" && a.gitState.gitRemoteAddr != "" {
-		slog.InfoContext(ctx, "updating git repo", slog.String("commit", a.config.Commit))
+		slog.InfoContext(ctx, "updating git repo", "commit", a.config.Commit)
 
-		cmd := exec.CommandContext(ctx, "git", "stash")
-		cmd.Dir = a.workingDir
-		if out, err := cmd.CombinedOutput(); err != nil {
-			return fmt.Errorf("git stash: %s: %v", out, err)
-		}
-		cmd = exec.CommandContext(ctx, "git", "fetch", "--prune", "origin")
+		cmd := exec.CommandContext(ctx, "git", "fetch", "--prune", "origin")
 		cmd.Dir = a.workingDir
 		if out, err := cmd.CombinedOutput(); err != nil {
 			return fmt.Errorf("git fetch: %s: %w", out, err)
@@ -2308,19 +2297,6 @@
 	return totalAdded, totalRemoved, nil
 }
 
-// getGitOrigin returns the URL of the git remote 'origin' if it exists
-func getGitOrigin(ctx context.Context, dir string) string {
-	cmd := exec.CommandContext(ctx, "git", "config", "--get", "remote.origin.url")
-	cmd.Dir = dir
-	stderr := new(strings.Builder)
-	cmd.Stderr = stderr
-	out, err := cmd.Output()
-	if err != nil {
-		return ""
-	}
-	return strings.TrimSpace(string(out))
-}
-
 // systemPromptData contains the data used to render the system prompt template
 type systemPromptData struct {
 	ClientGOOS         string