dockerimg: add only git objects to docker image
Instead of copying the entire working directory
(including uncommitted changes, hooks, and config files),
create a bare git repository and use git clone --reference.
This approach:
- Avoids copying uncommitted changes, hooks, and local config files
- Works correctly with git worktrees and submodules
- Reduces Docker image size substantially
- Maintains all git history and functionality
Fixes boldsoftware/sketch#190
Co-Authored-By: sketch <hello@sketch.dev>
Change-ID: s6af36147e2c4df00k
diff --git a/dockerimg/dockerimg.go b/dockerimg/dockerimg.go
index b800fd9..98ce389 100644
--- a/dockerimg/dockerimg.go
+++ b/dockerimg/dockerimg.go
@@ -113,6 +113,9 @@
GitRemoteUrl string
+ // Original git origin URL from the host repository
+ OriginalGitOrigin string
+
// Upstream branch for git work
Upstream string
@@ -158,10 +161,21 @@
if err != nil {
return err
}
- gitRoot, err := findGitRoot(ctx, config.Path)
+ // Bail early if sketch was started from a path that isn't in a git repo.
+ err = requireGitRepo(ctx, config.Path)
if err != nil {
return err
}
+
+ // Best effort attempt to get repo root; fall back to current directory.
+ gitRoot := config.Path
+ if root, err := gitRepoRoot(ctx, config.Path); err == nil {
+ gitRoot = root
+ }
+
+ // Capture the original git origin URL before we set up the temporary git server
+ config.OriginalGitOrigin = getOriginalGitOrigin(ctx, gitRoot)
+
err = checkForEmptyGitRepo(ctx, config.Path)
if err != nil {
return err
@@ -578,6 +592,9 @@
cmdArgs = append(cmdArgs, "-commit="+config.Commit)
cmdArgs = append(cmdArgs, "-upstream="+config.Upstream)
}
+ if config.OriginalGitOrigin != "" {
+ cmdArgs = append(cmdArgs, "-original-git-origin="+config.OriginalGitOrigin)
+ }
if config.OutsideHTTP != "" {
cmdArgs = append(cmdArgs, "-outside-http="+config.OutsideHTTP)
}
@@ -758,6 +775,8 @@
h := sha256.New()
h.Write([]byte(baseImageID))
h.Write([]byte(gitRoot))
+ // one-time cache-busting for the transition from copying git repos to only copying git objects
+ h.Write([]byte("git-objects"))
return hex.EncodeToString(h.Sum(nil))[:12] // Use first 12 chars for shorter name
}
@@ -774,7 +793,8 @@
return true, nil
}
-// buildLayeredImage builds a new Docker image by layering the repo on top of the base image
+// buildLayeredImage builds a new Docker image by layering the repo on top of the base image.
+//
// TODO: git config stuff could be environment variables at runtime for email and username.
// The git docs seem to say that http.postBuffer is a bug in our git proxy more than a thing
// that's needed, but we haven't found the bug yet!
@@ -783,21 +803,33 @@
// of Go). Then you want a git repo, which is much faster to incrementally fetch rather
// than cloning every time. Then you want some build artifacts, like perhaps the
// "go mod download" cache, or the "go build" cache or the "npm install" cache.
-// The implementation here copies the working directory (not just the git repo!),
-// and runs "go mod download". This is an ok compromise, but a power user might want
+// The implementation here copies the git objects into the base image.
+// That enables fast clones into the container, because most of the git objects are already there.
+// It also avoids copying uncommitted changes, configs/hooks, etc.
+// TODO: We should also set up fake temporary Go module(s) so we can run "go mod download".
+// This is an ok compromise, but a power user might want
// less caching or more caching, depending on their use case. One approach we could take
// is to punt entirely if /app/.git already exists. If the user has provided a -base-image with
// their git repo, let's assume they know what they're doing, and they've customized their image
-// for their use case. On the other side of the spectrum is cloning their repo every time,
-// or running git clean -xdf, which minimizes surprises but slows down builds.
+// for their use case.
// Note that buildx has some support for conditional COPY, but without buildx, which
// we can't reliably depend on, we have to run the base image to inspect its file system,
// and then we can decide what to do.
-func buildLayeredImage(ctx context.Context, imgName, baseImage, gitRoot string, _ bool) error {
+//
+// We may in the future want to enable people to bring along uncommitted changes to tracked files.
+// To do that, we would run `git stash create` in outie at launch time, treat HEAD as the base commit,
+// and add in the stash commit as a new commit atop it.
+// That would accurately model the base commit as well as the uncommitted changes.
+// (This wouldn't happen here, but at agent/container initialization time.)
+//
+// repoPath is the current working directory where sketch is being run from.
+func buildLayeredImage(ctx context.Context, imgName, baseImage, gitRoot string, verbose bool) error {
+ // Shove a bunch of git objects into the image for faster future cloning.
dockerfileContent := fmt.Sprintf(`FROM %s
-COPY . /app
+COPY . /git-ref
WORKDIR /app
-RUN if [ -f go.mod ]; then go mod download; fi
+# TODO: restore go.mod download
+# RUN if [ -f go.mod ]; then go mod download; fi
CMD ["/bin/sketch"]
`, baseImage)
@@ -836,8 +868,13 @@
".",
}
+ commonDir, err := gitCommonDir(ctx, gitRoot)
+ if err != nil {
+ return fmt.Errorf("failed to get git common dir: %w", err)
+ }
+
cmd := exec.CommandContext(ctx, "docker", cmdArgs...)
- cmd.Dir = gitRoot
+ cmd.Dir = commonDir
// We print the docker build output whether or not the user
// has selected --verbose. Building an image takes a while
// and this gives good context.
@@ -864,13 +901,14 @@
return nil
}
-func findGitRoot(ctx context.Context, path string) (string, error) {
- cmd := exec.CommandContext(ctx, "git", "rev-parse", "--show-toplevel")
+// requireGitRepo confirms that path is within a git repository.
+func requireGitRepo(ctx context.Context, path string) error {
+ cmd := exec.CommandContext(ctx, "git", "rev-parse", "--git-dir")
cmd.Dir = path
out, err := cmd.CombinedOutput()
if err != nil {
if strings.Contains(string(out), "not a git repository") {
- return "", fmt.Errorf(`sketch needs to run from within a git repo, but %s is not part of a git repo.
+ return fmt.Errorf(`sketch needs to run from within a git repo, but %s is not part of a git repo.
Consider one of the following options:
- cd to a different dir that is already part of a git repo first, or
- to create a new git repo from this directory (%s), run this command:
@@ -880,12 +918,40 @@
and try running sketch again.
`, path, path)
}
+ return fmt.Errorf("git rev-parse --git-dir: %s: %w", out, err)
+ }
+ return nil
+}
+
+// gitRepoRoot attempts to find the git repository root directory.
+// Returns an error if not in a git repository or if it's a bare repository.
+// This is used to calculate relative paths for preserving user's working directory context.
+func gitRepoRoot(ctx context.Context, path string) (string, error) {
+ cmd := exec.CommandContext(ctx, "git", "rev-parse", "--show-toplevel")
+ cmd.Dir = path
+ out, err := cmd.CombinedOutput()
+ if err != nil {
return "", fmt.Errorf("git rev-parse --show-toplevel: %s: %w", out, err)
}
// The returned path is absolute.
return strings.TrimSpace(string(out)), nil
}
+// gitCommonDir finds the git common directory for path.
+func gitCommonDir(ctx context.Context, path string) (string, error) {
+ cmd := exec.CommandContext(ctx, "git", "rev-parse", "--git-common-dir")
+ cmd.Dir = path
+ out, err := cmd.CombinedOutput()
+ if err != nil {
+ return "", fmt.Errorf("git rev-parse --git-common-dir: %s: %w", out, err)
+ }
+ gitCommonDir := strings.TrimSpace(string(out))
+ if !filepath.IsAbs(gitCommonDir) {
+ gitCommonDir = filepath.Join(path, gitCommonDir)
+ }
+ return gitCommonDir, nil
+}
+
// getEnvForwardingFromGitConfig retrieves environment variables to pass through to Docker
// from git config using the sketch.envfwd multi-valued key.
func getEnvForwardingFromGitConfig(ctx context.Context) []string {
@@ -910,6 +976,17 @@
return envVars
}
+// getOriginalGitOrigin returns the URL of the git remote 'origin' if it exists in the given directory
+func getOriginalGitOrigin(ctx context.Context, dir string) string {
+ cmd := exec.CommandContext(ctx, "git", "config", "--get", "remote.origin.url")
+ cmd.Dir = dir
+ out, err := cmd.Output()
+ if err != nil {
+ return ""
+ }
+ return strings.TrimSpace(string(out))
+}
+
// parseDockerArgs parses a string containing space-separated Docker arguments into an array of strings.
// It handles quoted arguments and escaped characters.
//
diff --git a/dockerimg/githttp.go b/dockerimg/githttp.go
index b7203c0..ecd46a1 100644
--- a/dockerimg/githttp.go
+++ b/dockerimg/githttp.go
@@ -7,7 +7,9 @@
"log/slog"
"net/http"
"net/http/cgi"
+ "os"
"os/exec"
+ "path/filepath"
"runtime"
"strings"
"time"
@@ -107,6 +109,12 @@
}
}
+ // Dumb hack for bare repos: if the path starts with .git, and there is no .git, strip it off.
+ path := r.URL.Path
+ if _, err := os.Stat(filepath.Join(g.gitRepoRoot, path)); os.IsNotExist(err) {
+ path = strings.TrimPrefix(path, "/.git") // turn /.git/info/refs into /info/refs
+ }
+
w.Header().Set("Cache-Control", "no-cache")
h := &cgi.Handler{
Path: gitBin,
@@ -114,7 +122,7 @@
Dir: g.gitRepoRoot,
Env: []string{
"GIT_PROJECT_ROOT=" + g.gitRepoRoot,
- "PATH_INFO=" + r.URL.Path,
+ "PATH_INFO=" + path,
"QUERY_STRING=" + r.URL.RawQuery,
"REQUEST_METHOD=" + r.Method,
"GIT_HTTP_EXPORT_ALL=true",