| Earl Lee | 2e463fb | 2025-04-17 11:22:22 -0700 | [diff] [blame] | 1 | // Copyright 2024 The Go Authors. All rights reserved. |
| 2 | // Use of this source code is governed by a BSD-style |
| 3 | // license that can be found in the LICENSE file. |
| 4 | |
| 5 | // Package httprr implements HTTP record and replay, mainly for use in tests. |
| 6 | // |
| 7 | // [Open] creates a new [RecordReplay]. Whether it is recording or replaying |
| 8 | // is controlled by the -httprecord flag, which is defined by this package |
| 9 | // only in test programs (built by “go test”). |
| 10 | // See the [Open] documentation for more details. |
| 11 | package httprr |
| 12 | |
| 13 | import ( |
| 14 | "bufio" |
| 15 | "bytes" |
| 16 | "cmp" |
| 17 | "context" |
| 18 | "flag" |
| 19 | "fmt" |
| 20 | "io" |
| 21 | "net/http" |
| 22 | "os" |
| 23 | "regexp" |
| 24 | "strconv" |
| 25 | "strings" |
| 26 | "sync" |
| 27 | "testing" |
| 28 | ) |
| 29 | |
| 30 | var record = new(string) |
| 31 | |
| 32 | func init() { |
| 33 | if testing.Testing() { |
| 34 | record = flag.String("httprecord", "", "re-record traces for files matching `regexp`") |
| 35 | } |
| 36 | } |
| 37 | |
| 38 | // A RecordReplay is an [http.RoundTripper] that can operate in two modes: record and replay. |
| 39 | // |
| 40 | // In record mode, the RecordReplay invokes another RoundTripper |
| 41 | // and logs the (request, response) pairs to a file. |
| 42 | // |
| 43 | // In replay mode, the RecordReplay responds to requests by finding |
| 44 | // an identical request in the log and sending the logged response. |
| 45 | type RecordReplay struct { |
| 46 | file string // file being read or written |
| 47 | real http.RoundTripper // real HTTP connection |
| 48 | |
| 49 | mu sync.Mutex |
| 50 | reqScrub []func(*http.Request) error // scrubbers for logging requests |
| 51 | respScrub []func(*bytes.Buffer) error // scrubbers for logging responses |
| 52 | replay map[string]string // if replaying, the log |
| 53 | record *os.File // if recording, the file being written |
| 54 | writeErr error // if recording, any write error encountered |
| 55 | } |
| 56 | |
| 57 | // ScrubReq adds new request scrubbing functions to rr. |
| 58 | // |
| 59 | // Before using a request as a lookup key or saving it in the record/replay log, |
| 60 | // the RecordReplay calls each scrub function, in the order they were registered, |
| 61 | // to canonicalize non-deterministic parts of the request and remove secrets. |
| 62 | // Scrubbing only applies to a copy of the request used in the record/replay log; |
| 63 | // the unmodified original request is sent to the actual server in recording mode. |
| 64 | // A scrub function can assume that if req.Body is not nil, then it has type [*Body]. |
| 65 | // |
| 66 | // Calling ScrubReq adds to the list of registered request scrubbing functions; |
| 67 | // it does not replace those registered by earlier calls. |
| 68 | func (rr *RecordReplay) ScrubReq(scrubs ...func(req *http.Request) error) { |
| 69 | rr.reqScrub = append(rr.reqScrub, scrubs...) |
| 70 | } |
| 71 | |
| 72 | // ScrubResp adds new response scrubbing functions to rr. |
| 73 | // |
| 74 | // Before using a response as a lookup key or saving it in the record/replay log, |
| 75 | // the RecordReplay calls each scrub function on a byte representation of the |
| 76 | // response, in the order they were registered, to canonicalize non-deterministic |
| 77 | // parts of the response and remove secrets. |
| 78 | // |
| 79 | // Calling ScrubResp adds to the list of registered response scrubbing functions; |
| 80 | // it does not replace those registered by earlier calls. |
| 81 | // |
| 82 | // Clients should be careful when loading the bytes into [*http.Response] using |
| 83 | // [http.ReadResponse]. This function can set [http.Response].Close to true even |
| 84 | // when the original response had it false. See code in go/src/net/http.Response.Write |
| 85 | // and go/src/net/http.Write for more info. |
| 86 | func (rr *RecordReplay) ScrubResp(scrubs ...func(*bytes.Buffer) error) { |
| 87 | rr.respScrub = append(rr.respScrub, scrubs...) |
| 88 | } |
| 89 | |
| 90 | // Recording reports whether the rr is in recording mode. |
| 91 | func (rr *RecordReplay) Recording() bool { |
| 92 | return rr.record != nil |
| 93 | } |
| 94 | |
| 95 | // Open opens a new record/replay log in the named file and |
| 96 | // returns a [RecordReplay] backed by that file. |
| 97 | // |
| 98 | // By default Open expects the file to exist and contain a |
| 99 | // previously-recorded log of (request, response) pairs, |
| 100 | // which [RecordReplay.RoundTrip] consults to prepare its responses. |
| 101 | // |
| 102 | // If the command-line flag -httprecord is set to a non-empty |
| 103 | // regular expression that matches file, then Open creates |
| 104 | // the file as a new log. In that mode, [RecordReplay.RoundTrip] |
| 105 | // makes actual HTTP requests using rt but then logs the requests and |
| 106 | // responses to the file for replaying in a future run. |
| 107 | func Open(file string, rt http.RoundTripper) (*RecordReplay, error) { |
| 108 | record, err := Recording(file) |
| 109 | if err != nil { |
| 110 | return nil, err |
| 111 | } |
| 112 | if record { |
| 113 | return create(file, rt) |
| 114 | } |
| 115 | return open(file, rt) |
| 116 | } |
| 117 | |
| 118 | // OpenForRecording opens the file for recording. |
| 119 | func OpenForRecording(file string, rt http.RoundTripper) (*RecordReplay, error) { |
| 120 | return create(file, rt) |
| 121 | } |
| 122 | |
| 123 | // Recording reports whether the "-httprecord" flag is set |
| 124 | // for the given file. |
| 125 | // It return an error if the flag is set to an invalid value. |
| 126 | func Recording(file string) (bool, error) { |
| 127 | if *record != "" { |
| 128 | re, err := regexp.Compile(*record) |
| 129 | if err != nil { |
| 130 | return false, fmt.Errorf("invalid -httprecord flag: %v", err) |
| 131 | } |
| 132 | if re.MatchString(file) { |
| 133 | return true, nil |
| 134 | } |
| 135 | } |
| 136 | return false, nil |
| 137 | } |
| 138 | |
| 139 | // creates creates a new record-mode RecordReplay in the file. |
| 140 | func create(file string, rt http.RoundTripper) (*RecordReplay, error) { |
| 141 | f, err := os.Create(file) |
| 142 | if err != nil { |
| 143 | return nil, err |
| 144 | } |
| 145 | |
| 146 | // Write header line. |
| 147 | // Each round-trip will write a new request-response record. |
| 148 | if _, err := fmt.Fprintf(f, "httprr trace v1\n"); err != nil { |
| 149 | // unreachable unless write error immediately after os.Create |
| 150 | f.Close() |
| 151 | return nil, err |
| 152 | } |
| 153 | rr := &RecordReplay{ |
| 154 | file: file, |
| 155 | real: rt, |
| 156 | record: f, |
| 157 | } |
| 158 | return rr, nil |
| 159 | } |
| 160 | |
| 161 | // open opens a replay-mode RecordReplay using the data in the file. |
| 162 | func open(file string, rt http.RoundTripper) (*RecordReplay, error) { |
| 163 | // Note: To handle larger traces without storing entirely in memory, |
| 164 | // could instead read the file incrementally, storing a map[hash]offsets |
| 165 | // and then reread the relevant part of the file during RoundTrip. |
| 166 | bdata, err := os.ReadFile(file) |
| 167 | if err != nil { |
| 168 | return nil, err |
| 169 | } |
| 170 | |
| 171 | // Trace begins with header line. |
| 172 | data := string(bdata) |
| 173 | line, data, ok := strings.Cut(data, "\n") |
| 174 | if !ok || line != "httprr trace v1" { |
| 175 | return nil, fmt.Errorf("read %s: not an httprr trace", file) |
| 176 | } |
| 177 | |
| 178 | replay := make(map[string]string) |
| 179 | for data != "" { |
| 180 | // Each record starts with a line of the form "n1 n2\n" |
| 181 | // followed by n1 bytes of request encoding and |
| 182 | // n2 bytes of response encoding. |
| 183 | line, data, ok = strings.Cut(data, "\n") |
| 184 | f1, f2, _ := strings.Cut(line, " ") |
| 185 | n1, err1 := strconv.Atoi(f1) |
| 186 | n2, err2 := strconv.Atoi(f2) |
| 187 | if !ok || err1 != nil || err2 != nil || n1 > len(data) || n2 > len(data[n1:]) { |
| 188 | return nil, fmt.Errorf("read %s: corrupt httprr trace", file) |
| 189 | } |
| 190 | var req, resp string |
| 191 | req, resp, data = data[:n1], data[n1:n1+n2], data[n1+n2:] |
| 192 | replay[req] = resp |
| 193 | } |
| 194 | |
| 195 | rr := &RecordReplay{ |
| 196 | file: file, |
| 197 | real: rt, |
| 198 | replay: replay, |
| 199 | } |
| 200 | return rr, nil |
| 201 | } |
| 202 | |
| 203 | // Client returns an http.Client using rr as its transport. |
| 204 | // It is a shorthand for: |
| 205 | // |
| 206 | // return &http.Client{Transport: rr} |
| 207 | // |
| 208 | // For more complicated uses, use rr or the [RecordReplay.RoundTrip] method directly. |
| 209 | func (rr *RecordReplay) Client() *http.Client { |
| 210 | return &http.Client{Transport: rr} |
| 211 | } |
| 212 | |
| 213 | // A Body is an io.ReadCloser used as an HTTP request body. |
| 214 | // In a Scrubber, if req.Body != nil, then req.Body is guaranteed |
| 215 | // to have type *Body, making it easy to access the body to change it. |
| 216 | type Body struct { |
| 217 | Data []byte |
| 218 | ReadOffset int |
| 219 | } |
| 220 | |
| 221 | // Read reads from the body, implementing io.Reader. |
| 222 | func (b *Body) Read(p []byte) (int, error) { |
| 223 | n := copy(p, b.Data[b.ReadOffset:]) |
| 224 | if n == 0 { |
| 225 | return 0, io.EOF |
| 226 | } |
| 227 | b.ReadOffset += n |
| 228 | return n, nil |
| 229 | } |
| 230 | |
| 231 | // Close is a no-op, implementing io.Closer. |
| 232 | func (b *Body) Close() error { |
| 233 | return nil |
| 234 | } |
| 235 | |
| 236 | // RoundTrip implements [http.RoundTripper]. |
| 237 | // |
| 238 | // If rr has been opened in record mode, RoundTrip passes the requests on to |
| 239 | // the RoundTripper specified in the call to [Open] and then logs the |
| 240 | // (request, response) pair to the underlying file. |
| 241 | // |
| 242 | // If rr has been opened in replay mode, RoundTrip looks up the request in the log |
| 243 | // and then responds with the previously logged response. |
| 244 | // If the log does not contain req, RoundTrip returns an error. |
| 245 | func (rr *RecordReplay) RoundTrip(req *http.Request) (*http.Response, error) { |
| 246 | reqWire, err := rr.reqWire(req) |
| 247 | if err != nil { |
| 248 | return nil, err |
| 249 | } |
| 250 | |
| 251 | // If we're in replay mode, replay a response. |
| 252 | if rr.replay != nil { |
| 253 | return rr.replayRoundTrip(req, reqWire) |
| 254 | } |
| 255 | |
| 256 | // Otherwise run a real round trip and save the request-response pair. |
| 257 | // But if we've had a log write error already, don't bother. |
| 258 | if err := rr.writeError(); err != nil { |
| 259 | return nil, err |
| 260 | } |
| 261 | resp, err := rr.real.RoundTrip(req) |
| 262 | if err != nil { |
| 263 | return nil, err |
| 264 | } |
| 265 | |
| 266 | // Encode resp and decode to get a copy for our caller. |
| 267 | respWire, err := rr.respWire(resp) |
| 268 | if err != nil { |
| 269 | return nil, err |
| 270 | } |
| 271 | if err := rr.writeLog(reqWire, respWire); err != nil { |
| 272 | return nil, err |
| 273 | } |
| 274 | return resp, nil |
| 275 | } |
| 276 | |
| 277 | // reqWire returns the wire-format HTTP request key to be |
| 278 | // used for request when saving to the log or looking up in a |
| 279 | // previously written log. It consumes the original req.Body |
| 280 | // but modifies req.Body to be an equivalent [*Body]. |
| 281 | func (rr *RecordReplay) reqWire(req *http.Request) (string, error) { |
| 282 | // rkey is the scrubbed request used as a lookup key. |
| 283 | // Clone req including req.Body. |
| 284 | rkey := req.Clone(context.Background()) |
| 285 | if req.Body != nil { |
| 286 | body, err := io.ReadAll(req.Body) |
| 287 | req.Body.Close() |
| 288 | if err != nil { |
| 289 | return "", err |
| 290 | } |
| 291 | req.Body = &Body{Data: body} |
| 292 | rkey.Body = &Body{Data: bytes.Clone(body)} |
| 293 | } |
| 294 | |
| 295 | // Canonicalize and scrub request key. |
| 296 | for _, scrub := range rr.reqScrub { |
| 297 | if err := scrub(rkey); err != nil { |
| 298 | return "", err |
| 299 | } |
| 300 | } |
| 301 | |
| 302 | // Now that scrubbers are done potentially modifying body, set length. |
| 303 | if rkey.Body != nil { |
| 304 | rkey.ContentLength = int64(len(rkey.Body.(*Body).Data)) |
| 305 | } |
| 306 | |
| 307 | // Serialize rkey to produce the log entry. |
| 308 | // Use WriteProxy instead of Write to preserve the URL's scheme. |
| 309 | var key strings.Builder |
| 310 | if err := rkey.WriteProxy(&key); err != nil { |
| 311 | return "", err |
| 312 | } |
| 313 | return key.String(), nil |
| 314 | } |
| 315 | |
| 316 | // respWire returns the wire-format HTTP response log entry. |
| 317 | // It modifies resp but leaves an equivalent response in its place. |
| 318 | func (rr *RecordReplay) respWire(resp *http.Response) (string, error) { |
| 319 | var key bytes.Buffer |
| 320 | if err := resp.Write(&key); err != nil { |
| 321 | return "", err |
| 322 | } |
| 323 | resp2, err := http.ReadResponse(bufio.NewReader(bytes.NewReader(key.Bytes())), resp.Request) |
| 324 | if err != nil { |
| 325 | // unreachable unless resp.Write does not round-trip with http.ReadResponse |
| 326 | return "", err |
| 327 | } |
| 328 | *resp = *resp2 |
| 329 | |
| 330 | for _, scrub := range rr.respScrub { |
| 331 | if err := scrub(&key); err != nil { |
| 332 | return "", err |
| 333 | } |
| 334 | } |
| 335 | return key.String(), nil |
| 336 | } |
| 337 | |
| 338 | // replayRoundTrip implements RoundTrip using the replay log. |
| 339 | func (rr *RecordReplay) replayRoundTrip(req *http.Request, reqLog string) (*http.Response, error) { |
| 340 | respLog, ok := rr.replay[reqLog] |
| 341 | if !ok { |
| 342 | return nil, fmt.Errorf("cached HTTP response not found for:\n%s", reqLog) |
| 343 | } |
| 344 | resp, err := http.ReadResponse(bufio.NewReader(strings.NewReader(respLog)), req) |
| 345 | if err != nil { |
| 346 | return nil, fmt.Errorf("read %s: corrupt httprr trace: %v", rr.file, err) |
| 347 | } |
| 348 | return resp, nil |
| 349 | } |
| 350 | |
| 351 | // writeError reports any previous log write error. |
| 352 | func (rr *RecordReplay) writeError() error { |
| 353 | rr.mu.Lock() |
| 354 | defer rr.mu.Unlock() |
| 355 | return rr.writeErr |
| 356 | } |
| 357 | |
| 358 | // writeLog writes the request-response pair to the log. |
| 359 | // If a write fails, writeLog arranges for rr.broken to return |
| 360 | // an error and deletes the underlying log. |
| 361 | func (rr *RecordReplay) writeLog(reqWire, respWire string) error { |
| 362 | rr.mu.Lock() |
| 363 | defer rr.mu.Unlock() |
| 364 | |
| 365 | if rr.writeErr != nil { |
| 366 | // Unreachable unless concurrent I/O error. |
| 367 | // Caller should have checked already. |
| 368 | return rr.writeErr |
| 369 | } |
| 370 | |
| 371 | _, err1 := fmt.Fprintf(rr.record, "%d %d\n", len(reqWire), len(respWire)) |
| 372 | _, err2 := rr.record.WriteString(reqWire) |
| 373 | _, err3 := rr.record.WriteString(respWire) |
| 374 | if err := cmp.Or(err1, err2, err3); err != nil { |
| 375 | rr.writeErr = err |
| 376 | rr.record.Close() |
| 377 | os.Remove(rr.file) |
| 378 | return err |
| 379 | } |
| 380 | |
| 381 | return nil |
| 382 | } |
| 383 | |
| 384 | // Close closes the RecordReplay. |
| 385 | // It is a no-op in replay mode. |
| 386 | func (rr *RecordReplay) Close() error { |
| 387 | if rr.writeErr != nil { |
| 388 | return rr.writeErr |
| 389 | } |
| 390 | if rr.record != nil { |
| 391 | return rr.record.Close() |
| 392 | } |
| 393 | return nil |
| 394 | } |