llm/ant: retry more on failure
diff --git a/llm/ant/ant.go b/llm/ant/ant.go
index fdf2fde..d2d3f3e 100644
--- a/llm/ant/ant.go
+++ b/llm/ant/ant.go
@@ -5,6 +5,7 @@
"cmp"
"context"
"encoding/json"
+ "errors"
"fmt"
"io"
"log/slog"
@@ -425,13 +426,22 @@
httpc := cmp.Or(s.HTTPC, http.DefaultClient)
// retry loop
+ var errs error // accumulated errors across all attempts
for attempts := 0; ; attempts++ {
+ if attempts > 10 {
+ return nil, fmt.Errorf("anthropic request failed after %d attempts: %w", attempts, errs)
+ }
+ if attempts > 0 {
+ sleep := backoff[min(attempts, len(backoff)-1)] + time.Duration(rand.Int64N(int64(time.Second)))
+ slog.WarnContext(ctx, "anthropic request sleep before retry", "sleep", sleep, "attempts", attempts)
+ time.Sleep(sleep)
+ }
if dumpText {
fmt.Printf("RAW REQUEST:\n%s\n\n", payload)
}
req, err := http.NewRequestWithContext(ctx, "POST", url, bytes.NewReader(payload))
if err != nil {
- return nil, err
+ return nil, errors.Join(errs, err)
}
req.Header.Set("Content-Type", "application/json")
@@ -452,10 +462,15 @@
resp, err := httpc.Do(req)
if err != nil {
- return nil, err
+ errs = errors.Join(errs, err)
+ continue
}
- buf, _ := io.ReadAll(resp.Body)
+ buf, err := io.ReadAll(resp.Body)
resp.Body.Close()
+ if err != nil {
+ errs = errors.Join(errs, err)
+ continue
+ }
switch {
case resp.StatusCode == http.StatusOK:
@@ -465,7 +480,7 @@
var response response
err = json.NewDecoder(bytes.NewReader(buf)).Decode(&response)
if err != nil {
- return nil, err
+ return nil, errors.Join(errs, err)
}
if response.StopReason == "max_tokens" && !largerMaxTokens {
slog.InfoContext(ctx, "anthropic_retrying_with_larger_tokens", "message", "Retrying Anthropic API call with larger max tokens size")
@@ -484,20 +499,24 @@
return toLLMResponse(&response), nil
case resp.StatusCode >= 500 && resp.StatusCode < 600:
- // overloaded or unhappy, in one form or another
- sleep := backoff[min(attempts, len(backoff)-1)] + time.Duration(rand.Int64N(int64(time.Second)))
- slog.WarnContext(ctx, "anthropic_request_failed", "response", string(buf), "status_code", resp.StatusCode, "sleep", sleep)
- time.Sleep(sleep)
+ // server error, retry
+ slog.WarnContext(ctx, "anthropic_request_failed", "response", string(buf), "status_code", resp.StatusCode)
+ errs = errors.Join(errs, fmt.Errorf("status %v: %s", resp.Status, buf))
+ continue
case resp.StatusCode == 429:
- // rate limited. wait 1 minute as a starting point, because that's the rate limiting window.
- // and then add some additional time for backoff.
- sleep := time.Minute + backoff[min(attempts, len(backoff)-1)] + time.Duration(rand.Int64N(int64(time.Second)))
- slog.WarnContext(ctx, "anthropic_request_rate_limited", "response", string(buf), "sleep", sleep)
- time.Sleep(sleep)
- // case resp.StatusCode == 400:
- // TODO: parse ErrorResponse, make (*ErrorResponse) implement error
+ // rate limited, retry
+ slog.WarnContext(ctx, "anthropic_request_rate_limited", "response", string(buf))
+ errs = errors.Join(errs, fmt.Errorf("status %v: %s", resp.Status, buf))
+ continue
+ case resp.StatusCode >= 400 && resp.StatusCode < 500:
+ // some other 400, probably unrecoverable
+ slog.WarnContext(ctx, "anthropic_request_failed", "response", string(buf), "status_code", resp.StatusCode)
+ return nil, errors.Join(errs, fmt.Errorf("status %v: %s", resp.Status, buf))
default:
- return nil, fmt.Errorf("API request failed with status %s\n%s", resp.Status, buf)
+ // ...retry, I guess?
+ slog.WarnContext(ctx, "anthropic_request_failed", "response", string(buf), "status_code", resp.StatusCode)
+ errs = errors.Join(errs, fmt.Errorf("status %v: %s", resp.Status, buf))
+ continue
}
}
}