blob: 5bde1b177639f145ad411f358c87c57a0e6782a0 [file] [log] [blame]
Earl Lee2e463fb2025-04-17 11:22:22 -07001package loop
2
3import (
4 "context"
5 "net/http"
6 "os"
7 "strings"
8 "testing"
9 "time"
10
11 "sketch.dev/ant"
12 "sketch.dev/httprr"
13)
14
15// TestAgentLoop tests that the Agent loop functionality works correctly.
16// It uses the httprr package to record HTTP interactions for replay in tests.
17// When failing, rebuild with "go test ./sketch/loop -run TestAgentLoop -httprecord .*agent_loop.*"
18// as necessary.
19func TestAgentLoop(t *testing.T) {
20 ctx := context.Background()
21
22 // Setup httprr recorder
23 rrPath := "testdata/agent_loop.httprr"
24 rr, err := httprr.Open(rrPath, http.DefaultTransport)
25 if err != nil && !os.IsNotExist(err) {
26 t.Fatal(err)
27 }
28
29 if rr.Recording() {
30 // Skip the test if API key is not available
31 if os.Getenv("ANTHROPIC_API_KEY") == "" {
32 t.Fatal("ANTHROPIC_API_KEY not set, required for HTTP recording")
33 }
34 }
35
36 // Create HTTP client
37 var client *http.Client
38 if rr != nil {
39 // Scrub API keys from requests for security
40 rr.ScrubReq(func(req *http.Request) error {
41 req.Header.Del("x-api-key")
42 req.Header.Del("anthropic-api-key")
43 return nil
44 })
45 client = rr.Client()
46 } else {
47 client = &http.Client{Transport: http.DefaultTransport}
48 }
49
50 // Create a new agent with the httprr client
51 origWD, err := os.Getwd()
52 if err != nil {
53 t.Fatal(err)
54 }
55 if err := os.Chdir("/"); err != nil {
56 t.Fatal(err)
57 }
58 budget := ant.Budget{MaxResponses: 100}
59 wd, err := os.Getwd()
60 if err != nil {
61 t.Fatal(err)
62 }
63
64 cfg := AgentConfig{
65 Context: ctx,
66 APIKey: os.Getenv("ANTHROPIC_API_KEY"),
67 HTTPC: client,
68 Budget: budget,
69 GitUsername: "Test Agent",
70 GitEmail: "totallyhuman@sketch.dev",
71 SessionID: "test-session-id",
72 ClientGOOS: "linux",
73 ClientGOARCH: "amd64",
74 }
75 agent := NewAgent(cfg)
76 if err := os.Chdir(origWD); err != nil {
77 t.Fatal(err)
78 }
79 err = agent.Init(AgentInit{WorkingDir: wd, NoGit: true})
80 if err != nil {
81 t.Fatal(err)
82 }
83
84 // Setup a test message that will trigger a simple, predictable response
85 userMessage := "What tools are available to you? Please just list them briefly."
86
87 // Send the message to the agent
88 agent.UserMessage(ctx, userMessage)
89
90 // Process a single loop iteration to avoid long-running tests
91 agent.InnerLoop(ctx)
92
93 // Collect responses with a timeout
94 var responses []AgentMessage
95 timeout := time.After(10 * time.Second)
96 done := false
97
98 for !done {
99 select {
100 case <-timeout:
101 t.Log("Timeout reached while waiting for agent responses")
102 done = true
103 default:
104 select {
105 case msg := <-agent.outbox:
106 t.Logf("Received message: Type=%s, EndOfTurn=%v, Content=%q", msg.Type, msg.EndOfTurn, msg.Content)
107 responses = append(responses, msg)
108 if msg.EndOfTurn {
109 done = true
110 }
111 default:
112 // No more messages available right now
113 time.Sleep(100 * time.Millisecond)
114 }
115 }
116 }
117
118 // Verify we got at least one response
119 if len(responses) == 0 {
120 t.Fatal("No responses received from agent")
121 }
122
123 // Log the received responses for debugging
124 t.Logf("Received %d responses", len(responses))
125
126 // Find the final agent response (with EndOfTurn=true)
127 var finalResponse *AgentMessage
128 for i := range responses {
129 if responses[i].Type == AgentMessageType && responses[i].EndOfTurn {
130 finalResponse = &responses[i]
131 break
132 }
133 }
134
135 // Verify we got a final agent response
136 if finalResponse == nil {
137 t.Fatal("No final agent response received")
138 }
139
140 // Check that the response contains tools information
141 if !strings.Contains(strings.ToLower(finalResponse.Content), "tool") {
142 t.Error("Expected response to mention tools")
143 }
144
145 // Count how many tool use messages we received
146 toolUseCount := 0
147 for _, msg := range responses {
148 if msg.Type == ToolUseMessageType {
149 toolUseCount++
150 }
151 }
152
153 t.Logf("Agent used %d tools in its response", toolUseCount)
154}
Philip Zeyliger99a9a022025-04-27 15:15:25 +0000155
156func TestAgentTracksOutstandingCalls(t *testing.T) {
157 agent := &Agent{
158 outstandingLLMCalls: make(map[string]struct{}),
159 outstandingToolCalls: make(map[string]string),
160 }
161
162 // Check initial state
163 if count := agent.OutstandingLLMCallCount(); count != 0 {
164 t.Errorf("Expected 0 outstanding LLM calls, got %d", count)
165 }
166
167 if tools := agent.OutstandingToolCalls(); len(tools) != 0 {
168 t.Errorf("Expected 0 outstanding tool calls, got %d", len(tools))
169 }
170
171 // Add some calls
172 agent.mu.Lock()
173 agent.outstandingLLMCalls["llm1"] = struct{}{}
174 agent.outstandingToolCalls["tool1"] = "bash"
175 agent.outstandingToolCalls["tool2"] = "think"
176 agent.mu.Unlock()
177
178 // Check tracking works
179 if count := agent.OutstandingLLMCallCount(); count != 1 {
180 t.Errorf("Expected 1 outstanding LLM call, got %d", count)
181 }
182
183 tools := agent.OutstandingToolCalls()
184 if len(tools) != 2 {
185 t.Errorf("Expected 2 outstanding tool calls, got %d", len(tools))
186 }
187
188 // Check removal
189 agent.mu.Lock()
190 delete(agent.outstandingLLMCalls, "llm1")
191 delete(agent.outstandingToolCalls, "tool1")
192 agent.mu.Unlock()
193
194 if count := agent.OutstandingLLMCallCount(); count != 0 {
195 t.Errorf("Expected 0 outstanding LLM calls after removal, got %d", count)
196 }
197
198 tools = agent.OutstandingToolCalls()
199 if len(tools) != 1 {
200 t.Errorf("Expected 1 outstanding tool call after removal, got %d", len(tools))
201 }
202
203 if tools[0] != "think" {
204 t.Errorf("Expected 'think' tool remaining, got %s", tools[0])
205 }
206}