blob: 0e5eb439f720ad8134302c3f392f2052a187f2c7 [file] [log] [blame]
Earl Lee2e463fb2025-04-17 11:22:22 -07001//go:build goexperiment.synctest
2
3package loop
4
5import (
6 "context"
Sean McCullough3871a092025-05-05 21:54:56 +00007 "encoding/json"
Earl Lee2e463fb2025-04-17 11:22:22 -07008 "fmt"
Earl Lee2e463fb2025-04-17 11:22:22 -07009 "testing"
10 "testing/synctest"
11
Sean McCullough3871a092025-05-05 21:54:56 +000012 "sketch.dev/llm"
13 "sketch.dev/llm/conversation"
Earl Lee2e463fb2025-04-17 11:22:22 -070014)
15
16func TestLoop_OneTurn_Basic(t *testing.T) {
17 synctest.Run(func() {
18 mockConvo := NewMockConvo(t)
19
20 agent := &Agent{
Sean McCullough3871a092025-05-05 21:54:56 +000021 convo: mockConvo,
22 inbox: make(chan string, 1),
Earl Lee2e463fb2025-04-17 11:22:22 -070023 }
Sean McCullough3871a092025-05-05 21:54:56 +000024 agent.stateMachine = NewStateMachine()
25 userMsg := llm.UserStringMessage("hi")
26 userMsgResponse := &llm.Response{}
Earl Lee2e463fb2025-04-17 11:22:22 -070027 mockConvo.ExpectCall("SendMessage", userMsg).Return(userMsgResponse, nil)
28
29 ctx, cancel := context.WithCancel(context.Background())
30 defer cancel()
31
32 go agent.Loop(ctx)
33
34 agent.UserMessage(ctx, "hi")
35
36 // This makes sure the SendMessage call happens before we assert the expectations.
37 synctest.Wait()
38
39 // Verify results
40 mockConvo.AssertExpectations(t)
41 })
42}
43
44func TestLoop_ToolCall_Basic(t *testing.T) {
45 synctest.Run(func() {
46 mockConvo := NewMockConvo(t)
47
48 agent := &Agent{
Sean McCullough3871a092025-05-05 21:54:56 +000049 convo: mockConvo,
50 inbox: make(chan string, 1),
Earl Lee2e463fb2025-04-17 11:22:22 -070051 }
Sean McCullough3871a092025-05-05 21:54:56 +000052 agent.stateMachine = NewStateMachine()
53 userMsg := llm.Message{
54 Role: llm.MessageRoleUser,
55 Content: []llm.Content{
56 {Type: llm.ContentTypeText, Text: "hi"},
Earl Lee2e463fb2025-04-17 11:22:22 -070057 },
58 }
Sean McCullough3871a092025-05-05 21:54:56 +000059 userMsgResponse := &llm.Response{
60 StopReason: llm.StopReasonToolUse,
61 Content: []llm.Content{
Earl Lee2e463fb2025-04-17 11:22:22 -070062 {
Sean McCullough3871a092025-05-05 21:54:56 +000063 Type: llm.ContentTypeToolUse,
Earl Lee2e463fb2025-04-17 11:22:22 -070064 ID: "tool1",
65 ToolName: "test_tool",
66 ToolInput: []byte(`{"param":"value"}`),
67 },
68 },
Sean McCullough3871a092025-05-05 21:54:56 +000069 Usage: llm.Usage{
Earl Lee2e463fb2025-04-17 11:22:22 -070070 InputTokens: 100,
71 OutputTokens: 200,
72 },
73 }
74
Sean McCullough3871a092025-05-05 21:54:56 +000075 toolUseContents := []llm.Content{
Earl Lee2e463fb2025-04-17 11:22:22 -070076 {
Sean McCullough3871a092025-05-05 21:54:56 +000077 Type: llm.ContentTypeToolResult,
Earl Lee2e463fb2025-04-17 11:22:22 -070078 ToolUseID: "tool1",
79 Text: "",
80 ToolResult: "This is a tool result",
81 ToolError: false,
82 },
83 }
Sean McCullough3871a092025-05-05 21:54:56 +000084 toolUseResultsMsg := llm.Message{
85 Role: llm.MessageRoleUser,
Earl Lee2e463fb2025-04-17 11:22:22 -070086 Content: toolUseContents,
87 }
Sean McCullough3871a092025-05-05 21:54:56 +000088 toolUseResponse := &llm.Response{
89 StopReason: llm.StopReasonEndTurn,
90 Content: []llm.Content{
Earl Lee2e463fb2025-04-17 11:22:22 -070091 {
Sean McCullough3871a092025-05-05 21:54:56 +000092 Type: llm.ContentTypeText,
Earl Lee2e463fb2025-04-17 11:22:22 -070093 Text: "tool_use contents accepted",
94 },
95 },
Sean McCullough3871a092025-05-05 21:54:56 +000096 Usage: llm.Usage{
Earl Lee2e463fb2025-04-17 11:22:22 -070097 InputTokens: 50,
98 OutputTokens: 75,
99 },
100 }
101
102 // Set up the mock response for tool results
103 mockConvo.ExpectCall("SendMessage", userMsg).Return(userMsgResponse, nil)
104 mockConvo.ExpectCall("ToolResultContents", userMsgResponse).Return(toolUseContents, nil)
105 mockConvo.ExpectCall("SendMessage", toolUseResultsMsg).Return(toolUseResponse, nil)
106
107 ctx, cancel := context.WithCancel(context.Background())
108 defer cancel()
109
110 go agent.Loop(ctx)
111
112 agent.UserMessage(ctx, "hi")
113
114 // This makes sure the SendMessage call happens before we assert the expectations.
115 synctest.Wait()
116
117 // Verify results
118 mockConvo.AssertExpectations(t)
119 })
120}
121
122func TestLoop_ToolCall_UserCancelsDuringToolResultContents(t *testing.T) {
123 synctest.Run(func() {
124 mockConvo := NewMockConvo(t)
125
126 agent := &Agent{
Sean McCullough3871a092025-05-05 21:54:56 +0000127 convo: mockConvo,
128 inbox: make(chan string, 1),
Earl Lee2e463fb2025-04-17 11:22:22 -0700129 }
Sean McCullough3871a092025-05-05 21:54:56 +0000130 agent.stateMachine = NewStateMachine()
131 userMsg := llm.UserStringMessage("hi")
132 userMsgResponse := &llm.Response{
133 StopReason: llm.StopReasonToolUse,
134 Content: []llm.Content{
Earl Lee2e463fb2025-04-17 11:22:22 -0700135 {
Sean McCullough3871a092025-05-05 21:54:56 +0000136 Type: llm.ContentTypeToolUse,
Earl Lee2e463fb2025-04-17 11:22:22 -0700137 ID: "tool1",
138 ToolName: "test_tool",
139 ToolInput: []byte(`{"param":"value"}`),
140 },
141 },
Sean McCullough3871a092025-05-05 21:54:56 +0000142 Usage: llm.Usage{
Earl Lee2e463fb2025-04-17 11:22:22 -0700143 InputTokens: 100,
144 OutputTokens: 200,
145 },
146 }
Sean McCullough3871a092025-05-05 21:54:56 +0000147 toolUseResultsMsg := llm.UserStringMessage(cancelToolUseMessage)
148 toolUseResponse := &llm.Response{
149 StopReason: llm.StopReasonEndTurn,
150 Content: []llm.Content{
Earl Lee2e463fb2025-04-17 11:22:22 -0700151 {
Sean McCullough3871a092025-05-05 21:54:56 +0000152 Type: llm.ContentTypeText,
Earl Lee2e463fb2025-04-17 11:22:22 -0700153 Text: "tool_use contents accepted",
154 },
155 },
Sean McCullough3871a092025-05-05 21:54:56 +0000156 Usage: llm.Usage{
Earl Lee2e463fb2025-04-17 11:22:22 -0700157 InputTokens: 50,
158 OutputTokens: 75,
159 },
160 }
161
162 // Set up the mock response for tool results
163
164 userCancelError := fmt.Errorf("user canceled")
165 // This allows the test to block the InnerLoop goroutine that invokes ToolResultsContents so
166 // we can force its context to cancel while it's blocked.
167 waitForToolResultContents := make(chan any, 1)
168
169 mockConvo.ExpectCall("SendMessage", userMsg).Return(userMsgResponse, nil)
170 mockConvo.ExpectCall("ToolResultContents",
Sean McCullough3871a092025-05-05 21:54:56 +0000171 userMsgResponse).BlockAndReturn(waitForToolResultContents, []llm.Content{}, userCancelError)
Earl Lee2e463fb2025-04-17 11:22:22 -0700172 mockConvo.ExpectCall("SendMessage", toolUseResultsMsg).Return(toolUseResponse, nil)
173
174 ctx, cancel := context.WithCancel(context.Background())
175 defer cancel()
176
177 go agent.Loop(ctx)
178
179 // This puts one message into agent.inbox, which should un-block the GatherMessages call
180 // at the top of agent.InnerLoop.
181 agent.UserMessage(ctx, "hi")
182
183 // This makes sure the first SendMessage call happens before we proceed with the cancel.
184 synctest.Wait()
185
186 // The goroutine executing ToolResultContents call should be blocked, simulating a long
187 // running operation that the user wishes to cancel while it's still in progress.
188 // This call invokes that InnerLoop context's cancel() func.
Sean McCullough3871a092025-05-05 21:54:56 +0000189 agent.CancelTurn(userCancelError)
Earl Lee2e463fb2025-04-17 11:22:22 -0700190
191 // This tells the goroutine that's in mockConvo.ToolResultContents to proceed.
192 waitForToolResultContents <- nil
193
194 // This makes sure the final SendMessage call happens before we assert the expectations.
195 synctest.Wait()
196
197 // Verify results
198 mockConvo.AssertExpectations(t)
199 })
200}
201
202func TestLoop_ToolCall_UserCancelsDuringToolResultContents_AndContinuesToChat(t *testing.T) {
203 synctest.Run(func() {
204 mockConvo := NewMockConvo(t)
205
206 agent := &Agent{
Sean McCullough3871a092025-05-05 21:54:56 +0000207 convo: mockConvo,
208 inbox: make(chan string, 1),
Earl Lee2e463fb2025-04-17 11:22:22 -0700209 }
Sean McCullough3871a092025-05-05 21:54:56 +0000210 agent.stateMachine = NewStateMachine()
211 userMsg := llm.Message{
212 Role: llm.MessageRoleUser,
213 Content: []llm.Content{
214 {Type: llm.ContentTypeText, Text: "hi"},
Earl Lee2e463fb2025-04-17 11:22:22 -0700215 },
216 }
Sean McCullough3871a092025-05-05 21:54:56 +0000217 userMsgResponse := &llm.Response{
218 StopReason: llm.StopReasonToolUse,
219 Content: []llm.Content{
Earl Lee2e463fb2025-04-17 11:22:22 -0700220 {
Sean McCullough3871a092025-05-05 21:54:56 +0000221 Type: llm.ContentTypeToolUse,
Earl Lee2e463fb2025-04-17 11:22:22 -0700222 ID: "tool1",
223 ToolName: "test_tool",
224 ToolInput: []byte(`{"param":"value"}`),
225 },
226 },
Sean McCullough3871a092025-05-05 21:54:56 +0000227 Usage: llm.Usage{
Earl Lee2e463fb2025-04-17 11:22:22 -0700228 InputTokens: 100,
229 OutputTokens: 200,
230 },
231 }
Sean McCullough3871a092025-05-05 21:54:56 +0000232 toolUseResultsMsg := llm.Message{
233 Role: llm.MessageRoleUser,
234 Content: []llm.Content{
235 {Type: llm.ContentTypeText, Text: cancelToolUseMessage},
Earl Lee2e463fb2025-04-17 11:22:22 -0700236 },
237 }
Sean McCullough3871a092025-05-05 21:54:56 +0000238 toolUseResultResponse := &llm.Response{
239 StopReason: llm.StopReasonEndTurn,
240 Content: []llm.Content{
Earl Lee2e463fb2025-04-17 11:22:22 -0700241 {
Sean McCullough3871a092025-05-05 21:54:56 +0000242 Type: llm.ContentTypeText,
Earl Lee2e463fb2025-04-17 11:22:22 -0700243 Text: "awaiting further instructions",
244 },
245 },
Sean McCullough3871a092025-05-05 21:54:56 +0000246 Usage: llm.Usage{
Earl Lee2e463fb2025-04-17 11:22:22 -0700247 InputTokens: 50,
248 OutputTokens: 75,
249 },
250 }
Sean McCullough3871a092025-05-05 21:54:56 +0000251 userFollowUpMsg := llm.Message{
252 Role: llm.MessageRoleUser,
253 Content: []llm.Content{
254 {Type: llm.ContentTypeText, Text: "that was the wrong thing to do"},
Earl Lee2e463fb2025-04-17 11:22:22 -0700255 },
256 }
Sean McCullough3871a092025-05-05 21:54:56 +0000257 userFollowUpResponse := &llm.Response{
258 StopReason: llm.StopReasonEndTurn,
259 Content: []llm.Content{
Earl Lee2e463fb2025-04-17 11:22:22 -0700260 {
Sean McCullough3871a092025-05-05 21:54:56 +0000261 Type: llm.ContentTypeText,
Earl Lee2e463fb2025-04-17 11:22:22 -0700262 Text: "sorry about that",
263 },
264 },
Sean McCullough3871a092025-05-05 21:54:56 +0000265 Usage: llm.Usage{
Earl Lee2e463fb2025-04-17 11:22:22 -0700266 InputTokens: 100,
267 OutputTokens: 200,
268 },
269 }
270 // Set up the mock response for tool results
271
272 userCancelError := fmt.Errorf("user canceled")
273 // This allows the test to block the InnerLoop goroutine that invokes ToolResultsContents so
274 // we can force its context to cancel while it's blocked.
275 waitForToolResultContents := make(chan any, 1)
276
277 mockConvo.ExpectCall("SendMessage", userMsg).Return(userMsgResponse, nil)
278 mockConvo.ExpectCall("ToolResultContents",
Sean McCullough3871a092025-05-05 21:54:56 +0000279 userMsgResponse).BlockAndReturn(waitForToolResultContents, []llm.Content{}, userCancelError)
Earl Lee2e463fb2025-04-17 11:22:22 -0700280 mockConvo.ExpectCall("SendMessage", toolUseResultsMsg).Return(toolUseResultResponse, nil)
281
282 mockConvo.ExpectCall("SendMessage", userFollowUpMsg).Return(userFollowUpResponse, nil)
283
284 ctx, cancel := context.WithCancel(context.Background())
285 defer cancel()
286
287 go agent.Loop(ctx)
288
289 // This puts one message into agent.inbox, which should un-block the GatherMessages call
290 // at the top of agent.InnerLoop.
291 agent.UserMessage(ctx, "hi")
292
293 // This makes sure the first SendMessage call happens before we proceed with the cancel.
294 synctest.Wait()
295
296 // The goroutine executing ToolResultContents call should be blocked, simulating a long
297 // running operation that the user wishes to cancel while it's still in progress.
298 // This call invokes that InnerLoop context's cancel() func.
Sean McCullough3871a092025-05-05 21:54:56 +0000299 agent.CancelTurn(userCancelError)
Earl Lee2e463fb2025-04-17 11:22:22 -0700300
301 // This tells the goroutine that's in mockConvo.ToolResultContents to proceed.
302 waitForToolResultContents <- nil
303
304 // Allow InnerLoop to handle the cancellation logic before continuing the conversation.
305 synctest.Wait()
306
307 agent.UserMessage(ctx, "that was the wrong thing to do")
308
309 synctest.Wait()
310
311 // Verify results
312 mockConvo.AssertExpectations(t)
313 })
314}
315
Sean McCullough3871a092025-05-05 21:54:56 +0000316func TestProcessTurn_UserCancels(t *testing.T) {
Earl Lee2e463fb2025-04-17 11:22:22 -0700317 synctest.Run(func() {
318 mockConvo := NewMockConvo(t)
319
320 agent := &Agent{
Sean McCullough3871a092025-05-05 21:54:56 +0000321 convo: mockConvo,
322 inbox: make(chan string, 1),
Earl Lee2e463fb2025-04-17 11:22:22 -0700323 }
Sean McCullough3871a092025-05-05 21:54:56 +0000324 agent.stateMachine = NewStateMachine()
Earl Lee2e463fb2025-04-17 11:22:22 -0700325
326 // Define test message
327 // This simulates something that would result in claude responding with tool_use responses.
Sean McCullough3871a092025-05-05 21:54:56 +0000328 userMsg := llm.UserStringMessage("use test_tool for something")
Earl Lee2e463fb2025-04-17 11:22:22 -0700329 // Mock initial response with tool use
Sean McCullough3871a092025-05-05 21:54:56 +0000330 userMsgResponse := &llm.Response{
331 StopReason: llm.StopReasonToolUse,
332 Content: []llm.Content{
Earl Lee2e463fb2025-04-17 11:22:22 -0700333 {
Sean McCullough3871a092025-05-05 21:54:56 +0000334 Type: llm.ContentTypeToolUse,
Earl Lee2e463fb2025-04-17 11:22:22 -0700335 ID: "tool1",
336 ToolName: "test_tool",
337 ToolInput: []byte(`{"param":"value"}`),
338 },
339 },
Sean McCullough3871a092025-05-05 21:54:56 +0000340 Usage: llm.Usage{
Earl Lee2e463fb2025-04-17 11:22:22 -0700341 InputTokens: 100,
342 OutputTokens: 200,
343 },
344 }
Sean McCullough3871a092025-05-05 21:54:56 +0000345 canceledToolUseContents := []llm.Content{
Earl Lee2e463fb2025-04-17 11:22:22 -0700346 {
Sean McCullough3871a092025-05-05 21:54:56 +0000347 Type: llm.ContentTypeToolResult,
Earl Lee2e463fb2025-04-17 11:22:22 -0700348 ToolUseID: "tool1",
349 ToolError: true,
350 ToolResult: "user canceled this tool_use",
351 },
352 }
Sean McCullough3871a092025-05-05 21:54:56 +0000353 canceledToolUseMsg := llm.Message{
354 Role: llm.MessageRoleUser,
355 Content: append(canceledToolUseContents, llm.StringContent(cancelToolUseMessage)),
Earl Lee2e463fb2025-04-17 11:22:22 -0700356 }
357 // Set up expected behaviors
358 waitForSendMessage := make(chan any)
359 mockConvo.ExpectCall("SendMessage", userMsg).BlockAndReturn(waitForSendMessage, userMsgResponse, nil)
360
361 mockConvo.ExpectCall("ToolResultCancelContents", userMsgResponse).Return(canceledToolUseContents, nil)
362 mockConvo.ExpectCall("SendMessage", canceledToolUseMsg).Return(
Sean McCullough3871a092025-05-05 21:54:56 +0000363 &llm.Response{
364 StopReason: llm.StopReasonToolUse,
Earl Lee2e463fb2025-04-17 11:22:22 -0700365 }, nil)
366
367 ctx, cancel := context.WithCancelCause(context.Background())
368
Sean McCullough3871a092025-05-05 21:54:56 +0000369 // Run one iteration of the processing loop
370 go agent.processTurn(ctx)
Earl Lee2e463fb2025-04-17 11:22:22 -0700371
372 // Send a message to the agent's inbox
373 agent.UserMessage(ctx, "use test_tool for something")
374
375 synctest.Wait()
376
377 // cancel the context before we even call InnerLoop with it, so it will
378 // be .Done() the first time it checks.
379 cancel(fmt.Errorf("user canceled"))
380
381 // unblock the InnerLoop goroutine's SendMessage call
382 waitForSendMessage <- nil
383
384 synctest.Wait()
385
386 // Verify results
387 mockConvo.AssertExpectations(t)
Earl Lee2e463fb2025-04-17 11:22:22 -0700388 })
389}
390
Sean McCullough3871a092025-05-05 21:54:56 +0000391func TestProcessTurn_UserDoesNotCancel(t *testing.T) {
Earl Lee2e463fb2025-04-17 11:22:22 -0700392 mockConvo := NewMockConvo(t)
393
394 agent := &Agent{
Sean McCullough3871a092025-05-05 21:54:56 +0000395 convo: mockConvo,
396 inbox: make(chan string, 100),
Earl Lee2e463fb2025-04-17 11:22:22 -0700397 }
Sean McCullough3871a092025-05-05 21:54:56 +0000398 agent.stateMachine = NewStateMachine()
Earl Lee2e463fb2025-04-17 11:22:22 -0700399
400 // Define test message
401 // This simulates something that would result in claude
402 // responding with tool_use responses.
403 testMsg := "use test_tool for something"
404
405 // Mock initial response with tool use
Sean McCullough3871a092025-05-05 21:54:56 +0000406 initialResponse := &llm.Response{
407 StopReason: llm.StopReasonToolUse,
408 Content: []llm.Content{
Earl Lee2e463fb2025-04-17 11:22:22 -0700409 {
Sean McCullough3871a092025-05-05 21:54:56 +0000410 Type: llm.ContentTypeToolUse,
Earl Lee2e463fb2025-04-17 11:22:22 -0700411 ID: "tool1",
412 ToolName: "test_tool",
413 ToolInput: []byte(`{"param":"value"}`),
414 },
415 },
Sean McCullough3871a092025-05-05 21:54:56 +0000416 Usage: llm.Usage{
Earl Lee2e463fb2025-04-17 11:22:22 -0700417 InputTokens: 100,
418 OutputTokens: 200,
419 },
420 }
421
422 // Set up expected behaviors
423 mockConvo.ExpectCall("SendMessage", nil).Return(initialResponse, nil)
424
Sean McCullough3871a092025-05-05 21:54:56 +0000425 toolUseContents := []llm.Content{
Earl Lee2e463fb2025-04-17 11:22:22 -0700426 {
Sean McCullough3871a092025-05-05 21:54:56 +0000427 Type: llm.ContentTypeToolResult,
Earl Lee2e463fb2025-04-17 11:22:22 -0700428 ToolUseID: "tool1",
429 Text: "",
430 ToolResult: "This is a tool result",
431 ToolError: false,
432 },
433 }
Sean McCullough3871a092025-05-05 21:54:56 +0000434 toolUseResponse := &llm.Response{
435 // StopReason: llm.StopReasonEndTurn,
436 Content: []llm.Content{
Earl Lee2e463fb2025-04-17 11:22:22 -0700437 {
Sean McCullough3871a092025-05-05 21:54:56 +0000438 Type: llm.ContentTypeText,
Earl Lee2e463fb2025-04-17 11:22:22 -0700439 Text: "tool_use contents accepted",
440 },
441 },
Sean McCullough3871a092025-05-05 21:54:56 +0000442 Usage: llm.Usage{
Earl Lee2e463fb2025-04-17 11:22:22 -0700443 InputTokens: 50,
444 OutputTokens: 75,
445 },
446 }
447
448 ctx, cancel := context.WithCancel(context.Background())
449 defer cancel()
450
451 // Setting up the mock response for tool results
452 mockConvo.ExpectCall("ToolResultContents", initialResponse).Return(toolUseContents, nil)
453 mockConvo.ExpectCall("SendMessage", nil).Return(toolUseResponse, nil)
Sean McCullough3871a092025-05-05 21:54:56 +0000454 // mockConvo, as a mock, isn't able to run the loop in conversation.Convo that makes this agent.OnToolResult callback.
Earl Lee2e463fb2025-04-17 11:22:22 -0700455 // So we "mock" it out here by calling it explicitly, in order to make sure it calls .pushToOutbox with this message.
456 // This is not a good situation.
Sean McCullough3871a092025-05-05 21:54:56 +0000457 // conversation.Convo and loop.Agent seem to be excessively coupled, and aware of each others' internal details.
458 // TODO: refactor (or clarify in docs somewhere) the boundary between what conversation.Convo is responsible
Earl Lee2e463fb2025-04-17 11:22:22 -0700459 // for vs what loop.Agent is responsible for.
Sean McCullough3871a092025-05-05 21:54:56 +0000460 antConvo := &conversation.Convo{}
Earl Lee2e463fb2025-04-17 11:22:22 -0700461 res := ""
Sean McCullough3871a092025-05-05 21:54:56 +0000462 agent.OnToolResult(ctx, antConvo, "tool1", "test_tool", json.RawMessage(`{"param":"value"}`), toolUseContents[0], &res, nil)
Earl Lee2e463fb2025-04-17 11:22:22 -0700463
464 // Send a message to the agent's inbox
465 agent.UserMessage(ctx, testMsg)
466
Sean McCullough3871a092025-05-05 21:54:56 +0000467 // Run one iteration of the processing loop
468 agent.processTurn(ctx)
Earl Lee2e463fb2025-04-17 11:22:22 -0700469
470 // Verify results
471 mockConvo.AssertExpectations(t)
Earl Lee2e463fb2025-04-17 11:22:22 -0700472}