blob: f79e73ef5d63fc4385fa4a16e8dce99931bdfeb4 [file] [log] [blame]
Earl Lee2e463fb2025-04-17 11:22:22 -07001//go:build goexperiment.synctest
2
3package loop
4
5import (
6 "context"
7 "fmt"
8 "strings"
9 "testing"
10 "testing/synctest"
11
12 "sketch.dev/ant"
13)
14
15func TestLoop_OneTurn_Basic(t *testing.T) {
16 synctest.Run(func() {
17 mockConvo := NewMockConvo(t)
18
19 agent := &Agent{
20 convo: mockConvo,
21 inbox: make(chan string, 1),
22 outbox: make(chan AgentMessage, 1),
23 }
24 userMsg := ant.Message{
25 Role: "user",
26 Content: []ant.Content{
27 {Type: "text", Text: "hi"},
28 },
29 }
30 userMsgResponse := &ant.MessageResponse{}
31 mockConvo.ExpectCall("SendMessage", userMsg).Return(userMsgResponse, nil)
32
33 ctx, cancel := context.WithCancel(context.Background())
34 defer cancel()
35
36 go agent.Loop(ctx)
37
38 agent.UserMessage(ctx, "hi")
39
40 // This makes sure the SendMessage call happens before we assert the expectations.
41 synctest.Wait()
42
43 // Verify results
44 mockConvo.AssertExpectations(t)
45 })
46}
47
48func TestLoop_ToolCall_Basic(t *testing.T) {
49 synctest.Run(func() {
50 mockConvo := NewMockConvo(t)
51
52 agent := &Agent{
53 convo: mockConvo,
54 inbox: make(chan string, 1),
55 outbox: make(chan AgentMessage, 1),
56 }
57 userMsg := ant.Message{
58 Role: "user",
59 Content: []ant.Content{
60 {Type: "text", Text: "hi"},
61 },
62 }
63 userMsgResponse := &ant.MessageResponse{
64 StopReason: ant.StopReasonToolUse,
65 Content: []ant.Content{
66 {
67 Type: ant.ContentTypeToolUse,
68 ID: "tool1",
69 ToolName: "test_tool",
70 ToolInput: []byte(`{"param":"value"}`),
71 },
72 },
73 Usage: ant.Usage{
74 InputTokens: 100,
75 OutputTokens: 200,
76 },
77 }
78
79 toolUseContents := []ant.Content{
80 {
81 Type: ant.ContentTypeToolResult,
82 ToolUseID: "tool1",
83 Text: "",
84 ToolResult: "This is a tool result",
85 ToolError: false,
86 },
87 }
88 toolUseResultsMsg := ant.Message{
89 Role: "user",
90 Content: toolUseContents,
91 }
92 toolUseResponse := &ant.MessageResponse{
93 StopReason: ant.StopReasonEndTurn,
94 Content: []ant.Content{
95 {
96 Type: ant.ContentTypeText,
97 Text: "tool_use contents accepted",
98 },
99 },
100 Usage: ant.Usage{
101 InputTokens: 50,
102 OutputTokens: 75,
103 },
104 }
105
106 // Set up the mock response for tool results
107 mockConvo.ExpectCall("SendMessage", userMsg).Return(userMsgResponse, nil)
108 mockConvo.ExpectCall("ToolResultContents", userMsgResponse).Return(toolUseContents, nil)
109 mockConvo.ExpectCall("SendMessage", toolUseResultsMsg).Return(toolUseResponse, nil)
110
111 ctx, cancel := context.WithCancel(context.Background())
112 defer cancel()
113
114 go agent.Loop(ctx)
115
116 agent.UserMessage(ctx, "hi")
117
118 // This makes sure the SendMessage call happens before we assert the expectations.
119 synctest.Wait()
120
121 // Verify results
122 mockConvo.AssertExpectations(t)
123 })
124}
125
126func TestLoop_ToolCall_UserCancelsDuringToolResultContents(t *testing.T) {
127 synctest.Run(func() {
128 mockConvo := NewMockConvo(t)
129
130 agent := &Agent{
131 convo: mockConvo,
132 inbox: make(chan string, 1),
133 outbox: make(chan AgentMessage, 10), // don't let anything block on outbox.
134 }
135 userMsg := ant.Message{
136 Role: "user",
137 Content: []ant.Content{
138 {Type: "text", Text: "hi"},
139 },
140 }
141 userMsgResponse := &ant.MessageResponse{
142 StopReason: ant.StopReasonToolUse,
143 Content: []ant.Content{
144 {
145 Type: ant.ContentTypeToolUse,
146 ID: "tool1",
147 ToolName: "test_tool",
148 ToolInput: []byte(`{"param":"value"}`),
149 },
150 },
151 Usage: ant.Usage{
152 InputTokens: 100,
153 OutputTokens: 200,
154 },
155 }
156 toolUseResultsMsg := ant.Message{
157 Role: "user",
158 Content: []ant.Content{
159 {Type: "text", Text: cancelToolUseMessage},
160 },
161 }
162 toolUseResponse := &ant.MessageResponse{
163 StopReason: ant.StopReasonEndTurn,
164 Content: []ant.Content{
165 {
166 Type: ant.ContentTypeText,
167 Text: "tool_use contents accepted",
168 },
169 },
170 Usage: ant.Usage{
171 InputTokens: 50,
172 OutputTokens: 75,
173 },
174 }
175
176 // Set up the mock response for tool results
177
178 userCancelError := fmt.Errorf("user canceled")
179 // This allows the test to block the InnerLoop goroutine that invokes ToolResultsContents so
180 // we can force its context to cancel while it's blocked.
181 waitForToolResultContents := make(chan any, 1)
182
183 mockConvo.ExpectCall("SendMessage", userMsg).Return(userMsgResponse, nil)
184 mockConvo.ExpectCall("ToolResultContents",
185 userMsgResponse).BlockAndReturn(waitForToolResultContents, []ant.Content{}, userCancelError)
186 mockConvo.ExpectCall("SendMessage", toolUseResultsMsg).Return(toolUseResponse, nil)
187
188 ctx, cancel := context.WithCancel(context.Background())
189 defer cancel()
190
191 go agent.Loop(ctx)
192
193 // This puts one message into agent.inbox, which should un-block the GatherMessages call
194 // at the top of agent.InnerLoop.
195 agent.UserMessage(ctx, "hi")
196
197 // This makes sure the first SendMessage call happens before we proceed with the cancel.
198 synctest.Wait()
199
200 // The goroutine executing ToolResultContents call should be blocked, simulating a long
201 // running operation that the user wishes to cancel while it's still in progress.
202 // This call invokes that InnerLoop context's cancel() func.
203 agent.CancelInnerLoop(userCancelError)
204
205 // This tells the goroutine that's in mockConvo.ToolResultContents to proceed.
206 waitForToolResultContents <- nil
207
208 // This makes sure the final SendMessage call happens before we assert the expectations.
209 synctest.Wait()
210
211 // Verify results
212 mockConvo.AssertExpectations(t)
213 })
214}
215
216func TestLoop_ToolCall_UserCancelsDuringToolResultContents_AndContinuesToChat(t *testing.T) {
217 synctest.Run(func() {
218 mockConvo := NewMockConvo(t)
219
220 agent := &Agent{
221 convo: mockConvo,
222 inbox: make(chan string, 1),
223 outbox: make(chan AgentMessage, 10), // don't let anything block on outbox.
224 }
225 userMsg := ant.Message{
226 Role: "user",
227 Content: []ant.Content{
228 {Type: "text", Text: "hi"},
229 },
230 }
231 userMsgResponse := &ant.MessageResponse{
232 StopReason: ant.StopReasonToolUse,
233 Content: []ant.Content{
234 {
235 Type: ant.ContentTypeToolUse,
236 ID: "tool1",
237 ToolName: "test_tool",
238 ToolInput: []byte(`{"param":"value"}`),
239 },
240 },
241 Usage: ant.Usage{
242 InputTokens: 100,
243 OutputTokens: 200,
244 },
245 }
246 toolUseResultsMsg := ant.Message{
247 Role: "user",
248 Content: []ant.Content{
249 {Type: "text", Text: cancelToolUseMessage},
250 },
251 }
252 toolUseResultResponse := &ant.MessageResponse{
253 StopReason: ant.StopReasonEndTurn,
254 Content: []ant.Content{
255 {
256 Type: ant.ContentTypeText,
257 Text: "awaiting further instructions",
258 },
259 },
260 Usage: ant.Usage{
261 InputTokens: 50,
262 OutputTokens: 75,
263 },
264 }
265 userFollowUpMsg := ant.Message{
266 Role: "user",
267 Content: []ant.Content{
268 {Type: "text", Text: "that was the wrong thing to do"},
269 },
270 }
271 userFollowUpResponse := &ant.MessageResponse{
272 StopReason: ant.StopReasonEndTurn,
273 Content: []ant.Content{
274 {
275 Type: ant.ContentTypeText,
276 Text: "sorry about that",
277 },
278 },
279 Usage: ant.Usage{
280 InputTokens: 100,
281 OutputTokens: 200,
282 },
283 }
284 // Set up the mock response for tool results
285
286 userCancelError := fmt.Errorf("user canceled")
287 // This allows the test to block the InnerLoop goroutine that invokes ToolResultsContents so
288 // we can force its context to cancel while it's blocked.
289 waitForToolResultContents := make(chan any, 1)
290
291 mockConvo.ExpectCall("SendMessage", userMsg).Return(userMsgResponse, nil)
292 mockConvo.ExpectCall("ToolResultContents",
293 userMsgResponse).BlockAndReturn(waitForToolResultContents, []ant.Content{}, userCancelError)
294 mockConvo.ExpectCall("SendMessage", toolUseResultsMsg).Return(toolUseResultResponse, nil)
295
296 mockConvo.ExpectCall("SendMessage", userFollowUpMsg).Return(userFollowUpResponse, nil)
297
298 ctx, cancel := context.WithCancel(context.Background())
299 defer cancel()
300
301 go agent.Loop(ctx)
302
303 // This puts one message into agent.inbox, which should un-block the GatherMessages call
304 // at the top of agent.InnerLoop.
305 agent.UserMessage(ctx, "hi")
306
307 // This makes sure the first SendMessage call happens before we proceed with the cancel.
308 synctest.Wait()
309
310 // The goroutine executing ToolResultContents call should be blocked, simulating a long
311 // running operation that the user wishes to cancel while it's still in progress.
312 // This call invokes that InnerLoop context's cancel() func.
313 agent.CancelInnerLoop(userCancelError)
314
315 // This tells the goroutine that's in mockConvo.ToolResultContents to proceed.
316 waitForToolResultContents <- nil
317
318 // Allow InnerLoop to handle the cancellation logic before continuing the conversation.
319 synctest.Wait()
320
321 agent.UserMessage(ctx, "that was the wrong thing to do")
322
323 synctest.Wait()
324
325 // Verify results
326 mockConvo.AssertExpectations(t)
327 })
328}
329
330func TestInnerLoop_UserCancels(t *testing.T) {
331 synctest.Run(func() {
332 mockConvo := NewMockConvo(t)
333
334 agent := &Agent{
335 convo: mockConvo,
336 inbox: make(chan string, 1),
337 outbox: make(chan AgentMessage, 10), // don't block on outbox
338 }
339
340 // Define test message
341 // This simulates something that would result in claude responding with tool_use responses.
342 userMsg := ant.Message{
343 Role: "user",
344 Content: []ant.Content{
345 {Type: "text", Text: "use test_tool for something"},
346 },
347 }
348 // Mock initial response with tool use
349 userMsgResponse := &ant.MessageResponse{
350 StopReason: ant.StopReasonToolUse,
351 Content: []ant.Content{
352 {
353 Type: ant.ContentTypeToolUse,
354 ID: "tool1",
355 ToolName: "test_tool",
356 ToolInput: []byte(`{"param":"value"}`),
357 },
358 },
359 Usage: ant.Usage{
360 InputTokens: 100,
361 OutputTokens: 200,
362 },
363 }
364 canceledToolUseContents := []ant.Content{
365 {
366 Type: ant.ContentTypeToolResult,
367 ToolUseID: "tool1",
368 ToolError: true,
369 ToolResult: "user canceled this tool_use",
370 },
371 }
372 canceledToolUseMsg := ant.Message{
373 Role: "user",
374 Content: append(canceledToolUseContents, ant.Content{
375 Type: ant.ContentTypeText,
376 Text: cancelToolUseMessage,
377 }),
378 }
379 // Set up expected behaviors
380 waitForSendMessage := make(chan any)
381 mockConvo.ExpectCall("SendMessage", userMsg).BlockAndReturn(waitForSendMessage, userMsgResponse, nil)
382
383 mockConvo.ExpectCall("ToolResultCancelContents", userMsgResponse).Return(canceledToolUseContents, nil)
384 mockConvo.ExpectCall("SendMessage", canceledToolUseMsg).Return(
385 &ant.MessageResponse{
386 StopReason: ant.StopReasonToolUse,
387 }, nil)
388
389 ctx, cancel := context.WithCancelCause(context.Background())
390
391 // Run one iteration of InnerLoop
392 go agent.InnerLoop(ctx)
393
394 // Send a message to the agent's inbox
395 agent.UserMessage(ctx, "use test_tool for something")
396
397 synctest.Wait()
398
399 // cancel the context before we even call InnerLoop with it, so it will
400 // be .Done() the first time it checks.
401 cancel(fmt.Errorf("user canceled"))
402
403 // unblock the InnerLoop goroutine's SendMessage call
404 waitForSendMessage <- nil
405
406 synctest.Wait()
407
408 // Verify results
409 mockConvo.AssertExpectations(t)
410
411 // Get all messages from outbox and verify their types/content
412 var messages []AgentMessage
413
414 // Collect messages until outbox is empty or we have 10 messages
415 for i := 0; i < 10; i++ {
416 select {
417 case msg := <-agent.outbox:
418 messages = append(messages, msg)
419 default:
420 // No more messages
421 i = 10 // Exit the loop
422 }
423 }
424
425 // Print out the messages we got for debugging
426 t.Logf("Received %d messages from outbox", len(messages))
427 for i, msg := range messages {
428 t.Logf("Message %d: Type=%s, Content=%s, EndOfTurn=%t", i, msg.Type, msg.Content, msg.EndOfTurn)
429 if msg.ToolName != "" {
430 t.Logf(" Tool: Name=%s, Input=%s, Result=%s, Error=%v",
431 msg.ToolName, msg.ToolInput, msg.ToolResult, msg.ToolError)
432 }
433 }
434
435 // Basic checks
436 if len(messages) < 1 {
437 t.Errorf("Should have at least one message, got %d", len(messages))
438 }
439
440 // The main thing we want to verify: when user cancels, the response processing stops
441 // and appropriate messages are sent
442
443 // Check if we have an error message about cancellation
444 hasCancelErrorMessage := false
445 for _, msg := range messages {
446 if msg.Type == ErrorMessageType && msg.Content == userCancelMessage {
447 hasCancelErrorMessage = true
448 break
449 }
450 }
451
452 // Check if we have a tool message with error
453 hasToolError := false
454 for _, msg := range messages {
455 if msg.Type == ToolUseMessageType &&
456 msg.ToolError && strings.Contains(msg.ToolResult, "user canceled") {
457 hasToolError = true
458 break
459 }
460 }
461
462 // We should have at least one of these messages
463 if !(hasCancelErrorMessage || hasToolError) {
464 t.Errorf("Should have either an error message or a tool with error about cancellation")
465 }
466 })
467}
468
469func TestInnerLoop_UserDoesNotCancel(t *testing.T) {
470 mockConvo := NewMockConvo(t)
471
472 agent := &Agent{
473 convo: mockConvo,
474 inbox: make(chan string, 100),
475 outbox: make(chan AgentMessage, 100),
476 }
477
478 // Define test message
479 // This simulates something that would result in claude
480 // responding with tool_use responses.
481 testMsg := "use test_tool for something"
482
483 // Mock initial response with tool use
484 initialResponse := &ant.MessageResponse{
485 StopReason: ant.StopReasonToolUse,
486 Content: []ant.Content{
487 {
488 Type: ant.ContentTypeToolUse,
489 ID: "tool1",
490 ToolName: "test_tool",
491 ToolInput: []byte(`{"param":"value"}`),
492 },
493 },
494 Usage: ant.Usage{
495 InputTokens: 100,
496 OutputTokens: 200,
497 },
498 }
499
500 // Set up expected behaviors
501 mockConvo.ExpectCall("SendMessage", nil).Return(initialResponse, nil)
502
503 toolUseContents := []ant.Content{
504 {
505 Type: ant.ContentTypeToolResult,
506 ToolUseID: "tool1",
507 Text: "",
508 ToolResult: "This is a tool result",
509 ToolError: false,
510 },
511 }
512 toolUseResponse := &ant.MessageResponse{
513 // StopReason: ant.StopReasonEndTurn,
514 Content: []ant.Content{
515 {
516 Type: ant.ContentTypeText,
517 Text: "tool_use contents accepted",
518 },
519 },
520 Usage: ant.Usage{
521 InputTokens: 50,
522 OutputTokens: 75,
523 },
524 }
525
526 ctx, cancel := context.WithCancel(context.Background())
527 defer cancel()
528
529 // Setting up the mock response for tool results
530 mockConvo.ExpectCall("ToolResultContents", initialResponse).Return(toolUseContents, nil)
531 mockConvo.ExpectCall("SendMessage", nil).Return(toolUseResponse, nil)
532 // mockConvo, as a mock, isn't able to run the loop in ant.Convo that makes this agent.OnToolResult callback.
533 // So we "mock" it out here by calling it explicitly, in order to make sure it calls .pushToOutbox with this message.
534 // This is not a good situation.
535 // ant.Convo and loop.Agent seem to be excessively coupled, and aware of each others' internal details.
536 // TODO: refactor (or clarify in docs somewhere) the boundary between what ant.Convo is responsible
537 // for vs what loop.Agent is responsible for.
538 antConvo := &ant.Convo{}
539 res := ""
540 agent.OnToolResult(ctx, antConvo, "tool1", nil, toolUseContents[0], &res, nil)
541
542 // Send a message to the agent's inbox
543 agent.UserMessage(ctx, testMsg)
544
545 // Run one iteration of InnerLoop
546 agent.InnerLoop(ctx)
547
548 // Verify results
549 mockConvo.AssertExpectations(t)
550
551 // Get all messages from outbox and verify their types/content
552 var messages []AgentMessage
553
554 // Collect messages until outbox is empty or we have 10 messages
555 for i := 0; i < 10; i++ {
556 select {
557 case msg := <-agent.outbox:
558 messages = append(messages, msg)
559 default:
560 // No more messages
561 i = 10 // Exit the loop
562 }
563 }
564
565 // Print out the messages we got for debugging
566 t.Logf("Received %d messages from outbox", len(messages))
567 for i, msg := range messages {
568 t.Logf("Message %d: Type=%s, Content=%s, EndOfTurn=%t", i, msg.Type, msg.Content, msg.EndOfTurn)
569 if msg.ToolName != "" {
570 t.Logf(" Tool: Name=%s, Input=%s, Result=%s, Error=%v",
571 msg.ToolName, msg.ToolInput, msg.ToolResult, msg.ToolError)
572 }
573 }
574
575 // Basic checks
576 if len(messages) < 1 {
577 t.Errorf("Should have at least one message, got %d", len(messages))
578 }
579
580 // The main thing we want to verify: when user cancels, the response processing stops
581 // and appropriate messages are sent
582
583 // Check if we have an error message about cancellation
584 hasCancelErrorMessage := false
585 for _, msg := range messages {
586 if msg.Type == ErrorMessageType && msg.Content == userCancelMessage {
587 hasCancelErrorMessage = true
588 break
589 }
590 }
591
592 // Check if we have a tool message with error
593 hasToolError := false
594 for _, msg := range messages {
595 if msg.Type == ToolUseMessageType &&
596 msg.ToolError && strings.Contains(msg.ToolResult, "user canceled") {
597 hasToolError = true
598 break
599 }
600 }
601
602 if hasCancelErrorMessage || hasToolError {
603 t.Errorf("Should not have either an error message nor a tool with error about cancellation")
604 }
605}