Blame - loop/agent_user_cancel_test.go - sketch

blob: f79e73ef5d63fc4385fa4a16e8dce99931bdfeb4 [file] [log] [blame]

Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	1	//go:build goexperiment.synctest
				2
				3	package loop
				4
				5	import (
				6	"context"
				7	"fmt"
				8	"strings"
				9	"testing"
				10	"testing/synctest"
				11
				12	"sketch.dev/ant"
				13	)
				14
				15	func TestLoop_OneTurn_Basic(t *testing.T) {
				16	synctest.Run(func() {
				17	mockConvo := NewMockConvo(t)
				18
				19	agent := &Agent{
				20	convo: mockConvo,
				21	inbox: make(chan string, 1),
				22	outbox: make(chan AgentMessage, 1),
				23	}
				24	userMsg := ant.Message{
				25	Role: "user",
				26	Content: []ant.Content{
				27	{Type: "text", Text: "hi"},
				28	},
				29	}
				30	userMsgResponse := &ant.MessageResponse{}
				31	mockConvo.ExpectCall("SendMessage", userMsg).Return(userMsgResponse, nil)
				32
				33	ctx, cancel := context.WithCancel(context.Background())
				34	defer cancel()
				35
				36	go agent.Loop(ctx)
				37
				38	agent.UserMessage(ctx, "hi")
				39
				40	// This makes sure the SendMessage call happens before we assert the expectations.
				41	synctest.Wait()
				42
				43	// Verify results
				44	mockConvo.AssertExpectations(t)
				45	})
				46	}
				47
				48	func TestLoop_ToolCall_Basic(t *testing.T) {
				49	synctest.Run(func() {
				50	mockConvo := NewMockConvo(t)
				51
				52	agent := &Agent{
				53	convo: mockConvo,
				54	inbox: make(chan string, 1),
				55	outbox: make(chan AgentMessage, 1),
				56	}
				57	userMsg := ant.Message{
				58	Role: "user",
				59	Content: []ant.Content{
				60	{Type: "text", Text: "hi"},
				61	},
				62	}
				63	userMsgResponse := &ant.MessageResponse{
				64	StopReason: ant.StopReasonToolUse,
				65	Content: []ant.Content{
				66	{
				67	Type: ant.ContentTypeToolUse,
				68	ID: "tool1",
				69	ToolName: "test_tool",
				70	ToolInput: []byte(`{"param":"value"}`),
				71	},
				72	},
				73	Usage: ant.Usage{
				74	InputTokens: 100,
				75	OutputTokens: 200,
				76	},
				77	}
				78
				79	toolUseContents := []ant.Content{
				80	{
				81	Type: ant.ContentTypeToolResult,
				82	ToolUseID: "tool1",
				83	Text: "",
				84	ToolResult: "This is a tool result",
				85	ToolError: false,
				86	},
				87	}
				88	toolUseResultsMsg := ant.Message{
				89	Role: "user",
				90	Content: toolUseContents,
				91	}
				92	toolUseResponse := &ant.MessageResponse{
				93	StopReason: ant.StopReasonEndTurn,
				94	Content: []ant.Content{
				95	{
				96	Type: ant.ContentTypeText,
				97	Text: "tool_use contents accepted",
				98	},
				99	},
				100	Usage: ant.Usage{
				101	InputTokens: 50,
				102	OutputTokens: 75,
				103	},
				104	}
				105
				106	// Set up the mock response for tool results
				107	mockConvo.ExpectCall("SendMessage", userMsg).Return(userMsgResponse, nil)
				108	mockConvo.ExpectCall("ToolResultContents", userMsgResponse).Return(toolUseContents, nil)
				109	mockConvo.ExpectCall("SendMessage", toolUseResultsMsg).Return(toolUseResponse, nil)
				110
				111	ctx, cancel := context.WithCancel(context.Background())
				112	defer cancel()
				113
				114	go agent.Loop(ctx)
				115
				116	agent.UserMessage(ctx, "hi")
				117
				118	// This makes sure the SendMessage call happens before we assert the expectations.
				119	synctest.Wait()
				120
				121	// Verify results
				122	mockConvo.AssertExpectations(t)
				123	})
				124	}
				125
				126	func TestLoop_ToolCall_UserCancelsDuringToolResultContents(t *testing.T) {
				127	synctest.Run(func() {
				128	mockConvo := NewMockConvo(t)
				129
				130	agent := &Agent{
				131	convo: mockConvo,
				132	inbox: make(chan string, 1),
				133	outbox: make(chan AgentMessage, 10), // don't let anything block on outbox.
				134	}
				135	userMsg := ant.Message{
				136	Role: "user",
				137	Content: []ant.Content{
				138	{Type: "text", Text: "hi"},
				139	},
				140	}
				141	userMsgResponse := &ant.MessageResponse{
				142	StopReason: ant.StopReasonToolUse,
				143	Content: []ant.Content{
				144	{
				145	Type: ant.ContentTypeToolUse,
				146	ID: "tool1",
				147	ToolName: "test_tool",
				148	ToolInput: []byte(`{"param":"value"}`),
				149	},
				150	},
				151	Usage: ant.Usage{
				152	InputTokens: 100,
				153	OutputTokens: 200,
				154	},
				155	}
				156	toolUseResultsMsg := ant.Message{
				157	Role: "user",
				158	Content: []ant.Content{
				159	{Type: "text", Text: cancelToolUseMessage},
				160	},
				161	}
				162	toolUseResponse := &ant.MessageResponse{
				163	StopReason: ant.StopReasonEndTurn,
				164	Content: []ant.Content{
				165	{
				166	Type: ant.ContentTypeText,
				167	Text: "tool_use contents accepted",
				168	},
				169	},
				170	Usage: ant.Usage{
				171	InputTokens: 50,
				172	OutputTokens: 75,
				173	},
				174	}
				175
				176	// Set up the mock response for tool results
				177
				178	userCancelError := fmt.Errorf("user canceled")
				179	// This allows the test to block the InnerLoop goroutine that invokes ToolResultsContents so
				180	// we can force its context to cancel while it's blocked.
				181	waitForToolResultContents := make(chan any, 1)
				182
				183	mockConvo.ExpectCall("SendMessage", userMsg).Return(userMsgResponse, nil)
				184	mockConvo.ExpectCall("ToolResultContents",
				185	userMsgResponse).BlockAndReturn(waitForToolResultContents, []ant.Content{}, userCancelError)
				186	mockConvo.ExpectCall("SendMessage", toolUseResultsMsg).Return(toolUseResponse, nil)
				187
				188	ctx, cancel := context.WithCancel(context.Background())
				189	defer cancel()
				190
				191	go agent.Loop(ctx)
				192
				193	// This puts one message into agent.inbox, which should un-block the GatherMessages call
				194	// at the top of agent.InnerLoop.
				195	agent.UserMessage(ctx, "hi")
				196
				197	// This makes sure the first SendMessage call happens before we proceed with the cancel.
				198	synctest.Wait()
				199
				200	// The goroutine executing ToolResultContents call should be blocked, simulating a long
				201	// running operation that the user wishes to cancel while it's still in progress.
				202	// This call invokes that InnerLoop context's cancel() func.
				203	agent.CancelInnerLoop(userCancelError)
				204
				205	// This tells the goroutine that's in mockConvo.ToolResultContents to proceed.
				206	waitForToolResultContents <- nil
				207
				208	// This makes sure the final SendMessage call happens before we assert the expectations.
				209	synctest.Wait()
				210
				211	// Verify results
				212	mockConvo.AssertExpectations(t)
				213	})
				214	}
				215
				216	func TestLoop_ToolCall_UserCancelsDuringToolResultContents_AndContinuesToChat(t *testing.T) {
				217	synctest.Run(func() {
				218	mockConvo := NewMockConvo(t)
				219
				220	agent := &Agent{
				221	convo: mockConvo,
				222	inbox: make(chan string, 1),
				223	outbox: make(chan AgentMessage, 10), // don't let anything block on outbox.
				224	}
				225	userMsg := ant.Message{
				226	Role: "user",
				227	Content: []ant.Content{
				228	{Type: "text", Text: "hi"},
				229	},
				230	}
				231	userMsgResponse := &ant.MessageResponse{
				232	StopReason: ant.StopReasonToolUse,
				233	Content: []ant.Content{
				234	{
				235	Type: ant.ContentTypeToolUse,
				236	ID: "tool1",
				237	ToolName: "test_tool",
				238	ToolInput: []byte(`{"param":"value"}`),
				239	},
				240	},
				241	Usage: ant.Usage{
				242	InputTokens: 100,
				243	OutputTokens: 200,
				244	},
				245	}
				246	toolUseResultsMsg := ant.Message{
				247	Role: "user",
				248	Content: []ant.Content{
				249	{Type: "text", Text: cancelToolUseMessage},
				250	},
				251	}
				252	toolUseResultResponse := &ant.MessageResponse{
				253	StopReason: ant.StopReasonEndTurn,
				254	Content: []ant.Content{
				255	{
				256	Type: ant.ContentTypeText,
				257	Text: "awaiting further instructions",
				258	},
				259	},
				260	Usage: ant.Usage{
				261	InputTokens: 50,
				262	OutputTokens: 75,
				263	},
				264	}
				265	userFollowUpMsg := ant.Message{
				266	Role: "user",
				267	Content: []ant.Content{
				268	{Type: "text", Text: "that was the wrong thing to do"},
				269	},
				270	}
				271	userFollowUpResponse := &ant.MessageResponse{
				272	StopReason: ant.StopReasonEndTurn,
				273	Content: []ant.Content{
				274	{
				275	Type: ant.ContentTypeText,
				276	Text: "sorry about that",
				277	},
				278	},
				279	Usage: ant.Usage{
				280	InputTokens: 100,
				281	OutputTokens: 200,
				282	},
				283	}
				284	// Set up the mock response for tool results
				285
				286	userCancelError := fmt.Errorf("user canceled")
				287	// This allows the test to block the InnerLoop goroutine that invokes ToolResultsContents so
				288	// we can force its context to cancel while it's blocked.
				289	waitForToolResultContents := make(chan any, 1)
				290
				291	mockConvo.ExpectCall("SendMessage", userMsg).Return(userMsgResponse, nil)
				292	mockConvo.ExpectCall("ToolResultContents",
				293	userMsgResponse).BlockAndReturn(waitForToolResultContents, []ant.Content{}, userCancelError)
				294	mockConvo.ExpectCall("SendMessage", toolUseResultsMsg).Return(toolUseResultResponse, nil)
				295
				296	mockConvo.ExpectCall("SendMessage", userFollowUpMsg).Return(userFollowUpResponse, nil)
				297
				298	ctx, cancel := context.WithCancel(context.Background())
				299	defer cancel()
				300
				301	go agent.Loop(ctx)
				302
				303	// This puts one message into agent.inbox, which should un-block the GatherMessages call
				304	// at the top of agent.InnerLoop.
				305	agent.UserMessage(ctx, "hi")
				306
				307	// This makes sure the first SendMessage call happens before we proceed with the cancel.
				308	synctest.Wait()
				309
				310	// The goroutine executing ToolResultContents call should be blocked, simulating a long
				311	// running operation that the user wishes to cancel while it's still in progress.
				312	// This call invokes that InnerLoop context's cancel() func.
				313	agent.CancelInnerLoop(userCancelError)
				314
				315	// This tells the goroutine that's in mockConvo.ToolResultContents to proceed.
				316	waitForToolResultContents <- nil
				317
				318	// Allow InnerLoop to handle the cancellation logic before continuing the conversation.
				319	synctest.Wait()
				320
				321	agent.UserMessage(ctx, "that was the wrong thing to do")
				322
				323	synctest.Wait()
				324
				325	// Verify results
				326	mockConvo.AssertExpectations(t)
				327	})
				328	}
				329
				330	func TestInnerLoop_UserCancels(t *testing.T) {
				331	synctest.Run(func() {
				332	mockConvo := NewMockConvo(t)
				333
				334	agent := &Agent{
				335	convo: mockConvo,
				336	inbox: make(chan string, 1),
				337	outbox: make(chan AgentMessage, 10), // don't block on outbox
				338	}
				339
				340	// Define test message
				341	// This simulates something that would result in claude responding with tool_use responses.
				342	userMsg := ant.Message{
				343	Role: "user",
				344	Content: []ant.Content{
				345	{Type: "text", Text: "use test_tool for something"},
				346	},
				347	}
				348	// Mock initial response with tool use
				349	userMsgResponse := &ant.MessageResponse{
				350	StopReason: ant.StopReasonToolUse,
				351	Content: []ant.Content{
				352	{
				353	Type: ant.ContentTypeToolUse,
				354	ID: "tool1",
				355	ToolName: "test_tool",
				356	ToolInput: []byte(`{"param":"value"}`),
				357	},
				358	},
				359	Usage: ant.Usage{
				360	InputTokens: 100,
				361	OutputTokens: 200,
				362	},
				363	}
				364	canceledToolUseContents := []ant.Content{
				365	{
				366	Type: ant.ContentTypeToolResult,
				367	ToolUseID: "tool1",
				368	ToolError: true,
				369	ToolResult: "user canceled this tool_use",
				370	},
				371	}
				372	canceledToolUseMsg := ant.Message{
				373	Role: "user",
				374	Content: append(canceledToolUseContents, ant.Content{
				375	Type: ant.ContentTypeText,
				376	Text: cancelToolUseMessage,
				377	}),
				378	}
				379	// Set up expected behaviors
				380	waitForSendMessage := make(chan any)
				381	mockConvo.ExpectCall("SendMessage", userMsg).BlockAndReturn(waitForSendMessage, userMsgResponse, nil)
				382
				383	mockConvo.ExpectCall("ToolResultCancelContents", userMsgResponse).Return(canceledToolUseContents, nil)
				384	mockConvo.ExpectCall("SendMessage", canceledToolUseMsg).Return(
				385	&ant.MessageResponse{
				386	StopReason: ant.StopReasonToolUse,
				387	}, nil)
				388
				389	ctx, cancel := context.WithCancelCause(context.Background())
				390
				391	// Run one iteration of InnerLoop
				392	go agent.InnerLoop(ctx)
				393
				394	// Send a message to the agent's inbox
				395	agent.UserMessage(ctx, "use test_tool for something")
				396
				397	synctest.Wait()
				398
				399	// cancel the context before we even call InnerLoop with it, so it will
				400	// be .Done() the first time it checks.
				401	cancel(fmt.Errorf("user canceled"))
				402
				403	// unblock the InnerLoop goroutine's SendMessage call
				404	waitForSendMessage <- nil
				405
				406	synctest.Wait()
				407
				408	// Verify results
				409	mockConvo.AssertExpectations(t)
				410
				411	// Get all messages from outbox and verify their types/content
				412	var messages []AgentMessage
				413
				414	// Collect messages until outbox is empty or we have 10 messages
				415	for i := 0; i < 10; i++ {
				416	select {
				417	case msg := <-agent.outbox:
				418	messages = append(messages, msg)
				419	default:
				420	// No more messages
				421	i = 10 // Exit the loop
				422	}
				423	}
				424
				425	// Print out the messages we got for debugging
				426	t.Logf("Received %d messages from outbox", len(messages))
				427	for i, msg := range messages {
				428	t.Logf("Message %d: Type=%s, Content=%s, EndOfTurn=%t", i, msg.Type, msg.Content, msg.EndOfTurn)
				429	if msg.ToolName != "" {
				430	t.Logf(" Tool: Name=%s, Input=%s, Result=%s, Error=%v",
				431	msg.ToolName, msg.ToolInput, msg.ToolResult, msg.ToolError)
				432	}
				433	}
				434
				435	// Basic checks
				436	if len(messages) < 1 {
				437	t.Errorf("Should have at least one message, got %d", len(messages))
				438	}
				439
				440	// The main thing we want to verify: when user cancels, the response processing stops
				441	// and appropriate messages are sent
				442
				443	// Check if we have an error message about cancellation
				444	hasCancelErrorMessage := false
				445	for _, msg := range messages {
				446	if msg.Type == ErrorMessageType && msg.Content == userCancelMessage {
				447	hasCancelErrorMessage = true
				448	break
				449	}
				450	}
				451
				452	// Check if we have a tool message with error
				453	hasToolError := false
				454	for _, msg := range messages {
				455	if msg.Type == ToolUseMessageType &&
				456	msg.ToolError && strings.Contains(msg.ToolResult, "user canceled") {
				457	hasToolError = true
				458	break
				459	}
				460	}
				461
				462	// We should have at least one of these messages
				463	if !(hasCancelErrorMessage \|\| hasToolError) {
				464	t.Errorf("Should have either an error message or a tool with error about cancellation")
				465	}
				466	})
				467	}
				468
				469	func TestInnerLoop_UserDoesNotCancel(t *testing.T) {
				470	mockConvo := NewMockConvo(t)
				471
				472	agent := &Agent{
				473	convo: mockConvo,
				474	inbox: make(chan string, 100),
				475	outbox: make(chan AgentMessage, 100),
				476	}
				477
				478	// Define test message
				479	// This simulates something that would result in claude
				480	// responding with tool_use responses.
				481	testMsg := "use test_tool for something"
				482
				483	// Mock initial response with tool use
				484	initialResponse := &ant.MessageResponse{
				485	StopReason: ant.StopReasonToolUse,
				486	Content: []ant.Content{
				487	{
				488	Type: ant.ContentTypeToolUse,
				489	ID: "tool1",
				490	ToolName: "test_tool",
				491	ToolInput: []byte(`{"param":"value"}`),
				492	},
				493	},
				494	Usage: ant.Usage{
				495	InputTokens: 100,
				496	OutputTokens: 200,
				497	},
				498	}
				499
				500	// Set up expected behaviors
				501	mockConvo.ExpectCall("SendMessage", nil).Return(initialResponse, nil)
				502
				503	toolUseContents := []ant.Content{
				504	{
				505	Type: ant.ContentTypeToolResult,
				506	ToolUseID: "tool1",
				507	Text: "",
				508	ToolResult: "This is a tool result",
				509	ToolError: false,
				510	},
				511	}
				512	toolUseResponse := &ant.MessageResponse{
				513	// StopReason: ant.StopReasonEndTurn,
				514	Content: []ant.Content{
				515	{
				516	Type: ant.ContentTypeText,
				517	Text: "tool_use contents accepted",
				518	},
				519	},
				520	Usage: ant.Usage{
				521	InputTokens: 50,
				522	OutputTokens: 75,
				523	},
				524	}
				525
				526	ctx, cancel := context.WithCancel(context.Background())
				527	defer cancel()
				528
				529	// Setting up the mock response for tool results
				530	mockConvo.ExpectCall("ToolResultContents", initialResponse).Return(toolUseContents, nil)
				531	mockConvo.ExpectCall("SendMessage", nil).Return(toolUseResponse, nil)
				532	// mockConvo, as a mock, isn't able to run the loop in ant.Convo that makes this agent.OnToolResult callback.
				533	// So we "mock" it out here by calling it explicitly, in order to make sure it calls .pushToOutbox with this message.
				534	// This is not a good situation.
				535	// ant.Convo and loop.Agent seem to be excessively coupled, and aware of each others' internal details.
				536	// TODO: refactor (or clarify in docs somewhere) the boundary between what ant.Convo is responsible
				537	// for vs what loop.Agent is responsible for.
				538	antConvo := &ant.Convo{}
				539	res := ""
				540	agent.OnToolResult(ctx, antConvo, "tool1", nil, toolUseContents[0], &res, nil)
				541
				542	// Send a message to the agent's inbox
				543	agent.UserMessage(ctx, testMsg)
				544
				545	// Run one iteration of InnerLoop
				546	agent.InnerLoop(ctx)
				547
				548	// Verify results
				549	mockConvo.AssertExpectations(t)
				550
				551	// Get all messages from outbox and verify their types/content
				552	var messages []AgentMessage
				553
				554	// Collect messages until outbox is empty or we have 10 messages
				555	for i := 0; i < 10; i++ {
				556	select {
				557	case msg := <-agent.outbox:
				558	messages = append(messages, msg)
				559	default:
				560	// No more messages
				561	i = 10 // Exit the loop
				562	}
				563	}
				564
				565	// Print out the messages we got for debugging
				566	t.Logf("Received %d messages from outbox", len(messages))
				567	for i, msg := range messages {
				568	t.Logf("Message %d: Type=%s, Content=%s, EndOfTurn=%t", i, msg.Type, msg.Content, msg.EndOfTurn)
				569	if msg.ToolName != "" {
				570	t.Logf(" Tool: Name=%s, Input=%s, Result=%s, Error=%v",
				571	msg.ToolName, msg.ToolInput, msg.ToolResult, msg.ToolError)
				572	}
				573	}
				574
				575	// Basic checks
				576	if len(messages) < 1 {
				577	t.Errorf("Should have at least one message, got %d", len(messages))
				578	}
				579
				580	// The main thing we want to verify: when user cancels, the response processing stops
				581	// and appropriate messages are sent
				582
				583	// Check if we have an error message about cancellation
				584	hasCancelErrorMessage := false
				585	for _, msg := range messages {
				586	if msg.Type == ErrorMessageType && msg.Content == userCancelMessage {
				587	hasCancelErrorMessage = true
				588	break
				589	}
				590	}
				591
				592	// Check if we have a tool message with error
				593	hasToolError := false
				594	for _, msg := range messages {
				595	if msg.Type == ToolUseMessageType &&
				596	msg.ToolError && strings.Contains(msg.ToolResult, "user canceled") {
				597	hasToolError = true
				598	break
				599	}
				600	}
				601
				602	if hasCancelErrorMessage \|\| hasToolError {
				603	t.Errorf("Should not have either an error message nor a tool with error about cancellation")
				604	}
				605	}