Blame - loop/agent_user_cancel_test.go - sketch

blob: e55ba17929dac41edea76a949bc9130aaca11916 [file] [log] [blame]

Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	1	//go:build goexperiment.synctest
				2
				3	package loop
				4
				5	import (
				6	"context"
				7	"fmt"
				8	"strings"
				9	"testing"
				10	"testing/synctest"
				11
				12	"sketch.dev/ant"
				13	)
				14
				15	func TestLoop_OneTurn_Basic(t *testing.T) {
				16	synctest.Run(func() {
				17	mockConvo := NewMockConvo(t)
				18
				19	agent := &Agent{
				20	convo: mockConvo,
				21	inbox: make(chan string, 1),
				22	outbox: make(chan AgentMessage, 1),
				23	}
Josh Bleecher Snyder	a3dcd86	2025-04-30 19:47:16 +0000	[diff] [blame]	24	userMsg := ant.UserStringMessage("hi")
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	25	userMsgResponse := &ant.MessageResponse{}
				26	mockConvo.ExpectCall("SendMessage", userMsg).Return(userMsgResponse, nil)
				27
				28	ctx, cancel := context.WithCancel(context.Background())
				29	defer cancel()
				30
				31	go agent.Loop(ctx)
				32
				33	agent.UserMessage(ctx, "hi")
				34
				35	// This makes sure the SendMessage call happens before we assert the expectations.
				36	synctest.Wait()
				37
				38	// Verify results
				39	mockConvo.AssertExpectations(t)
				40	})
				41	}
				42
				43	func TestLoop_ToolCall_Basic(t *testing.T) {
				44	synctest.Run(func() {
				45	mockConvo := NewMockConvo(t)
				46
				47	agent := &Agent{
				48	convo: mockConvo,
				49	inbox: make(chan string, 1),
				50	outbox: make(chan AgentMessage, 1),
				51	}
				52	userMsg := ant.Message{
Josh Bleecher Snyder	a3dcd86	2025-04-30 19:47:16 +0000	[diff] [blame]	53	Role: ant.MessageRoleUser,
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	54	Content: []ant.Content{
Josh Bleecher Snyder	a3dcd86	2025-04-30 19:47:16 +0000	[diff] [blame]	55	{Type: ant.ContentTypeText, Text: "hi"},
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	56	},
				57	}
				58	userMsgResponse := &ant.MessageResponse{
				59	StopReason: ant.StopReasonToolUse,
				60	Content: []ant.Content{
				61	{
				62	Type: ant.ContentTypeToolUse,
				63	ID: "tool1",
				64	ToolName: "test_tool",
				65	ToolInput: []byte(`{"param":"value"}`),
				66	},
				67	},
				68	Usage: ant.Usage{
				69	InputTokens: 100,
				70	OutputTokens: 200,
				71	},
				72	}
				73
				74	toolUseContents := []ant.Content{
				75	{
				76	Type: ant.ContentTypeToolResult,
				77	ToolUseID: "tool1",
				78	Text: "",
				79	ToolResult: "This is a tool result",
				80	ToolError: false,
				81	},
				82	}
				83	toolUseResultsMsg := ant.Message{
Josh Bleecher Snyder	a3dcd86	2025-04-30 19:47:16 +0000	[diff] [blame]	84	Role: ant.MessageRoleUser,
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	85	Content: toolUseContents,
				86	}
				87	toolUseResponse := &ant.MessageResponse{
				88	StopReason: ant.StopReasonEndTurn,
				89	Content: []ant.Content{
				90	{
				91	Type: ant.ContentTypeText,
				92	Text: "tool_use contents accepted",
				93	},
				94	},
				95	Usage: ant.Usage{
				96	InputTokens: 50,
				97	OutputTokens: 75,
				98	},
				99	}
				100
				101	// Set up the mock response for tool results
				102	mockConvo.ExpectCall("SendMessage", userMsg).Return(userMsgResponse, nil)
				103	mockConvo.ExpectCall("ToolResultContents", userMsgResponse).Return(toolUseContents, nil)
				104	mockConvo.ExpectCall("SendMessage", toolUseResultsMsg).Return(toolUseResponse, nil)
				105
				106	ctx, cancel := context.WithCancel(context.Background())
				107	defer cancel()
				108
				109	go agent.Loop(ctx)
				110
				111	agent.UserMessage(ctx, "hi")
				112
				113	// This makes sure the SendMessage call happens before we assert the expectations.
				114	synctest.Wait()
				115
				116	// Verify results
				117	mockConvo.AssertExpectations(t)
				118	})
				119	}
				120
				121	func TestLoop_ToolCall_UserCancelsDuringToolResultContents(t *testing.T) {
				122	synctest.Run(func() {
				123	mockConvo := NewMockConvo(t)
				124
				125	agent := &Agent{
				126	convo: mockConvo,
				127	inbox: make(chan string, 1),
				128	outbox: make(chan AgentMessage, 10), // don't let anything block on outbox.
				129	}
Josh Bleecher Snyder	a3dcd86	2025-04-30 19:47:16 +0000	[diff] [blame]	130	userMsg := ant.UserStringMessage("hi")
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	131	userMsgResponse := &ant.MessageResponse{
				132	StopReason: ant.StopReasonToolUse,
				133	Content: []ant.Content{
				134	{
				135	Type: ant.ContentTypeToolUse,
				136	ID: "tool1",
				137	ToolName: "test_tool",
				138	ToolInput: []byte(`{"param":"value"}`),
				139	},
				140	},
				141	Usage: ant.Usage{
				142	InputTokens: 100,
				143	OutputTokens: 200,
				144	},
				145	}
Josh Bleecher Snyder	a3dcd86	2025-04-30 19:47:16 +0000	[diff] [blame]	146	toolUseResultsMsg := ant.UserStringMessage(cancelToolUseMessage)
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	147	toolUseResponse := &ant.MessageResponse{
				148	StopReason: ant.StopReasonEndTurn,
				149	Content: []ant.Content{
				150	{
				151	Type: ant.ContentTypeText,
				152	Text: "tool_use contents accepted",
				153	},
				154	},
				155	Usage: ant.Usage{
				156	InputTokens: 50,
				157	OutputTokens: 75,
				158	},
				159	}
				160
				161	// Set up the mock response for tool results
				162
				163	userCancelError := fmt.Errorf("user canceled")
				164	// This allows the test to block the InnerLoop goroutine that invokes ToolResultsContents so
				165	// we can force its context to cancel while it's blocked.
				166	waitForToolResultContents := make(chan any, 1)
				167
				168	mockConvo.ExpectCall("SendMessage", userMsg).Return(userMsgResponse, nil)
				169	mockConvo.ExpectCall("ToolResultContents",
				170	userMsgResponse).BlockAndReturn(waitForToolResultContents, []ant.Content{}, userCancelError)
				171	mockConvo.ExpectCall("SendMessage", toolUseResultsMsg).Return(toolUseResponse, nil)
				172
				173	ctx, cancel := context.WithCancel(context.Background())
				174	defer cancel()
				175
				176	go agent.Loop(ctx)
				177
				178	// This puts one message into agent.inbox, which should un-block the GatherMessages call
				179	// at the top of agent.InnerLoop.
				180	agent.UserMessage(ctx, "hi")
				181
				182	// This makes sure the first SendMessage call happens before we proceed with the cancel.
				183	synctest.Wait()
				184
				185	// The goroutine executing ToolResultContents call should be blocked, simulating a long
				186	// running operation that the user wishes to cancel while it's still in progress.
				187	// This call invokes that InnerLoop context's cancel() func.
				188	agent.CancelInnerLoop(userCancelError)
				189
				190	// This tells the goroutine that's in mockConvo.ToolResultContents to proceed.
				191	waitForToolResultContents <- nil
				192
				193	// This makes sure the final SendMessage call happens before we assert the expectations.
				194	synctest.Wait()
				195
				196	// Verify results
				197	mockConvo.AssertExpectations(t)
				198	})
				199	}
				200
				201	func TestLoop_ToolCall_UserCancelsDuringToolResultContents_AndContinuesToChat(t *testing.T) {
				202	synctest.Run(func() {
				203	mockConvo := NewMockConvo(t)
				204
				205	agent := &Agent{
				206	convo: mockConvo,
				207	inbox: make(chan string, 1),
				208	outbox: make(chan AgentMessage, 10), // don't let anything block on outbox.
				209	}
				210	userMsg := ant.Message{
Josh Bleecher Snyder	a3dcd86	2025-04-30 19:47:16 +0000	[diff] [blame]	211	Role: ant.MessageRoleUser,
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	212	Content: []ant.Content{
Josh Bleecher Snyder	a3dcd86	2025-04-30 19:47:16 +0000	[diff] [blame]	213	{Type: ant.ContentTypeText, Text: "hi"},
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	214	},
				215	}
				216	userMsgResponse := &ant.MessageResponse{
				217	StopReason: ant.StopReasonToolUse,
				218	Content: []ant.Content{
				219	{
				220	Type: ant.ContentTypeToolUse,
				221	ID: "tool1",
				222	ToolName: "test_tool",
				223	ToolInput: []byte(`{"param":"value"}`),
				224	},
				225	},
				226	Usage: ant.Usage{
				227	InputTokens: 100,
				228	OutputTokens: 200,
				229	},
				230	}
				231	toolUseResultsMsg := ant.Message{
Josh Bleecher Snyder	a3dcd86	2025-04-30 19:47:16 +0000	[diff] [blame]	232	Role: ant.MessageRoleUser,
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	233	Content: []ant.Content{
Josh Bleecher Snyder	a3dcd86	2025-04-30 19:47:16 +0000	[diff] [blame]	234	{Type: ant.ContentTypeText, Text: cancelToolUseMessage},
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	235	},
				236	}
				237	toolUseResultResponse := &ant.MessageResponse{
				238	StopReason: ant.StopReasonEndTurn,
				239	Content: []ant.Content{
				240	{
				241	Type: ant.ContentTypeText,
				242	Text: "awaiting further instructions",
				243	},
				244	},
				245	Usage: ant.Usage{
				246	InputTokens: 50,
				247	OutputTokens: 75,
				248	},
				249	}
				250	userFollowUpMsg := ant.Message{
Josh Bleecher Snyder	a3dcd86	2025-04-30 19:47:16 +0000	[diff] [blame]	251	Role: ant.MessageRoleUser,
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	252	Content: []ant.Content{
Josh Bleecher Snyder	a3dcd86	2025-04-30 19:47:16 +0000	[diff] [blame]	253	{Type: ant.ContentTypeText, Text: "that was the wrong thing to do"},
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	254	},
				255	}
				256	userFollowUpResponse := &ant.MessageResponse{
				257	StopReason: ant.StopReasonEndTurn,
				258	Content: []ant.Content{
				259	{
				260	Type: ant.ContentTypeText,
				261	Text: "sorry about that",
				262	},
				263	},
				264	Usage: ant.Usage{
				265	InputTokens: 100,
				266	OutputTokens: 200,
				267	},
				268	}
				269	// Set up the mock response for tool results
				270
				271	userCancelError := fmt.Errorf("user canceled")
				272	// This allows the test to block the InnerLoop goroutine that invokes ToolResultsContents so
				273	// we can force its context to cancel while it's blocked.
				274	waitForToolResultContents := make(chan any, 1)
				275
				276	mockConvo.ExpectCall("SendMessage", userMsg).Return(userMsgResponse, nil)
				277	mockConvo.ExpectCall("ToolResultContents",
				278	userMsgResponse).BlockAndReturn(waitForToolResultContents, []ant.Content{}, userCancelError)
				279	mockConvo.ExpectCall("SendMessage", toolUseResultsMsg).Return(toolUseResultResponse, nil)
				280
				281	mockConvo.ExpectCall("SendMessage", userFollowUpMsg).Return(userFollowUpResponse, nil)
				282
				283	ctx, cancel := context.WithCancel(context.Background())
				284	defer cancel()
				285
				286	go agent.Loop(ctx)
				287
				288	// This puts one message into agent.inbox, which should un-block the GatherMessages call
				289	// at the top of agent.InnerLoop.
				290	agent.UserMessage(ctx, "hi")
				291
				292	// This makes sure the first SendMessage call happens before we proceed with the cancel.
				293	synctest.Wait()
				294
				295	// The goroutine executing ToolResultContents call should be blocked, simulating a long
				296	// running operation that the user wishes to cancel while it's still in progress.
				297	// This call invokes that InnerLoop context's cancel() func.
				298	agent.CancelInnerLoop(userCancelError)
				299
				300	// This tells the goroutine that's in mockConvo.ToolResultContents to proceed.
				301	waitForToolResultContents <- nil
				302
				303	// Allow InnerLoop to handle the cancellation logic before continuing the conversation.
				304	synctest.Wait()
				305
				306	agent.UserMessage(ctx, "that was the wrong thing to do")
				307
				308	synctest.Wait()
				309
				310	// Verify results
				311	mockConvo.AssertExpectations(t)
				312	})
				313	}
				314
				315	func TestInnerLoop_UserCancels(t *testing.T) {
				316	synctest.Run(func() {
				317	mockConvo := NewMockConvo(t)
				318
				319	agent := &Agent{
				320	convo: mockConvo,
				321	inbox: make(chan string, 1),
				322	outbox: make(chan AgentMessage, 10), // don't block on outbox
				323	}
				324
				325	// Define test message
				326	// This simulates something that would result in claude responding with tool_use responses.
Josh Bleecher Snyder	a3dcd86	2025-04-30 19:47:16 +0000	[diff] [blame]	327	userMsg := ant.UserStringMessage("use test_tool for something")
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	328	// Mock initial response with tool use
				329	userMsgResponse := &ant.MessageResponse{
				330	StopReason: ant.StopReasonToolUse,
				331	Content: []ant.Content{
				332	{
				333	Type: ant.ContentTypeToolUse,
				334	ID: "tool1",
				335	ToolName: "test_tool",
				336	ToolInput: []byte(`{"param":"value"}`),
				337	},
				338	},
				339	Usage: ant.Usage{
				340	InputTokens: 100,
				341	OutputTokens: 200,
				342	},
				343	}
				344	canceledToolUseContents := []ant.Content{
				345	{
				346	Type: ant.ContentTypeToolResult,
				347	ToolUseID: "tool1",
				348	ToolError: true,
				349	ToolResult: "user canceled this tool_use",
				350	},
				351	}
				352	canceledToolUseMsg := ant.Message{
Josh Bleecher Snyder	a3dcd86	2025-04-30 19:47:16 +0000	[diff] [blame]	353	Role: ant.MessageRoleUser,
				354	Content: append(canceledToolUseContents, ant.StringContent(cancelToolUseMessage)),
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	355	}
				356	// Set up expected behaviors
				357	waitForSendMessage := make(chan any)
				358	mockConvo.ExpectCall("SendMessage", userMsg).BlockAndReturn(waitForSendMessage, userMsgResponse, nil)
				359
				360	mockConvo.ExpectCall("ToolResultCancelContents", userMsgResponse).Return(canceledToolUseContents, nil)
				361	mockConvo.ExpectCall("SendMessage", canceledToolUseMsg).Return(
				362	&ant.MessageResponse{
				363	StopReason: ant.StopReasonToolUse,
				364	}, nil)
				365
				366	ctx, cancel := context.WithCancelCause(context.Background())
				367
				368	// Run one iteration of InnerLoop
				369	go agent.InnerLoop(ctx)
				370
				371	// Send a message to the agent's inbox
				372	agent.UserMessage(ctx, "use test_tool for something")
				373
				374	synctest.Wait()
				375
				376	// cancel the context before we even call InnerLoop with it, so it will
				377	// be .Done() the first time it checks.
				378	cancel(fmt.Errorf("user canceled"))
				379
				380	// unblock the InnerLoop goroutine's SendMessage call
				381	waitForSendMessage <- nil
				382
				383	synctest.Wait()
				384
				385	// Verify results
				386	mockConvo.AssertExpectations(t)
				387
				388	// Get all messages from outbox and verify their types/content
				389	var messages []AgentMessage
				390
				391	// Collect messages until outbox is empty or we have 10 messages
				392	for i := 0; i < 10; i++ {
				393	select {
				394	case msg := <-agent.outbox:
				395	messages = append(messages, msg)
				396	default:
				397	// No more messages
				398	i = 10 // Exit the loop
				399	}
				400	}
				401
				402	// Print out the messages we got for debugging
				403	t.Logf("Received %d messages from outbox", len(messages))
				404	for i, msg := range messages {
				405	t.Logf("Message %d: Type=%s, Content=%s, EndOfTurn=%t", i, msg.Type, msg.Content, msg.EndOfTurn)
				406	if msg.ToolName != "" {
				407	t.Logf(" Tool: Name=%s, Input=%s, Result=%s, Error=%v",
				408	msg.ToolName, msg.ToolInput, msg.ToolResult, msg.ToolError)
				409	}
				410	}
				411
				412	// Basic checks
				413	if len(messages) < 1 {
				414	t.Errorf("Should have at least one message, got %d", len(messages))
				415	}
				416
				417	// The main thing we want to verify: when user cancels, the response processing stops
				418	// and appropriate messages are sent
				419
				420	// Check if we have an error message about cancellation
				421	hasCancelErrorMessage := false
				422	for _, msg := range messages {
				423	if msg.Type == ErrorMessageType && msg.Content == userCancelMessage {
				424	hasCancelErrorMessage = true
				425	break
				426	}
				427	}
				428
				429	// Check if we have a tool message with error
				430	hasToolError := false
				431	for _, msg := range messages {
				432	if msg.Type == ToolUseMessageType &&
				433	msg.ToolError && strings.Contains(msg.ToolResult, "user canceled") {
				434	hasToolError = true
				435	break
				436	}
				437	}
				438
				439	// We should have at least one of these messages
				440	if !(hasCancelErrorMessage \|\| hasToolError) {
				441	t.Errorf("Should have either an error message or a tool with error about cancellation")
				442	}
				443	})
				444	}
				445
				446	func TestInnerLoop_UserDoesNotCancel(t *testing.T) {
				447	mockConvo := NewMockConvo(t)
				448
				449	agent := &Agent{
				450	convo: mockConvo,
				451	inbox: make(chan string, 100),
				452	outbox: make(chan AgentMessage, 100),
				453	}
				454
				455	// Define test message
				456	// This simulates something that would result in claude
				457	// responding with tool_use responses.
				458	testMsg := "use test_tool for something"
				459
				460	// Mock initial response with tool use
				461	initialResponse := &ant.MessageResponse{
				462	StopReason: ant.StopReasonToolUse,
				463	Content: []ant.Content{
				464	{
				465	Type: ant.ContentTypeToolUse,
				466	ID: "tool1",
				467	ToolName: "test_tool",
				468	ToolInput: []byte(`{"param":"value"}`),
				469	},
				470	},
				471	Usage: ant.Usage{
				472	InputTokens: 100,
				473	OutputTokens: 200,
				474	},
				475	}
				476
				477	// Set up expected behaviors
				478	mockConvo.ExpectCall("SendMessage", nil).Return(initialResponse, nil)
				479
				480	toolUseContents := []ant.Content{
				481	{
				482	Type: ant.ContentTypeToolResult,
				483	ToolUseID: "tool1",
				484	Text: "",
				485	ToolResult: "This is a tool result",
				486	ToolError: false,
				487	},
				488	}
				489	toolUseResponse := &ant.MessageResponse{
				490	// StopReason: ant.StopReasonEndTurn,
				491	Content: []ant.Content{
				492	{
				493	Type: ant.ContentTypeText,
				494	Text: "tool_use contents accepted",
				495	},
				496	},
				497	Usage: ant.Usage{
				498	InputTokens: 50,
				499	OutputTokens: 75,
				500	},
				501	}
				502
				503	ctx, cancel := context.WithCancel(context.Background())
				504	defer cancel()
				505
				506	// Setting up the mock response for tool results
				507	mockConvo.ExpectCall("ToolResultContents", initialResponse).Return(toolUseContents, nil)
				508	mockConvo.ExpectCall("SendMessage", nil).Return(toolUseResponse, nil)
				509	// mockConvo, as a mock, isn't able to run the loop in ant.Convo that makes this agent.OnToolResult callback.
				510	// So we "mock" it out here by calling it explicitly, in order to make sure it calls .pushToOutbox with this message.
				511	// This is not a good situation.
				512	// ant.Convo and loop.Agent seem to be excessively coupled, and aware of each others' internal details.
				513	// TODO: refactor (or clarify in docs somewhere) the boundary between what ant.Convo is responsible
				514	// for vs what loop.Agent is responsible for.
				515	antConvo := &ant.Convo{}
				516	res := ""
				517	agent.OnToolResult(ctx, antConvo, "tool1", nil, toolUseContents[0], &res, nil)
				518
				519	// Send a message to the agent's inbox
				520	agent.UserMessage(ctx, testMsg)
				521
				522	// Run one iteration of InnerLoop
				523	agent.InnerLoop(ctx)
				524
				525	// Verify results
				526	mockConvo.AssertExpectations(t)
				527
				528	// Get all messages from outbox and verify their types/content
				529	var messages []AgentMessage
				530
				531	// Collect messages until outbox is empty or we have 10 messages
				532	for i := 0; i < 10; i++ {
				533	select {
				534	case msg := <-agent.outbox:
				535	messages = append(messages, msg)
				536	default:
				537	// No more messages
				538	i = 10 // Exit the loop
				539	}
				540	}
				541
				542	// Print out the messages we got for debugging
				543	t.Logf("Received %d messages from outbox", len(messages))
				544	for i, msg := range messages {
				545	t.Logf("Message %d: Type=%s, Content=%s, EndOfTurn=%t", i, msg.Type, msg.Content, msg.EndOfTurn)
				546	if msg.ToolName != "" {
				547	t.Logf(" Tool: Name=%s, Input=%s, Result=%s, Error=%v",
				548	msg.ToolName, msg.ToolInput, msg.ToolResult, msg.ToolError)
				549	}
				550	}
				551
				552	// Basic checks
				553	if len(messages) < 1 {
				554	t.Errorf("Should have at least one message, got %d", len(messages))
				555	}
				556
				557	// The main thing we want to verify: when user cancels, the response processing stops
				558	// and appropriate messages are sent
				559
				560	// Check if we have an error message about cancellation
				561	hasCancelErrorMessage := false
				562	for _, msg := range messages {
				563	if msg.Type == ErrorMessageType && msg.Content == userCancelMessage {
				564	hasCancelErrorMessage = true
				565	break
				566	}
				567	}
				568
				569	// Check if we have a tool message with error
				570	hasToolError := false
				571	for _, msg := range messages {
				572	if msg.Type == ToolUseMessageType &&
				573	msg.ToolError && strings.Contains(msg.ToolResult, "user canceled") {
				574	hasToolError = true
				575	break
				576	}
				577	}
				578
				579	if hasCancelErrorMessage \|\| hasToolError {
				580	t.Errorf("Should not have either an error message nor a tool with error about cancellation")
				581	}
				582	}