Blame - loop/agent_test.go - sketch

blob: 5bde1b177639f145ad411f358c87c57a0e6782a0 [file] [log] [blame]

Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	1	package loop
				2
				3	import (
				4	"context"
				5	"net/http"
				6	"os"
				7	"strings"
				8	"testing"
				9	"time"
				10
				11	"sketch.dev/ant"
				12	"sketch.dev/httprr"
				13	)
				14
				15	// TestAgentLoop tests that the Agent loop functionality works correctly.
				16	// It uses the httprr package to record HTTP interactions for replay in tests.
				17	// When failing, rebuild with "go test ./sketch/loop -run TestAgentLoop -httprecord .agent_loop."
				18	// as necessary.
				19	func TestAgentLoop(t *testing.T) {
				20	ctx := context.Background()
				21
				22	// Setup httprr recorder
				23	rrPath := "testdata/agent_loop.httprr"
				24	rr, err := httprr.Open(rrPath, http.DefaultTransport)
				25	if err != nil && !os.IsNotExist(err) {
				26	t.Fatal(err)
				27	}
				28
				29	if rr.Recording() {
				30	// Skip the test if API key is not available
				31	if os.Getenv("ANTHROPIC_API_KEY") == "" {
				32	t.Fatal("ANTHROPIC_API_KEY not set, required for HTTP recording")
				33	}
				34	}
				35
				36	// Create HTTP client
				37	var client *http.Client
				38	if rr != nil {
				39	// Scrub API keys from requests for security
				40	rr.ScrubReq(func(req *http.Request) error {
				41	req.Header.Del("x-api-key")
				42	req.Header.Del("anthropic-api-key")
				43	return nil
				44	})
				45	client = rr.Client()
				46	} else {
				47	client = &http.Client{Transport: http.DefaultTransport}
				48	}
				49
				50	// Create a new agent with the httprr client
				51	origWD, err := os.Getwd()
				52	if err != nil {
				53	t.Fatal(err)
				54	}
				55	if err := os.Chdir("/"); err != nil {
				56	t.Fatal(err)
				57	}
				58	budget := ant.Budget{MaxResponses: 100}
				59	wd, err := os.Getwd()
				60	if err != nil {
				61	t.Fatal(err)
				62	}
				63
				64	cfg := AgentConfig{
				65	Context: ctx,
				66	APIKey: os.Getenv("ANTHROPIC_API_KEY"),
				67	HTTPC: client,
				68	Budget: budget,
				69	GitUsername: "Test Agent",
				70	GitEmail: "totallyhuman@sketch.dev",
				71	SessionID: "test-session-id",
				72	ClientGOOS: "linux",
				73	ClientGOARCH: "amd64",
				74	}
				75	agent := NewAgent(cfg)
				76	if err := os.Chdir(origWD); err != nil {
				77	t.Fatal(err)
				78	}
				79	err = agent.Init(AgentInit{WorkingDir: wd, NoGit: true})
				80	if err != nil {
				81	t.Fatal(err)
				82	}
				83
				84	// Setup a test message that will trigger a simple, predictable response
				85	userMessage := "What tools are available to you? Please just list them briefly."
				86
				87	// Send the message to the agent
				88	agent.UserMessage(ctx, userMessage)
				89
				90	// Process a single loop iteration to avoid long-running tests
				91	agent.InnerLoop(ctx)
				92
				93	// Collect responses with a timeout
				94	var responses []AgentMessage
				95	timeout := time.After(10 * time.Second)
				96	done := false
				97
				98	for !done {
				99	select {
				100	case <-timeout:
				101	t.Log("Timeout reached while waiting for agent responses")
				102	done = true
				103	default:
				104	select {
				105	case msg := <-agent.outbox:
				106	t.Logf("Received message: Type=%s, EndOfTurn=%v, Content=%q", msg.Type, msg.EndOfTurn, msg.Content)
				107	responses = append(responses, msg)
				108	if msg.EndOfTurn {
				109	done = true
				110	}
				111	default:
				112	// No more messages available right now
				113	time.Sleep(100 * time.Millisecond)
				114	}
				115	}
				116	}
				117
				118	// Verify we got at least one response
				119	if len(responses) == 0 {
				120	t.Fatal("No responses received from agent")
				121	}
				122
				123	// Log the received responses for debugging
				124	t.Logf("Received %d responses", len(responses))
				125
				126	// Find the final agent response (with EndOfTurn=true)
				127	var finalResponse *AgentMessage
				128	for i := range responses {
				129	if responses[i].Type == AgentMessageType && responses[i].EndOfTurn {
				130	finalResponse = &responses[i]
				131	break
				132	}
				133	}
				134
				135	// Verify we got a final agent response
				136	if finalResponse == nil {
				137	t.Fatal("No final agent response received")
				138	}
				139
				140	// Check that the response contains tools information
				141	if !strings.Contains(strings.ToLower(finalResponse.Content), "tool") {
				142	t.Error("Expected response to mention tools")
				143	}
				144
				145	// Count how many tool use messages we received
				146	toolUseCount := 0
				147	for _, msg := range responses {
				148	if msg.Type == ToolUseMessageType {
				149	toolUseCount++
				150	}
				151	}
				152
				153	t.Logf("Agent used %d tools in its response", toolUseCount)
				154	}
Philip Zeyliger	99a9a02	2025-04-27 15:15:25 +0000	[diff] [blame^]	155
				156	func TestAgentTracksOutstandingCalls(t *testing.T) {
				157	agent := &Agent{
				158	outstandingLLMCalls: make(map[string]struct{}),
				159	outstandingToolCalls: make(map[string]string),
				160	}
				161
				162	// Check initial state
				163	if count := agent.OutstandingLLMCallCount(); count != 0 {
				164	t.Errorf("Expected 0 outstanding LLM calls, got %d", count)
				165	}
				166
				167	if tools := agent.OutstandingToolCalls(); len(tools) != 0 {
				168	t.Errorf("Expected 0 outstanding tool calls, got %d", len(tools))
				169	}
				170
				171	// Add some calls
				172	agent.mu.Lock()
				173	agent.outstandingLLMCalls["llm1"] = struct{}{}
				174	agent.outstandingToolCalls["tool1"] = "bash"
				175	agent.outstandingToolCalls["tool2"] = "think"
				176	agent.mu.Unlock()
				177
				178	// Check tracking works
				179	if count := agent.OutstandingLLMCallCount(); count != 1 {
				180	t.Errorf("Expected 1 outstanding LLM call, got %d", count)
				181	}
				182
				183	tools := agent.OutstandingToolCalls()
				184	if len(tools) != 2 {
				185	t.Errorf("Expected 2 outstanding tool calls, got %d", len(tools))
				186	}
				187
				188	// Check removal
				189	agent.mu.Lock()
				190	delete(agent.outstandingLLMCalls, "llm1")
				191	delete(agent.outstandingToolCalls, "tool1")
				192	agent.mu.Unlock()
				193
				194	if count := agent.OutstandingLLMCallCount(); count != 0 {
				195	t.Errorf("Expected 0 outstanding LLM calls after removal, got %d", count)
				196	}
				197
				198	tools = agent.OutstandingToolCalls()
				199	if len(tools) != 1 {
				200	t.Errorf("Expected 1 outstanding tool call after removal, got %d", len(tools))
				201	}
				202
				203	if tools[0] != "think" {
				204	t.Errorf("Expected 'think' tool remaining, got %s", tools[0])
				205	}
				206	}