Blame - loop/agent_test.go - sketch

blob: 124f2fcc7cd775ac0bae23c9a13ae5f2317b6a3f [file] [log] [blame]

Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	1	package loop
				2
				3	import (
Josh Bleecher Snyder	4d5e997	2025-05-01 15:56:37 -0700	[diff] [blame]	4	"cmp"
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	5	"context"
Sean McCullough	9f4b808	2025-04-30 17:34:07 +0000	[diff] [blame]	6	"fmt"
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	7	"net/http"
				8	"os"
Sean McCullough	96b60dd	2025-04-30 09:49:10 -0700	[diff] [blame]	9	"slices"
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	10	"strings"
				11	"testing"
				12	"time"
				13
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	14	"sketch.dev/httprr"
Josh Bleecher Snyder	4f84ab7	2025-04-22 16:40:54 -0700	[diff] [blame]	15	"sketch.dev/llm"
				16	"sketch.dev/llm/ant"
				17	"sketch.dev/llm/conversation"
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	18	)
				19
				20	// TestAgentLoop tests that the Agent loop functionality works correctly.
				21	// It uses the httprr package to record HTTP interactions for replay in tests.
				22	// When failing, rebuild with "go test ./sketch/loop -run TestAgentLoop -httprecord .agent_loop."
				23	// as necessary.
				24	func TestAgentLoop(t *testing.T) {
				25	ctx := context.Background()
				26
				27	// Setup httprr recorder
				28	rrPath := "testdata/agent_loop.httprr"
				29	rr, err := httprr.Open(rrPath, http.DefaultTransport)
				30	if err != nil && !os.IsNotExist(err) {
				31	t.Fatal(err)
				32	}
				33
				34	if rr.Recording() {
				35	// Skip the test if API key is not available
				36	if os.Getenv("ANTHROPIC_API_KEY") == "" {
				37	t.Fatal("ANTHROPIC_API_KEY not set, required for HTTP recording")
				38	}
				39	}
				40
				41	// Create HTTP client
				42	var client *http.Client
				43	if rr != nil {
				44	// Scrub API keys from requests for security
				45	rr.ScrubReq(func(req *http.Request) error {
				46	req.Header.Del("x-api-key")
				47	req.Header.Del("anthropic-api-key")
				48	return nil
				49	})
				50	client = rr.Client()
				51	} else {
				52	client = &http.Client{Transport: http.DefaultTransport}
				53	}
				54
				55	// Create a new agent with the httprr client
				56	origWD, err := os.Getwd()
				57	if err != nil {
				58	t.Fatal(err)
				59	}
				60	if err := os.Chdir("/"); err != nil {
				61	t.Fatal(err)
				62	}
Philip Zeyliger	e6c294d	2025-06-04 16:55:21 +0000	[diff] [blame]	63	budget := conversation.Budget{MaxDollars: 10.0}
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	64	wd, err := os.Getwd()
				65	if err != nil {
				66	t.Fatal(err)
				67	}
				68
David Crawshaw	3659d87	2025-05-05 17:52:23 -0700	[diff] [blame]	69	apiKey := cmp.Or(os.Getenv("OUTER_SKETCH_MODEL_API_KEY"), os.Getenv("ANTHROPIC_API_KEY"))
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	70	cfg := AgentConfig{
Philip Zeyliger	bc8c8dc	2025-05-21 13:19:13 -0700	[diff] [blame]	71	Context: ctx,
				72	WorkingDir: wd,
Josh Bleecher Snyder	4f84ab7	2025-04-22 16:40:54 -0700	[diff] [blame]	73	Service: &ant.Service{
				74	APIKey: apiKey,
				75	HTTPC: client,
				76	},
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	77	Budget: budget,
				78	GitUsername: "Test Agent",
				79	GitEmail: "totallyhuman@sketch.dev",
				80	SessionID: "test-session-id",
				81	ClientGOOS: "linux",
				82	ClientGOARCH: "amd64",
				83	}
				84	agent := NewAgent(cfg)
Josh Bleecher Snyder	8a0de52	2025-07-24 19:29:07 +0000	[diff] [blame]	85
				86	// Use fixed time for deterministic tests
				87	fixedTime := time.Date(2025, 7, 25, 19, 37, 57, 0, time.UTC)
				88	agent.now = func() time.Time { return fixedTime }
				89
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	90	if err := os.Chdir(origWD); err != nil {
				91	t.Fatal(err)
				92	}
Philip Zeyliger	bc8c8dc	2025-05-21 13:19:13 -0700	[diff] [blame]	93	err = agent.Init(AgentInit{NoGit: true})
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	94	if err != nil {
				95	t.Fatal(err)
				96	}
				97
				98	// Setup a test message that will trigger a simple, predictable response
Josh Bleecher Snyder	3b44cc3	2025-07-22 02:28:14 +0000	[diff] [blame]	99	userMessage := "What tools are available to you? Please just list them briefly."
				100
				101	// Set a slug so that the agent doesn't have to.
				102	agent.SetSlug("list-available-tools")
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	103
				104	// Send the message to the agent
				105	agent.UserMessage(ctx, userMessage)
				106
				107	// Process a single loop iteration to avoid long-running tests
Sean McCullough	885a16a	2025-04-30 02:49:25 +0000	[diff] [blame]	108	agent.processTurn(ctx)
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	109
				110	// Collect responses with a timeout
				111	var responses []AgentMessage
Philip Zeyliger	9373c07	2025-05-01 10:27:01 -0700	[diff] [blame]	112	ctx2, cancel := context.WithDeadline(ctx, time.Now().Add(10*time.Second))
				113	defer cancel()
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	114	done := false
Philip Zeyliger	b7c5875	2025-05-01 10:10:17 -0700	[diff] [blame]	115	it := agent.NewIterator(ctx2, 0)
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	116
				117	for !done {
Philip Zeyliger	b7c5875	2025-05-01 10:10:17 -0700	[diff] [blame]	118	msg := it.Next()
				119	t.Logf("Received message: Type=%s, EndOfTurn=%v, Content=%q", msg.Type, msg.EndOfTurn, msg.Content)
				120	responses = append(responses, *msg)
				121	if msg.EndOfTurn {
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	122	done = true
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	123	}
				124	}
				125
				126	// Verify we got at least one response
				127	if len(responses) == 0 {
				128	t.Fatal("No responses received from agent")
				129	}
				130
				131	// Log the received responses for debugging
				132	t.Logf("Received %d responses", len(responses))
				133
				134	// Find the final agent response (with EndOfTurn=true)
				135	var finalResponse *AgentMessage
				136	for i := range responses {
				137	if responses[i].Type == AgentMessageType && responses[i].EndOfTurn {
				138	finalResponse = &responses[i]
				139	break
				140	}
				141	}
				142
				143	// Verify we got a final agent response
				144	if finalResponse == nil {
				145	t.Fatal("No final agent response received")
				146	}
				147
				148	// Check that the response contains tools information
				149	if !strings.Contains(strings.ToLower(finalResponse.Content), "tool") {
				150	t.Error("Expected response to mention tools")
				151	}
				152
				153	// Count how many tool use messages we received
				154	toolUseCount := 0
				155	for _, msg := range responses {
				156	if msg.Type == ToolUseMessageType {
				157	toolUseCount++
				158	}
				159	}
				160
				161	t.Logf("Agent used %d tools in its response", toolUseCount)
				162	}
Philip Zeyliger	99a9a02	2025-04-27 15:15:25 +0000	[diff] [blame]	163
				164	func TestAgentTracksOutstandingCalls(t *testing.T) {
				165	agent := &Agent{
				166	outstandingLLMCalls: make(map[string]struct{}),
				167	outstandingToolCalls: make(map[string]string),
Sean McCullough	96b60dd	2025-04-30 09:49:10 -0700	[diff] [blame]	168	stateMachine: NewStateMachine(),
Philip Zeyliger	99a9a02	2025-04-27 15:15:25 +0000	[diff] [blame]	169	}
				170
				171	// Check initial state
				172	if count := agent.OutstandingLLMCallCount(); count != 0 {
				173	t.Errorf("Expected 0 outstanding LLM calls, got %d", count)
				174	}
				175
				176	if tools := agent.OutstandingToolCalls(); len(tools) != 0 {
				177	t.Errorf("Expected 0 outstanding tool calls, got %d", len(tools))
				178	}
				179
				180	// Add some calls
				181	agent.mu.Lock()
				182	agent.outstandingLLMCalls["llm1"] = struct{}{}
				183	agent.outstandingToolCalls["tool1"] = "bash"
				184	agent.outstandingToolCalls["tool2"] = "think"
				185	agent.mu.Unlock()
				186
				187	// Check tracking works
				188	if count := agent.OutstandingLLMCallCount(); count != 1 {
				189	t.Errorf("Expected 1 outstanding LLM call, got %d", count)
				190	}
				191
				192	tools := agent.OutstandingToolCalls()
				193	if len(tools) != 2 {
				194	t.Errorf("Expected 2 outstanding tool calls, got %d", len(tools))
				195	}
				196
				197	// Check removal
				198	agent.mu.Lock()
				199	delete(agent.outstandingLLMCalls, "llm1")
				200	delete(agent.outstandingToolCalls, "tool1")
				201	agent.mu.Unlock()
				202
				203	if count := agent.OutstandingLLMCallCount(); count != 0 {
				204	t.Errorf("Expected 0 outstanding LLM calls after removal, got %d", count)
				205	}
				206
				207	tools = agent.OutstandingToolCalls()
				208	if len(tools) != 1 {
				209	t.Errorf("Expected 1 outstanding tool call after removal, got %d", len(tools))
				210	}
				211
				212	if tools[0] != "think" {
				213	t.Errorf("Expected 'think' tool remaining, got %s", tools[0])
				214	}
				215	}
Sean McCullough	9f4b808	2025-04-30 17:34:07 +0000	[diff] [blame]	216
				217	// TestAgentProcessTurnWithNilResponse tests the scenario where Agent.processTurn receives
				218	// a nil value for initialResp from processUserMessage.
				219	func TestAgentProcessTurnWithNilResponse(t *testing.T) {
				220	// Create a mock conversation that will return nil and error
				221	mockConvo := &MockConvoInterface{
Josh Bleecher Snyder	4f84ab7	2025-04-22 16:40:54 -0700	[diff] [blame]	222	sendMessageFunc: func(message llm.Message) (*llm.Response, error) {
Sean McCullough	9f4b808	2025-04-30 17:34:07 +0000	[diff] [blame]	223	return nil, fmt.Errorf("test error: simulating nil response")
				224	},
				225	}
				226
				227	// Create a minimal Agent instance for testing
				228	agent := &Agent{
				229	convo: mockConvo,
				230	inbox: make(chan string, 10),
Philip Zeyliger	9373c07	2025-05-01 10:27:01 -0700	[diff] [blame]	231	subscribers: []chan *AgentMessage{},
Sean McCullough	9f4b808	2025-04-30 17:34:07 +0000	[diff] [blame]	232	outstandingLLMCalls: make(map[string]struct{}),
				233	outstandingToolCalls: make(map[string]string),
				234	}
				235
				236	// Create a test context
				237	ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second)
				238	defer cancel()
				239
				240	// Push a test message to the inbox so that processUserMessage will try to process it
				241	agent.inbox <- "Test message"
				242
				243	// Call processTurn - it should exit early without panic when initialResp is nil
				244	agent.processTurn(ctx)
				245
Philip Zeyliger	9373c07	2025-05-01 10:27:01 -0700	[diff] [blame]	246	// Verify error message was added to history
				247	agent.mu.Lock()
				248	defer agent.mu.Unlock()
				249
Josh Bleecher Snyder	3b44cc3	2025-07-22 02:28:14 +0000	[diff] [blame]	250	// There should be exactly two messages: slug + error
				251	if len(agent.history) != 2 {
				252	t.Errorf("Expected exactly two messages (slug + error), got %d", len(agent.history))
Philip Zeyliger	9373c07	2025-05-01 10:27:01 -0700	[diff] [blame]	253	} else {
Josh Bleecher Snyder	3b44cc3	2025-07-22 02:28:14 +0000	[diff] [blame]	254	slugMsg := agent.history[0]
				255	if slugMsg.Type != SlugMessageType {
				256	t.Errorf("Expected first message to be slug, got message type: %s", slugMsg.Type)
Sean McCullough	9f4b808	2025-04-30 17:34:07 +0000	[diff] [blame]	257	}
Josh Bleecher Snyder	3b44cc3	2025-07-22 02:28:14 +0000	[diff] [blame]	258	errorMsg := agent.history[1]
				259	if errorMsg.Type != ErrorMessageType {
				260	t.Errorf("Expected second message to be error, got message type: %s", errorMsg.Type)
				261	}
				262	if !strings.Contains(errorMsg.Content, "simulating nil response") {
				263	t.Errorf("Expected error message to contain 'simulating nil response', got: %s", errorMsg.Content)
Sean McCullough	9f4b808	2025-04-30 17:34:07 +0000	[diff] [blame]	264	}
Sean McCullough	9f4b808	2025-04-30 17:34:07 +0000	[diff] [blame]	265	}
				266	}
				267
				268	// MockConvoInterface implements the ConvoInterface for testing
				269	type MockConvoInterface struct {
Josh Bleecher Snyder	4f84ab7	2025-04-22 16:40:54 -0700	[diff] [blame]	270	sendMessageFunc func(message llm.Message) (*llm.Response, error)
				271	sendUserTextMessageFunc func(s string, otherContents ...llm.Content) (*llm.Response, error)
Josh Bleecher Snyder	64f2aa8	2025-05-14 18:31:05 +0000	[diff] [blame]	272	toolResultContentsFunc func(ctx context.Context, resp *llm.Response) ([]llm.Content, bool, error)
Josh Bleecher Snyder	4f84ab7	2025-04-22 16:40:54 -0700	[diff] [blame]	273	toolResultCancelContentsFunc func(resp *llm.Response) ([]llm.Content, error)
Sean McCullough	9f4b808	2025-04-30 17:34:07 +0000	[diff] [blame]	274	cancelToolUseFunc func(toolUseID string, cause error) error
Josh Bleecher Snyder	4f84ab7	2025-04-22 16:40:54 -0700	[diff] [blame]	275	cumulativeUsageFunc func() conversation.CumulativeUsage
Philip Zeyliger	b8a8f35	2025-06-02 07:39:37 -0700	[diff] [blame]	276	lastUsageFunc func() llm.Usage
Josh Bleecher Snyder	4f84ab7	2025-04-22 16:40:54 -0700	[diff] [blame]	277	resetBudgetFunc func(conversation.Budget)
Sean McCullough	9f4b808	2025-04-30 17:34:07 +0000	[diff] [blame]	278	overBudgetFunc func() error
Philip Zeyliger	2c4db09	2025-04-28 16:57:50 -0700	[diff] [blame]	279	getIDFunc func() string
Josh Bleecher Snyder	4f84ab7	2025-04-22 16:40:54 -0700	[diff] [blame]	280	subConvoWithHistoryFunc func() *conversation.Convo
Philip Zeyliger	43a0bfc	2025-07-14 14:54:27 -0700	[diff] [blame]	281	debugJSONFunc func() ([]byte, error)
Sean McCullough	9f4b808	2025-04-30 17:34:07 +0000	[diff] [blame]	282	}
				283
Josh Bleecher Snyder	4f84ab7	2025-04-22 16:40:54 -0700	[diff] [blame]	284	func (m MockConvoInterface) SendMessage(message llm.Message) (llm.Response, error) {
Sean McCullough	9f4b808	2025-04-30 17:34:07 +0000	[diff] [blame]	285	if m.sendMessageFunc != nil {
				286	return m.sendMessageFunc(message)
				287	}
				288	return nil, nil
				289	}
				290
Josh Bleecher Snyder	4f84ab7	2025-04-22 16:40:54 -0700	[diff] [blame]	291	func (m MockConvoInterface) SendUserTextMessage(s string, otherContents ...llm.Content) (llm.Response, error) {
Sean McCullough	9f4b808	2025-04-30 17:34:07 +0000	[diff] [blame]	292	if m.sendUserTextMessageFunc != nil {
				293	return m.sendUserTextMessageFunc(s, otherContents...)
				294	}
				295	return nil, nil
				296	}
				297
Josh Bleecher Snyder	64f2aa8	2025-05-14 18:31:05 +0000	[diff] [blame]	298	func (m MockConvoInterface) ToolResultContents(ctx context.Context, resp llm.Response) ([]llm.Content, bool, error) {
Sean McCullough	9f4b808	2025-04-30 17:34:07 +0000	[diff] [blame]	299	if m.toolResultContentsFunc != nil {
				300	return m.toolResultContentsFunc(ctx, resp)
				301	}
Josh Bleecher Snyder	64f2aa8	2025-05-14 18:31:05 +0000	[diff] [blame]	302	return nil, false, nil
Sean McCullough	9f4b808	2025-04-30 17:34:07 +0000	[diff] [blame]	303	}
				304
Josh Bleecher Snyder	4f84ab7	2025-04-22 16:40:54 -0700	[diff] [blame]	305	func (m MockConvoInterface) ToolResultCancelContents(resp llm.Response) ([]llm.Content, error) {
Sean McCullough	9f4b808	2025-04-30 17:34:07 +0000	[diff] [blame]	306	if m.toolResultCancelContentsFunc != nil {
				307	return m.toolResultCancelContentsFunc(resp)
				308	}
				309	return nil, nil
				310	}
				311
				312	func (m *MockConvoInterface) CancelToolUse(toolUseID string, cause error) error {
				313	if m.cancelToolUseFunc != nil {
				314	return m.cancelToolUseFunc(toolUseID, cause)
				315	}
				316	return nil
				317	}
				318
Josh Bleecher Snyder	4f84ab7	2025-04-22 16:40:54 -0700	[diff] [blame]	319	func (m *MockConvoInterface) CumulativeUsage() conversation.CumulativeUsage {
Sean McCullough	9f4b808	2025-04-30 17:34:07 +0000	[diff] [blame]	320	if m.cumulativeUsageFunc != nil {
				321	return m.cumulativeUsageFunc()
				322	}
Josh Bleecher Snyder	4f84ab7	2025-04-22 16:40:54 -0700	[diff] [blame]	323	return conversation.CumulativeUsage{}
Sean McCullough	9f4b808	2025-04-30 17:34:07 +0000	[diff] [blame]	324	}
				325
Philip Zeyliger	b8a8f35	2025-06-02 07:39:37 -0700	[diff] [blame]	326	func (m *MockConvoInterface) LastUsage() llm.Usage {
				327	if m.lastUsageFunc != nil {
				328	return m.lastUsageFunc()
				329	}
				330	return llm.Usage{}
				331	}
				332
Josh Bleecher Snyder	4f84ab7	2025-04-22 16:40:54 -0700	[diff] [blame]	333	func (m *MockConvoInterface) ResetBudget(budget conversation.Budget) {
Sean McCullough	9f4b808	2025-04-30 17:34:07 +0000	[diff] [blame]	334	if m.resetBudgetFunc != nil {
				335	m.resetBudgetFunc(budget)
				336	}
				337	}
				338
				339	func (m *MockConvoInterface) OverBudget() error {
				340	if m.overBudgetFunc != nil {
				341	return m.overBudgetFunc()
				342	}
				343	return nil
				344	}
				345
Philip Zeyliger	2c4db09	2025-04-28 16:57:50 -0700	[diff] [blame]	346	func (m *MockConvoInterface) GetID() string {
				347	if m.getIDFunc != nil {
				348	return m.getIDFunc()
				349	}
				350	return "mock-convo-id"
				351	}
				352
Josh Bleecher Snyder	4f84ab7	2025-04-22 16:40:54 -0700	[diff] [blame]	353	func (m MockConvoInterface) SubConvoWithHistory() conversation.Convo {
Philip Zeyliger	2c4db09	2025-04-28 16:57:50 -0700	[diff] [blame]	354	if m.subConvoWithHistoryFunc != nil {
				355	return m.subConvoWithHistoryFunc()
				356	}
				357	return nil
				358	}
				359
Philip Zeyliger	43a0bfc	2025-07-14 14:54:27 -0700	[diff] [blame]	360	func (m *MockConvoInterface) DebugJSON() ([]byte, error) {
				361	if m.debugJSONFunc != nil {
				362	return m.debugJSONFunc()
				363	}
				364	return []byte(`[{"role": "user", "content": [{"type": "text", "text": "mock conversation"}]}]`), nil
				365	}
				366
Sean McCullough	9f4b808	2025-04-30 17:34:07 +0000	[diff] [blame]	367	// TestAgentProcessTurnWithNilResponseNilError tests the scenario where Agent.processTurn receives
				368	// a nil value for initialResp and nil error from processUserMessage.
				369	// This test verifies that the implementation properly handles this edge case.
				370	func TestAgentProcessTurnWithNilResponseNilError(t *testing.T) {
				371	// Create a mock conversation that will return nil response and nil error
				372	mockConvo := &MockConvoInterface{
Josh Bleecher Snyder	4f84ab7	2025-04-22 16:40:54 -0700	[diff] [blame]	373	sendMessageFunc: func(message llm.Message) (*llm.Response, error) {
Sean McCullough	9f4b808	2025-04-30 17:34:07 +0000	[diff] [blame]	374	return nil, nil // This is unusual but now handled gracefully
				375	},
				376	}
				377
				378	// Create a minimal Agent instance for testing
				379	agent := &Agent{
				380	convo: mockConvo,
				381	inbox: make(chan string, 10),
Philip Zeyliger	9373c07	2025-05-01 10:27:01 -0700	[diff] [blame]	382	subscribers: []chan *AgentMessage{},
Sean McCullough	9f4b808	2025-04-30 17:34:07 +0000	[diff] [blame]	383	outstandingLLMCalls: make(map[string]struct{}),
				384	outstandingToolCalls: make(map[string]string),
				385	}
				386
				387	// Create a test context
				388	ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second)
				389	defer cancel()
				390
				391	// Push a test message to the inbox so that processUserMessage will try to process it
				392	agent.inbox <- "Test message"
				393
				394	// Call processTurn - it should handle nil initialResp with a descriptive error
				395	err := agent.processTurn(ctx)
				396
				397	// Verify we get the expected error
				398	if err == nil {
				399	t.Error("Expected processTurn to return an error for nil initialResp, but got nil")
				400	} else if !strings.Contains(err.Error(), "unexpected nil response") {
				401	t.Errorf("Expected error about nil response, got: %v", err)
				402	} else {
				403	t.Logf("As expected, processTurn returned error: %v", err)
				404	}
				405
Philip Zeyliger	9373c07	2025-05-01 10:27:01 -0700	[diff] [blame]	406	// Verify error message was added to history
				407	agent.mu.Lock()
				408	defer agent.mu.Unlock()
				409
Josh Bleecher Snyder	3b44cc3	2025-07-22 02:28:14 +0000	[diff] [blame]	410	// There should be exactly two messages: slug + error
				411	if len(agent.history) != 2 {
				412	t.Errorf("Expected exactly two messages (slug + error), got %d", len(agent.history))
Philip Zeyliger	9373c07	2025-05-01 10:27:01 -0700	[diff] [blame]	413	} else {
Josh Bleecher Snyder	3b44cc3	2025-07-22 02:28:14 +0000	[diff] [blame]	414	slugMsg := agent.history[0]
				415	if slugMsg.Type != SlugMessageType {
				416	t.Errorf("Expected first message to be slug, got message type: %s", slugMsg.Type)
Sean McCullough	9f4b808	2025-04-30 17:34:07 +0000	[diff] [blame]	417	}
Josh Bleecher Snyder	3b44cc3	2025-07-22 02:28:14 +0000	[diff] [blame]	418	errorMsg := agent.history[1]
				419	if errorMsg.Type != ErrorMessageType {
				420	t.Errorf("Expected second message to be error, got message type: %s", errorMsg.Type)
				421	}
				422	if !strings.Contains(errorMsg.Content, "unexpected nil response") {
				423	t.Errorf("Expected error about nil response, got: %s", errorMsg.Content)
Sean McCullough	9f4b808	2025-04-30 17:34:07 +0000	[diff] [blame]	424	}
Sean McCullough	9f4b808	2025-04-30 17:34:07 +0000	[diff] [blame]	425	}
				426	}
Sean McCullough	96b60dd	2025-04-30 09:49:10 -0700	[diff] [blame]	427
				428	func TestAgentStateMachine(t *testing.T) {
				429	// Create a simplified test for the state machine functionality
				430	agent := &Agent{
				431	stateMachine: NewStateMachine(),
				432	}
				433
				434	// Initially the state should be Ready
				435	if state := agent.CurrentState(); state != StateReady {
				436	t.Errorf("Expected initial state to be StateReady, got %s", state)
				437	}
				438
				439	// Test manual transitions to verify state tracking
				440	ctx := context.Background()
				441
				442	// Track transitions
				443	var transitions []State
				444	agent.stateMachine.SetTransitionCallback(func(ctx context.Context, from, to State, event TransitionEvent) {
				445	transitions = append(transitions, to)
				446	t.Logf("State transition: %s -> %s (%s)", from, to, event.Description)
				447	})
				448
				449	// Perform a valid sequence of transitions (based on the state machine rules)
				450	expectedStates := []State{
				451	StateWaitingForUserInput,
				452	StateSendingToLLM,
				453	StateProcessingLLMResponse,
				454	StateToolUseRequested,
				455	StateCheckingForCancellation,
				456	StateRunningTool,
				457	StateCheckingGitCommits,
				458	StateRunningAutoformatters,
				459	StateCheckingBudget,
				460	StateGatheringAdditionalMessages,
				461	StateSendingToolResults,
				462	StateProcessingLLMResponse,
				463	StateEndOfTurn,
				464	}
				465
				466	// Manually perform each transition
				467	for _, state := range expectedStates {
				468	err := agent.stateMachine.Transition(ctx, state, "Test transition to "+state.String())
				469	if err != nil {
				470	t.Errorf("Failed to transition to %s: %v", state, err)
				471	}
				472	}
				473
				474	// Check if we recorded the right number of transitions
				475	if len(transitions) != len(expectedStates) {
				476	t.Errorf("Expected %d state transitions, got %d", len(expectedStates), len(transitions))
				477	}
				478
				479	// Check each transition matched what we expected
				480	for i, expected := range expectedStates {
				481	if i < len(transitions) {
				482	if transitions[i] != expected {
				483	t.Errorf("Transition %d: expected %s, got %s", i, expected, transitions[i])
				484	}
				485	}
				486	}
				487
				488	// Verify the current state is the last one we transitioned to
				489	if state := agent.CurrentState(); state != expectedStates[len(expectedStates)-1] {
				490	t.Errorf("Expected current state to be %s, got %s", expectedStates[len(expectedStates)-1], state)
				491	}
				492
				493	// Test force transition
				494	agent.stateMachine.ForceTransition(ctx, StateCancelled, "Testing force transition")
				495
				496	// Verify current state was updated
				497	if state := agent.CurrentState(); state != StateCancelled {
				498	t.Errorf("Expected forced state to be StateCancelled, got %s", state)
				499	}
				500	}
				501
				502	// mockConvoInterface is a mock implementation of ConvoInterface for testing
				503	type mockConvoInterface struct {
Josh Bleecher Snyder	4f84ab7	2025-04-22 16:40:54 -0700	[diff] [blame]	504	SendMessageFunc func(message llm.Message) (*llm.Response, error)
Josh Bleecher Snyder	64f2aa8	2025-05-14 18:31:05 +0000	[diff] [blame]	505	ToolResultContentsFunc func(ctx context.Context, resp *llm.Response) ([]llm.Content, bool, error)
Sean McCullough	96b60dd	2025-04-30 09:49:10 -0700	[diff] [blame]	506	}
				507
				508	func (c *mockConvoInterface) GetID() string {
				509	return "mockConvoInterface-id"
				510	}
				511
Josh Bleecher Snyder	4f84ab7	2025-04-22 16:40:54 -0700	[diff] [blame]	512	func (c mockConvoInterface) SubConvoWithHistory() conversation.Convo {
Sean McCullough	96b60dd	2025-04-30 09:49:10 -0700	[diff] [blame]	513	return nil
				514	}
				515
Josh Bleecher Snyder	4f84ab7	2025-04-22 16:40:54 -0700	[diff] [blame]	516	func (m *mockConvoInterface) CumulativeUsage() conversation.CumulativeUsage {
				517	return conversation.CumulativeUsage{}
Sean McCullough	96b60dd	2025-04-30 09:49:10 -0700	[diff] [blame]	518	}
				519
Philip Zeyliger	b8a8f35	2025-06-02 07:39:37 -0700	[diff] [blame]	520	func (m *mockConvoInterface) LastUsage() llm.Usage {
				521	return llm.Usage{}
				522	}
				523
Josh Bleecher Snyder	4f84ab7	2025-04-22 16:40:54 -0700	[diff] [blame]	524	func (m *mockConvoInterface) ResetBudget(conversation.Budget) {}
Sean McCullough	96b60dd	2025-04-30 09:49:10 -0700	[diff] [blame]	525
				526	func (m *mockConvoInterface) OverBudget() error {
				527	return nil
				528	}
				529
Josh Bleecher Snyder	4f84ab7	2025-04-22 16:40:54 -0700	[diff] [blame]	530	func (m mockConvoInterface) SendMessage(message llm.Message) (llm.Response, error) {
Sean McCullough	96b60dd	2025-04-30 09:49:10 -0700	[diff] [blame]	531	if m.SendMessageFunc != nil {
				532	return m.SendMessageFunc(message)
				533	}
Josh Bleecher Snyder	4f84ab7	2025-04-22 16:40:54 -0700	[diff] [blame]	534	return &llm.Response{StopReason: llm.StopReasonEndTurn}, nil
Sean McCullough	96b60dd	2025-04-30 09:49:10 -0700	[diff] [blame]	535	}
				536
Josh Bleecher Snyder	4f84ab7	2025-04-22 16:40:54 -0700	[diff] [blame]	537	func (m mockConvoInterface) SendUserTextMessage(s string, otherContents ...llm.Content) (llm.Response, error) {
				538	return m.SendMessage(llm.UserStringMessage(s))
Sean McCullough	96b60dd	2025-04-30 09:49:10 -0700	[diff] [blame]	539	}
				540
Josh Bleecher Snyder	64f2aa8	2025-05-14 18:31:05 +0000	[diff] [blame]	541	func (m mockConvoInterface) ToolResultContents(ctx context.Context, resp llm.Response) ([]llm.Content, bool, error) {
Sean McCullough	96b60dd	2025-04-30 09:49:10 -0700	[diff] [blame]	542	if m.ToolResultContentsFunc != nil {
				543	return m.ToolResultContentsFunc(ctx, resp)
				544	}
Josh Bleecher Snyder	64f2aa8	2025-05-14 18:31:05 +0000	[diff] [blame]	545	return []llm.Content{}, false, nil
Sean McCullough	96b60dd	2025-04-30 09:49:10 -0700	[diff] [blame]	546	}
				547
Josh Bleecher Snyder	4f84ab7	2025-04-22 16:40:54 -0700	[diff] [blame]	548	func (m mockConvoInterface) ToolResultCancelContents(resp llm.Response) ([]llm.Content, error) {
				549	return []llm.Content{llm.StringContent("Tool use cancelled")}, nil
Sean McCullough	96b60dd	2025-04-30 09:49:10 -0700	[diff] [blame]	550	}
				551
				552	func (m *mockConvoInterface) CancelToolUse(toolUseID string, cause error) error {
				553	return nil
				554	}
				555
Philip Zeyliger	43a0bfc	2025-07-14 14:54:27 -0700	[diff] [blame]	556	func (m *mockConvoInterface) DebugJSON() ([]byte, error) {
				557	return []byte(`[{"role": "user", "content": [{"type": "text", "text": "mock conversation"}]}]`), nil
				558	}
				559
Sean McCullough	96b60dd	2025-04-30 09:49:10 -0700	[diff] [blame]	560	func TestAgentProcessTurnStateTransitions(t *testing.T) {
				561	// Create a mock ConvoInterface for testing
				562	mockConvo := &mockConvoInterface{}
				563
				564	// Use the testing context
				565	ctx := t.Context()
				566
				567	// Create an agent with the state machine
				568	agent := &Agent{
Philip Zeyliger	f287299	2025-05-22 10:35:28 -0700	[diff] [blame]	569	convo: mockConvo,
				570	config: AgentConfig{Context: ctx},
				571	inbox: make(chan string, 10),
				572	ready: make(chan struct{}),
				573
Sean McCullough	96b60dd	2025-04-30 09:49:10 -0700	[diff] [blame]	574	outstandingLLMCalls: make(map[string]struct{}),
				575	outstandingToolCalls: make(map[string]string),
				576	stateMachine: NewStateMachine(),
				577	startOfTurn: time.Now(),
Philip Zeyliger	9373c07	2025-05-01 10:27:01 -0700	[diff] [blame]	578	subscribers: []chan *AgentMessage{},
Sean McCullough	96b60dd	2025-04-30 09:49:10 -0700	[diff] [blame]	579	}
				580
				581	// Verify initial state
				582	if state := agent.CurrentState(); state != StateReady {
				583	t.Errorf("Expected initial state to be StateReady, got %s", state)
				584	}
				585
				586	// Add a message to the inbox so we don't block in GatherMessages
				587	agent.inbox <- "Test message"
				588
				589	// Setup the mock to simulate a model response with end of turn
Josh Bleecher Snyder	4f84ab7	2025-04-22 16:40:54 -0700	[diff] [blame]	590	mockConvo.SendMessageFunc = func(message llm.Message) (*llm.Response, error) {
				591	return &llm.Response{
				592	StopReason: llm.StopReasonEndTurn,
				593	Content: []llm.Content{
				594	llm.StringContent("This is a test response"),
Sean McCullough	96b60dd	2025-04-30 09:49:10 -0700	[diff] [blame]	595	},
				596	}, nil
				597	}
				598
				599	// Track state transitions
				600	var transitions []State
				601	agent.stateMachine.SetTransitionCallback(func(ctx context.Context, from, to State, event TransitionEvent) {
				602	transitions = append(transitions, to)
				603	t.Logf("State transition: %s -> %s (%s)", from, to, event.Description)
				604	})
				605
				606	// Process a turn, which should trigger state transitions
				607	agent.processTurn(ctx)
				608
				609	// The minimum expected states for a simple end-of-turn response
				610	minExpectedStates := []State{
				611	StateWaitingForUserInput,
				612	StateSendingToLLM,
				613	StateProcessingLLMResponse,
				614	StateEndOfTurn,
				615	}
				616
				617	// Verify we have at least the minimum expected states
				618	if len(transitions) < len(minExpectedStates) {
				619	t.Errorf("Expected at least %d state transitions, got %d", len(minExpectedStates), len(transitions))
				620	}
				621
				622	// Check that the transitions follow the expected sequence
				623	for i, expected := range minExpectedStates {
				624	if i < len(transitions) {
				625	if transitions[i] != expected {
				626	t.Errorf("Transition %d: expected %s, got %s", i, expected, transitions[i])
				627	}
				628	}
				629	}
				630
				631	// Verify the final state is EndOfTurn
				632	if state := agent.CurrentState(); state != StateEndOfTurn {
				633	t.Errorf("Expected final state to be StateEndOfTurn, got %s", state)
				634	}
				635	}
				636
				637	func TestAgentProcessTurnWithToolUse(t *testing.T) {
				638	// Create a mock ConvoInterface for testing
				639	mockConvo := &mockConvoInterface{}
				640
				641	// Setup a test context
				642	ctx := context.Background()
				643
				644	// Create an agent with the state machine
				645	agent := &Agent{
Philip Zeyliger	f287299	2025-05-22 10:35:28 -0700	[diff] [blame]	646	convo: mockConvo,
				647	config: AgentConfig{Context: ctx},
				648	inbox: make(chan string, 10),
				649	ready: make(chan struct{}),
				650
Sean McCullough	96b60dd	2025-04-30 09:49:10 -0700	[diff] [blame]	651	outstandingLLMCalls: make(map[string]struct{}),
				652	outstandingToolCalls: make(map[string]string),
				653	stateMachine: NewStateMachine(),
				654	startOfTurn: time.Now(),
Philip Zeyliger	9373c07	2025-05-01 10:27:01 -0700	[diff] [blame]	655	subscribers: []chan *AgentMessage{},
Sean McCullough	96b60dd	2025-04-30 09:49:10 -0700	[diff] [blame]	656	}
				657
				658	// Add a message to the inbox so we don't block in GatherMessages
				659	agent.inbox <- "Test message"
				660
				661	// First response requests a tool
				662	firstResponseDone := false
Josh Bleecher Snyder	4f84ab7	2025-04-22 16:40:54 -0700	[diff] [blame]	663	mockConvo.SendMessageFunc = func(message llm.Message) (*llm.Response, error) {
Sean McCullough	96b60dd	2025-04-30 09:49:10 -0700	[diff] [blame]	664	if !firstResponseDone {
				665	firstResponseDone = true
Josh Bleecher Snyder	4f84ab7	2025-04-22 16:40:54 -0700	[diff] [blame]	666	return &llm.Response{
				667	StopReason: llm.StopReasonToolUse,
				668	Content: []llm.Content{
				669	llm.StringContent("I'll use a tool"),
				670	{Type: llm.ContentTypeToolUse, ToolName: "test_tool", ToolInput: []byte("{}"), ID: "test_id"},
Sean McCullough	96b60dd	2025-04-30 09:49:10 -0700	[diff] [blame]	671	},
				672	}, nil
				673	}
				674	// Second response ends the turn
Josh Bleecher Snyder	4f84ab7	2025-04-22 16:40:54 -0700	[diff] [blame]	675	return &llm.Response{
				676	StopReason: llm.StopReasonEndTurn,
				677	Content: []llm.Content{
				678	llm.StringContent("Finished using the tool"),
Sean McCullough	96b60dd	2025-04-30 09:49:10 -0700	[diff] [blame]	679	},
				680	}, nil
				681	}
				682
				683	// Tool result content handler
Josh Bleecher Snyder	64f2aa8	2025-05-14 18:31:05 +0000	[diff] [blame]	684	mockConvo.ToolResultContentsFunc = func(ctx context.Context, resp *llm.Response) ([]llm.Content, bool, error) {
				685	return []llm.Content{llm.StringContent("Tool executed successfully")}, false, nil
Sean McCullough	96b60dd	2025-04-30 09:49:10 -0700	[diff] [blame]	686	}
				687
				688	// Track state transitions
				689	var transitions []State
				690	agent.stateMachine.SetTransitionCallback(func(ctx context.Context, from, to State, event TransitionEvent) {
				691	transitions = append(transitions, to)
				692	t.Logf("State transition: %s -> %s (%s)", from, to, event.Description)
				693	})
				694
				695	// Process a turn with tool use
				696	agent.processTurn(ctx)
				697
				698	// Define expected states for a tool use flow
				699	expectedToolStates := []State{
				700	StateWaitingForUserInput,
				701	StateSendingToLLM,
				702	StateProcessingLLMResponse,
				703	StateToolUseRequested,
				704	StateCheckingForCancellation,
				705	StateRunningTool,
				706	}
				707
				708	// Verify that these states are present in order
				709	for i, expectedState := range expectedToolStates {
				710	if i >= len(transitions) {
				711	t.Errorf("Missing expected transition to %s; only got %d transitions", expectedState, len(transitions))
				712	continue
				713	}
				714	if transitions[i] != expectedState {
				715	t.Errorf("Expected transition %d to be %s, got %s", i, expectedState, transitions[i])
				716	}
				717	}
				718
				719	// Also verify we eventually reached EndOfTurn
				720	if !slices.Contains(transitions, StateEndOfTurn) {
				721	t.Errorf("Expected to eventually reach StateEndOfTurn, but never did")
				722	}
				723	}
Philip Zeyliger	72252cb	2025-05-10 17:00:08 -0700	[diff] [blame]	724
Philip Zeyliger	72252cb	2025-05-10 17:00:08 -0700	[diff] [blame]	725	func TestPushToOutbox(t *testing.T) {
				726	// Create a new agent
				727	a := &Agent{
				728	outstandingLLMCalls: make(map[string]struct{}),
				729	outstandingToolCalls: make(map[string]string),
				730	stateMachine: NewStateMachine(),
				731	subscribers: make([]chan *AgentMessage, 0),
				732	}
				733
				734	// Create a channel to receive messages
				735	messageCh := make(chan *AgentMessage, 1)
				736
				737	// Add the channel to the subscribers list
				738	a.mu.Lock()
				739	a.subscribers = append(a.subscribers, messageCh)
				740	a.mu.Unlock()
				741
				742	// We need to set the text that would be produced by our modified contentToString function
				743	resultText := "test resultnested result" // Directly set the expected output
				744
				745	// In a real-world scenario, this would be coming from a toolResult that contained nested content
				746
				747	m := AgentMessage{
				748	Type: ToolUseMessageType,
				749	ToolResult: resultText,
				750	}
				751
				752	// Push the message to the outbox
				753	a.pushToOutbox(context.Background(), m)
				754
				755	// Receive the message from the subscriber
				756	received := <-messageCh
				757
				758	// Check that the Content field contains the concatenated text from ToolResult
				759	expected := "test resultnested result"
				760	if received.Content != expected {
				761	t.Errorf("Expected Content to be %q, got %q", expected, received.Content)
				762	}
				763	}
Josh Bleecher Snyder	3b44cc3	2025-07-22 02:28:14 +0000	[diff] [blame]	764
				765	// TestCleanSlugName tests the slug cleaning function
				766	func TestCleanSlugName(t *testing.T) {
				767	tests := []struct {
				768	name string
				769	input string
				770	want string
				771	}{
				772	{"simple lowercase", "fix-bug", "fix-bug"},
				773	{"uppercase to lowercase", "FIX-BUG", "fix-bug"},
				774	{"spaces to hyphens", "fix login bug", "fix-login-bug"},
				775	{"mixed case and spaces", "Fix Login Bug", "fix-login-bug"},
				776	{"special characters removed", "fix_bug@home!", "fixbughome"},
				777	{"multiple hyphens preserved", "fix--bug---here", "fix--bug---here"},
				778	{"leading/trailing hyphens preserved", "-fix-bug-", "-fix-bug-"},
				779	{"numbers preserved", "fix-bug-v2", "fix-bug-v2"},
				780	{"empty string", "", ""},
				781	{"only special chars", "@#$%", ""},
				782	}
				783
				784	for _, tt := range tests {
				785	t.Run(tt.name, func(t *testing.T) {
				786	got := cleanSlugName(tt.input)
				787	if got != tt.want {
				788	t.Errorf("cleanSlugName(%q) = %q, want %q", tt.input, got, tt.want)
				789	}
				790	})
				791	}
				792	}
				793
				794	// TestAutoGenerateSlugInputValidation tests input validation for auto slug generation
				795	func TestAutoGenerateSlugInputValidation(t *testing.T) {
				796	// Test soleText with empty input
				797	emptyContents := []llm.Content{}
				798	_, err := soleText(emptyContents)
				799	if err == nil {
				800	t.Errorf("Expected error for empty contents, got nil")
				801	}
				802
				803	// Test with non-text content only
				804	nonTextContents := []llm.Content{
				805	{Type: llm.ContentTypeToolUse, ToolName: "bash"},
				806	}
				807	_, err = soleText(nonTextContents)
				808	if err == nil {
				809	t.Errorf("Expected error for non-text contents, got nil")
				810	}
				811
				812	// Test slug formatting
				813	testInputs := []string{
				814	"Fix the login bug",
				815	"Add user authentication system",
				816	"Refactor API endpoints",
				817	"Update documentation",
				818	}
				819
				820	for _, input := range testInputs {
				821	slug := cleanSlugName(strings.ToLower(strings.ReplaceAll(input, " ", "-")))
				822	if slug == "" {
				823	t.Errorf("cleanSlugName produced empty result for input %q", input)
				824	}
				825	if !strings.Contains(slug, "-") {
				826	// We expect most multi-word inputs to contain hyphens after processing
				827	t.Logf("Input %q produced slug %q (no hyphen found, might be single word)", input, slug)
				828	}
				829	}
				830	}
				831
				832	// TestSoleText tests the soleText helper function
				833	func TestSoleText(t *testing.T) {
				834	tests := []struct {
				835	name string
				836	contents []llm.Content
				837	wantText string
				838	wantErr bool
				839	}{
				840	{
				841	name: "single text content",
				842	contents: []llm.Content{
				843	{Type: llm.ContentTypeText, Text: " Hello world "},
				844	},
				845	wantText: "Hello world",
				846	wantErr: false,
				847	},
				848	{
				849	name: "empty slice",
				850	contents: []llm.Content{},
				851	wantText: "",
				852	wantErr: true,
				853	},
				854	{
				855	name: "multiple contents",
				856	contents: []llm.Content{
				857	{Type: llm.ContentTypeText, Text: "First"},
				858	{Type: llm.ContentTypeText, Text: "Second"},
				859	},
				860	wantText: "",
				861	wantErr: true,
				862	},
				863	{
				864	name: "non-text content",
				865	contents: []llm.Content{
				866	{Type: llm.ContentTypeToolUse, ToolName: "bash"},
				867	},
				868	wantText: "",
				869	wantErr: true,
				870	},
				871	{
				872	name: "empty text content",
				873	contents: []llm.Content{
				874	{Type: llm.ContentTypeText, Text: ""},
				875	},
				876	wantText: "",
				877	wantErr: true,
				878	},
				879	}
				880
				881	for _, tt := range tests {
				882	t.Run(tt.name, func(t *testing.T) {
				883	gotText, err := soleText(tt.contents)
				884	if (err != nil) != tt.wantErr {
				885	t.Errorf("soleText() error = %v, wantErr %v", err, tt.wantErr)
				886	return
				887	}
				888	if gotText != tt.wantText {
				889	t.Errorf("soleText() gotText = %v, want %v", gotText, tt.wantText)
				890	}
				891	})
				892	}
				893	}
Josh Bleecher Snyder	8a0de52	2025-07-24 19:29:07 +0000	[diff] [blame]	894
				895	// TestSystemPromptIncludesDateTime tests that the system prompt includes current date/time
				896	func TestSystemPromptIncludesDateTime(t *testing.T) {
				897	ctx := context.Background()
				898
				899	// Create a minimal agent config for testing
				900	config := AgentConfig{
				901	Context: ctx,
				902	ClientGOOS: "linux",
				903	ClientGOARCH: "amd64",
				904	}
				905
				906	// Create agent
				907	agent := NewAgent(config)
				908
				909	// Use fixed time for deterministic tests
				910	fixedTime := time.Date(2025, 7, 25, 19, 37, 57, 0, time.UTC)
				911	agent.now = func() time.Time { return fixedTime }
				912
				913	// Set minimal required fields for rendering
				914	agent.workingDir = "/tmp"
				915	agent.repoRoot = "/tmp"
				916
				917	// Mock SketchGitBase to return a valid commit hash
				918	// We'll override this by setting a method that returns a fixed value
				919	// Since we can't easily mock the git calls, we'll work around it
				920
				921	// Render the system prompt
				922	systemPrompt := agent.renderSystemPrompt()
				923
Josh Bleecher Snyder	9224eb0	2025-07-26 04:45:05 +0000	[diff] [blame]	924	// Check that the system prompt contains a current_date section
				925	if !strings.Contains(systemPrompt, "<current_date>") {
				926	t.Error("System prompt should contain <current_date> section")
Josh Bleecher Snyder	8a0de52	2025-07-24 19:29:07 +0000	[diff] [blame]	927	}
				928
Josh Bleecher Snyder	9224eb0	2025-07-26 04:45:05 +0000	[diff] [blame]	929	// Check that it contains what looks like a date
				930	// The format is "2006-01-02" (time.DateOnly)
				931	if !strings.Contains(systemPrompt, "-") {
				932	t.Error("System prompt should contain a formatted date")
Josh Bleecher Snyder	8a0de52	2025-07-24 19:29:07 +0000	[diff] [blame]	933	}
				934
Josh Bleecher Snyder	9224eb0	2025-07-26 04:45:05 +0000	[diff] [blame]	935	// Verify the expected fixed date (2025-07-25)
				936	expectedDate := "2025-07-25"
				937	if !strings.Contains(systemPrompt, expectedDate) {
				938	t.Errorf("System prompt should contain expected fixed date %s", expectedDate)
Josh Bleecher Snyder	8a0de52	2025-07-24 19:29:07 +0000	[diff] [blame]	939	}
				940
				941	// Print part of the system prompt for manual verification in test output
Josh Bleecher Snyder	9224eb0	2025-07-26 04:45:05 +0000	[diff] [blame]	942	// Find the current_date section
				943	start := strings.Index(systemPrompt, "<current_date>")
Josh Bleecher Snyder	8a0de52	2025-07-24 19:29:07 +0000	[diff] [blame]	944	if start != -1 {
Josh Bleecher Snyder	9224eb0	2025-07-26 04:45:05 +0000	[diff] [blame]	945	end := strings.Index(systemPrompt[start:], "</current_date>") + start
Josh Bleecher Snyder	8a0de52	2025-07-24 19:29:07 +0000	[diff] [blame]	946	if end > start {
Josh Bleecher Snyder	9224eb0	2025-07-26 04:45:05 +0000	[diff] [blame]	947	dateSection := systemPrompt[start : end+len("</current_date>")]
				948	t.Logf("Date section in system prompt: %s", dateSection)
Josh Bleecher Snyder	8a0de52	2025-07-24 19:29:07 +0000	[diff] [blame]	949	}
				950	}
				951	}