Refactor everything

Change-Id: Ic3a37c38cfecba943c91f6ae545ce1c5b551c0d5
diff --git a/server/agent/thinker.go b/server/agent/thinker.go
new file mode 100644
index 0000000..8e70230
--- /dev/null
+++ b/server/agent/thinker.go
@@ -0,0 +1,522 @@
+package agent
+
+import (
+	"context"
+	"encoding/json"
+	"fmt"
+	"log/slog"
+	"strings"
+
+	"github.com/iomodo/staff/llm"
+	"github.com/iomodo/staff/tm"
+	"golang.org/x/text/cases"
+	"golang.org/x/text/language"
+)
+
+type Thinker struct {
+	roles        []string
+	llmProvider  llm.LLMProvider
+	model        string // TODO: abstract away in llmProvider
+	systemPrompt string // TODO abstract away in llmProvider
+	maxTokens    int
+	temperature  float64
+	logger       *slog.Logger
+}
+
+func NewThinker(llmProvider llm.LLMProvider, model string, systemPrompt string, maxTokens int, temperature float64, roles []string, logger *slog.Logger) *Thinker {
+	return &Thinker{llmProvider: llmProvider, model: model, maxTokens: maxTokens, temperature: temperature, roles: roles, logger: logger}
+}
+
+// shouldGenerateSubtasks determines if a task should be broken down into subtasks using LLM
+func (t *Thinker) ShouldGenerateSubtasks(task *tm.Task) bool {
+	// Don't generate subtasks for subtasks
+	if task.ParentTaskID != "" {
+		return false
+	}
+
+	// Don't generate if already evaluated
+	if task.SubtasksEvaluated {
+		return false
+	}
+
+	// Ask LLM to decide
+	ctx := context.Background()
+	decision, err := t.shouldGenerateSubtasks(ctx, task)
+	if err != nil {
+		t.logger.Warn("Failed to get LLM subtask decision for task",
+			slog.String("task_id", task.ID),
+			slog.String("error", err.Error()))
+		// Fallback to simple heuristics
+		return task.Priority == tm.PriorityHigh || len(task.Description) > 200
+	}
+
+	task.SubtasksEvaluated = true
+	t.logger.Info("LLM subtask decision for task",
+		slog.String("task_id", task.ID),
+		slog.Bool("needs_subtasks", decision.NeedsSubtasks),
+		slog.Int("complexity_score", decision.ComplexityScore),
+		slog.String("reasoning", decision.Reasoning))
+
+	return decision.NeedsSubtasks
+}
+
+// AnalyzeTaskForSubtasks uses LLM to analyze a task and propose subtasks
+func (t *Thinker) GenerateSubtasksForTask(ctx context.Context, task *tm.Task) (*tm.SubtaskAnalysis, error) {
+	prompt := buildSubtaskAnalysisPrompt(task)
+
+	req := llm.ChatCompletionRequest{
+		Model: t.model,
+		Messages: []llm.Message{
+			{
+				Role:    llm.RoleSystem,
+				Content: getSubtaskAnalysisSystemPrompt(t.roles),
+			},
+			{
+				Role:    llm.RoleUser,
+				Content: prompt,
+			},
+		},
+		MaxTokens:   &[]int{4000}[0],
+		Temperature: &[]float64{0.3}[0],
+	}
+
+	resp, err := t.llmProvider.ChatCompletion(ctx, req)
+	if err != nil {
+		return nil, fmt.Errorf("LLM analysis failed: %w", err)
+	}
+
+	if len(resp.Choices) == 0 {
+		return nil, fmt.Errorf("no response from LLM")
+	}
+
+	// Parse the LLM response
+	analysis, err := parseSubtaskAnalysis(resp.Choices[0].Message.Content, task.ID, t.roles, t.logger)
+	if err != nil {
+		return nil, fmt.Errorf("failed to parse LLM response: %w", err)
+	}
+
+	return analysis, nil
+}
+
+// generateSolution uses the agent's LLM to generate a solution
+func (t *Thinker) GenerateSolution(ctx context.Context, task *tm.Task) (string, error) {
+	prompt := buildTaskPrompt(task)
+
+	req := llm.ChatCompletionRequest{
+		Model: t.model,
+		Messages: []llm.Message{
+			{
+				Role:    llm.RoleSystem,
+				Content: t.systemPrompt,
+			},
+			{
+				Role:    llm.RoleUser,
+				Content: prompt,
+			},
+		},
+		MaxTokens:   &t.maxTokens,
+		Temperature: &t.temperature,
+	}
+
+	resp, err := t.llmProvider.ChatCompletion(ctx, req)
+	if err != nil {
+		return "", fmt.Errorf("LLM request failed: %w", err)
+	}
+
+	if len(resp.Choices) == 0 {
+		return "", fmt.Errorf("no response from LLM")
+	}
+
+	return resp.Choices[0].Message.Content, nil
+}
+
+// ShouldGenerateSubtasks asks LLM whether a task needs subtasks based on existing agents
+func (t *Thinker) shouldGenerateSubtasks(ctx context.Context, task *tm.Task) (*tm.SubtaskDecision, error) {
+	prompt := buildSubtaskDecisionPrompt(task)
+
+	req := llm.ChatCompletionRequest{
+		Model: t.model,
+		Messages: []llm.Message{
+			{
+				Role:    llm.RoleSystem,
+				Content: getSubtaskDecisionSystemPrompt(t.roles),
+			},
+			{
+				Role:    llm.RoleUser,
+				Content: prompt,
+			},
+		},
+		MaxTokens:   &[]int{1000}[0],
+		Temperature: &[]float64{0.3}[0],
+	}
+
+	resp, err := t.llmProvider.ChatCompletion(ctx, req)
+	if err != nil {
+		return nil, fmt.Errorf("LLM decision failed: %w", err)
+	}
+
+	if len(resp.Choices) == 0 {
+		return nil, fmt.Errorf("no response from LLM")
+	}
+
+	// Parse the LLM response
+	decision, err := parseSubtaskDecision(resp.Choices[0].Message.Content)
+	if err != nil {
+		return nil, fmt.Errorf("failed to parse LLM decision: %w", err)
+	}
+
+	return decision, nil
+}
+
+func buildSubtaskDecisionPrompt(task *tm.Task) string {
+	return fmt.Sprintf(`Please evaluate whether the following task needs to be broken down into subtasks:
+
+**Task Title:** %s
+
+**Description:** %s
+
+**Priority:** %s
+
+**Current Status:** %s
+
+Consider:
+- Can this be completed by a single agent with existing capabilities?
+- Does it require multiple specialized skills?
+- Is the scope too large for one person?
+- Are there logical components that could be parallelized?
+
+Provide your decision in the JSON format specified in the system prompt.`,
+		task.Title,
+		task.Description,
+		task.Priority,
+		task.Status)
+}
+
+func getSubtaskDecisionSystemPrompt(roles []string) string {
+	availableRoles := strings.Join(roles, ", ")
+
+	return fmt.Sprintf(`You are an expert project manager and task analyst. Your job is to determine whether a task needs to be broken down into subtasks.
+
+Currently available team roles and their capabilities: %s
+
+When evaluating a task, consider:
+1. Task complexity and scope
+2. Whether multiple specialized skills are needed
+3. If the task can be completed by a single agent with current capabilities
+4. Whether new agent roles might be needed for specialized skills
+
+Respond with a JSON object in this exact format:
+{
+  "needs_subtasks": true/false,
+  "reasoning": "Clear explanation of why subtasks are or aren't needed",
+  "complexity_score": 5,
+  "required_skills": ["skill1", "skill2", "skill3"]
+}
+
+Complexity score should be 1-10 where:
+- 1-3: Simple tasks that can be handled by one agent
+- 4-6: Moderate complexity, might benefit from subtasks
+- 7-10: Complex tasks that definitely need breaking down
+
+Required skills should list all technical/domain skills needed to complete the task.`, availableRoles)
+}
+
+func parseSubtaskDecision(response string) (*tm.SubtaskDecision, error) {
+	// Try to extract JSON from the response
+	jsonStart := strings.Index(response, "{")
+	jsonEnd := strings.LastIndex(response, "}")
+
+	if jsonStart == -1 || jsonEnd == -1 {
+		return nil, fmt.Errorf("no JSON found in LLM response")
+	}
+
+	jsonStr := response[jsonStart : jsonEnd+1]
+
+	var decision tm.SubtaskDecision
+	if err := json.Unmarshal([]byte(jsonStr), &decision); err != nil {
+		return nil, fmt.Errorf("failed to unmarshal JSON: %w", err)
+	}
+
+	return &decision, nil
+}
+
+func buildSubtaskAnalysisPrompt(task *tm.Task) string {
+	return fmt.Sprintf(`Please analyze the following task and break it down into subtasks:
+
+**Task Title:** %s
+
+**Description:** %s
+
+**Priority:** %s
+
+**Current Status:** %s
+
+Please analyze this task and provide a detailed breakdown into subtasks. Consider:
+- Technical complexity and requirements
+- Logical task dependencies 
+- Appropriate skill sets needed for each subtask
+- Risk factors and potential blockers
+- Estimated effort for each component
+
+Provide the analysis in the JSON format specified in the system prompt.`,
+		task.Title,
+		task.Description,
+		task.Priority,
+		task.Status)
+}
+
+func getSubtaskAnalysisSystemPrompt(roles []string) string {
+	availableRoles := strings.Join(roles, ", ")
+
+	return fmt.Sprintf(`You are an expert project manager and technical architect. Your job is to analyze complex tasks and break them down into well-defined subtasks that can be assigned to specialized team members.
+
+Currently available team roles: %s
+
+When analyzing a task, you should:
+1. Understand the task requirements and scope
+2. Break it down into logical, manageable subtasks
+3. Assign each subtask to the most appropriate team role OR propose creating new agents
+4. Estimate effort and identify dependencies
+5. Provide a clear execution strategy
+
+If you need specialized skills not covered by existing roles, propose new agent creation.
+
+Respond with a JSON object in this exact format:
+{
+  "analysis_summary": "Brief analysis of the task and approach",
+  "subtasks": [
+    {
+      "title": "Subtask title",
+      "description": "Detailed description of what needs to be done",
+      "priority": "high|medium|low",
+      "assigned_to": "role_name",
+      "estimated_hours": 8,
+      "dependencies": ["subtask_index_1", "subtask_index_2"],
+      "required_skills": ["skill1", "skill2"]
+    }
+  ],
+  "agent_creations": [
+    {
+      "role": "new_role_name",
+      "skills": ["specialized_skill1", "specialized_skill2"],
+      "description": "Description of what this agent does",
+      "justification": "Why this new agent is needed"
+    }
+  ],
+  "recommended_approach": "High-level strategy for executing these subtasks",
+  "estimated_total_hours": 40,
+  "risk_assessment": "Potential risks and mitigation strategies"
+}
+
+For existing roles, use: %s
+For new agents, propose appropriate role names and skill sets.
+Dependencies should reference subtask indices (e.g., ["0", "1"] means depends on first and second subtasks).`, availableRoles, availableRoles)
+}
+
+func parseSubtaskAnalysis(response string, parentTaskID string, agentRoles []string, logger *slog.Logger) (*tm.SubtaskAnalysis, error) {
+	// Try to extract JSON from the response (LLM might wrap it in markdown)
+	jsonStart := strings.Index(response, "{")
+	jsonEnd := strings.LastIndex(response, "}")
+
+	if jsonStart == -1 || jsonEnd == -1 {
+		return nil, fmt.Errorf("no JSON found in LLM response")
+	}
+
+	jsonStr := response[jsonStart : jsonEnd+1]
+
+	var rawAnalysis struct {
+		AnalysisSummary string `json:"analysis_summary"`
+		Subtasks        []struct {
+			Title          string   `json:"title"`
+			Description    string   `json:"description"`
+			Priority       string   `json:"priority"`
+			AssignedTo     string   `json:"assigned_to"`
+			EstimatedHours int      `json:"estimated_hours"`
+			Dependencies   []string `json:"dependencies"`
+			RequiredSkills []string `json:"required_skills"`
+		} `json:"subtasks"`
+		AgentCreations      []tm.AgentCreationProposal `json:"agent_creations"`
+		RecommendedApproach string                     `json:"recommended_approach"`
+		EstimatedTotalHours int                        `json:"estimated_total_hours"`
+		RiskAssessment      string                     `json:"risk_assessment"`
+	}
+
+	if err := json.Unmarshal([]byte(jsonStr), &rawAnalysis); err != nil {
+		return nil, fmt.Errorf("failed to unmarshal JSON: %w", err)
+	}
+
+	// Convert to our types
+	analysis := &tm.SubtaskAnalysis{
+		ParentTaskID:        parentTaskID,
+		AnalysisSummary:     rawAnalysis.AnalysisSummary,
+		AgentCreations:      rawAnalysis.AgentCreations,
+		RecommendedApproach: rawAnalysis.RecommendedApproach,
+		EstimatedTotalHours: rawAnalysis.EstimatedTotalHours,
+		RiskAssessment:      rawAnalysis.RiskAssessment,
+	}
+
+	// Convert subtasks
+	for _, st := range rawAnalysis.Subtasks {
+		priority := tm.PriorityMedium // default
+		switch strings.ToLower(st.Priority) {
+		case "high":
+			priority = tm.PriorityHigh
+		case "low":
+			priority = tm.PriorityLow
+		}
+
+		subtask := tm.SubtaskProposal{
+			Title:          st.Title,
+			Description:    st.Description,
+			Priority:       priority,
+			AssignedTo:     st.AssignedTo,
+			EstimatedHours: st.EstimatedHours,
+			Dependencies:   st.Dependencies,
+			RequiredSkills: st.RequiredSkills,
+		}
+
+		analysis.Subtasks = append(analysis.Subtasks, subtask)
+	}
+
+	// Validate agent assignments and handle new agent creation
+	if err := validateAndHandleAgentAssignments(analysis, agentRoles, logger); err != nil {
+		logger.Warn("Warning during agent assignment handling", slog.String("error", err.Error()))
+	}
+
+	return analysis, nil
+}
+
+func validateAndHandleAgentAssignments(analysis *tm.SubtaskAnalysis, agentRoles []string, logger *slog.Logger) error {
+	// Collect all agent roles that will be available (existing + proposed new ones)
+	availableRoles := make(map[string]bool)
+	for _, role := range agentRoles {
+		availableRoles[role] = true
+	}
+
+	// Add proposed new agent roles
+	for _, agentCreation := range analysis.AgentCreations {
+		availableRoles[agentCreation.Role] = true
+
+		// Create a subtask for agent creation
+		agentCreationSubtask := tm.SubtaskProposal{
+			Title:          fmt.Sprintf("Create %s Agent", cases.Title(language.English).String(agentCreation.Role)),
+			Description:    fmt.Sprintf("Create and configure a new %s agent with skills: %s. %s", agentCreation.Role, strings.Join(agentCreation.Skills, ", "), agentCreation.Justification),
+			Priority:       tm.PriorityHigh, // Agent creation is high priority
+			AssignedTo:     "ceo",           // CEO creates new agents
+			EstimatedHours: 4,               // Estimated time to set up new agent
+			Dependencies:   []string{},      // No dependencies for agent creation
+			RequiredSkills: []string{"agent_configuration", "system_design"},
+		}
+
+		// Insert at the beginning so agent creation happens first
+		analysis.Subtasks = append([]tm.SubtaskProposal{agentCreationSubtask}, analysis.Subtasks...)
+
+		// Update dependencies to account for the new subtask at index 0
+		for i := 1; i < len(analysis.Subtasks); i++ {
+			for j, dep := range analysis.Subtasks[i].Dependencies {
+				// Convert dependency index and increment by 1
+				if depIndex := parseDependencyIndex(dep); depIndex >= 0 {
+					analysis.Subtasks[i].Dependencies[j] = fmt.Sprintf("%d", depIndex+1)
+				}
+			}
+		}
+	}
+
+	// Now validate all assignments against available roles
+	defaultRole := "ceo" // fallback role
+	if len(agentRoles) > 0 {
+		defaultRole = agentRoles[0]
+	}
+
+	for i := range analysis.Subtasks {
+		if !availableRoles[analysis.Subtasks[i].AssignedTo] {
+			logger.Warn("Unknown agent role for subtask, using default",
+				slog.String("unknown_role", analysis.Subtasks[i].AssignedTo),
+				slog.String("subtask_title", analysis.Subtasks[i].Title),
+				slog.String("assigned_role", defaultRole))
+			analysis.Subtasks[i].AssignedTo = defaultRole
+		}
+	}
+
+	return nil
+}
+
+func parseDependencyIndex(dep string) int {
+	var idx int
+	if _, err := fmt.Sscanf(dep, "%d", &idx); err == nil {
+		return idx
+	}
+	return -1 // Invalid dependency format
+}
+
+func generateSubtaskPRContent(analysis *tm.SubtaskAnalysis) string {
+	var content strings.Builder
+
+	content.WriteString(fmt.Sprintf("# Subtasks Created for Task %s\n\n", analysis.ParentTaskID))
+	content.WriteString(fmt.Sprintf("This PR creates **%d individual task files** in `/operations/tasks/` ready for agent assignment.\n\n", len(analysis.Subtasks)))
+	content.WriteString(fmt.Sprintf("✅ **Parent task `%s` has been marked as completed** - the complex task has been successfully broken down into actionable subtasks.\n\n", analysis.ParentTaskID))
+	content.WriteString(fmt.Sprintf("## Analysis Summary\n%s\n\n", analysis.AnalysisSummary))
+	content.WriteString(fmt.Sprintf("## Recommended Approach\n%s\n\n", analysis.RecommendedApproach))
+	content.WriteString(fmt.Sprintf("**Estimated Total Hours:** %d\n\n", analysis.EstimatedTotalHours))
+
+	// List the created task files
+	content.WriteString("## Created Task Files\n\n")
+	for i, subtask := range analysis.Subtasks {
+		taskID := fmt.Sprintf("%s-subtask-%d", analysis.ParentTaskID, i+1)
+		content.WriteString(fmt.Sprintf("### %d. `%s.md`\n", i+1, taskID))
+		content.WriteString(fmt.Sprintf("- **Title:** %s\n", subtask.Title))
+		content.WriteString(fmt.Sprintf("- **Assigned to:** %s\n", subtask.AssignedTo))
+		content.WriteString(fmt.Sprintf("- **Priority:** %s\n", subtask.Priority))
+		content.WriteString(fmt.Sprintf("- **Estimated Hours:** %d\n", subtask.EstimatedHours))
+		content.WriteString(fmt.Sprintf("- **Description:** %s\n\n", subtask.Description))
+	}
+
+	if analysis.RiskAssessment != "" {
+		content.WriteString(fmt.Sprintf("## Risk Assessment\n%s\n\n", analysis.RiskAssessment))
+	}
+
+	content.WriteString("## Proposed Subtasks\n\n")
+
+	for i, subtask := range analysis.Subtasks {
+		content.WriteString(fmt.Sprintf("### %d. %s\n", i+1, subtask.Title))
+		content.WriteString(fmt.Sprintf("- **Assigned to:** %s\n", subtask.AssignedTo))
+		content.WriteString(fmt.Sprintf("- **Priority:** %s\n", subtask.Priority))
+		content.WriteString(fmt.Sprintf("- **Estimated Hours:** %d\n", subtask.EstimatedHours))
+
+		if len(subtask.Dependencies) > 0 {
+			deps := strings.Join(subtask.Dependencies, ", ")
+			content.WriteString(fmt.Sprintf("- **Dependencies:** %s\n", deps))
+		}
+
+		content.WriteString(fmt.Sprintf("- **Description:** %s\n\n", subtask.Description))
+	}
+
+	content.WriteString("---\n")
+	content.WriteString("*Generated by Staff AI Agent System*\n\n")
+	content.WriteString("**Instructions:**\n")
+	content.WriteString("- Review the proposed subtasks\n")
+	content.WriteString("- Approve or request changes\n")
+	content.WriteString("- When merged, the subtasks will be automatically created and assigned\n")
+
+	return content.String()
+}
+
+// buildTaskPrompt creates a detailed prompt for the LLM
+func buildTaskPrompt(task *tm.Task) string {
+	return fmt.Sprintf(`Task: %s
+
+Priority: %s
+Description: %s
+
+Please provide a complete solution for this task. Include:
+1. Detailed implementation plan
+2. Code changes needed (if applicable)
+3. Files to be created or modified
+4. Testing considerations
+5. Any dependencies or prerequisites
+
+Your response should be comprehensive and actionable.`,
+		task.Title,
+		task.Priority,
+		task.Description)
+}