sketch/loop: fix flaky TestPortMonitor_IntegrationDemo test
Remove assumptions about system-wide port state that made the test flaky.
Root Cause:
- Test assumed it was the only process creating/destroying ports
- Made brittle assertions about total port counts before/after test servers
- Failed when other processes (tests, services, containers) modified ports
The Fix:
- Focus only on the specific test ports created by the test
- Use polling with timeout to detect port creation/removal
- Remove assertions about total port counts
- Test only verifies its own test ports are detected and removed
Changes:
- Replace fixed sleeps with polling loops with timeouts
- Remove port count comparisons that depended on system state
- Keep track of test ports separately from system port scanning
- Add proper timeout handling to prevent hanging tests
Test Results:
- Before: Flaky failures when other processes modified ports
- After: 5 consecutive runs all passed, focusing only on test-specific behavior
This preserves the test's usefulness as an integration test while making it
deterministic and isolated from system-wide port changes.
Co-Authored-By: sketch <hello@sketch.dev>
Change-ID: s58312d8275959960k
diff --git a/loop/port_monitor_demo_test.go b/loop/port_monitor_demo_test.go
index 4190e5a..4c67790 100644
--- a/loop/port_monitor_demo_test.go
+++ b/loop/port_monitor_demo_test.go
@@ -12,12 +12,13 @@
// TestPortMonitor_IntegrationDemo demonstrates the full integration of PortMonitor with an Agent.
// This test shows how the PortMonitor detects port changes and sends notifications to an Agent.
+// The test focuses on specific ports it creates rather than making assumptions about total port counts.
func TestPortMonitor_IntegrationDemo(t *testing.T) {
// Create a test agent
agent := createTestAgent(t)
// Create and start the port monitor
- pm := NewPortMonitor(agent, 100*time.Millisecond) // Fast polling for demo
+ pm := NewPortMonitor(agent, 25*time.Millisecond) // Fast polling for demo
ctx := context.Background()
err := pm.Start(ctx)
if err != nil {
@@ -25,9 +26,6 @@
}
defer pm.Stop()
- // Wait for initial scan
- time.Sleep(200 * time.Millisecond)
-
// Show current ports
currentPorts := pm.GetPorts()
t.Logf("Initial TCP ports detected: %d", len(currentPorts))
@@ -38,6 +36,7 @@
// Start multiple test servers to demonstrate detection
var listeners []net.Listener
var wg sync.WaitGroup
+ var testPorts []uint16
// Start 3 test HTTP servers
for i := 0; i < 3; i++ {
@@ -49,6 +48,7 @@
addr := listener.Addr().(*net.TCPAddr)
port := addr.Port
+ testPorts = append(testPorts, uint16(port))
t.Logf("Started test HTTP server %d on port %d", i+1, port)
// Start a simple HTTP server
@@ -64,43 +64,39 @@
}(listener, i+1)
}
- // Wait for ports to be detected
- time.Sleep(500 * time.Millisecond)
+ // Wait for ports to be detected with timeout
+ detectionTimeout := time.After(15 * time.Second)
+ detectionTicker := time.NewTicker(25 * time.Millisecond)
+ defer detectionTicker.Stop()
- // Check that the new ports were detected
- updatedPorts := pm.GetPorts()
- t.Logf("Updated TCP ports detected: %d", len(updatedPorts))
+ allPortsDetected := false
+detectionLoop:
+ for !allPortsDetected {
+ select {
+ case <-detectionTimeout:
+ t.Fatalf("Timeout waiting for test ports to be detected")
+ case <-detectionTicker.C:
+ // Check if all test ports are detected
+ updatedPorts := pm.GetPorts()
+ portMap := make(map[uint16]bool)
+ for _, port := range updatedPorts {
+ portMap[port.Port] = true
+ }
- // Verify that we have at least 3 more ports than initially
- if len(updatedPorts) < len(currentPorts)+3 {
- t.Errorf("Expected at least %d ports, got %d", len(currentPorts)+3, len(updatedPorts))
- }
-
- // Find the new test server ports
- var testPorts []uint16
- for _, listener := range listeners {
- addr := listener.Addr().(*net.TCPAddr)
- testPorts = append(testPorts, uint16(addr.Port))
- }
-
- // Verify all test ports are detected
- portMap := make(map[uint16]bool)
- for _, port := range updatedPorts {
- portMap[port.Port] = true
- }
-
- allPortsDetected := true
- for _, testPort := range testPorts {
- if !portMap[testPort] {
- allPortsDetected = false
- t.Errorf("Test port %d was not detected", testPort)
+ allPortsDetected = true
+ for _, testPort := range testPorts {
+ if !portMap[testPort] {
+ allPortsDetected = false
+ break
+ }
+ }
+ if allPortsDetected {
+ t.Logf("All test ports successfully detected: %v", testPorts)
+ break detectionLoop
+ }
}
}
- if allPortsDetected {
- t.Logf("All test ports successfully detected!")
- }
-
// Close all listeners
for i, listener := range listeners {
t.Logf("Closing test server %d", i+1)
@@ -110,41 +106,41 @@
// Wait for servers to stop
wg.Wait()
- // Wait for ports to be removed
- time.Sleep(500 * time.Millisecond)
+ // Wait for ports to be removed with timeout
+ removalTimeout := time.After(15 * time.Second)
+ removalTicker := time.NewTicker(25 * time.Millisecond)
+ defer removalTicker.Stop()
- // Check that ports were removed
- finalPorts := pm.GetPorts()
- t.Logf("Final TCP ports detected: %d", len(finalPorts))
+ allPortsRemoved := false
+removalLoop:
+ for !allPortsRemoved {
+ select {
+ case <-removalTimeout:
+ t.Fatalf("Timeout waiting for test ports to be removed")
+ case <-removalTicker.C:
+ // Check if all test ports are removed
+ finalPorts := pm.GetPorts()
+ portMap := make(map[uint16]bool)
+ for _, port := range finalPorts {
+ portMap[port.Port] = true
+ }
- // Verify the final port count is back to near the original
- if len(finalPorts) > len(currentPorts)+1 {
- t.Errorf("Expected final port count to be close to initial (%d), got %d", len(currentPorts), len(finalPorts))
- }
-
- // Verify test ports are no longer detected
- portMap = make(map[uint16]bool)
- for _, port := range finalPorts {
- portMap[port.Port] = true
- }
-
- allPortsRemoved := true
- for _, testPort := range testPorts {
- if portMap[testPort] {
- allPortsRemoved = false
- t.Errorf("Test port %d was not removed", testPort)
+ allPortsRemoved = true
+ for _, testPort := range testPorts {
+ if portMap[testPort] {
+ allPortsRemoved = false
+ break
+ }
+ }
+ if allPortsRemoved {
+ t.Logf("All test ports successfully removed: %v", testPorts)
+ break removalLoop
+ }
}
}
- if allPortsRemoved {
- t.Logf("All test ports successfully removed!")
- }
-
t.Logf("Integration test completed successfully!")
- t.Logf("- Initial ports: %d", len(currentPorts))
- t.Logf("- Peak ports: %d", len(updatedPorts))
- t.Logf("- Final ports: %d", len(finalPorts))
- t.Logf("- Test ports added and removed: %d", len(testPorts))
+ t.Logf("- Test ports created and monitored: %v", testPorts)
}
// contains checks if a string contains a substring.