From 2ba0f4ae27fb5fc09c5c52a47eac930c14b66f11 Mon Sep 17 00:00:00 2001
From: Masonmason <masonhuang0320@gmail.com>
Date: Thu, 24 Jul 2025 19:10:37 +0800
Subject: [PATCH 1/2] fix: Remove duplicate inference result logging to prevent
 terminal spam
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- Comment out print() statements in InferencePipeline that duplicate GUI callback output
- Prevents each inference result from appearing multiple times in terminal
- Keeps logging system clean while maintaining GUI formatted display
- This was causing terminal output to show each result 2-3 times due to:
  1. InferencePipeline print() statements captured by StdoutCapture
  2. Same results formatted and sent via terminal_output callback

🤖 Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <noreply@anthropic.com>
---
 cluster4npu_ui/core/functions/InferencePipeline.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/cluster4npu_ui/core/functions/InferencePipeline.py b/cluster4npu_ui/core/functions/InferencePipeline.py
index 63fec81..d697e35 100644
--- a/cluster4npu_ui/core/functions/InferencePipeline.py
+++ b/cluster4npu_ui/core/functions/InferencePipeline.py
@@ -244,10 +244,12 @@ class PipelineStage:
                     # Handle tuple results like (probability, result_string)
                     prob, result_str = result
                     if prob is not None and result_str is not None:
-                        print(f"[Stage {self.stage_id}] ✅ Inference result: prob={prob:.3f}, result={result_str}")
+                        # Avoid duplicate logging - handled by GUI callback formatting
+                        # print(f"[Stage {self.stage_id}] ✅ Inference result: prob={prob:.3f}, result={result_str}")
                         inference_result = result
                 elif isinstance(result, dict) and result:  # Non-empty dict
-                    print(f"[Stage {self.stage_id}] ✅ Dict result: {result}")
+                    # Avoid duplicate logging - handled by GUI callback formatting  
+                    # print(f"[Stage {self.stage_id}] ✅ Dict result: {result}")
                     inference_result = result
                 else:
                     inference_result = result

From f41d9ae5c8b5788a03ba418c2bb7980b76eff279 Mon Sep 17 00:00:00 2001
From: Masonmason <masonhuang0320@gmail.com>
Date: Thu, 24 Jul 2025 19:17:18 +0800
Subject: [PATCH 2/2] feat: Implement output queue based FPS calculation for
 accurate throughput measurement
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- Add time-window based FPS calculation using output queue timestamps
- Replace misleading "Theoretical FPS" (based on processing time) with real "Pipeline FPS"
- Track actual inference output generation rate over 10-second sliding window
- Add thread-safe FPS calculation with proper timestamp management
- Display realistic FPS values (4-9 FPS) instead of inflated values (90+ FPS)

Key improvements:
- _record_output_timestamp(): Records when each output is generated
- get_current_fps(): Calculates FPS based on actual throughput over time window
- Thread-safe implementation with fps_lock for concurrent access
- Automatic cleanup of old timestamps outside the time window
- Integration with GUI display to show meaningful FPS metrics

This provides users with accurate inference throughput measurements that reflect
real-world performance, especially important for multi-dongle setups where
understanding actual scaling is crucial.

🤖 Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <noreply@anthropic.com>
---
 .../core/functions/InferencePipeline.py       | 46 +++++++++++++++++++
 cluster4npu_ui/ui/dialogs/deployment.py       | 15 ++++--
 2 files changed, 56 insertions(+), 5 deletions(-)

diff --git a/cluster4npu_ui/core/functions/InferencePipeline.py b/cluster4npu_ui/core/functions/InferencePipeline.py
index d697e35..7fc6ee2 100644
--- a/cluster4npu_ui/core/functions/InferencePipeline.py
+++ b/cluster4npu_ui/core/functions/InferencePipeline.py
@@ -350,6 +350,11 @@ class InferencePipeline:
         self.completed_counter = 0
         self.error_counter = 0
         
+        # FPS calculation based on output queue throughput
+        self.fps_window_size = 10.0  # 10 second window
+        self.output_timestamps = []  # Track when outputs are generated
+        self.fps_lock = threading.Lock()  # Thread safety for FPS calculation
+        
     def initialize(self):
         """Initialize all stages"""
         print(f"[{self.pipeline_name}] Initializing pipeline with {len(self.stages)} stages...")
@@ -367,8 +372,43 @@ class InferencePipeline:
         
         print(f"[{self.pipeline_name}] All stages initialized successfully")
     
+    def _record_output_timestamp(self):
+        """Record timestamp when output is generated for FPS calculation"""
+        with self.fps_lock:
+            current_time = time.time()
+            self.output_timestamps.append(current_time)
+            
+            # Remove timestamps older than window
+            cutoff_time = current_time - self.fps_window_size
+            self.output_timestamps = [t for t in self.output_timestamps if t > cutoff_time]
+    
+    def get_current_fps(self) -> float:
+        """Calculate current FPS based on output queue throughput"""
+        with self.fps_lock:
+            if len(self.output_timestamps) < 2:
+                return 0.0
+            
+            current_time = time.time()
+            # Clean old timestamps
+            cutoff_time = current_time - self.fps_window_size
+            valid_timestamps = [t for t in self.output_timestamps if t > cutoff_time]
+            
+            if len(valid_timestamps) < 2:
+                return 0.0
+            
+            # Calculate FPS over the time window
+            time_span = valid_timestamps[-1] - valid_timestamps[0]
+            if time_span > 0:
+                return (len(valid_timestamps) - 1) / time_span
+            
+            return 0.0
+    
     def start(self):
         """Start the pipeline"""
+        # Clear previous FPS data when starting
+        with self.fps_lock:
+            self.output_timestamps.clear()
+            
         print(f"[{self.pipeline_name}] Starting pipeline...")
         
         # Start all stages
@@ -479,6 +519,9 @@ class InferencePipeline:
                         self.pipeline_output_queue.put(current_data, block=False)
                         self.completed_counter += 1
                         
+                        # Record output timestamp for FPS calculation
+                        self._record_output_timestamp()
+                        
                         # Call result callback
                         if self.result_callback:
                             self.result_callback(current_data)
@@ -488,6 +531,8 @@ class InferencePipeline:
                         try:
                             self.pipeline_output_queue.get_nowait()
                             self.pipeline_output_queue.put(current_data, block=False)
+                            # Record output timestamp even when queue was full
+                            self._record_output_timestamp()
                         except queue.Empty:
                             pass
                 else:
@@ -543,6 +588,7 @@ class InferencePipeline:
             'pipeline_errors': self.error_counter,
             'pipeline_input_queue_size': self.pipeline_input_queue.qsize(),
             'pipeline_output_queue_size': self.pipeline_output_queue.qsize(),
+            'current_fps': self.get_current_fps(),  # Add real-time FPS
             'stage_statistics': stage_stats
         }
     
diff --git a/cluster4npu_ui/ui/dialogs/deployment.py b/cluster4npu_ui/ui/dialogs/deployment.py
index 3a40024..abc20b3 100644
--- a/cluster4npu_ui/ui/dialogs/deployment.py
+++ b/cluster4npu_ui/ui/dialogs/deployment.py
@@ -190,6 +190,9 @@ class DeploymentWorker(QThread):
                     
                     # Set up both GUI and terminal result callbacks
                     def combined_result_callback(result_dict):
+                        # Add current FPS from pipeline to result_dict
+                        result_dict['current_pipeline_fps'] = pipeline.get_current_fps()
+                        
                         # Send to GUI terminal and results display
                         terminal_output = self._format_terminal_results(result_dict)
                         self.terminal_output.emit(terminal_output)
@@ -286,11 +289,13 @@ class DeploymentWorker(QThread):
             if 'total_processing_time' in metadata:
                 processing_time = metadata['total_processing_time']
                 output_lines.append(f"   ⏱️  Processing Time: {processing_time:.3f}s")
-                
-                # Add FPS calculation
-                if processing_time > 0:
-                    fps = 1.0 / processing_time
-                    output_lines.append(f"   🚄 Theoretical FPS: {fps:.2f}")
+            
+            # Real-time FPS calculation based on output queue throughput
+            current_fps = result_dict.get('current_pipeline_fps', 0.0)
+            if current_fps > 0:
+                output_lines.append(f"   🚄 Pipeline FPS: {current_fps:.2f}")
+            else:
+                output_lines.append(f"   🚄 Pipeline FPS: Calculating...")
             
             # Additional metadata
             if metadata: