feat: Implement memory management and queue optimization

Major improvements: - Add intelligent memory management for both input and output queues - Implement frame dropping strategy to prevent memory overflow - Set output queue limit to 50 results with FIFO cleanup - Add input queue management with real-time frame dropping - Filter async results from callbacks and display to reduce noise - Improve system stability and prevent queue-related hangs - Add comprehensive logging for dropped frames and results Performance enhancements: - Maintain real-time processing by prioritizing latest frames - Prevent memory accumulation that previously caused system freezes - Ensure consistent queue size reporting and FPS calculations 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
2025-07-30 22:46:08 +08:00 · 2025-07-30 22:46:08 +08:00 · 0a946c5aaa
commit 0a946c5aaa
parent c9f294bb4c
4 changed files with 65 additions and 27 deletions
--- a/cluster4npu_ui/core/functions/InferencePipeline.py
+++ b/cluster4npu_ui/core/functions/InferencePipeline.py
@ -526,11 +526,24 @@ class InferencePipeline:
                        current_data.metadata['start_timestamp']
                    )
                    # Only put valid inference results into output queue
                    if has_valid_inference:
                        # Manage output queue size - maintain fixed upper limit for memory management
                        MAX_OUTPUT_QUEUE_SIZE = 50  # Set maximum output queue size
                        # If queue is getting full, remove old results to make space
                        while self.pipeline_output_queue.qsize() >= MAX_OUTPUT_QUEUE_SIZE:
                            try:
                                dropped_result = self.pipeline_output_queue.get_nowait()
                                # Track dropped results for debugging
                                if not hasattr(self, '_dropped_results_count'):
                                    self._dropped_results_count = 0
                                self._dropped_results_count += 1
                            except queue.Empty:
                                break
                        try:
                            self.pipeline_output_queue.put(current_data, block=False)
                        # Only count completed results if they contain valid inference
                        if has_valid_inference:
                            self.completed_counter += 1
                            # Record output timestamp for FPS calculation
                            self._record_output_timestamp()
@ -539,21 +552,17 @@ class InferencePipeline:
                            if self.completed_counter % 10 == 0:
                                print(f"[{self.pipeline_name}] Processed {self.completed_counter} results")
                                print(f"[{self.pipeline_name}] Queue sizes - Input: {self.pipeline_input_queue.qsize()}, Output: {self.pipeline_output_queue.qsize()}")
                                # Show dropped results info if any
                                if hasattr(self, '_dropped_results_count') and self._dropped_results_count > 0:
                                    print(f"[{self.pipeline_name}] Dropped {self._dropped_results_count} old results for memory management")
-                        # Call result callback
+                            # Call result callback for valid inference results
                            if self.result_callback:
                                self.result_callback(current_data)
                        except queue.Full:
-                        # Drop oldest and add new
+                            # Fallback: should rarely happen due to pre-emptive cleaning above
-                        try:
+                            print(f"[{self.pipeline_name}] Warning: Output queue still full after cleanup")
                            self.pipeline_output_queue.get_nowait()
                            self.pipeline_output_queue.put(current_data, block=False)
                            # Only record timestamp and count if valid inference result
                            if has_valid_inference:
                                self._record_output_timestamp()
                        except queue.Empty:
                            pass
                else:
                    self.error_counter += 1
                    if self.error_callback:
@ -567,10 +576,31 @@ class InferencePipeline:
        print(f"[{self.pipeline_name}] Coordinator stopped")
    def put_data(self, data: Any, timeout: float = 1.0) -> bool:
-        """Put data into pipeline"""
+        """Put data into pipeline with memory management"""
        try:
            self.pipeline_input_queue.put(data, timeout=timeout)
            return True
        except queue.Full:
            # Drop oldest frames to make space for new ones (for real-time processing)
            try:
                dropped_data = self.pipeline_input_queue.get_nowait()
                self.pipeline_input_queue.put(data, block=False)
                # Track dropped frames for debugging
                if not hasattr(self, '_dropped_frames_count'):
                    self._dropped_frames_count = 0
                self._dropped_frames_count += 1
                # Log occasionally to show frame dropping (every 50 drops)
                if self._dropped_frames_count % 50 == 0:
                    print(f"[{self.pipeline_name}] Dropped {self._dropped_frames_count} input frames for real-time processing")
                return True
            except queue.Empty:
                # Rare case: queue became empty between full check and get
                try:
                    self.pipeline_input_queue.put(data, block=False)
                    return True
                except queue.Full:
                    return False
--- a/cluster4npu_ui/core/functions/workflow_orchestrator.py
+++ b/cluster4npu_ui/core/functions/workflow_orchestrator.py
@ -181,7 +181,7 @@ class WorkflowOrchestrator:
                self.result_callback(result_dict)
        except Exception as e:
-            print(f"❌ Error handling result: {e}")
+            print(f"Error handling result: {e}")
    def _parse_resolution(self, resolution_str: Optional[str]) -> Optional[tuple[int, int]]:
        """
--- a/cluster4npu_ui/release_note.md
+++ b/cluster4npu_ui/release_note.md
@ -0,0 +1,8 @@
 # Release Note for Cluster4NPU
 ## v0.0.2 (2025/7/31)
 ### Update
 - 
 ### Fix
 - Can't inference twice in the same app
 - FPS computation
 - 
--- a/cluster4npu_ui/ui/dialogs/deployment.py
+++ b/cluster4npu_ui/ui/dialogs/deployment.py
@ -366,7 +366,7 @@ class DeploymentDialog(QDialog):
        # Topology tab
        self.topology_tab = self.create_topology_tab()
-        self.tab_widget.addTab(self.topology_tab, "Topology Analysis")
+        self.tab_widget.addTab(self.topology_tab, "Analysis")
        # Configuration tab
        self.config_tab = self.create_configuration_tab()