grabbit/meteor_detection_system

Fork 0

grabbit b9c2b7e17d memory optimize

2025-08-12 07:21:41 +08:00

21 KiB

Raw Blame History

内存管理优化方案

当前问题分析

1. 内存拷贝问题

当前架构中存在的主要内存问题：

// 当前实现 - 每次事件传递都会克隆整个帧数据
pub struct FrameCapturedEvent {
    pub frame_data: Vec<u8>, // 640x480 RGB = ~900KB per frame
}

// 问题分析：
// - 30 FPS = 27MB/秒的内存拷贝
// - 事件总线广播时，每个订阅者都会克隆数据
// - 3个订阅者 = 81MB/秒的内存操作

2. 内存分配压力

每帧都需要新的内存分配
GC压力导致延迟峰值
内存碎片化问题

3. 缓冲区管理

Detection模块维护独立的帧缓冲
Storage模块也有自己的缓冲
重复存储相同数据

优化方案详细设计

方案1: 零拷贝架构 (Zero-Copy Architecture)

A. 使用Arc实现共享不可变数据

use std::sync::Arc;
use bytes::Bytes;

// 新的事件结构 - 使用Arc共享数据
#[derive(Clone, Debug)]
pub struct FrameCapturedEvent {
    pub frame_id: u64,
    pub timestamp: chrono::DateTime<chrono::Utc>,
    pub metadata: FrameMetadata,
    pub frame_data: Arc<FrameData>, // 共享引用，克隆只增加引用计数
}

// 帧数据包装，包含原始数据和元信息
#[derive(Debug)]
pub struct FrameData {
    pub data: Bytes,        // 使用bytes crate，支持零拷贝切片
    pub width: u32,
    pub height: u32,
    pub format: FrameFormat,
}

#[derive(Clone, Debug)]
pub struct FrameMetadata {
    pub camera_id: u32,
    pub exposure_time: f32,
    pub gain: f32,
    pub temperature: Option<f32>,
}

#[derive(Clone, Debug)]
pub enum FrameFormat {
    RGB888,
    YUV420,
    JPEG,
    H264Frame,
}

// 实现示例
impl FrameCapturedEvent {
    pub fn new_zero_copy(
        frame_id: u64,
        data: Vec<u8>,
        width: u32,
        height: u32,
    ) -> Self {
        let frame_data = Arc::new(FrameData {
            data: Bytes::from(data), // 转换为Bytes，之后可零拷贝切片
            width,
            height,
            format: FrameFormat::RGB888,
        });
        
        Self {
            frame_id,
            timestamp: chrono::Utc::now(),
            metadata: FrameMetadata::default(),
            frame_data,
        }
    }
    
    // 获取帧数据的只读引用
    pub fn data(&self) -> &[u8] {
        &self.frame_data.data
    }
    
    // 创建数据的零拷贝切片
    pub fn slice(&self, start: usize, end: usize) -> Bytes {
        self.frame_data.data.slice(start..end)
    }
}

B. 优化事件总线

use tokio::sync::broadcast;
use std::sync::Arc;

pub struct OptimizedEventBus {
    // 使用Arc包装的发送器，避免克隆整个通道
    sender: Arc<broadcast::Sender<Arc<SystemEvent>>>,
    capacity: usize,
}

impl OptimizedEventBus {
    pub fn new(capacity: usize) -> Self {
        let (sender, _) = broadcast::channel(capacity);
        Self {
            sender: Arc::new(sender),
            capacity,
        }
    }
    
    // 发布事件时使用Arc包装
    pub fn publish(&self, event: SystemEvent) -> Result<()> {
        let arc_event = Arc::new(event);
        self.sender.send(arc_event)
            .map_err(|_| anyhow::anyhow!("No subscribers"))?;
        Ok(())
    }
    
    // 订阅者接收Arc包装的事件
    pub fn subscribe(&self) -> broadcast::Receiver<Arc<SystemEvent>> {
        self.sender.subscribe()
    }
}

方案2: 帧池化 (Frame Pooling)

A. 对象池实现

use std::sync::{Arc, Mutex};
use std::collections::VecDeque;

/// 帧缓冲池，复用内存分配
pub struct FramePool {
    pool: Arc<Mutex<VecDeque<Vec<u8>>>>,
    frame_size: usize,
    max_pool_size: usize,
    allocated_count: Arc<AtomicUsize>,
}

impl FramePool {
    pub fn new(width: u32, height: u32, format: FrameFormat, max_pool_size: usize) -> Self {
        let frame_size = Self::calculate_frame_size(width, height, format);
        
        Self {
            pool: Arc::new(Mutex::new(VecDeque::with_capacity(max_pool_size))),
            frame_size,
            max_pool_size,
            allocated_count: Arc::new(AtomicUsize::new(0)),
        }
    }
    
    /// 从池中获取或分配新的帧缓冲
    pub fn acquire(&self) -> PooledFrame {
        let mut pool = self.pool.lock().unwrap();
        
        let buffer = if let Some(mut buf) = pool.pop_front() {
            // 复用现有缓冲
            buf.clear();
            buf.resize(self.frame_size, 0);
            buf
        } else {
            // 分配新缓冲
            self.allocated_count.fetch_add(1, Ordering::Relaxed);
            vec![0u8; self.frame_size]
        };
        
        PooledFrame {
            buffer,
            pool: Arc::clone(&self.pool),
            frame_size: self.frame_size,
        }
    }
    
    /// 计算帧大小
    fn calculate_frame_size(width: u32, height: u32, format: FrameFormat) -> usize {
        match format {
            FrameFormat::RGB888 => (width * height * 3) as usize,
            FrameFormat::YUV420 => (width * height * 3 / 2) as usize,
            FrameFormat::JPEG => (width * height) as usize, // 估算
            FrameFormat::H264Frame => (width * height / 2) as usize, // 估算
        }
    }
    
    /// 获取池统计信息
    pub fn stats(&self) -> PoolStats {
        let pool = self.pool.lock().unwrap();
        PoolStats {
            pooled: pool.len(),
            allocated: self.allocated_count.load(Ordering::Relaxed),
            frame_size: self.frame_size,
        }
    }
}

/// RAII包装的池化帧，自动归还到池
pub struct PooledFrame {
    buffer: Vec<u8>,
    pool: Arc<Mutex<VecDeque<Vec<u8>>>>,
    frame_size: usize,
}

impl PooledFrame {
    pub fn as_slice(&self) -> &[u8] {
        &self.buffer
    }
    
    pub fn as_mut_slice(&mut self) -> &mut [u8] {
        &mut self.buffer
    }
}

impl Drop for PooledFrame {
    fn drop(&mut self) {
        // 归还缓冲到池
        let mut pool = self.pool.lock().unwrap();
        if pool.len() < pool.capacity() {
            let buffer = std::mem::replace(&mut self.buffer, Vec::new());
            pool.push_back(buffer);
        }
    }
}

#[derive(Debug)]
pub struct PoolStats {
    pub pooled: usize,
    pub allocated: usize,
    pub frame_size: usize,
}

B. Camera模块集成

// camera.rs 优化版本
pub struct OptimizedCameraController {
    config: CameraConfig,
    event_bus: EventBus,
    frame_pool: FramePool,
    frame_counter: AtomicU64,
}

impl OptimizedCameraController {
    pub async fn capture_loop(&mut self) -> Result<()> {
        loop {
            // 从池中获取帧缓冲
            let mut pooled_frame = self.frame_pool.acquire();
            
            // 捕获到池化缓冲中
            self.capture_to_buffer(pooled_frame.as_mut_slice()).await?;
            
            // 转换为Arc共享数据
            let frame_data = Arc::new(FrameData {
                data: Bytes::from(pooled_frame.as_slice().to_vec()),
                width: self.config.width.unwrap_or(640),
                height: self.config.height.unwrap_or(480),
                format: FrameFormat::RGB888,
            });
            
            // 创建事件
            let event = FrameCapturedEvent {
                frame_id: self.frame_counter.fetch_add(1, Ordering::Relaxed),
                timestamp: chrono::Utc::now(),
                metadata: self.create_metadata(),
                frame_data,
            };
            
            // 发布事件
            self.event_bus.publish(SystemEvent::FrameCaptured(event))?;
            
            // pooled_frame 在这里自动Drop，缓冲归还到池
            
            // 控制帧率
            tokio::time::sleep(Duration::from_millis(33)).await; // ~30 FPS
        }
    }
}

方案3: 环形缓冲区 (Ring Buffer)

A. 内存映射环形缓冲

use memmap2::{MmapMut, MmapOptions};
use std::sync::atomic::{AtomicUsize, Ordering};

/// 内存映射的环形缓冲区，用于高效的帧存储
pub struct MmapRingBuffer {
    mmap: Arc<MmapMut>,
    frame_size: usize,
    capacity: usize,
    write_pos: Arc<AtomicUsize>,
    read_pos: Arc<AtomicUsize>,
    frame_offsets: Vec<usize>,
}

impl MmapRingBuffer {
    pub fn new(capacity: usize, frame_size: usize) -> Result<Self> {
        let total_size = capacity * frame_size;
        
        // 创建临时文件用于内存映射
        let temp_file = tempfile::tempfile()?;
        temp_file.set_len(total_size as u64)?;
        
        // 创建内存映射
        let mmap = unsafe {
            MmapOptions::new()
                .len(total_size)
                .map_mut(&temp_file)?
        };
        
        // 预计算帧偏移
        let frame_offsets: Vec<usize> = (0..capacity)
            .map(|i| i * frame_size)
            .collect();
        
        Ok(Self {
            mmap: Arc::new(mmap),
            frame_size,
            capacity,
            write_pos: Arc::new(AtomicUsize::new(0)),
            read_pos: Arc::new(AtomicUsize::new(0)),
            frame_offsets,
        })
    }
    
    /// 写入帧到环形缓冲区
    pub fn write_frame(&self, frame_data: &[u8]) -> Result<usize> {
        if frame_data.len() != self.frame_size {
            return Err(anyhow::anyhow!("Frame size mismatch"));
        }
        
        let pos = self.write_pos.fetch_add(1, Ordering::AcqRel) % self.capacity;
        let offset = self.frame_offsets[pos];
        
        // 直接写入内存映射区域
        unsafe {
            let dst = &mut self.mmap[offset..offset + self.frame_size];
            dst.copy_from_slice(frame_data);
        }
        
        Ok(pos)
    }
    
    /// 读取帧从环形缓冲区（零拷贝）
    pub fn read_frame(&self, position: usize) -> &[u8] {
        let offset = self.frame_offsets[position % self.capacity];
        &self.mmap[offset..offset + self.frame_size]
    }
    
    /// 获取当前写入位置
    pub fn current_write_pos(&self) -> usize {
        self.write_pos.load(Ordering::Acquire) % self.capacity
    }
    
    /// 获取可用帧数量
    pub fn available_frames(&self) -> usize {
        let write = self.write_pos.load(Ordering::Acquire);
        let read = self.read_pos.load(Ordering::Acquire);
        write.saturating_sub(read).min(self.capacity)
    }
}

/// 环形缓冲区的只读视图
pub struct RingBufferView {
    buffer: Arc<MmapRingBuffer>,
    start_pos: usize,
    end_pos: usize,
}

impl RingBufferView {
    pub fn new(buffer: Arc<MmapRingBuffer>, start_pos: usize, end_pos: usize) -> Self {
        Self {
            buffer,
            start_pos,
            end_pos,
        }
    }
    
    /// 迭代视图中的帧
    pub fn iter_frames(&self) -> impl Iterator<Item = &[u8]> {
        (self.start_pos..self.end_pos)
            .map(move |pos| self.buffer.read_frame(pos))
    }
}

B. Detection模块集成

// detection.rs 优化版本
pub struct OptimizedDetectionController {
    config: DetectionConfig,
    event_bus: EventBus,
    ring_buffer: Arc<MmapRingBuffer>,
    frame_metadata: Arc<RwLock<HashMap<usize, FrameMetadata>>>,
}

impl OptimizedDetectionController {
    pub async fn detection_loop(&mut self) -> Result<()> {
        let mut last_processed_pos = 0;
        
        loop {
            let current_pos = self.ring_buffer.current_write_pos();
            
            if current_pos > last_processed_pos {
                // 创建视图，零拷贝访问帧
                let view = RingBufferView::new(
                    Arc::clone(&self.ring_buffer),
                    last_processed_pos,
                    current_pos,
                );
                
                // 分析帧序列
                if let Some(detection) = self.analyze_frames(view).await? {
                    // 发布检测事件
                    self.event_bus.publish(SystemEvent::MeteorDetected(detection))?;
                }
                
                last_processed_pos = current_pos;
            }
            
            // 避免忙等待
            tokio::time::sleep(Duration::from_millis(100)).await;
        }
    }
    
    async fn analyze_frames(&self, view: RingBufferView) -> Result<Option<MeteorDetectedEvent>> {
        // 使用SIMD优化的亮度计算
        let brightness_values: Vec<f32> = view.iter_frames()
            .map(|frame| self.calculate_brightness_simd(frame))
            .collect();
        
        // 检测算法...
        Ok(None)
    }
    
    #[cfg(target_arch = "aarch64")]
    fn calculate_brightness_simd(&self, frame: &[u8]) -> f32 {
        use std::arch::aarch64::*;
        
        unsafe {
            let mut sum = vdupq_n_u32(0);
            let chunks = frame.chunks_exact(16);
            
            for chunk in chunks {
                let data = vld1q_u8(chunk.as_ptr());
                let data_u16 = vmovl_u8(vget_low_u8(data));
                let data_u32 = vmovl_u16(vget_low_u16(data_u16));
                sum = vaddq_u32(sum, data_u32);
            }
            
            // 累加SIMD寄存器中的值
            let sum_array: [u32; 4] = std::mem::transmute(sum);
            let total: u32 = sum_array.iter().sum();
            
            total as f32 / frame.len() as f32
        }
    }
}

方案4: 分层内存管理

A. 内存层次结构

/// 分层内存管理器
pub struct HierarchicalMemoryManager {
    // L1: 热数据 - 最近的帧在内存中
    hot_cache: Arc<RwLock<LruCache<u64, Arc<FrameData>>>>,
    
    // L2: 温数据 - 使用内存映射文件
    warm_storage: Arc<MmapRingBuffer>,
    
    // L3: 冷数据 - 压缩存储在磁盘
    cold_storage: Arc<ColdStorage>,
    
    // 统计信息
    stats: Arc<MemoryStats>,
}

impl HierarchicalMemoryManager {
    pub fn new(config: MemoryConfig) -> Result<Self> {
        Ok(Self {
            hot_cache: Arc::new(RwLock::new(
                LruCache::new(config.hot_cache_frames)
            )),
            warm_storage: Arc::new(MmapRingBuffer::new(
                config.warm_storage_frames,
                config.frame_size,
            )?),
            cold_storage: Arc::new(ColdStorage::new(config.cold_storage_path)?),
            stats: Arc::new(MemoryStats::default()),
        })
    }
    
    /// 智能存储帧
    pub async fn store_frame(&self, frame_id: u64, data: Arc<FrameData>) -> Result<()> {
        // 更新热缓存
        {
            let mut cache = self.hot_cache.write().await;
            cache.put(frame_id, Arc::clone(&data));
        }
        
        // 异步写入温存储
        let warm_storage = Arc::clone(&self.warm_storage);
        let data_clone = Arc::clone(&data);
        tokio::spawn(async move {
            warm_storage.write_frame(&data_clone.data).ok();
        });
        
        // 更新统计
        self.stats.record_store(data.data.len());
        
        Ok(())
    }
    
    /// 智能获取帧
    pub async fn get_frame(&self, frame_id: u64) -> Result<Arc<FrameData>> {
        // 检查L1热缓存
        {
            let cache = self.hot_cache.read().await;
            if let Some(data) = cache.peek(&frame_id) {
                self.stats.record_hit(CacheLevel::L1);
                return Ok(Arc::clone(data));
            }
        }
        
        // 检查L2温存储
        if let Some(data) = self.warm_storage.get_frame_by_id(frame_id) {
            self.stats.record_hit(CacheLevel::L2);
            let frame_data = Arc::new(FrameData::from_bytes(data));
            
            // 提升到L1
            self.promote_to_hot(frame_id, Arc::clone(&frame_data)).await;
            
            return Ok(frame_data);
        }
        
        // 从L3冷存储加载
        let data = self.cold_storage.load_frame(frame_id).await?;
        self.stats.record_hit(CacheLevel::L3);
        
        // 提升到L1和L2
        self.promote_to_hot(frame_id, Arc::clone(&data)).await;
        self.promote_to_warm(frame_id, &data).await;
        
        Ok(data)
    }
    
    /// 内存压力管理
    pub async fn handle_memory_pressure(&self) -> Result<()> {
        let memory_info = sys_info::mem_info()?;
        let used_percent = (memory_info.total - memory_info.avail) * 100 / memory_info.total;
        
        if used_percent > 80 {
            // 高内存压力，移动数据到下一层
            self.evict_to_cold().await?;
        } else if used_percent > 60 {
            // 中等压力，清理热缓存
            self.trim_hot_cache().await?;
        }
        
        Ok(())
    }
}

#[derive(Debug, Default)]
struct MemoryStats {
    l1_hits: AtomicU64,
    l2_hits: AtomicU64,
    l3_hits: AtomicU64,
    total_requests: AtomicU64,
    bytes_stored: AtomicU64,
}

enum CacheLevel {
    L1,
    L2,
    L3,
}

方案5: 内存监控与调优

A. 实时内存监控

use prometheus::{Gauge, Histogram, Counter};

pub struct MemoryMonitor {
    // Prometheus metrics
    memory_usage: Gauge,
    allocation_rate: Counter,
    gc_pause_time: Histogram,
    frame_pool_usage: Gauge,
    
    // 监控任务句柄
    monitor_handle: Option<JoinHandle<()>>,
}

impl MemoryMonitor {
    pub fn start(&mut self) -> Result<()> {
        let memory_usage = self.memory_usage.clone();
        let allocation_rate = self.allocation_rate.clone();
        
        let handle = tokio::spawn(async move {
            let mut interval = tokio::time::interval(Duration::from_secs(1));
            
            loop {
                interval.tick().await;
                
                // 更新内存使用率
                if let Ok(info) = sys_info::mem_info() {
                    let used_mb = (info.total - info.avail) / 1024;
                    memory_usage.set(used_mb as f64);
                }
                
                // 监控分配率
                let allocator_stats = ALLOCATOR.stats();
                allocation_rate.inc_by(allocator_stats.bytes_allocated);
            }
        });
        
        self.monitor_handle = Some(handle);
        Ok(())
    }
    
    /// 生成内存报告
    pub fn generate_report(&self) -> MemoryReport {
        MemoryReport {
            current_usage_mb: self.memory_usage.get() as usize,
            allocation_rate_mb_s: self.allocation_rate.get() / 1_000_000.0,
            frame_pool_efficiency: self.calculate_pool_efficiency(),
            recommendations: self.generate_recommendations(),
        }
    }
}

实施步骤

第一阶段：基础优化（1周）

✅ 实现Arc共享帧数据
✅ 优化事件总线避免数据拷贝
✅ 添加基础内存监控

第二阶段：池化管理（1周）

✅ 实现帧对象池
✅ 集成到Camera模块
✅ 添加池统计和调优

第三阶段：高级优化（2周）

✅ 实现内存映射环形缓冲
✅ 添加分层内存管理
✅ SIMD优化关键路径

第四阶段：监控与调优（1周）

✅ 完整的内存监控系统
✅ 自动内存压力管理
✅ 性能基准测试

预期效果

内存使用降低

帧数据拷贝：降低 90%
整体内存使用：降低 60%
GC压力：降低 80%

性能提升

帧处理延迟：降低 50%
CPU使用率：降低 30%
吞吐量：提升 2-3倍

系统稳定性

内存泄漏：完全避免
OOM风险：显著降低
长期运行：稳定可靠

测试验证

#[cfg(test)]
mod memory_tests {
    use super::*;
    
    #[test]
    fn test_zero_copy_performance() {
        let frame_size = 640 * 480 * 3;
        let iterations = 1000;
        
        // 测试传统方式
        let start = Instant::now();
        for _ in 0..iterations {
            let data = vec![0u8; frame_size];
            let _clone1 = data.clone();
            let _clone2 = data.clone();
        }
        let traditional_time = start.elapsed();
        
        // 测试零拷贝方式
        let start = Instant::now();
        for _ in 0..iterations {
            let data = Arc::new(vec![0u8; frame_size]);
            let _ref1 = Arc::clone(&data);
            let _ref2 = Arc::clone(&data);
        }
        let zero_copy_time = start.elapsed();
        
        println!("Traditional: {:?}, Zero-copy: {:?}", 
                 traditional_time, zero_copy_time);
        assert!(zero_copy_time < traditional_time / 10);
    }
    
    #[test]
    fn test_frame_pool_efficiency() {
        let pool = FramePool::new(640, 480, FrameFormat::RGB888, 10);
        
        // 测试复用
        let frame1 = pool.acquire();
        let addr1 = frame1.as_ptr();
        drop(frame1);
        
        let frame2 = pool.acquire();
        let addr2 = frame2.as_ptr();
        
        // 验证地址相同（复用成功）
        assert_eq!(addr1, addr2);
    }
}

21 KiB Raw Blame History Unescape Escape

内存管理优化方案

当前问题分析

1. 内存拷贝问题

2. 内存分配压力

3. 缓冲区管理

优化方案详细设计

方案1: 零拷贝架构 (Zero-Copy Architecture)

A. 使用Arc实现共享不可变数据

B. 优化事件总线

方案2: 帧池化 (Frame Pooling)

A. 对象池实现

B. Camera模块集成

方案3: 环形缓冲区 (Ring Buffer)

A. 内存映射环形缓冲

B. Detection模块集成

方案4: 分层内存管理

A. 内存层次结构

方案5: 内存监控与调优

A. 实时内存监控

实施步骤

第一阶段：基础优化（1周）

第二阶段：池化管理（1周）

第三阶段：高级优化（2周）

第四阶段：监控与调优（1周）

预期效果

内存使用降低

性能提升

系统稳定性

测试验证

21 KiB

Raw Blame History