grabbit ca7e92a1a1 🎉 Epic 3 Complete: Production Readiness & Observability
Successfully implemented comprehensive monitoring and alerting infrastructure for the Meteor platform across all three stories of Epic 3:

**Story 3.5: 核心业务指标监控 (Core Business Metrics Monitoring)**
- Instrumented NestJS web backend with CloudWatch metrics integration using prom-client
- Instrumented Go compute service with structured CloudWatch metrics reporting
- Created comprehensive Terraform infrastructure from scratch with modular design
- Built 5-row CloudWatch dashboard with application, error rate, business, and infrastructure metrics
- Added proper error categorization and provider performance tracking

**Story 3.6: 关键故障告警 (Critical System Alerts)**
- Implemented SNS-based alerting infrastructure via Terraform
- Created critical alarms for NestJS 5xx error rate (>1% threshold)
- Created Go service processing failure rate alarm (>5% threshold)
- Created SQS queue depth alarm (>1000 messages threshold)
- Added actionable alarm descriptions with investigation guidance
- Configured email notifications with manual confirmation workflow

**Cross-cutting Infrastructure:**
- Complete AWS infrastructure as code with Terraform (S3, SQS, CloudWatch, SNS, IAM, optional RDS/Fargate)
- Structured logging implementation across all services (NestJS, Go, Rust)
- Metrics collection following "Golden Four Signals" observability approach
- Configurable thresholds and deployment-ready monitoring solution

The platform now has production-grade observability with comprehensive metrics collection, centralized monitoring dashboards, and automated critical system alerting.

🤖 Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-08-03 23:42:01 +08:00

300 lines
8.4 KiB
Go

package validation
import (
"context"
"encoding/json"
"fmt"
"meteor-compute-service/internal/models"
"time"
"github.com/google/uuid"
)
// MVPValidationProvider implements a basic pass-through validation for MVP
// This will be replaced with more sophisticated algorithms in Epic 3
type MVPValidationProvider struct {
info ProviderInfo
}
// NewMVPValidationProvider creates a new MVP validation provider instance
func NewMVPValidationProvider() *MVPValidationProvider {
return &MVPValidationProvider{
info: ProviderInfo{
Name: "MVP Validation Provider",
Version: "1.0.0",
Description: "Basic pass-through validation for MVP phase",
Algorithm: "mvp_pass_through",
},
}
}
// GetProviderInfo returns metadata about this validation provider
func (v *MVPValidationProvider) GetProviderInfo() ProviderInfo {
return v.info
}
// Validate performs basic validation on a raw event
// For MVP, this is a simple pass-through that marks all events as valid
func (v *MVPValidationProvider) Validate(ctx context.Context, rawEvent *models.RawEvent) (*models.ValidationResult, error) {
// Basic validation details that will be stored
details := ValidationDetails{
Algorithm: v.info.Algorithm,
Version: v.info.Version,
ValidationSteps: []ValidationStep{},
Metadata: make(map[string]interface{}),
}
// Step 1: Basic data completeness check
step1 := v.validateDataCompleteness(rawEvent)
details.ValidationSteps = append(details.ValidationSteps, step1)
// Step 2: Event type validation
step2 := v.validateEventType(rawEvent)
details.ValidationSteps = append(details.ValidationSteps, step2)
// Step 3: File validation
step3 := v.validateFile(rawEvent)
details.ValidationSteps = append(details.ValidationSteps, step3)
// Step 4: Metadata validation
step4 := v.validateMetadata(rawEvent)
details.ValidationSteps = append(details.ValidationSteps, step4)
// For MVP, calculate a simple score based on completed validation steps
totalSteps := len(details.ValidationSteps)
passedSteps := 0
for _, step := range details.ValidationSteps {
if step.Passed {
passedSteps++
}
}
score := float64(passedSteps) / float64(totalSteps)
isValid := score >= 0.8 // 80% threshold for MVP
// Add summary to metadata
details.Metadata["total_steps"] = totalSteps
details.Metadata["passed_steps"] = passedSteps
details.Metadata["score"] = score
details.Metadata["threshold"] = 0.8
// Serialize details to JSON
detailsJSON, err := json.Marshal(details)
if err != nil {
return nil, fmt.Errorf("failed to marshal validation details: %w", err)
}
return &models.ValidationResult{
IsValid: isValid,
Score: score,
Algorithm: v.info.Algorithm,
Details: detailsJSON,
ProcessedAt: time.Now().UTC(),
Reason: v.generateReason(isValid, passedSteps, totalSteps),
}, nil
}
// validateDataCompleteness checks if required fields are present
func (v *MVPValidationProvider) validateDataCompleteness(rawEvent *models.RawEvent) ValidationStep {
step := ValidationStep{
Name: "data_completeness",
Description: "Checks if required fields are present and valid",
Details: make(map[string]interface{}),
}
issues := []string{}
// Check required UUID fields
if rawEvent.ID == (uuid.UUID{}) {
issues = append(issues, "missing_id")
}
if rawEvent.DeviceID == (uuid.UUID{}) {
issues = append(issues, "missing_device_id")
}
if rawEvent.UserProfileID == (uuid.UUID{}) {
issues = append(issues, "missing_user_profile_id")
}
// Check required string fields
if rawEvent.FilePath == "" {
issues = append(issues, "missing_file_path")
}
if rawEvent.EventType == "" {
issues = append(issues, "missing_event_type")
}
// Check timestamp
if rawEvent.EventTimestamp.IsZero() {
issues = append(issues, "missing_event_timestamp")
}
step.Details["issues"] = issues
step.Details["issues_count"] = len(issues)
step.Passed = len(issues) == 0
if len(issues) > 0 {
step.Error = fmt.Sprintf("Found %d data completeness issues", len(issues))
}
return step
}
// validateEventType checks if the event type is supported
func (v *MVPValidationProvider) validateEventType(rawEvent *models.RawEvent) ValidationStep {
step := ValidationStep{
Name: "event_type_validation",
Description: "Validates that the event type is supported",
Details: make(map[string]interface{}),
}
supportedTypes := []string{
models.EventTypeMotion,
models.EventTypeAlert,
models.EventTypeMeteor,
}
step.Details["event_type"] = rawEvent.EventType
step.Details["supported_types"] = supportedTypes
// Check if event type is supported
isSupported := false
for _, supportedType := range supportedTypes {
if rawEvent.EventType == supportedType {
isSupported = true
break
}
}
step.Passed = isSupported
step.Details["is_supported"] = isSupported
if !isSupported {
step.Error = fmt.Sprintf("Unsupported event type: %s", rawEvent.EventType)
}
return step
}
// validateFile checks basic file information
func (v *MVPValidationProvider) validateFile(rawEvent *models.RawEvent) ValidationStep {
step := ValidationStep{
Name: "file_validation",
Description: "Validates file information and properties",
Details: make(map[string]interface{}),
}
issues := []string{}
// Check file path format (basic validation)
if len(rawEvent.FilePath) < 3 {
issues = append(issues, "file_path_too_short")
}
// Check file size if provided
if rawEvent.FileSize != nil {
step.Details["file_size"] = *rawEvent.FileSize
if *rawEvent.FileSize <= 0 {
issues = append(issues, "invalid_file_size")
}
// Check for reasonable file size limits (e.g., not more than 100MB for video files)
if *rawEvent.FileSize > 100*1024*1024 {
issues = append(issues, "file_size_too_large")
}
}
// Check file type if provided
if rawEvent.FileType != nil {
step.Details["file_type"] = *rawEvent.FileType
// Basic MIME type validation for common formats
supportedMimeTypes := []string{
"video/mp4",
"video/quicktime",
"video/x-msvideo",
"image/jpeg",
"image/png",
"application/gzip",
"application/x-tar",
}
isSupportedMime := false
for _, mimeType := range supportedMimeTypes {
if *rawEvent.FileType == mimeType {
isSupportedMime = true
break
}
}
if !isSupportedMime {
issues = append(issues, "unsupported_file_type")
}
step.Details["supported_mime_types"] = supportedMimeTypes
}
step.Details["issues"] = issues
step.Details["issues_count"] = len(issues)
step.Passed = len(issues) == 0
if len(issues) > 0 {
step.Error = fmt.Sprintf("Found %d file validation issues", len(issues))
}
return step
}
// validateMetadata performs basic metadata validation
func (v *MVPValidationProvider) validateMetadata(rawEvent *models.RawEvent) ValidationStep {
step := ValidationStep{
Name: "metadata_validation",
Description: "Validates event metadata structure and content",
Details: make(map[string]interface{}),
}
issues := []string{}
// Check if metadata is valid JSON
if rawEvent.Metadata != nil {
var metadata map[string]interface{}
if err := json.Unmarshal(rawEvent.Metadata, &metadata); err != nil {
issues = append(issues, "invalid_json_metadata")
step.Details["json_error"] = err.Error()
} else {
step.Details["metadata_keys"] = getKeys(metadata)
step.Details["metadata_size"] = len(rawEvent.Metadata)
// Check for reasonable metadata size (not more than 10KB)
if len(rawEvent.Metadata) > 10*1024 {
issues = append(issues, "metadata_too_large")
}
}
} else {
// Metadata is optional, so this is not an error
step.Details["metadata_present"] = false
}
step.Details["issues"] = issues
step.Details["issues_count"] = len(issues)
step.Passed = len(issues) == 0
if len(issues) > 0 {
step.Error = fmt.Sprintf("Found %d metadata validation issues", len(issues))
}
return step
}
// generateReason creates a human-readable reason for the validation result
func (v *MVPValidationProvider) generateReason(isValid bool, passedSteps, totalSteps int) string {
if isValid {
return fmt.Sprintf("Event passed validation with %d/%d steps completed successfully", passedSteps, totalSteps)
}
return fmt.Sprintf("Event failed validation with only %d/%d steps completed successfully (required: 80%%)", passedSteps, totalSteps)
}
// getKeys extracts keys from a map
func getKeys(m map[string]interface{}) []string {
keys := make([]string, 0, len(m))
for k := range m {
keys = append(keys, k)
}
return keys
}