Successfully implemented comprehensive monitoring and alerting infrastructure for the Meteor platform across all three stories of Epic 3: **Story 3.5: 核心业务指标监控 (Core Business Metrics Monitoring)** - Instrumented NestJS web backend with CloudWatch metrics integration using prom-client - Instrumented Go compute service with structured CloudWatch metrics reporting - Created comprehensive Terraform infrastructure from scratch with modular design - Built 5-row CloudWatch dashboard with application, error rate, business, and infrastructure metrics - Added proper error categorization and provider performance tracking **Story 3.6: 关键故障告警 (Critical System Alerts)** - Implemented SNS-based alerting infrastructure via Terraform - Created critical alarms for NestJS 5xx error rate (>1% threshold) - Created Go service processing failure rate alarm (>5% threshold) - Created SQS queue depth alarm (>1000 messages threshold) - Added actionable alarm descriptions with investigation guidance - Configured email notifications with manual confirmation workflow **Cross-cutting Infrastructure:** - Complete AWS infrastructure as code with Terraform (S3, SQS, CloudWatch, SNS, IAM, optional RDS/Fargate) - Structured logging implementation across all services (NestJS, Go, Rust) - Metrics collection following "Golden Four Signals" observability approach - Configurable thresholds and deployment-ready monitoring solution The platform now has production-grade observability with comprehensive metrics collection, centralized monitoring dashboards, and automated critical system alerting. 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
128 lines
3.2 KiB
Go
128 lines
3.2 KiB
Go
package config
|
|
|
|
import (
|
|
"os"
|
|
"strconv"
|
|
"time"
|
|
)
|
|
|
|
// Config holds the application configuration
|
|
type Config struct {
|
|
Port string
|
|
|
|
// Database configuration
|
|
DatabaseURL string
|
|
DatabaseMaxConns int32
|
|
DatabaseTimeout time.Duration
|
|
|
|
// SQS configuration
|
|
SQSQueueURL string
|
|
SQSRegion string
|
|
SQSMaxMessages int32
|
|
SQSWaitTimeSeconds int32
|
|
SQSVisibilityTimeout int32
|
|
|
|
// Processing configuration
|
|
ProcessingWorkers int
|
|
ProcessingBatchSize int
|
|
IdempotencyEnabled bool
|
|
|
|
// Validation configuration
|
|
ValidationProvider string
|
|
}
|
|
|
|
// Load loads configuration from environment variables with defaults
|
|
func Load() *Config {
|
|
port := os.Getenv("PORT")
|
|
if port == "" {
|
|
port = "8080"
|
|
}
|
|
|
|
databaseURL := os.Getenv("DATABASE_URL")
|
|
if databaseURL == "" {
|
|
databaseURL = "postgres://postgres:password@localhost:5432/meteor_development?sslmode=disable"
|
|
}
|
|
|
|
databaseMaxConns := parseInt32(os.Getenv("DATABASE_MAX_CONNS"), 10)
|
|
databaseTimeout := parseDuration(os.Getenv("DATABASE_TIMEOUT"), 30*time.Second)
|
|
|
|
sqsQueueURL := os.Getenv("SQS_QUEUE_URL")
|
|
if sqsQueueURL == "" {
|
|
sqsQueueURL = "https://sqs.us-east-1.amazonaws.com/123456789012/meteor-raw-events-queue"
|
|
}
|
|
|
|
sqsRegion := os.Getenv("SQS_REGION")
|
|
if sqsRegion == "" {
|
|
sqsRegion = "us-east-1"
|
|
}
|
|
|
|
sqsMaxMessages := parseInt32(os.Getenv("SQS_MAX_MESSAGES"), 10)
|
|
sqsWaitTimeSeconds := parseInt32(os.Getenv("SQS_WAIT_TIME_SECONDS"), 20)
|
|
sqsVisibilityTimeout := parseInt32(os.Getenv("SQS_VISIBILITY_TIMEOUT"), 300)
|
|
|
|
processingWorkers := parseInt(os.Getenv("PROCESSING_WORKERS"), 5)
|
|
processingBatchSize := parseInt(os.Getenv("PROCESSING_BATCH_SIZE"), 10)
|
|
idempotencyEnabled := parseBool(os.Getenv("IDEMPOTENCY_ENABLED"), true)
|
|
|
|
validationProvider := os.Getenv("VALIDATION_PROVIDER")
|
|
if validationProvider == "" {
|
|
validationProvider = "mvp" // Default to MVP provider for backward compatibility
|
|
}
|
|
|
|
return &Config{
|
|
Port: port,
|
|
DatabaseURL: databaseURL,
|
|
DatabaseMaxConns: databaseMaxConns,
|
|
DatabaseTimeout: databaseTimeout,
|
|
SQSQueueURL: sqsQueueURL,
|
|
SQSRegion: sqsRegion,
|
|
SQSMaxMessages: sqsMaxMessages,
|
|
SQSWaitTimeSeconds: sqsWaitTimeSeconds,
|
|
SQSVisibilityTimeout: sqsVisibilityTimeout,
|
|
ProcessingWorkers: processingWorkers,
|
|
ProcessingBatchSize: processingBatchSize,
|
|
IdempotencyEnabled: idempotencyEnabled,
|
|
ValidationProvider: validationProvider,
|
|
}
|
|
}
|
|
|
|
// Helper functions for parsing environment variables
|
|
func parseInt(s string, defaultValue int) int {
|
|
if s == "" {
|
|
return defaultValue
|
|
}
|
|
if val, err := strconv.Atoi(s); err == nil {
|
|
return val
|
|
}
|
|
return defaultValue
|
|
}
|
|
|
|
func parseInt32(s string, defaultValue int32) int32 {
|
|
if s == "" {
|
|
return defaultValue
|
|
}
|
|
if val, err := strconv.ParseInt(s, 10, 32); err == nil {
|
|
return int32(val)
|
|
}
|
|
return defaultValue
|
|
}
|
|
|
|
func parseBool(s string, defaultValue bool) bool {
|
|
if s == "" {
|
|
return defaultValue
|
|
}
|
|
if val, err := strconv.ParseBool(s); err == nil {
|
|
return val
|
|
}
|
|
return defaultValue
|
|
}
|
|
|
|
func parseDuration(s string, defaultValue time.Duration) time.Duration {
|
|
if s == "" {
|
|
return defaultValue
|
|
}
|
|
if val, err := time.ParseDuration(s); err == nil {
|
|
return val
|
|
}
|
|
return defaultValue
|
|
} |