Successfully implemented comprehensive monitoring and alerting infrastructure for the Meteor platform across all three stories of Epic 3: **Story 3.5: 核心业务指标监控 (Core Business Metrics Monitoring)** - Instrumented NestJS web backend with CloudWatch metrics integration using prom-client - Instrumented Go compute service with structured CloudWatch metrics reporting - Created comprehensive Terraform infrastructure from scratch with modular design - Built 5-row CloudWatch dashboard with application, error rate, business, and infrastructure metrics - Added proper error categorization and provider performance tracking **Story 3.6: 关键故障告警 (Critical System Alerts)** - Implemented SNS-based alerting infrastructure via Terraform - Created critical alarms for NestJS 5xx error rate (>1% threshold) - Created Go service processing failure rate alarm (>5% threshold) - Created SQS queue depth alarm (>1000 messages threshold) - Added actionable alarm descriptions with investigation guidance - Configured email notifications with manual confirmation workflow **Cross-cutting Infrastructure:** - Complete AWS infrastructure as code with Terraform (S3, SQS, CloudWatch, SNS, IAM, optional RDS/Fargate) - Structured logging implementation across all services (NestJS, Go, Rust) - Metrics collection following "Golden Four Signals" observability approach - Configurable thresholds and deployment-ready monitoring solution The platform now has production-grade observability with comprehensive metrics collection, centralized monitoring dashboards, and automated critical system alerting. 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
90 lines
2.1 KiB
HCL
90 lines
2.1 KiB
HCL
# S3 bucket for storing meteor event files
|
|
resource "aws_s3_bucket" "meteor_events" {
|
|
bucket = "${local.name_prefix}-events"
|
|
force_destroy = var.s3_bucket_force_destroy
|
|
|
|
tags = merge(local.common_tags, {
|
|
Name = "${local.name_prefix}-events"
|
|
Description = "Storage for meteor event files and media"
|
|
})
|
|
}
|
|
|
|
# S3 bucket versioning
|
|
resource "aws_s3_bucket_versioning" "meteor_events" {
|
|
bucket = aws_s3_bucket.meteor_events.id
|
|
versioning_configuration {
|
|
status = var.s3_bucket_versioning ? "Enabled" : "Disabled"
|
|
}
|
|
}
|
|
|
|
# S3 bucket server-side encryption
|
|
resource "aws_s3_bucket_server_side_encryption_configuration" "meteor_events" {
|
|
bucket = aws_s3_bucket.meteor_events.id
|
|
|
|
rule {
|
|
apply_server_side_encryption_by_default {
|
|
sse_algorithm = "AES256"
|
|
}
|
|
}
|
|
}
|
|
|
|
# S3 bucket public access block
|
|
resource "aws_s3_bucket_public_access_block" "meteor_events" {
|
|
bucket = aws_s3_bucket.meteor_events.id
|
|
|
|
block_public_acls = true
|
|
block_public_policy = true
|
|
ignore_public_acls = true
|
|
restrict_public_buckets = true
|
|
}
|
|
|
|
# S3 bucket lifecycle configuration
|
|
resource "aws_s3_bucket_lifecycle_configuration" "meteor_events" {
|
|
bucket = aws_s3_bucket.meteor_events.id
|
|
|
|
rule {
|
|
id = "event_files_lifecycle"
|
|
status = "Enabled"
|
|
|
|
# Move to Infrequent Access after 30 days
|
|
transition {
|
|
days = 30
|
|
storage_class = "STANDARD_IA"
|
|
}
|
|
|
|
# Move to Glacier after 90 days
|
|
transition {
|
|
days = 90
|
|
storage_class = "GLACIER"
|
|
}
|
|
|
|
# Delete after 2555 days (7 years)
|
|
expiration {
|
|
days = 2555
|
|
}
|
|
}
|
|
|
|
rule {
|
|
id = "incomplete_multipart_uploads"
|
|
status = "Enabled"
|
|
|
|
abort_incomplete_multipart_upload {
|
|
days_after_initiation = 7
|
|
}
|
|
}
|
|
}
|
|
|
|
# S3 bucket notification to SQS for new uploads
|
|
resource "aws_s3_bucket_notification" "meteor_events" {
|
|
bucket = aws_s3_bucket.meteor_events.id
|
|
|
|
queue {
|
|
queue_arn = aws_sqs_queue.meteor_processing.arn
|
|
events = ["s3:ObjectCreated:*"]
|
|
|
|
filter_prefix = "raw-events/"
|
|
filter_suffix = ".json"
|
|
}
|
|
|
|
depends_on = [aws_sqs_queue_policy.meteor_processing_s3]
|
|
} |