Successfully implemented comprehensive monitoring and alerting infrastructure for the Meteor platform across all three stories of Epic 3: **Story 3.5: 核心业务指标监控 (Core Business Metrics Monitoring)** - Instrumented NestJS web backend with CloudWatch metrics integration using prom-client - Instrumented Go compute service with structured CloudWatch metrics reporting - Created comprehensive Terraform infrastructure from scratch with modular design - Built 5-row CloudWatch dashboard with application, error rate, business, and infrastructure metrics - Added proper error categorization and provider performance tracking **Story 3.6: 关键故障告警 (Critical System Alerts)** - Implemented SNS-based alerting infrastructure via Terraform - Created critical alarms for NestJS 5xx error rate (>1% threshold) - Created Go service processing failure rate alarm (>5% threshold) - Created SQS queue depth alarm (>1000 messages threshold) - Added actionable alarm descriptions with investigation guidance - Configured email notifications with manual confirmation workflow **Cross-cutting Infrastructure:** - Complete AWS infrastructure as code with Terraform (S3, SQS, CloudWatch, SNS, IAM, optional RDS/Fargate) - Structured logging implementation across all services (NestJS, Go, Rust) - Metrics collection following "Golden Four Signals" observability approach - Configurable thresholds and deployment-ready monitoring solution The platform now has production-grade observability with comprehensive metrics collection, centralized monitoring dashboards, and automated critical system alerting. 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
135 lines
4.4 KiB
HCL
135 lines
4.4 KiB
HCL
output "s3_bucket_name" {
|
|
description = "Name of the S3 bucket for meteor events"
|
|
value = aws_s3_bucket.meteor_events.id
|
|
}
|
|
|
|
output "s3_bucket_arn" {
|
|
description = "ARN of the S3 bucket for meteor events"
|
|
value = aws_s3_bucket.meteor_events.arn
|
|
}
|
|
|
|
output "sqs_queue_url" {
|
|
description = "URL of the SQS queue for processing"
|
|
value = aws_sqs_queue.meteor_processing.url
|
|
}
|
|
|
|
output "sqs_queue_arn" {
|
|
description = "ARN of the SQS queue for processing"
|
|
value = aws_sqs_queue.meteor_processing.arn
|
|
}
|
|
|
|
output "sqs_dlq_url" {
|
|
description = "URL of the SQS dead letter queue"
|
|
value = aws_sqs_queue.meteor_processing_dlq.url
|
|
}
|
|
|
|
output "cloudwatch_dashboard_url" {
|
|
description = "URL to access the CloudWatch dashboard"
|
|
value = "https://${var.aws_region}.console.aws.amazon.com/cloudwatch/home?region=${var.aws_region}#dashboards:name=${aws_cloudwatch_dashboard.meteor_dashboard.dashboard_name}"
|
|
}
|
|
|
|
output "cloudwatch_log_groups" {
|
|
description = "CloudWatch log groups created"
|
|
value = {
|
|
web_backend = aws_cloudwatch_log_group.web_backend.name
|
|
compute_service = aws_cloudwatch_log_group.compute_service.name
|
|
}
|
|
}
|
|
|
|
# Alerting outputs
|
|
output "sns_alerts_topic_arn" {
|
|
description = "ARN of the SNS topic for alerts"
|
|
value = aws_sns_topic.alerts.arn
|
|
}
|
|
|
|
output "critical_alarms" {
|
|
description = "Critical CloudWatch alarms created"
|
|
value = {
|
|
nestjs_error_rate = aws_cloudwatch_metric_alarm.nestjs_5xx_error_rate.alarm_name
|
|
go_service_failure = aws_cloudwatch_metric_alarm.go_service_failure_rate.alarm_name
|
|
sqs_queue_depth = aws_cloudwatch_metric_alarm.sqs_queue_depth.alarm_name
|
|
}
|
|
}
|
|
|
|
# RDS outputs (when enabled)
|
|
output "rds_endpoint" {
|
|
description = "RDS instance endpoint"
|
|
value = var.enable_rds ? aws_db_instance.meteor[0].endpoint : null
|
|
sensitive = true
|
|
}
|
|
|
|
output "rds_database_name" {
|
|
description = "RDS database name"
|
|
value = var.enable_rds ? aws_db_instance.meteor[0].db_name : null
|
|
}
|
|
|
|
output "rds_secret_arn" {
|
|
description = "ARN of the secret containing RDS credentials"
|
|
value = var.enable_rds ? aws_secretsmanager_secret.rds_password[0].arn : null
|
|
}
|
|
|
|
# IAM outputs
|
|
output "iam_policy_arn" {
|
|
description = "ARN of the IAM policy for application services"
|
|
value = aws_iam_policy.meteor_app.arn
|
|
}
|
|
|
|
output "ecs_task_role_arn" {
|
|
description = "ARN of the ECS task role (when using Fargate)"
|
|
value = var.enable_fargate ? aws_iam_role.ecs_task[0].arn : null
|
|
}
|
|
|
|
output "ecs_execution_role_arn" {
|
|
description = "ARN of the ECS execution role (when using Fargate)"
|
|
value = var.enable_fargate ? aws_iam_role.ecs_task_execution[0].arn : null
|
|
}
|
|
|
|
output "app_credentials_secret_arn" {
|
|
description = "ARN of the secret containing app credentials (when not using Fargate)"
|
|
value = var.enable_fargate ? null : aws_secretsmanager_secret.app_credentials[0].arn
|
|
sensitive = true
|
|
}
|
|
|
|
# VPC outputs (when using Fargate)
|
|
output "vpc_id" {
|
|
description = "ID of the VPC"
|
|
value = var.enable_fargate ? aws_vpc.main[0].id : null
|
|
}
|
|
|
|
output "private_subnet_ids" {
|
|
description = "IDs of the private subnets"
|
|
value = var.enable_fargate ? aws_subnet.private[*].id : null
|
|
}
|
|
|
|
output "public_subnet_ids" {
|
|
description = "IDs of the public subnets"
|
|
value = var.enable_fargate ? aws_subnet.public[*].id : null
|
|
}
|
|
|
|
output "security_group_ecs_tasks" {
|
|
description = "ID of the security group for ECS tasks"
|
|
value = var.enable_fargate ? aws_security_group.ecs_tasks[0].id : null
|
|
}
|
|
|
|
# Environment configuration for applications
|
|
output "environment_variables" {
|
|
description = "Environment variables for application configuration"
|
|
value = {
|
|
AWS_REGION = var.aws_region
|
|
AWS_S3_BUCKET_NAME = aws_s3_bucket.meteor_events.id
|
|
AWS_SQS_QUEUE_URL = aws_sqs_queue.meteor_processing.url
|
|
ENVIRONMENT = var.environment
|
|
}
|
|
}
|
|
|
|
# Configuration snippet for docker-compose or deployment
|
|
output "docker_environment" {
|
|
description = "Environment variables formatted for Docker deployment"
|
|
value = {
|
|
AWS_REGION = var.aws_region
|
|
AWS_S3_BUCKET_NAME = aws_s3_bucket.meteor_events.id
|
|
AWS_SQS_QUEUE_URL = aws_sqs_queue.meteor_processing.url
|
|
DATABASE_URL = var.enable_rds ? "postgresql://${aws_db_instance.meteor[0].username}:${random_password.rds_password[0].result}@${aws_db_instance.meteor[0].endpoint}:${aws_db_instance.meteor[0].port}/${aws_db_instance.meteor[0].db_name}" : null
|
|
}
|
|
sensitive = true
|
|
} |