Successfully implemented comprehensive monitoring and alerting infrastructure for the Meteor platform across all three stories of Epic 3: **Story 3.5: 核心业务指标监控 (Core Business Metrics Monitoring)** - Instrumented NestJS web backend with CloudWatch metrics integration using prom-client - Instrumented Go compute service with structured CloudWatch metrics reporting - Created comprehensive Terraform infrastructure from scratch with modular design - Built 5-row CloudWatch dashboard with application, error rate, business, and infrastructure metrics - Added proper error categorization and provider performance tracking **Story 3.6: 关键故障告警 (Critical System Alerts)** - Implemented SNS-based alerting infrastructure via Terraform - Created critical alarms for NestJS 5xx error rate (>1% threshold) - Created Go service processing failure rate alarm (>5% threshold) - Created SQS queue depth alarm (>1000 messages threshold) - Added actionable alarm descriptions with investigation guidance - Configured email notifications with manual confirmation workflow **Cross-cutting Infrastructure:** - Complete AWS infrastructure as code with Terraform (S3, SQS, CloudWatch, SNS, IAM, optional RDS/Fargate) - Structured logging implementation across all services (NestJS, Go, Rust) - Metrics collection following "Golden Four Signals" observability approach - Configurable thresholds and deployment-ready monitoring solution The platform now has production-grade observability with comprehensive metrics collection, centralized monitoring dashboards, and automated critical system alerting. 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
174 lines
4.3 KiB
HCL
174 lines
4.3 KiB
HCL
# VPC for meteor application (only if using Fargate)
|
|
resource "aws_vpc" "main" {
|
|
count = var.enable_fargate ? 1 : 0
|
|
cidr_block = "10.0.0.0/16"
|
|
enable_dns_hostnames = true
|
|
enable_dns_support = true
|
|
|
|
tags = merge(local.common_tags, {
|
|
Name = "${local.name_prefix}-vpc"
|
|
})
|
|
}
|
|
|
|
# Internet Gateway
|
|
resource "aws_internet_gateway" "main" {
|
|
count = var.enable_fargate ? 1 : 0
|
|
vpc_id = aws_vpc.main[0].id
|
|
|
|
tags = merge(local.common_tags, {
|
|
Name = "${local.name_prefix}-igw"
|
|
})
|
|
}
|
|
|
|
# Data source for availability zones
|
|
data "aws_availability_zones" "available" {
|
|
state = "available"
|
|
}
|
|
|
|
# Public Subnets
|
|
resource "aws_subnet" "public" {
|
|
count = var.enable_fargate ? 2 : 0
|
|
|
|
vpc_id = aws_vpc.main[0].id
|
|
cidr_block = "10.0.${count.index + 1}.0/24"
|
|
availability_zone = data.aws_availability_zones.available.names[count.index]
|
|
map_public_ip_on_launch = true
|
|
|
|
tags = merge(local.common_tags, {
|
|
Name = "${local.name_prefix}-public-subnet-${count.index + 1}"
|
|
Type = "Public"
|
|
})
|
|
}
|
|
|
|
# Private Subnets
|
|
resource "aws_subnet" "private" {
|
|
count = var.enable_fargate ? 2 : 0
|
|
|
|
vpc_id = aws_vpc.main[0].id
|
|
cidr_block = "10.0.${count.index + 10}.0/24"
|
|
availability_zone = data.aws_availability_zones.available.names[count.index]
|
|
|
|
tags = merge(local.common_tags, {
|
|
Name = "${local.name_prefix}-private-subnet-${count.index + 1}"
|
|
Type = "Private"
|
|
})
|
|
}
|
|
|
|
# Elastic IPs for NAT Gateways
|
|
resource "aws_eip" "nat" {
|
|
count = var.enable_fargate ? 2 : 0
|
|
domain = "vpc"
|
|
|
|
depends_on = [aws_internet_gateway.main]
|
|
|
|
tags = merge(local.common_tags, {
|
|
Name = "${local.name_prefix}-nat-eip-${count.index + 1}"
|
|
})
|
|
}
|
|
|
|
# NAT Gateways
|
|
resource "aws_nat_gateway" "main" {
|
|
count = var.enable_fargate ? 2 : 0
|
|
|
|
allocation_id = aws_eip.nat[count.index].id
|
|
subnet_id = aws_subnet.public[count.index].id
|
|
|
|
depends_on = [aws_internet_gateway.main]
|
|
|
|
tags = merge(local.common_tags, {
|
|
Name = "${local.name_prefix}-nat-${count.index + 1}"
|
|
})
|
|
}
|
|
|
|
# Route Table for Public Subnets
|
|
resource "aws_route_table" "public" {
|
|
count = var.enable_fargate ? 1 : 0
|
|
vpc_id = aws_vpc.main[0].id
|
|
|
|
route {
|
|
cidr_block = "0.0.0.0/0"
|
|
gateway_id = aws_internet_gateway.main[0].id
|
|
}
|
|
|
|
tags = merge(local.common_tags, {
|
|
Name = "${local.name_prefix}-public-rt"
|
|
})
|
|
}
|
|
|
|
# Route Table Associations for Public Subnets
|
|
resource "aws_route_table_association" "public" {
|
|
count = var.enable_fargate ? 2 : 0
|
|
|
|
subnet_id = aws_subnet.public[count.index].id
|
|
route_table_id = aws_route_table.public[0].id
|
|
}
|
|
|
|
# Route Tables for Private Subnets
|
|
resource "aws_route_table" "private" {
|
|
count = var.enable_fargate ? 2 : 0
|
|
|
|
vpc_id = aws_vpc.main[0].id
|
|
|
|
route {
|
|
cidr_block = "0.0.0.0/0"
|
|
nat_gateway_id = aws_nat_gateway.main[count.index].id
|
|
}
|
|
|
|
tags = merge(local.common_tags, {
|
|
Name = "${local.name_prefix}-private-rt-${count.index + 1}"
|
|
})
|
|
}
|
|
|
|
# Route Table Associations for Private Subnets
|
|
resource "aws_route_table_association" "private" {
|
|
count = var.enable_fargate ? 2 : 0
|
|
|
|
subnet_id = aws_subnet.private[count.index].id
|
|
route_table_id = aws_route_table.private[count.index].id
|
|
}
|
|
|
|
# Security Group for ECS Tasks
|
|
resource "aws_security_group" "ecs_tasks" {
|
|
count = var.enable_fargate ? 1 : 0
|
|
name = "${local.name_prefix}-ecs-tasks"
|
|
description = "Security group for ECS tasks"
|
|
vpc_id = aws_vpc.main[0].id
|
|
|
|
ingress {
|
|
from_port = 3000
|
|
to_port = 3000
|
|
protocol = "tcp"
|
|
cidr_blocks = ["0.0.0.0/0"]
|
|
description = "HTTP from Load Balancer"
|
|
}
|
|
|
|
egress {
|
|
from_port = 0
|
|
to_port = 0
|
|
protocol = "-1"
|
|
cidr_blocks = ["0.0.0.0/0"]
|
|
description = "All outbound traffic"
|
|
}
|
|
|
|
tags = merge(local.common_tags, {
|
|
Name = "${local.name_prefix}-ecs-tasks"
|
|
})
|
|
}
|
|
|
|
# VPC Endpoints for AWS services (to reduce NAT Gateway costs)
|
|
resource "aws_vpc_endpoint" "s3" {
|
|
count = var.enable_fargate ? 1 : 0
|
|
vpc_id = aws_vpc.main[0].id
|
|
service_name = "com.amazonaws.${data.aws_region.current.name}.s3"
|
|
|
|
tags = merge(local.common_tags, {
|
|
Name = "${local.name_prefix}-s3-endpoint"
|
|
})
|
|
}
|
|
|
|
resource "aws_vpc_endpoint_route_table_association" "s3_private" {
|
|
count = var.enable_fargate ? 2 : 0
|
|
|
|
vpc_endpoint_id = aws_vpc_endpoint.s3[0].id
|
|
route_table_id = aws_route_table.private[count.index].id
|
|
} |