
そのままでは動かないので参考までに。
DatadogでAPIの外形監視をして、応答がなければDatadog -> Lambda -> EC2(SSM)経由でEC2のNginxを再起動します。
もくじ
Datadog側 Terraform
terraform.tfvars
datadog_api_key = "xxxxx" datadog_app_key = "yyyyy" # SampleAppヘルスチェックURL health_check_url_sample_app = "https://example.net/login" # Slack通知先 slack_channel_name = "slack-datadog通知"
variables.tf
variable health_check_url_sample_app {}
variable slack_channel_name {}
provider.tf
terraform {
required_version = "= 1.1.1"
backend "s3" {
bucket = "sampleapp-terraform-xxxxx"
region = "ap-northeast-1"
# keyは環境で一意にすること
key = "datadog/terraform.tfstate"
profile = "terraform-local-deployer"
}
required_providers {
aws = {
source = "hashicorp/aws"
version = "4.10.0"
}
datadog = {
source = "DataDog/datadog"
}
}
}
provider "datadog" {
api_key = var.datadog_api_key
app_key = var.datadog_app_key
}
synthetics_test.tf
resource "datadog_synthetics_test" "sample_app" {
type = "api"
subtype = "http"
request_definition {
method = "GET"
url = var.health_check_url_sample_app
}
assertion {
type = "statusCode"
operator = "is"
target = "200"
}
assertion {
type = "responseTime"
operator = "lessThan"
target = 2000
}
locations = ["aws:ap-northeast-1"]
options_list {
# 1分毎にチェック
tick_every = 60
retry {
count = 2
interval = 300
}
monitor_options {
renotify_interval = 60
}
}
name = "An API test on ${var.health_check_url_sample_app} [SampleApp]"
message = "API TEST on ${var.health_check_url_sample_app} @${var.slack_channel_name} @webhook-${datadog_webhook.restart_nginx_ec2.name} [SampleApp]"
tags = ["env:production", "service:proxy"]
status = "live"
}
webhook.tf
resource "datadog_webhook" "restart_nginx_ec2" {
name = "restart-nginx-ec2-production"
url = "https://xxxxx.lambda-url.ap-northeast-1.on.aws/"
encode_as = "json"
payload = <<EOF
{
"body": "$EVENT_MSG",
"last_updated": "$LAST_UPDATED",
"event_type": "$EVENT_TYPE",
"title": "$EVENT_TITLE",
"date": "$DATE",
"org": {
"id": "$ORG_ID",
"name": "$ORG_NAME"
},
"id": "$ID"
}
EOF
}
AWS側 Terraform
# Lambda EC2 SSM操作用
resource "aws_iam_role" "lambda_ssm_role" {
name = "lambda-ssm-role-${var.ENV_VALUE_ENVIRONMENT}"
assume_role_policy = file("${path.module}/policy/assume-lambda.json")
}
resource "aws_iam_policy" "lambda_ssm_role_policy" {
name = "lambda-ssm-policy-${var.ENV_VALUE_ENVIRONMENT}"
policy = file("${path.module}/policy/lambda-ssm.json")
}
resource "aws_iam_role_policy_attachment" "lambda_ssm_role_attach_policy" {
role = aws_iam_role.lambda_ssm_role.name
policy_arn = aws_iam_policy.lambda_ssm_role_policy.arn
}
# EC2用
resource "aws_iam_role" "sample_app_ec2_role" {
name = "sample_app-ec2-role-${var.ENV_VALUE_ENVIRONMENT}"
assume_role_policy = file("${path.module}/policy/assume-ec2.json")
}
resource "aws_iam_policy" "sample_app_ec2_role_policy" {
name = "sample_app-ec2-policy-${var.ENV_VALUE_ENVIRONMENT}"
policy = file("${path.module}/policy/sample-app-ec2.json")
}
resource "aws_iam_role_policy_attachment" "sample_app_ec2_role_attach_policy" {
role = aws_iam_role.sample_app_ec2_role.name
policy_arn = aws_iam_policy.sample_app_ec2_role_policy.arn
}
# EC2 instance profile
resource "aws_iam_instance_profile" "ec2_profile" {
name = "sample_app-ec2-profile-${var.ENV_VALUE_ENVIRONMENT}"
role = aws_iam_role.sample_app_ec2_role.name
}
assume-lambda.json
{
"Version": "2012-10-17",
"Statement": [
{
"Effect": "Allow",
"Principal": {
"Service": "lambda.amazonaws.com"
},
"Action": "sts:AssumeRole"
}
]
}
lambda-ssm.json
{
"Version": "2012-10-17",
"Statement": [
{
"Effect": "Allow",
"Action": [
"cloudwatch:PutMetricData",
"ds:CreateComputer",
"ds:DescribeDirectories",
"ec2:DescribeInstanceStatus",
"logs:*",
"ssm:*",
"ec2messages:*"
],
"Resource": "*"
},
{
"Effect": "Allow",
"Action": "iam:CreateServiceLinkedRole",
"Resource": "arn:aws:iam::*:role/aws-service-role/ssm.amazonaws.com/AWSServiceRoleForAmazonSSM*",
"Condition": {
"StringLike": {
"iam:AWSServiceName": "ssm.amazonaws.com"
}
}
},
{
"Effect": "Allow",
"Action": [
"iam:DeleteServiceLinkedRole",
"iam:GetServiceLinkedRoleDeletionStatus"
],
"Resource": "arn:aws:iam::*:role/aws-service-role/ssm.amazonaws.com/AWSServiceRoleForAmazonSSM*"
},
{
"Effect": "Allow",
"Action": [
"ssmmessages:CreateControlChannel",
"ssmmessages:CreateDataChannel",
"ssmmessages:OpenControlChannel",
"ssmmessages:OpenDataChannel"
],
"Resource": "*"
}
]
}
assume-ec2.json
{
"Version": "2012-10-17",
"Statement": [
{
"Effect": "Allow",
"Principal": {
"Service": "ec2.amazonaws.com"
},
"Action": "sts:AssumeRole"
}
]
}
sample-app-ec2.json
{
"Version": "2012-10-17",
"Statement": [
{
"Action": [
"logs:*"
],
"Effect": "Allow",
"Resource": "*"
},
{
"Effect": "Allow",
"Action": [
"ssm:DescribeAssociation",
"ssm:GetDeployablePatchSnapshotForInstance",
"ssm:GetDocument",
"ssm:DescribeDocument",
"ssm:GetManifest",
"ssm:GetParameters",
"ssm:ListAssociations",
"ssm:ListInstanceAssociations",
"ssm:PutInventory",
"ssm:PutComplianceItems",
"ssm:PutConfigurePackageResult",
"ssm:UpdateAssociationStatus",
"ssm:UpdateInstanceAssociationStatus",
"ssm:UpdateInstanceInformation"
],
"Resource": "*"
},
{
"Effect": "Allow",
"Action": [
"ssmmessages:CreateControlChannel",
"ssmmessages:CreateDataChannel",
"ssmmessages:OpenControlChannel",
"ssmmessages:OpenDataChannel"
],
"Resource": "*"
},
{
"Effect": "Allow",
"Action": [
"ec2messages:AcknowledgeMessage",
"ec2messages:DeleteMessage",
"ec2messages:FailMessage",
"ec2messages:GetEndpoint",
"ec2messages:GetMessages",
"ec2messages:SendReply"
],
"Resource": "*"
},
{
"Effect": "Allow",
"Action": [
"cloudwatch:PutMetricData"
],
"Resource": "*"
},
{
"Effect": "Allow",
"Action": [
"ec2:DescribeInstanceStatus"
],
"Resource": "*"
}
]
}
lambda.tf
# EC2のNginx再起動用関数
data "archive_file" "restart_nginx_ec2" {
type = "zip"
source_dir = "${path.module}/../../src/${var.ENV_VALUE_ENVIRONMENT}/${var.lambda_repository_name}/lambda/functions/restartNginxEc2"
output_path = "${path.module}/../../upload/${var.ENV_VALUE_ENVIRONMENT}/${var.lambda_repository_name}/lambda/functions/restartNginxEc2.zip"
}
resource "aws_lambda_function" "restart_nginx_ec2" {
filename = "${data.archive_file.restart_nginx_ec2.output_path}"
function_name = "restart-nginx-ec2-${var.ENV_VALUE_ENVIRONMENT}"
role = "${var.lambda_ssm_role_arn}"
handler = "function.lambda_handler"
source_code_hash = "${data.archive_file.restart_nginx_ec2.output_base64sha256}"
runtime = "python3.9"
memory_size = 128
timeout = 60
environment {
variables = {
# 対象のid
EC2_INSTANCE_ID = var.lambda_restart_nginx_ec2_target_ec2_id
}
}
tags = {
is_datadog_enable = (var.ENV_VALUE_ENVIRONMENT != "develop")
role = var.TAG_ROLE_LAMBDA
}
}
resource "aws_lambda_function_url" "restart_nginx_ec2" {
function_name = aws_lambda_function.restart_nginx_ec2.function_name
authorization_type = "NONE"
}
Lambdaリポジトリ
{リポジトリ名}/lambda/functions/restartNginxEc2/function.py
import boto3
import logging
import os
logger = logging.getLogger()
logger.setLevel(logging.INFO)
EC2_INSTANCE_ID = os.environ['EC2_INSTANCE_ID']
def lambda_handler(event, context):
ssm = boto3.client('ssm')
res = ssm.send_command(
InstanceIds=[EC2_INSTANCE_ID],
DocumentName="AWS-RunShellScript",
Parameters={
"commands": [
"systemctl restart nginx"
],
"executionTimeout": ["60"]
},
)
if res['ResponseMetadata']['HTTPStatusCode'] != 200:
print('FAILED to execute ssm.send_command().')
return
{
'message': "script completed."
}




