From e86f9c60639b5dc46020b0ca5ae480a74cf16535 Mon Sep 17 00:00:00 2001 From: Yared Taye Date: Thu, 13 Jul 2023 23:01:09 -0400 Subject: [PATCH 01/19] test_events_log --- ...ws_events_log_performance_test_matrix.json | 23 +++++ ...windows_events_log_stress_test_matrix.json | 30 ++++++ generator/test_case_generator.go | 2 + terraform/performance/main.tf | 2 +- .../windows/windows_events/agent_config.json | 98 +++++++++++++++++++ .../windows/windows_events/parameters.yml | 70 +++++++++++++ .../windows/windows_events/agent_config.json | 90 +++++++++++++++++ .../windows/windows_events/parameters.yml | 70 +++++++++++++ .../performance/performance_stats.go | 43 ++++++++ .../performance/performance_validator.go | 1 - 10 files changed, 427 insertions(+), 2 deletions(-) create mode 100644 generator/resources/ec2_windows_events_log_performance_test_matrix.json create mode 100644 generator/resources/ec2_windows_events_log_stress_test_matrix.json create mode 100644 test/performance/windows/windows_events/agent_config.json create mode 100644 test/performance/windows/windows_events/parameters.yml create mode 100644 test/stress/windows/windows_events/agent_config.json create mode 100644 test/stress/windows/windows_events/parameters.yml diff --git a/generator/resources/ec2_windows_events_log_performance_test_matrix.json b/generator/resources/ec2_windows_events_log_performance_test_matrix.json new file mode 100644 index 000000000..2e1ae193e --- /dev/null +++ b/generator/resources/ec2_windows_events_log_performance_test_matrix.json @@ -0,0 +1,23 @@ +[ + { + "os": "win-2022", + "ami": "cloudwatch-agent-integration-test-win-2022*", + "arc": "amd64", + "valuesPerMinute": 100, + "family": "windows" + }, + { + "os": "win-2022", + "ami": "cloudwatch-agent-integration-test-win-2022*", + "arc": "amd64", + "valuesPerMinute": 1000, + "family": "windows" + }, + { + "os": "win-2022", + "ami": "cloudwatch-agent-integration-test-win-2022*", + "arc": "amd64", + "valuesPerMinute": 5000, + "family": "windows" + } +] \ No newline at end of file diff --git a/generator/resources/ec2_windows_events_log_stress_test_matrix.json b/generator/resources/ec2_windows_events_log_stress_test_matrix.json new file mode 100644 index 000000000..effa360e0 --- /dev/null +++ b/generator/resources/ec2_windows_events_log_stress_test_matrix.json @@ -0,0 +1,30 @@ +[ + { + "os": "win-2022", + "ami": "cloudwatch-agent-integration-test-win-2022*", + "arc": "amd64", + "valuesPerMinute": 1000, + "family": "windows" + }, + { + "os": "win-2022", + "ami": "cloudwatch-agent-integration-test-win-2022*", + "arc": "amd64", + "valuesPerMinute": 5000, + "family": "windows" + }, + { + "os": "win-2022", + "ami": "cloudwatch-agent-integration-test-win-2022*", + "arc": "amd64", + "valuesPerMinute": 10000, + "family": "windows" + }, + { + "os": "win-2022", + "ami": "cloudwatch-agent-integration-test-win-2022*", + "arc": "amd64", + "valuesPerMinute": 50000, + "family": "windows" + } +] \ No newline at end of file diff --git a/generator/test_case_generator.go b/generator/test_case_generator.go index fe240389d..223ed43fe 100644 --- a/generator/test_case_generator.go +++ b/generator/test_case_generator.go @@ -125,6 +125,7 @@ var testTypeToTestConfig = map[string][]testConfig{ "ec2_windows_performance": { {testDir: "../../test/performance/windows/logs"}, {testDir: "../../test/performance/windows/system"}, + {testDir: "../../test/performance/windows/windows_events"}, }, "ec2_stress": { {testDir: "../../test/stress/emf"}, @@ -136,6 +137,7 @@ var testTypeToTestConfig = map[string][]testConfig{ "ec2_windows_stress": { {testDir: "../../test/stress/windows/logs"}, {testDir: "../../test/stress/windows/system"}, + {testDir: "../../test/stress/windows/windows_events"}, }, "ecs_fargate": { {testDir: "./test/ecs/ecs_metadata"}, diff --git a/terraform/performance/main.tf b/terraform/performance/main.tf index 2a6182c3a..18a5e5a52 100644 --- a/terraform/performance/main.tf +++ b/terraform/performance/main.tf @@ -152,4 +152,4 @@ data "aws_ami" "latest" { data "aws_dynamodb_table" "performance-dynamodb-table" { name = module.common.performance-dynamodb-table -} +} \ No newline at end of file diff --git a/test/performance/windows/windows_events/agent_config.json b/test/performance/windows/windows_events/agent_config.json new file mode 100644 index 000000000..a28359cde --- /dev/null +++ b/test/performance/windows/windows_events/agent_config.json @@ -0,0 +1,98 @@ +{ + "agent": { + "metrics_collection_interval": 1 + }, + "metrics": { + "namespace": "CloudWatchAgentEventsLogsPerformance", + "append_dimensions": { + "InstanceId": "${aws:InstanceId}" + }, + "metrics_collected": { + "Memory": { + "measurement": [ + { + "name": "Available Bytes", + "rename": "Available_Bytes" + } + ], + "metrics_collection_interval": 1 + }, + "Network Interface": { + "resources": [ + "*" + ], + "measurement": [ + { + "name": "Bytes Sent/sec", + "rename": "Bytes_Sent_Per_Sec" + }, + { + "name": "Packets Sent/sec", + "rename": "Packets_Sent_Per_Sec" + } + ], + "metrics_collection_interval": 1 + }, + "procstat": [ + { + "exe": "amazon-cloudwatch-agent.exe", + "measurement": [ + "cpu_usage", + "memory_rss", + "memory_vms", + "write_bytes" + ], + "metrics_collection_interval": 1 + } + ] + } + }, + "logs": { + "logs_collected": { + "windows_events": { + "collect_list": [ + { + "event_format": "xml", + "event_levels": [ + "VERBOSE", + "INFORMATION", + "WARNING", + "ERROR", + "CRITICAL" + ], + "event_name": "System", + "log_group_name": "EC2_EventLogs", + "log_stream_name": "{instance_id}" + }, + { + "event_format": "xml", + "event_levels": [ + "VERBOSE", + "INFORMATION", + "WARNING", + "ERROR", + "CRITICAL" + ], + "event_name": "Security", + "log_group_name": "EC2_EventLogs", + "log_stream_name": "{instance_id}" + }, + { + "event_format": "xml", + "event_levels": [ + "VERBOSE", + "INFORMATION", + "WARNING", + "ERROR", + "CRITICAL" + ], + "event_name": "Application", + "log_group_name": "EC2_EventLogs", + "log_stream_name": "{instance_id}" + } + ] + } + }, + "force_flush_interval": 60 + } +} diff --git a/test/performance/windows/windows_events/parameters.yml b/test/performance/windows/windows_events/parameters.yml new file mode 100644 index 000000000..35b711883 --- /dev/null +++ b/test/performance/windows/windows_events/parameters.yml @@ -0,0 +1,70 @@ +receivers: ["logs"] + +test_case: "logs_windows_events_performance" +validate_type: "performance" +data_type: "logs" +# Number of logs being written +number_monitored_logs: 100 +# Number of metrics to be sent or number of log lines being written each minute +values_per_minute: "" +# Number of seconds the agent should run and collect the metrics. In this case, 5 minutes +agent_collection_period: 300 + +commit_hash: +commit_date: + +cloudwatch_agent_config: "" + +# Metric that the test needs to validate; moreover, the stress validation already has +# InstanceID dimension; therefore, does not need to validate it +# https://github.com/aws/amazon-cloudwatch-agent-test/pull/109/files#diff-47c87373e751dd9fd5ce504e44b320765c8b84d6cde524a4e8a32cfa34674165R124-R135 +metric_namespace: "CloudWatchAgentEventsLogsPerformance" +metric_validation: + - metric_name: "procstat cpu_usage" + metric_sample_count: 300 + metric_dimension: + - name: "exe" + value: "amazon-cloudwatch-agent.exe" + - name: "process_name" + value: "amazon-cloudwatch-agent.exe" + - metric_name: "procstat memory_rss" + metric_sample_count: 300 + metric_dimension: + - name: "exe" + value: "amazon-cloudwatch-agent.exe" + - name: "process_name" + value: "amazon-cloudwatch-agent.exe" + - metric_name: "procstat memory_vms" + metric_sample_count: 300 + metric_dimension: + - name: "exe" + value: "amazon-cloudwatch-agent.exe" + - name: "process_name" + value: "amazon-cloudwatch-agent.exe" + - metric_name: "procstat write_bytes" + metric_sample_count: 300 + metric_dimension: + - name: "exe" + value: "amazon-cloudwatch-agent.exe" + - name: "process_name" + value: "amazon-cloudwatch-agent.exe" + + # Validations for Network Interface + - metric_name: "Bytes_Sent_Per_Sec" + metric_dimension: + - name: "objectname" + value: "Network Interface" + - name: "instance" + value: "Amazon Elastic Network Adapter" + - metric_name: "Packets_Sent_Per_Sec" + metric_dimension: + - name: "objectname" + value: "Network Interface" + - name: "instance" + value: "Amazon Elastic Network Adapter" + # Validate Memory metrics + - metric_name: "Available_Bytes" + metric_sample_count: 60 + metric_dimension: + - name: "objectname" + value: "Memory" \ No newline at end of file diff --git a/test/stress/windows/windows_events/agent_config.json b/test/stress/windows/windows_events/agent_config.json new file mode 100644 index 000000000..f79a3f898 --- /dev/null +++ b/test/stress/windows/windows_events/agent_config.json @@ -0,0 +1,90 @@ +{ + "agent": { + "debug": true + }, + "metrics": { + "namespace": "CloudWatchAgentEventsLogsStress", + "metrics_collected": { + "net": { + "resources": [ + "eth0" + ], + "measurement": [ + { + "name": "Bytes Sent/sec", + "rename": "Bytes_Sent_Per_Sec" + }, + { + "name": "Packets Sent/sec", + "rename": "Packets_Sent_Per_Sec" + } + ], + "metrics_collection_interval": 1 + }, + "procstat": [ + { + "exe": "cloudwatch-agent", + "measurement": [ + "cpu_usage", + "memory_rss", + "memory_vms", + "write_bytes" + ], + "metrics_collection_interval": 1 + } + ] + }, + "append_dimensions": { + "InstanceId": "${aws:InstanceId}" + }, + "force_flush_interval": 10 + }, + "logs": { + "logs_collected": { + "windows_events": { + "collect_list": [ + { + "event_format": "xml", + "event_levels": [ + "VERBOSE", + "INFORMATION", + "WARNING", + "ERROR", + "CRITICAL" + ], + "event_name": "System", + "log_group_name": "EC2_EventLogs", + "log_stream_name": "{instance_id}" + }, + { + "event_format": "xml", + "event_levels": [ + "VERBOSE", + "INFORMATION", + "WARNING", + "ERROR", + "CRITICAL" + ], + "event_name": "Security", + "log_group_name": "EC2_EventLogs", + "log_stream_name": "{instance_id}" + }, + { + "event_format": "xml", + "event_levels": [ + "VERBOSE", + "INFORMATION", + "WARNING", + "ERROR", + "CRITICAL" + ], + "event_name": "Application", + "log_group_name": "EC2_EventLogs", + "log_stream_name": "{instance_id}" + } + ] + } + }, + "force_flush_interval": 60 + } +} diff --git a/test/stress/windows/windows_events/parameters.yml b/test/stress/windows/windows_events/parameters.yml new file mode 100644 index 000000000..81540b314 --- /dev/null +++ b/test/stress/windows/windows_events/parameters.yml @@ -0,0 +1,70 @@ +receivers: ["logs"] + +test_case: "logs_win_events_stress" +validate_type: "stress" +data_type: "logs" +# Number of logs being written +number_monitored_logs: 100 +# Number of metrics to be sent or number of log lines being written each minute +values_per_minute: "" +# Number of seconds the agent should run and collect the metrics. In this case, 5 minutes +agent_collection_period: 300 + +commit_hash: +commit_date: + +cloudwatch_agent_config: "" + +# Metric that the test needs to validate; moreover, the stress validation already has +# InstanceID dimension; therefore, does not need to validate it +# https://github.com/aws/amazon-cloudwatch-agent-test/pull/109/files#diff-47c87373e751dd9fd5ce504e44b320765c8b84d6cde524a4e8a32cfa34674165R124-R135 +metric_namespace: "CloudWatchAgentEventsLogsStress" +metric_validation: + - metric_name: "procstat cpu_usage" + metric_sample_count: 300 + metric_dimension: + - name: "exe" + value: "amazon-cloudwatch-agent.exe" + - name: "process_name" + value: "amazon-cloudwatch-agent.exe" + - metric_name: "procstat memory_rss" + metric_sample_count: 300 + metric_dimension: + - name: "exe" + value: "amazon-cloudwatch-agent.exe" + - name: "process_name" + value: "amazon-cloudwatch-agent.exe" + - metric_name: "procstat memory_vms" + metric_sample_count: 300 + metric_dimension: + - name: "exe" + value: "amazon-cloudwatch-agent.exe" + - name: "process_name" + value: "amazon-cloudwatch-agent.exe" + - metric_name: "procstat write_bytes" + metric_sample_count: 300 + metric_dimension: + - name: "exe" + value: "amazon-cloudwatch-agent.exe" + - name: "process_name" + value: "amazon-cloudwatch-agent.exe" + + # Validations for Network Interface + - metric_name: "Bytes_Sent_Per_Sec" + metric_dimension: + - name: "objectname" + value: "Network Interface" + - name: "instance" + value: "Amazon Elastic Network Adapter" + - metric_name: "Packets_Sent_Per_Sec" + metric_dimension: + - name: "objectname" + value: "Network Interface" + - name: "instance" + value: "Amazon Elastic Network Adapter" + # Validate Memory metrics + - metric_name: "Available_Bytes" + metric_sample_count: 60 + metric_dimension: + - name: "objectname" + value: "Memory" \ No newline at end of file diff --git a/validator/validators/performance/performance_stats.go b/validator/validators/performance/performance_stats.go index 5af5ce7f9..762d92467 100644 --- a/validator/validators/performance/performance_stats.go +++ b/validator/validators/performance/performance_stats.go @@ -4,6 +4,7 @@ package performance import ( + "github.com/aws/aws-sdk-go-v2/service/cloudwatch/types" "log" "math" "sort" @@ -61,3 +62,45 @@ func CalculateMetricStatisticsBasedOnDataAndPeriod(data []float64, dataPeriod fl Period: int(dataPeriod / float64(length)), } } + +func CalculateMetricStatisticsWindows(datapoints []types.Datapoint, dataPeriod float64) Stats { + var data []float64 + for _, datapoint := range datapoints { + data = append(data, *datapoint.Average) + } + length := len(data) + if length == 0 { + return Stats{} + } + sort.Float64s(data) + + min := data[0] + max := data[length-1] + + sum := 0.0 + for _, value := range data { + sum += value + } + + avg := sum / float64(length) + + if length < 99 { + log.Println("Note: less than 99 values given, p99 value will be equal the max value") + } + p99Index := int(float64(length)*.99) - 1 + p99Val := data[p99Index] + + stdDevSum := 0.0 + for _, value := range data { + stdDevSum += math.Pow(avg-value, 2) + } + + return Stats{ + Average: avg, + Max: max, + Min: min, + P99: p99Val, + Std: math.Sqrt(stdDevSum / float64(length)), + Period: int(dataPeriod / float64(length)), + } +} diff --git a/validator/validators/performance/performance_validator.go b/validator/validators/performance/performance_validator.go index b1e98f032..614a48a1d 100644 --- a/validator/validators/performance/performance_validator.go +++ b/validator/validators/performance/performance_validator.go @@ -12,7 +12,6 @@ import ( "github.com/aws/aws-sdk-go-v2/service/cloudwatch/types" "github.com/aws/aws-sdk-go/aws" - "github.com/cenkalti/backoff/v4" "golang.org/x/exp/maps" "golang.org/x/exp/slices" From 2f680c9d3fb69e37dfc795df1d8a702cbb42759f Mon Sep 17 00:00:00 2001 From: Yared Taye Date: Fri, 14 Jul 2023 09:22:52 -0400 Subject: [PATCH 02/19] revert change --- .../performance/performance_stats.go | 43 ------------------- 1 file changed, 43 deletions(-) diff --git a/validator/validators/performance/performance_stats.go b/validator/validators/performance/performance_stats.go index 762d92467..5af5ce7f9 100644 --- a/validator/validators/performance/performance_stats.go +++ b/validator/validators/performance/performance_stats.go @@ -4,7 +4,6 @@ package performance import ( - "github.com/aws/aws-sdk-go-v2/service/cloudwatch/types" "log" "math" "sort" @@ -62,45 +61,3 @@ func CalculateMetricStatisticsBasedOnDataAndPeriod(data []float64, dataPeriod fl Period: int(dataPeriod / float64(length)), } } - -func CalculateMetricStatisticsWindows(datapoints []types.Datapoint, dataPeriod float64) Stats { - var data []float64 - for _, datapoint := range datapoints { - data = append(data, *datapoint.Average) - } - length := len(data) - if length == 0 { - return Stats{} - } - sort.Float64s(data) - - min := data[0] - max := data[length-1] - - sum := 0.0 - for _, value := range data { - sum += value - } - - avg := sum / float64(length) - - if length < 99 { - log.Println("Note: less than 99 values given, p99 value will be equal the max value") - } - p99Index := int(float64(length)*.99) - 1 - p99Val := data[p99Index] - - stdDevSum := 0.0 - for _, value := range data { - stdDevSum += math.Pow(avg-value, 2) - } - - return Stats{ - Average: avg, - Max: max, - Min: min, - P99: p99Val, - Std: math.Sqrt(stdDevSum / float64(length)), - Period: int(dataPeriod / float64(length)), - } -} From b1f02cca28dbf271a637faf7107b2bbacbd88e59 Mon Sep 17 00:00:00 2001 From: Yared Taye Date: Fri, 14 Jul 2023 14:24:47 -0400 Subject: [PATCH 03/19] change parameters.yml --- test/stress/windows/windows_events/parameters.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/stress/windows/windows_events/parameters.yml b/test/stress/windows/windows_events/parameters.yml index 81540b314..a4509c029 100644 --- a/test/stress/windows/windows_events/parameters.yml +++ b/test/stress/windows/windows_events/parameters.yml @@ -1,6 +1,6 @@ -receivers: ["logs"] +receivers: ["windows_events"] -test_case: "logs_win_events_stress" +test_case: "logs_windows_events_stress" validate_type: "stress" data_type: "logs" # Number of logs being written From c5c6e810d277d95713cccfdc286ca76cedd53f85 Mon Sep 17 00:00:00 2001 From: Yared Taye Date: Sun, 16 Jul 2023 17:05:52 -0400 Subject: [PATCH 04/19] fix directory --- .../windows/windows_events/agent_config.json | 2 +- .../windows/windows_events/parameters.yml | 4 +- .../windows_events/agent_config.json | 98 +++++++++++++++++++ .../performance/windows_events/parameters.yml | 70 +++++++++++++ .../windows/windows_events/agent_config.json | 2 +- .../windows/windows_events/parameters.yml | 2 +- .../performance/performance_stats.go | 1 + 7 files changed, 174 insertions(+), 5 deletions(-) create mode 100644 test/performance/windows_events/agent_config.json create mode 100644 test/performance/windows_events/parameters.yml diff --git a/test/performance/windows/windows_events/agent_config.json b/test/performance/windows/windows_events/agent_config.json index a28359cde..8a617ff07 100644 --- a/test/performance/windows/windows_events/agent_config.json +++ b/test/performance/windows/windows_events/agent_config.json @@ -3,7 +3,7 @@ "metrics_collection_interval": 1 }, "metrics": { - "namespace": "CloudWatchAgentEventsLogsPerformance", + "namespace": "CloudWatchAgentPerformance", "append_dimensions": { "InstanceId": "${aws:InstanceId}" }, diff --git a/test/performance/windows/windows_events/parameters.yml b/test/performance/windows/windows_events/parameters.yml index 35b711883..988efb736 100644 --- a/test/performance/windows/windows_events/parameters.yml +++ b/test/performance/windows/windows_events/parameters.yml @@ -1,4 +1,4 @@ -receivers: ["logs"] +receivers: ["windows_events"] test_case: "logs_windows_events_performance" validate_type: "performance" @@ -18,7 +18,7 @@ cloudwatch_agent_config: "" # Metric that the test needs to validate; moreover, the stress validation already has # InstanceID dimension; therefore, does not need to validate it # https://github.com/aws/amazon-cloudwatch-agent-test/pull/109/files#diff-47c87373e751dd9fd5ce504e44b320765c8b84d6cde524a4e8a32cfa34674165R124-R135 -metric_namespace: "CloudWatchAgentEventsLogsPerformance" +metric_namespace: "CloudWatchAgentPerformance" metric_validation: - metric_name: "procstat cpu_usage" metric_sample_count: 300 diff --git a/test/performance/windows_events/agent_config.json b/test/performance/windows_events/agent_config.json new file mode 100644 index 000000000..a28359cde --- /dev/null +++ b/test/performance/windows_events/agent_config.json @@ -0,0 +1,98 @@ +{ + "agent": { + "metrics_collection_interval": 1 + }, + "metrics": { + "namespace": "CloudWatchAgentEventsLogsPerformance", + "append_dimensions": { + "InstanceId": "${aws:InstanceId}" + }, + "metrics_collected": { + "Memory": { + "measurement": [ + { + "name": "Available Bytes", + "rename": "Available_Bytes" + } + ], + "metrics_collection_interval": 1 + }, + "Network Interface": { + "resources": [ + "*" + ], + "measurement": [ + { + "name": "Bytes Sent/sec", + "rename": "Bytes_Sent_Per_Sec" + }, + { + "name": "Packets Sent/sec", + "rename": "Packets_Sent_Per_Sec" + } + ], + "metrics_collection_interval": 1 + }, + "procstat": [ + { + "exe": "amazon-cloudwatch-agent.exe", + "measurement": [ + "cpu_usage", + "memory_rss", + "memory_vms", + "write_bytes" + ], + "metrics_collection_interval": 1 + } + ] + } + }, + "logs": { + "logs_collected": { + "windows_events": { + "collect_list": [ + { + "event_format": "xml", + "event_levels": [ + "VERBOSE", + "INFORMATION", + "WARNING", + "ERROR", + "CRITICAL" + ], + "event_name": "System", + "log_group_name": "EC2_EventLogs", + "log_stream_name": "{instance_id}" + }, + { + "event_format": "xml", + "event_levels": [ + "VERBOSE", + "INFORMATION", + "WARNING", + "ERROR", + "CRITICAL" + ], + "event_name": "Security", + "log_group_name": "EC2_EventLogs", + "log_stream_name": "{instance_id}" + }, + { + "event_format": "xml", + "event_levels": [ + "VERBOSE", + "INFORMATION", + "WARNING", + "ERROR", + "CRITICAL" + ], + "event_name": "Application", + "log_group_name": "EC2_EventLogs", + "log_stream_name": "{instance_id}" + } + ] + } + }, + "force_flush_interval": 60 + } +} diff --git a/test/performance/windows_events/parameters.yml b/test/performance/windows_events/parameters.yml new file mode 100644 index 000000000..d65211ec7 --- /dev/null +++ b/test/performance/windows_events/parameters.yml @@ -0,0 +1,70 @@ +receivers: ["windows_events"] + +test_case: "logs_windows_events_performance" +validate_type: "performance" +data_type: "logs" +# Number of logs being written +number_monitored_logs: 100 +# Number of metrics to be sent or number of log lines being written each minute +values_per_minute: "" +# Number of seconds the agent should run and collect the metrics. In this case, 5 minutes +agent_collection_period: 300 + +commit_hash: +commit_date: + +cloudwatch_agent_config: "" + +# Metric that the test needs to validate; moreover, the stress validation already has +# InstanceID dimension; therefore, does not need to validate it +# https://github.com/aws/amazon-cloudwatch-agent-test/pull/109/files#diff-47c87373e751dd9fd5ce504e44b320765c8b84d6cde524a4e8a32cfa34674165R124-R135 +metric_namespace: "CloudWatchAgentEventsLogsPerformance" +metric_validation: + - metric_name: "procstat cpu_usage" + metric_sample_count: 300 + metric_dimension: + - name: "exe" + value: "amazon-cloudwatch-agent.exe" + - name: "process_name" + value: "amazon-cloudwatch-agent.exe" + - metric_name: "procstat memory_rss" + metric_sample_count: 300 + metric_dimension: + - name: "exe" + value: "amazon-cloudwatch-agent.exe" + - name: "process_name" + value: "amazon-cloudwatch-agent.exe" + - metric_name: "procstat memory_vms" + metric_sample_count: 300 + metric_dimension: + - name: "exe" + value: "amazon-cloudwatch-agent.exe" + - name: "process_name" + value: "amazon-cloudwatch-agent.exe" + - metric_name: "procstat write_bytes" + metric_sample_count: 300 + metric_dimension: + - name: "exe" + value: "amazon-cloudwatch-agent.exe" + - name: "process_name" + value: "amazon-cloudwatch-agent.exe" + + # Validations for Network Interface + - metric_name: "Bytes_Sent_Per_Sec" + metric_dimension: + - name: "objectname" + value: "Network Interface" + - name: "instance" + value: "Amazon Elastic Network Adapter" + - metric_name: "Packets_Sent_Per_Sec" + metric_dimension: + - name: "objectname" + value: "Network Interface" + - name: "instance" + value: "Amazon Elastic Network Adapter" + # Validate Memory metrics + - metric_name: "Available_Bytes" + metric_sample_count: 60 + metric_dimension: + - name: "objectname" + value: "Memory" \ No newline at end of file diff --git a/test/stress/windows/windows_events/agent_config.json b/test/stress/windows/windows_events/agent_config.json index f79a3f898..e1882ff23 100644 --- a/test/stress/windows/windows_events/agent_config.json +++ b/test/stress/windows/windows_events/agent_config.json @@ -3,7 +3,7 @@ "debug": true }, "metrics": { - "namespace": "CloudWatchAgentEventsLogsStress", + "namespace": "CloudWatchAgentStress", "metrics_collected": { "net": { "resources": [ diff --git a/test/stress/windows/windows_events/parameters.yml b/test/stress/windows/windows_events/parameters.yml index a4509c029..e9a302d65 100644 --- a/test/stress/windows/windows_events/parameters.yml +++ b/test/stress/windows/windows_events/parameters.yml @@ -18,7 +18,7 @@ cloudwatch_agent_config: "" # Metric that the test needs to validate; moreover, the stress validation already has # InstanceID dimension; therefore, does not need to validate it # https://github.com/aws/amazon-cloudwatch-agent-test/pull/109/files#diff-47c87373e751dd9fd5ce504e44b320765c8b84d6cde524a4e8a32cfa34674165R124-R135 -metric_namespace: "CloudWatchAgentEventsLogsStress" +metric_namespace: "CloudWatchAgentStress" metric_validation: - metric_name: "procstat cpu_usage" metric_sample_count: 300 diff --git a/validator/validators/performance/performance_stats.go b/validator/validators/performance/performance_stats.go index 5af5ce7f9..869abb676 100644 --- a/validator/validators/performance/performance_stats.go +++ b/validator/validators/performance/performance_stats.go @@ -4,6 +4,7 @@ package performance import ( + "github.com/aws/aws-sdk-go-v2/service/cloudwatch/types" "log" "math" "sort" From 85f8b814d4cef38727f30aed4bb6f8897a7a378d Mon Sep 17 00:00:00 2001 From: Yared Taye Date: Sun, 16 Jul 2023 17:16:29 -0400 Subject: [PATCH 05/19] add windows performance validation --- generator/test_case_generator.go | 503 ++++++++++-------- .../windows_events/agent_config.json | 98 ---- .../performance/windows_events/parameters.yml | 70 --- .../performance/performance_stats.go | 1 - 4 files changed, 289 insertions(+), 383 deletions(-) delete mode 100644 test/performance/windows_events/agent_config.json delete mode 100644 test/performance/windows_events/parameters.yml diff --git a/generator/test_case_generator.go b/generator/test_case_generator.go index 223ed43fe..614a48a1d 100644 --- a/generator/test_case_generator.go +++ b/generator/test_case_generator.go @@ -1,260 +1,335 @@ // Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: MIT -package main +package performance import ( - "encoding/json" "fmt" - "io" + "github.com/aws/aws-sdk-go-v2/service/cloudwatch" "log" - "os" + "strings" + "time" - "github.com/mitchellh/mapstructure" -) - -type matrixRow struct { - TestDir string `json:"test_dir"` - Os string `json:"os"` - Family string `json:"family"` - TestType string `json:"testType"` - Arc string `json:"arc"` - InstanceType string `json:"instanceType"` - Ami string `json:"ami"` - BinaryName string `json:"binaryName"` - Username string `json:"username"` - InstallAgentCommand string `json:"installAgentCommand"` - AgentStartCommand string `json:"agentStartCommand"` - CaCertPath string `json:"caCertPath"` - ValuesPerMinute int `json:"values_per_minute"` // Number of metrics to be sent or number of log lines to write - K8sVersion string `json:"k8s_version"` - TerraformDir string `json:"terraform_dir"` - UseSSM bool `json:"useSSM"` - ExcludedTests string `json:"excludedTests"` -} + "github.com/aws/aws-sdk-go-v2/service/cloudwatch/types" + "github.com/aws/aws-sdk-go/aws" + "golang.org/x/exp/maps" + "golang.org/x/exp/slices" -type testConfig struct { - // this gives more flexibility to define terraform dir when there should be a different set of terraform files - // e.g. statsd can have a multiple terraform module sets for difference test scenarios (ecs, eks or ec2) - testDir string - terraformDir string - // define target matrix field as set(s) - // empty map means a testConfig will be created with a test entry for each entry from *_test_matrix.json - targets map[string]map[string]struct{} -} + "github.com/aws/amazon-cloudwatch-agent-test/util/awsservice" + "github.com/aws/amazon-cloudwatch-agent-test/validator/models" + "github.com/aws/amazon-cloudwatch-agent-test/validator/validators/basic" +) const ( - testTypeKeyEc2Linux = "ec2_linux" + ServiceName = "AmazonCloudWatchAgent" + DynamoDBDataBase = "CWAPerformanceMetrics" ) -// you can't have a const map in golang -var testTypeToTestConfig = map[string][]testConfig{ - "ec2_gpu": { - {testDir: "./test/nvidia_gpu"}, - }, - testTypeKeyEc2Linux: { - {testDir: "./test/ca_bundle"}, - {testDir: "./test/cloudwatchlogs"}, - { - testDir: "./test/metrics_number_dimension", - targets: map[string]map[string]struct{}{"os": {"al2": {}}}, - }, - {testDir: "./test/metric_value_benchmark"}, - {testDir: "./test/run_as_user"}, - {testDir: "./test/collection_interval"}, - {testDir: "./test/metric_dimension"}, - {testDir: "./test/restart"}, - {testDir: "./test/multi_config"}, - { - testDir: "./test/acceptance", - targets: map[string]map[string]struct{}{"os": {"ubuntu-20.04": {}}}, - }, - // skipping FIPS test as the test cannot be verified - // neither ssh nor SSM works after a reboot once FIPS is enabled - //{ - // testDir: "./test/fips", - // targets: map[string]map[string]struct{}{"os": {"rhel8": {}}}, - //}, - { - testDir: "./test/lvm", - targets: map[string]map[string]struct{}{"os": {"al2": {}}}, - }, - { - testDir: "./test/proxy", - targets: map[string]map[string]struct{}{"os": {"al2": {}}}, - }, - { - testDir: "./test/ssl_cert", - targets: map[string]map[string]struct{}{"os": {"al2": {}}}, - }, - { - testDir: "./test/userdata", - terraformDir: "terraform/ec2/userdata", - targets: map[string]map[string]struct{}{"os": {"ol9": {}}}, - }, - { - testDir: "./test/assume_role", - terraformDir: "terraform/ec2/creds", - targets: map[string]map[string]struct{}{"os": {"al2": {}}}, - }, - }, - /* - You can only place 1 mac instance on a dedicate host a single time. - Therefore, limit down the scope for testing in Mac since EC2 can be done with Linux - and Mac under the hood share similar plugins with Linux - */ - "ec2_mac": { - {testDir: "../../../test/feature/mac"}, - {testDir: "../../../test/run_as_user"}, - }, - "ec2_windows": { - {testDir: "../../../test/feature/windows"}, - {testDir: "../../../test/restart"}, - {testDir: "../../../test/acceptance"}, - {testDir: "../../../test/multi_config"}, - // assume role test doesn't add much value, and it already being tested with linux - //{testDir: "../../../test/assume_role"}, - }, - "ec2_performance": { - {testDir: "../../test/performance/emf"}, - {testDir: "../../test/performance/logs"}, - {testDir: "../../test/performance/system"}, - {testDir: "../../test/performance/statsd"}, - {testDir: "../../test/performance/collectd"}, - }, - "ec2_windows_performance": { - {testDir: "../../test/performance/windows/logs"}, - {testDir: "../../test/performance/windows/system"}, - {testDir: "../../test/performance/windows/windows_events"}, - }, - "ec2_stress": { - {testDir: "../../test/stress/emf"}, - {testDir: "../../test/stress/logs"}, - {testDir: "../../test/stress/system"}, - {testDir: "../../test/stress/statsd"}, - {testDir: "../../test/stress/collectd"}, - }, - "ec2_windows_stress": { - {testDir: "../../test/stress/windows/logs"}, - {testDir: "../../test/stress/windows/system"}, - {testDir: "../../test/stress/windows/windows_events"}, - }, - "ecs_fargate": { - {testDir: "./test/ecs/ecs_metadata"}, - }, - "ecs_ec2_daemon": { - {testDir: "./test/metric_value_benchmark"}, - {testDir: "./test/statsd"}, - {testDir: "./test/emf"}, - }, - "eks_daemon": { - { - testDir: "./test/metric_value_benchmark", - targets: map[string]map[string]struct{}{"arc": {"amd64": {}}}, - }, - { - testDir: "./test/statsd", terraformDir: "terraform/eks/daemon/statsd", - targets: map[string]map[string]struct{}{"arc": {"amd64": {}}}, - }, - { - testDir: "./test/emf", terraformDir: "terraform/eks/daemon/emf", - targets: map[string]map[string]struct{}{"arc": {"amd64": {}}}, - }, - { - testDir: "./test/fluent", terraformDir: "terraform/eks/daemon/fluent/d", - targets: map[string]map[string]struct{}{"arc": {"amd64": {}}}, - }, - {testDir: "./test/fluent", terraformDir: "terraform/eks/daemon/fluent/bit"}, - }, - "eks_deployment": { - {testDir: "./test/metric_value_benchmark"}, - }, -} +var ( + // The default unit for these metrics is byte. However, we want to convert to MB for easier understanding + metricsConvertToMB = []string{"mem_total", "procstat_memory_rss", "procstat_memory_swap", "procstat_memory_data", "procstat_memory_vms", "procstat_write_bytes", "procstat_bytes_sent", "memory_rss", "memory_vms", "write_bytes", "Bytes_Sent_Per_Sec", "Available_Bytes"} +) -func copyAllEC2LinuxTestForOnpremTesting() { - /* Some tests need to be fixed in order to run in both environment, so for now for PoC, run one that works. - testTypeToTestConfig["ec2_linux_onprem"] = testTypeToTestConfig[testTypeKeyEc2Linux] - */ - testTypeToTestConfig["ec2_linux_onprem"] = []testConfig{ - { - testDir: "./test/lvm", - targets: map[string]map[string]struct{}{"os": {"al2": {}}}, - }, - } +type PerformanceValidator struct { + vConfig models.ValidateConfig + models.ValidatorFactory } -func main() { - copyAllEC2LinuxTestForOnpremTesting() +var _ models.ValidatorFactory = (*PerformanceValidator)(nil) - for testType, testConfigs := range testTypeToTestConfig { - testMatrix := genMatrix(testType, testConfigs) - writeTestMatrixFile(testType, testMatrix) +func NewPerformanceValidator(vConfig models.ValidateConfig) models.ValidatorFactory { + return &PerformanceValidator{ + vConfig: vConfig, + ValidatorFactory: basic.NewBasicValidator(vConfig), } } -func genMatrix(testType string, testConfigs []testConfig) []matrixRow { - openTestMatrix, err := os.Open(fmt.Sprintf("generator/resources/%v_test_matrix.json", testType)) +func (s *PerformanceValidator) CheckData(startTime, endTime time.Time) error { + perfInfo := PerformanceInformation{} + if s.vConfig.GetOSFamily() == "windows" { + stat, err := s.GetWindowsPerformanceMetrics(startTime, endTime) + if err != nil { + return err + } + perfInfo, err = s.CalculateWindowsMetricStatsAndPackMetrics(stat) + if err != nil { + return err + } + } else { + metrics, err := s.GetPerformanceMetrics(startTime, endTime) + if err != nil { + return err + } + perfInfo, err = s.CalculateMetricStatsAndPackMetrics(metrics) + if err != nil { + return err + } + } + err := s.SendPacketToDatabase(perfInfo) if err != nil { - log.Panicf("can't read file %v_test_matrix.json err %v", testType, err) + return err } - defer openTestMatrix.Close() + return nil +} + +func (s *PerformanceValidator) SendPacketToDatabase(perfInfo PerformanceInformation) error { + var ( + dataType = s.vConfig.GetDataType() + receiver = s.vConfig.GetPluginsConfig()[0] //Assuming one plugin at a time + commitHash, commitDate = s.vConfig.GetCommitInformation() + agentCollectionPeriod = fmt.Sprint(s.vConfig.GetAgentCollectionPeriod().Seconds()) + // The secondary global index that is used for checking if there are item has already been exist in the table + // The performance validator will query based on the UseCaseHash to confirm if the current commit with the use case + // has been exist or not? If yes, merge it. If not, sending it to the database + // https://github.com/aws/amazon-cloudwatch-agent-test/blob/e07fe7adb1b1d75244d8984507d3f83a7237c3d3/terraform/setup/main.tf#L46-L53 + kCheckingAttribute = []string{"CommitHash", "UseCase"} + vCheckingAttribute = []string{fmt.Sprint(commitHash), receiver} + ) - byteValueTestMatrix, _ := io.ReadAll(openTestMatrix) + err := backoff.Retry(func() error { + existingPerfInfo, err := awsservice.GetItemInDatabase(DynamoDBDataBase, "UseCaseHash", kCheckingAttribute, vCheckingAttribute, perfInfo) + if err != nil { + return err + } - var testMatrix []map[string]interface{} - err = json.Unmarshal(byteValueTestMatrix, &testMatrix) - if err != nil { - log.Panicf("can't unmarshall file %v_test_matrix.json err %v", testType, err) - } + // Get the latest performance information from the database and update by merging the existing one + // and finally replace the packet in the database + maps.Copy(existingPerfInfo["Results"].(map[string]interface{}), perfInfo["Results"].(map[string]interface{})) + + finalPerfInfo := packIntoPerformanceInformation(existingPerfInfo["UniqueID"].(string), receiver, dataType, agentCollectionPeriod, commitHash, commitDate, existingPerfInfo["Results"]) - testMatrixComplete := make([]matrixRow, 0, len(testMatrix)) - for _, test := range testMatrix { - for _, testConfig := range testConfigs { - row := matrixRow{TestDir: testConfig.testDir, TestType: testType, TerraformDir: testConfig.terraformDir} - err = mapstructure.Decode(test, &row) - if err != nil { - log.Panicf("can't decode map test %v to metric line struct with error %v", testConfig, err) + err = awsservice.ReplaceItemInDatabase(DynamoDBDataBase, finalPerfInfo) + + if err != nil { + return err + } + return nil + }, awsservice.StandardExponentialBackoff) + + return err +} +func (s *PerformanceValidator) CalculateMetricStatsAndPackMetrics(metrics []types.MetricDataResult) (PerformanceInformation, error) { + var ( + receiver = s.vConfig.GetPluginsConfig()[0] //Assuming one plugin at a time + commitHash, commitDate = s.vConfig.GetCommitInformation() + dataType = s.vConfig.GetDataType() + dataRate = fmt.Sprint(s.vConfig.GetDataRate()) + uniqueID = s.vConfig.GetUniqueID() + agentCollectionPeriod = s.vConfig.GetAgentCollectionPeriod().Seconds() + ) + performanceMetricResults := make(map[string]Stats) + + for _, metric := range metrics { + metricLabel := strings.Split(*metric.Label, " ") + metricName := metricLabel[len(metricLabel)-1] + metricValues := metric.Values + //Convert every bytes to MB + if slices.Contains(metricsConvertToMB, metricName) { + for i, val := range metricValues { + metricValues[i] = val / (1024 * 1024) } + } + log.Printf("Start calculate metric statictics for metric %s %v \n", metricName, metricValues) + if !isAllValuesGreaterThanOrEqualToZero(metricValues) { + return nil, fmt.Errorf("\n values are not all greater than or equal to zero for metric %s with values: %v", metricName, metricValues) + } + metricStats := CalculateMetricStatisticsBasedOnDataAndPeriod(metricValues, agentCollectionPeriod) + log.Printf("Finished calculate metric statictics for metric %s: %v \n", metricName, metricStats) + performanceMetricResults[metricName] = metricStats + } + + return packIntoPerformanceInformation(uniqueID, receiver, dataType, fmt.Sprint(agentCollectionPeriod), commitHash, commitDate, map[string]interface{}{dataRate: performanceMetricResults}), nil +} - if testConfig.targets == nil || shouldAddTest(&row, testConfig.targets) { - testMatrixComplete = append(testMatrixComplete, row) +func (s *PerformanceValidator) CalculateWindowsMetricStatsAndPackMetrics(statistic []*cloudwatch.GetMetricStatisticsOutput) (PerformanceInformation, error) { + var ( + receiver = s.vConfig.GetPluginsConfig()[0] //Assuming one plugin at a time + commitHash, commitDate = s.vConfig.GetCommitInformation() + dataType = s.vConfig.GetDataType() + dataRate = fmt.Sprint(s.vConfig.GetDataRate()) + uniqueID = s.vConfig.GetUniqueID() + agentCollectionPeriod = s.vConfig.GetAgentCollectionPeriod().Seconds() + ) + performanceMetricResults := make(map[string]Stats) + + for _, metric := range statistic { + metricLabel := strings.Split(*metric.Label, " ") + metricName := metricLabel[len(metricLabel)-1] + metricValues := metric.Datapoints + //Convert every bytes to MB + if slices.Contains(metricsConvertToMB, metricName) { + for i, val := range metricValues { + *metricValues[i].Average = *val.Average / (1024 * 1024) } } + log.Printf("Start calculate metric statictics for metric %s \n", metricName) + if !isAllStatisticsGreaterThanOrEqualToZero(metricValues) { + return nil, fmt.Errorf("\n values are not all greater than or equal to zero for metric %s with values: %v", metricName, metricValues) + } + // GetMetricStatistics provides these statistics, however this will require maintaining multiple data arrays + // and can be difficult for code readability. This way follows the same calculation pattern as Linux + // and simplify the logics. + var data []float64 + for _, datapoint := range metric.Datapoints { + data = append(data, *datapoint.Average) + } + metricStats := CalculateMetricStatisticsBasedOnDataAndPeriod(data, agentCollectionPeriod) + log.Printf("Finished calculate metric statictics for metric %s: %+v \n", metricName, metricStats) + performanceMetricResults[metricName] = metricStats } - return testMatrixComplete + + return packIntoPerformanceInformation(uniqueID, receiver, dataType, fmt.Sprint(agentCollectionPeriod), commitHash, commitDate, map[string]interface{}{dataRate: performanceMetricResults}), nil } -// not so robust way to determine a matrix entry should be included to complete test matrix, but it serves the purpose -// struct (matrixRow) field should be added as elif to support more. could use reflection with some tradeoffs -func shouldAddTest(row *matrixRow, targets map[string]map[string]struct{}) bool { - for key, set := range targets { - var rowVal string - if key == "arc" { - rowVal = row.Arc - } else if key == "os" { - rowVal = row.Os +func (s *PerformanceValidator) GetPerformanceMetrics(startTime, endTime time.Time) ([]types.MetricDataResult, error) { + var ( + metricNamespace = s.vConfig.GetMetricNamespace() + validationMetric = s.vConfig.GetMetricValidation() + ec2InstanceId = awsservice.GetInstanceId() + performanceMetricDataQueries = []types.MetricDataQuery{} + ) + log.Printf("Start getting performance metrics from CloudWatch") + for _, metric := range validationMetric { + metricDimensions := []types.Dimension{ + { + Name: aws.String("InstanceId"), + Value: aws.String(ec2InstanceId), + }, + } + for _, dimension := range metric.MetricDimension { + metricDimensions = append(metricDimensions, types.Dimension{ + Name: aws.String(dimension.Name), + Value: aws.String(dimension.Value), + }) + } + performanceMetricDataQueries = append(performanceMetricDataQueries, s.buildPerformanceMetricQueries(metric.MetricName, metricNamespace, metricDimensions)) + } + + for _, stat := range validationMetric { + metricDimensions := []types.Dimension{ + { + Name: aws.String("InstanceId"), + Value: aws.String(ec2InstanceId), + }, } + for _, dimension := range stat.MetricDimension { + metricDimensions = append(metricDimensions, types.Dimension{ + Name: aws.String(dimension.Name), + Value: aws.String(dimension.Value), + }) + } + } + metrics, err := awsservice.GetMetricData(performanceMetricDataQueries, startTime, endTime) - if rowVal == "" { - continue + if err != nil { + return nil, err + } + + return metrics.MetricDataResults, nil +} + +func (s *PerformanceValidator) GetWindowsPerformanceMetrics(startTime, endTime time.Time) ([]*cloudwatch.GetMetricStatisticsOutput, error) { + var ( + metricNamespace = s.vConfig.GetMetricNamespace() + validationMetric = s.vConfig.GetMetricValidation() + ec2InstanceId = awsservice.GetInstanceId() + ) + log.Printf("Start getting performance metrics from CloudWatch") + + var statistics = []*cloudwatch.GetMetricStatisticsOutput{} + for _, stat := range validationMetric { + metricDimensions := []types.Dimension{ + { + Name: aws.String("InstanceId"), + Value: aws.String(ec2InstanceId), + }, + } + for _, dimension := range stat.MetricDimension { + metricDimensions = append(metricDimensions, types.Dimension{ + Name: aws.String(dimension.Name), + Value: aws.String(dimension.Value), + }) + } + log.Printf("Trying to get Metric %s for GetMetricStatistic ", stat.MetricName) + statList := []types.Statistic{ + types.StatisticAverage, + } + // Windows procstat metrics always append a space and GetMetricData does not support space character + // Only workaround is to use GetMetricStatistics and retrieve the datapoints on a secondly period + statistic, err := awsservice.GetMetricStatistics(stat.MetricName, metricNamespace, metricDimensions, startTime, endTime, 1, statList, nil) + if err != nil { + return nil, err + } + statistics = append(statistics, statistic) + log.Printf("Statistics for Metric: %s", stat.MetricName) + for _, datapoint := range statistic.Datapoints { + log.Printf("Average: %f", *(datapoint.Average)) } - _, ok := set[rowVal] - if !ok { + } + + return statistics, nil +} + +func (s *PerformanceValidator) buildPerformanceMetricQueries(metricName, metricNamespace string, metricDimensions []types.Dimension) types.MetricDataQuery { + metricInformation := types.Metric{ + Namespace: aws.String(metricNamespace), + MetricName: aws.String(metricName), + Dimensions: metricDimensions, + } + + metricDataQuery := types.MetricDataQuery{ + MetricStat: &types.MetricStat{ + Metric: &metricInformation, + Period: aws.Int32(10), + Stat: aws.String(string(models.AVERAGE)), + }, + Id: aws.String(strings.ToLower(metricName)), + } + return metricDataQuery +} + +// packIntoPerformanceInformation will package all the information into the required format of MongoDb Database +// https://github.com/aws/amazon-cloudwatch-agent-test/blob/e07fe7adb1b1d75244d8984507d3f83a7237c3d3/terraform/setup/main.tf#L8-L63 +func packIntoPerformanceInformation(uniqueID, receiver, dataType, collectionPeriod, commitHash string, commitDate int64, result interface{}) PerformanceInformation { + instanceAMI := awsservice.GetImageId() + instanceType := awsservice.GetInstanceType() + + return PerformanceInformation{ + "UniqueID": uniqueID, + "Service": ServiceName, + "UseCase": receiver, + "CommitDate": commitDate, + "CommitHash": commitHash, + "DataType": dataType, + "Results": result, + "CollectionPeriod": collectionPeriod, + "InstanceAMI": instanceAMI, + "InstanceType": instanceType, + } +} + +func isAllValuesGreaterThanOrEqualToZero(values []float64) bool { + if len(values) == 0 { + return false + } + for _, value := range values { + if value < 0 { return false } } return true } -func writeTestMatrixFile(testType string, testMatrix []matrixRow) { - bytes, err := json.MarshalIndent(testMatrix, "", " ") - if err != nil { - log.Panicf("Can't marshal json for target os %v, err %v", testType, err) +func isAllStatisticsGreaterThanOrEqualToZero(datapoints []types.Datapoint) bool { + if len(datapoints) == 0 { + return false } - err = os.WriteFile(fmt.Sprintf("generator/resources/%v_complete_test_matrix.json", testType), bytes, os.ModePerm) - if err != nil { - log.Panicf("Can't write json to file for target os %v, err %v", testType, err) + for _, datapoint := range datapoints { + if *datapoint.Average < 0 { + return false + } } + return true } diff --git a/test/performance/windows_events/agent_config.json b/test/performance/windows_events/agent_config.json deleted file mode 100644 index a28359cde..000000000 --- a/test/performance/windows_events/agent_config.json +++ /dev/null @@ -1,98 +0,0 @@ -{ - "agent": { - "metrics_collection_interval": 1 - }, - "metrics": { - "namespace": "CloudWatchAgentEventsLogsPerformance", - "append_dimensions": { - "InstanceId": "${aws:InstanceId}" - }, - "metrics_collected": { - "Memory": { - "measurement": [ - { - "name": "Available Bytes", - "rename": "Available_Bytes" - } - ], - "metrics_collection_interval": 1 - }, - "Network Interface": { - "resources": [ - "*" - ], - "measurement": [ - { - "name": "Bytes Sent/sec", - "rename": "Bytes_Sent_Per_Sec" - }, - { - "name": "Packets Sent/sec", - "rename": "Packets_Sent_Per_Sec" - } - ], - "metrics_collection_interval": 1 - }, - "procstat": [ - { - "exe": "amazon-cloudwatch-agent.exe", - "measurement": [ - "cpu_usage", - "memory_rss", - "memory_vms", - "write_bytes" - ], - "metrics_collection_interval": 1 - } - ] - } - }, - "logs": { - "logs_collected": { - "windows_events": { - "collect_list": [ - { - "event_format": "xml", - "event_levels": [ - "VERBOSE", - "INFORMATION", - "WARNING", - "ERROR", - "CRITICAL" - ], - "event_name": "System", - "log_group_name": "EC2_EventLogs", - "log_stream_name": "{instance_id}" - }, - { - "event_format": "xml", - "event_levels": [ - "VERBOSE", - "INFORMATION", - "WARNING", - "ERROR", - "CRITICAL" - ], - "event_name": "Security", - "log_group_name": "EC2_EventLogs", - "log_stream_name": "{instance_id}" - }, - { - "event_format": "xml", - "event_levels": [ - "VERBOSE", - "INFORMATION", - "WARNING", - "ERROR", - "CRITICAL" - ], - "event_name": "Application", - "log_group_name": "EC2_EventLogs", - "log_stream_name": "{instance_id}" - } - ] - } - }, - "force_flush_interval": 60 - } -} diff --git a/test/performance/windows_events/parameters.yml b/test/performance/windows_events/parameters.yml deleted file mode 100644 index d65211ec7..000000000 --- a/test/performance/windows_events/parameters.yml +++ /dev/null @@ -1,70 +0,0 @@ -receivers: ["windows_events"] - -test_case: "logs_windows_events_performance" -validate_type: "performance" -data_type: "logs" -# Number of logs being written -number_monitored_logs: 100 -# Number of metrics to be sent or number of log lines being written each minute -values_per_minute: "" -# Number of seconds the agent should run and collect the metrics. In this case, 5 minutes -agent_collection_period: 300 - -commit_hash: -commit_date: - -cloudwatch_agent_config: "" - -# Metric that the test needs to validate; moreover, the stress validation already has -# InstanceID dimension; therefore, does not need to validate it -# https://github.com/aws/amazon-cloudwatch-agent-test/pull/109/files#diff-47c87373e751dd9fd5ce504e44b320765c8b84d6cde524a4e8a32cfa34674165R124-R135 -metric_namespace: "CloudWatchAgentEventsLogsPerformance" -metric_validation: - - metric_name: "procstat cpu_usage" - metric_sample_count: 300 - metric_dimension: - - name: "exe" - value: "amazon-cloudwatch-agent.exe" - - name: "process_name" - value: "amazon-cloudwatch-agent.exe" - - metric_name: "procstat memory_rss" - metric_sample_count: 300 - metric_dimension: - - name: "exe" - value: "amazon-cloudwatch-agent.exe" - - name: "process_name" - value: "amazon-cloudwatch-agent.exe" - - metric_name: "procstat memory_vms" - metric_sample_count: 300 - metric_dimension: - - name: "exe" - value: "amazon-cloudwatch-agent.exe" - - name: "process_name" - value: "amazon-cloudwatch-agent.exe" - - metric_name: "procstat write_bytes" - metric_sample_count: 300 - metric_dimension: - - name: "exe" - value: "amazon-cloudwatch-agent.exe" - - name: "process_name" - value: "amazon-cloudwatch-agent.exe" - - # Validations for Network Interface - - metric_name: "Bytes_Sent_Per_Sec" - metric_dimension: - - name: "objectname" - value: "Network Interface" - - name: "instance" - value: "Amazon Elastic Network Adapter" - - metric_name: "Packets_Sent_Per_Sec" - metric_dimension: - - name: "objectname" - value: "Network Interface" - - name: "instance" - value: "Amazon Elastic Network Adapter" - # Validate Memory metrics - - metric_name: "Available_Bytes" - metric_sample_count: 60 - metric_dimension: - - name: "objectname" - value: "Memory" \ No newline at end of file diff --git a/validator/validators/performance/performance_stats.go b/validator/validators/performance/performance_stats.go index 869abb676..5af5ce7f9 100644 --- a/validator/validators/performance/performance_stats.go +++ b/validator/validators/performance/performance_stats.go @@ -4,7 +4,6 @@ package performance import ( - "github.com/aws/aws-sdk-go-v2/service/cloudwatch/types" "log" "math" "sort" From 33baafc04e20ebe2aa93d1dd7f9bbac691fc0b3f Mon Sep 17 00:00:00 2001 From: Yared Taye Date: Sun, 16 Jul 2023 17:24:01 -0400 Subject: [PATCH 06/19] add backoff import --- validator/validators/performance/performance_validator.go | 1 + 1 file changed, 1 insertion(+) diff --git a/validator/validators/performance/performance_validator.go b/validator/validators/performance/performance_validator.go index 614a48a1d..b1e98f032 100644 --- a/validator/validators/performance/performance_validator.go +++ b/validator/validators/performance/performance_validator.go @@ -12,6 +12,7 @@ import ( "github.com/aws/aws-sdk-go-v2/service/cloudwatch/types" "github.com/aws/aws-sdk-go/aws" + "github.com/cenkalti/backoff/v4" "golang.org/x/exp/maps" "golang.org/x/exp/slices" From e1621013f77dfb49d740ab344706c0a325fa1455 Mon Sep 17 00:00:00 2001 From: Yared Taye Date: Sun, 16 Jul 2023 20:41:04 -0400 Subject: [PATCH 07/19] add windows_events reciever --- validator/models/validation_config.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/validator/models/validation_config.go b/validator/models/validation_config.go index ed9a96fb6..314fa3356 100644 --- a/validator/models/validation_config.go +++ b/validator/models/validation_config.go @@ -15,7 +15,7 @@ import ( "gopkg.in/yaml.v3" ) -var supportedReceivers = []string{"logs", "statsd", "collectd", "system", "emf"} +var supportedReceivers = []string{"logs", "statsd", "collectd", "system", "emf", "windows_events"} type ValidateConfig interface { GetPluginsConfig() []string From cce00bfc0838bc1877c03072d612967f88e13db8 Mon Sep 17 00:00:00 2001 From: Yared Taye Date: Sun, 16 Jul 2023 23:37:02 -0400 Subject: [PATCH 08/19] change parameters.yml --- test/performance/windows/windows_events/parameters.yml | 2 +- test/stress/windows/windows_events/parameters.yml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/test/performance/windows/windows_events/parameters.yml b/test/performance/windows/windows_events/parameters.yml index 988efb736..f0bb6ad9c 100644 --- a/test/performance/windows/windows_events/parameters.yml +++ b/test/performance/windows/windows_events/parameters.yml @@ -1,4 +1,4 @@ -receivers: ["windows_events"] +receivers: ["logs"] test_case: "logs_windows_events_performance" validate_type: "performance" diff --git a/test/stress/windows/windows_events/parameters.yml b/test/stress/windows/windows_events/parameters.yml index e9a302d65..3062d76d9 100644 --- a/test/stress/windows/windows_events/parameters.yml +++ b/test/stress/windows/windows_events/parameters.yml @@ -1,4 +1,4 @@ -receivers: ["windows_events"] +receivers: ["logs"] test_case: "logs_windows_events_stress" validate_type: "stress" From 8bd03ebdb80b376424e78645b1d46e0c0518abad Mon Sep 17 00:00:00 2001 From: Yared Taye Date: Mon, 17 Jul 2023 00:00:28 -0400 Subject: [PATCH 09/19] change from files to windows_events to test --- util/common/logs.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/util/common/logs.go b/util/common/logs.go index 92a4e9ca1..11e51bf79 100644 --- a/util/common/logs.go +++ b/util/common/logs.go @@ -125,7 +125,7 @@ func getLogFilePaths(configPath string) ([]string, error) { return nil, err } - logFiles := cfgFileData["logs"].(map[string]interface{})["logs_collected"].(map[string]interface{})["files"].(map[string]interface{})["collect_list"].([]interface{}) + logFiles := cfgFileData["logs"].(map[string]interface{})["logs_collected"].(map[string]interface{})["windows_events"].(map[string]interface{})["collect_list"].([]interface{}) var filePaths []string for _, process := range logFiles { filePaths = append(filePaths, process.(map[string]interface{})["file_path"].(string)) From 1f8c5e8edf3ca502a7c569eb42852dec9952047d Mon Sep 17 00:00:00 2001 From: Yared Taye Date: Mon, 17 Jul 2023 00:24:46 -0400 Subject: [PATCH 10/19] change log generation logic --- util/common/logs.go | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/util/common/logs.go b/util/common/logs.go index 11e51bf79..79e371029 100644 --- a/util/common/logs.go +++ b/util/common/logs.go @@ -125,7 +125,17 @@ func getLogFilePaths(configPath string) ([]string, error) { return nil, err } - logFiles := cfgFileData["logs"].(map[string]interface{})["logs_collected"].(map[string]interface{})["windows_events"].(map[string]interface{})["collect_list"].([]interface{}) + _, ok := cfgFileData["logs"].(map[string]interface{})["logs_collected"].(map[string]interface{})["files"] + + if !ok { + event_names := cfgFileData["logs"].(map[string]interface{})["logs_collected"].(map[string]interface{})["windows_events"].(map[string]interface{})["collect_list"].([]interface{}) + var eventName []string + for _, process := range event_names { + eventName = append(eventName, process.(map[string]interface{})["event_name"].(string)) + } + return eventName, nil + } + logFiles := cfgFileData["logs"].(map[string]interface{})["logs_collected"].(map[string]interface{})["files"].(map[string]interface{})["collect_list"].([]interface{}) var filePaths []string for _, process := range logFiles { filePaths = append(filePaths, process.(map[string]interface{})["file_path"].(string)) From 774ea8bb280cabd6c1539bf12e8d353bea7318b3 Mon Sep 17 00:00:00 2001 From: Yared Taye Date: Mon, 17 Jul 2023 01:29:54 -0400 Subject: [PATCH 11/19] change parameters.yml --- .../windows/windows_events/agent_config.json | 12 ++-- .../windows/windows_events/parameters.yml | 64 ++++++++++++++++++- .../windows/windows_events/agent_config.json | 12 ++-- .../windows/windows_events/parameters.yml | 64 ++++++++++++++++++- util/common/logs.go | 10 --- 5 files changed, 138 insertions(+), 24 deletions(-) diff --git a/test/performance/windows/windows_events/agent_config.json b/test/performance/windows/windows_events/agent_config.json index 8a617ff07..c474c7863 100644 --- a/test/performance/windows/windows_events/agent_config.json +++ b/test/performance/windows/windows_events/agent_config.json @@ -61,8 +61,8 @@ "CRITICAL" ], "event_name": "System", - "log_group_name": "EC2_EventLogs", - "log_stream_name": "{instance_id}" + "log_group_name": "{instance_id}", + "log_stream_name": "System" }, { "event_format": "xml", @@ -74,8 +74,8 @@ "CRITICAL" ], "event_name": "Security", - "log_group_name": "EC2_EventLogs", - "log_stream_name": "{instance_id}" + "log_group_name": "{instance_id}", + "log_stream_name": "Security" }, { "event_format": "xml", @@ -87,8 +87,8 @@ "CRITICAL" ], "event_name": "Application", - "log_group_name": "EC2_EventLogs", - "log_stream_name": "{instance_id}" + "log_group_name": "{instance_id}", + "log_stream_name": "Application" } ] } diff --git a/test/performance/windows/windows_events/parameters.yml b/test/performance/windows/windows_events/parameters.yml index f0bb6ad9c..8ff6fd147 100644 --- a/test/performance/windows/windows_events/parameters.yml +++ b/test/performance/windows/windows_events/parameters.yml @@ -67,4 +67,66 @@ metric_validation: metric_sample_count: 60 metric_dimension: - name: "objectname" - value: "Memory" \ No newline at end of file + value: "Memory" + +log_validation: + - log_value: "Security Informational log" + log_level: "Information" + log_lines: 1 + log_stream: "Security" + log_source: "WindowsEvents" + - log_value: "Security Warning log" + log_level: "Warning" + log_lines: 1 + log_stream: "Security" + log_source: "WindowsEvents" + - log_value: "Security Error log" + log_level: "Error" + log_lines: 1 + log_stream: "Security" + log_source: "WindowsEvents" + - log_value: "Security Critical log" + log_level: "Critical" + log_lines: 1 + log_stream: "Security" + log_source: "WindowsEvents" + - log_value: "System information log" + log_level: "Information" + log_lines: 1 + log_stream: "System" + log_source: "WindowsEvents" + - log_value: "System warning log" + log_level: "Warning" + log_lines: 1 + log_stream: "System" + log_source: "WindowsEvents" + - log_value: "System error log" + log_level: "Error" + log_lines: 1 + log_stream: "System" + log_source: "WindowsEvents" + - log_value: "System Critical log" + log_level: "Critical" + log_lines: 1 + log_stream: "System" + log_source: "WindowsEvents" + - log_value: "Application information log" + log_level: "Information" + log_lines: 1 + log_stream: "Application" + log_source: "WindowsEvents" + - log_value: "Application warning log" + log_level: "Warning" + log_lines: 1 + log_stream: "Application" + log_source: "WindowsEvents" + - log_value: "Application error log" + log_level: "Error" + log_lines: 1 + log_stream: "Application" + log_source: "WindowsEvents" + - log_value: "Application critical log" + log_level: "Critical" + log_lines: 1 + log_stream: "Application" + log_source: "WindowsEvents" \ No newline at end of file diff --git a/test/stress/windows/windows_events/agent_config.json b/test/stress/windows/windows_events/agent_config.json index e1882ff23..2034cdf57 100644 --- a/test/stress/windows/windows_events/agent_config.json +++ b/test/stress/windows/windows_events/agent_config.json @@ -53,8 +53,8 @@ "CRITICAL" ], "event_name": "System", - "log_group_name": "EC2_EventLogs", - "log_stream_name": "{instance_id}" + "log_group_name": "{instance_id}", + "log_stream_name": "System" }, { "event_format": "xml", @@ -66,8 +66,8 @@ "CRITICAL" ], "event_name": "Security", - "log_group_name": "EC2_EventLogs", - "log_stream_name": "{instance_id}" + "log_group_name": "{instance_id}", + "log_stream_name": "Security" }, { "event_format": "xml", @@ -79,8 +79,8 @@ "CRITICAL" ], "event_name": "Application", - "log_group_name": "EC2_EventLogs", - "log_stream_name": "{instance_id}" + "log_group_name": "{instance_id}", + "log_stream_name": "Application" } ] } diff --git a/test/stress/windows/windows_events/parameters.yml b/test/stress/windows/windows_events/parameters.yml index 3062d76d9..20bcc7609 100644 --- a/test/stress/windows/windows_events/parameters.yml +++ b/test/stress/windows/windows_events/parameters.yml @@ -67,4 +67,66 @@ metric_validation: metric_sample_count: 60 metric_dimension: - name: "objectname" - value: "Memory" \ No newline at end of file + value: "Memory" + +log_validation: + - log_value: "Security Informational log" + log_level: "Information" + log_lines: 1 + log_stream: "Security" + log_source: "WindowsEvents" + - log_value: "Security Warning log" + log_level: "Warning" + log_lines: 1 + log_stream: "Security" + log_source: "WindowsEvents" + - log_value: "Security Error log" + log_level: "Error" + log_lines: 1 + log_stream: "Security" + log_source: "WindowsEvents" + - log_value: "Security Critical log" + log_level: "Critical" + log_lines: 1 + log_stream: "Security" + log_source: "WindowsEvents" + - log_value: "System information log" + log_level: "Information" + log_lines: 1 + log_stream: "System" + log_source: "WindowsEvents" + - log_value: "System warning log" + log_level: "Warning" + log_lines: 1 + log_stream: "System" + log_source: "WindowsEvents" + - log_value: "System error log" + log_level: "Error" + log_lines: 1 + log_stream: "System" + log_source: "WindowsEvents" + - log_value: "System Critical log" + log_level: "Critical" + log_lines: 1 + log_stream: "System" + log_source: "WindowsEvents" + - log_value: "Application information log" + log_level: "Information" + log_lines: 1 + log_stream: "Application" + log_source: "WindowsEvents" + - log_value: "Application warning log" + log_level: "Warning" + log_lines: 1 + log_stream: "Application" + log_source: "WindowsEvents" + - log_value: "Application error log" + log_level: "Error" + log_lines: 1 + log_stream: "Application" + log_source: "WindowsEvents" + - log_value: "Application critical log" + log_level: "Critical" + log_lines: 1 + log_stream: "Application" + log_source: "WindowsEvents" \ No newline at end of file diff --git a/util/common/logs.go b/util/common/logs.go index 79e371029..92a4e9ca1 100644 --- a/util/common/logs.go +++ b/util/common/logs.go @@ -125,16 +125,6 @@ func getLogFilePaths(configPath string) ([]string, error) { return nil, err } - _, ok := cfgFileData["logs"].(map[string]interface{})["logs_collected"].(map[string]interface{})["files"] - - if !ok { - event_names := cfgFileData["logs"].(map[string]interface{})["logs_collected"].(map[string]interface{})["windows_events"].(map[string]interface{})["collect_list"].([]interface{}) - var eventName []string - for _, process := range event_names { - eventName = append(eventName, process.(map[string]interface{})["event_name"].(string)) - } - return eventName, nil - } logFiles := cfgFileData["logs"].(map[string]interface{})["logs_collected"].(map[string]interface{})["files"].(map[string]interface{})["collect_list"].([]interface{}) var filePaths []string for _, process := range logFiles { From bb847467ecc1ec75f587106150790480dcdaf49e Mon Sep 17 00:00:00 2001 From: Yared Taye Date: Mon, 17 Jul 2023 01:47:42 -0400 Subject: [PATCH 12/19] change configs --- .../windows/windows_events/agent_config.json | 10 ++++++++++ test/stress/windows/windows_events/agent_config.json | 10 ++++++++++ util/common/logs.go | 1 + 3 files changed, 21 insertions(+) diff --git a/test/performance/windows/windows_events/agent_config.json b/test/performance/windows/windows_events/agent_config.json index c474c7863..ec5973f4c 100644 --- a/test/performance/windows/windows_events/agent_config.json +++ b/test/performance/windows/windows_events/agent_config.json @@ -91,6 +91,16 @@ "log_stream_name": "Application" } ] + }, + "files": { + "collect_list": [ + { + "file_path": "C:/Users/Administrator/AppData/Local/Temp/test1.log", + "log_group_name": "{instance_id}", + "log_stream_name": "test1.log", + "timezone": "UTC" + } + ] } }, "force_flush_interval": 60 diff --git a/test/stress/windows/windows_events/agent_config.json b/test/stress/windows/windows_events/agent_config.json index 2034cdf57..10a631944 100644 --- a/test/stress/windows/windows_events/agent_config.json +++ b/test/stress/windows/windows_events/agent_config.json @@ -83,6 +83,16 @@ "log_stream_name": "Application" } ] + }, + "files": { + "collect_list": [ + { + "file_path": "C:/Users/Administrator/AppData/Local/Temp/test1.log", + "log_group_name": "{instance_id}", + "log_stream_name": "test1.log", + "timezone": "UTC" + } + ] } }, "force_flush_interval": 60 diff --git a/util/common/logs.go b/util/common/logs.go index 92a4e9ca1..b74e6da29 100644 --- a/util/common/logs.go +++ b/util/common/logs.go @@ -32,6 +32,7 @@ func GenerateLogs(configFilePath string, duration time.Duration, sendingInterval } func GenerateWindowsEvents(validationLog []models.LogValidation) error { + log.Printf("Windows event creation") var multiErr error for _, vLog := range validationLog { if vLog.LogSource == "WindowsEvents" && vLog.LogLevel != "" { From 782676285846059023cb50f1c06ef210a33250f9 Mon Sep 17 00:00:00 2001 From: Yared Taye Date: Mon, 17 Jul 2023 08:56:11 -0400 Subject: [PATCH 13/19] change logging for windows --- .../validators/stress/stress_validator.go | 18 ++++++------------ 1 file changed, 6 insertions(+), 12 deletions(-) diff --git a/validator/validators/stress/stress_validator.go b/validator/validators/stress/stress_validator.go index 440e821fb..3469e3e5b 100644 --- a/validator/validators/stress/stress_validator.go +++ b/validator/validators/stress/stress_validator.go @@ -419,24 +419,18 @@ func (s *StressValidator) ValidateStressMetricWindows(metricName, metricNamespac boundAndPeriod = s.vConfig.GetAgentCollectionPeriod().Seconds() receiver = s.vConfig.GetPluginsConfig()[0] //Assuming one plugin at a time ) - log.Printf("Start to collect and validate metric %s with the namespace %s, start time %v and end time %v \n", metricName, metricNamespace, startTime, endTime) - metrics, err := awsservice.GetMetricStatistics( - metricName, - metricNamespace, - metricDimensions, - startTime, - endTime, - int32(boundAndPeriod), - []types.Statistic{types.StatisticMaximum}, - nil, - ) + stressMetricQueries := s.buildStressMetricQueries(metricName, metricNamespace, metricDimensions) + + log.Printf("Start to collect and validate windows metric %s with the namespace %s, start time %v and end time %v \n", metricName, metricNamespace, startTime, endTime) + + metrics, err := awsservice.GetMetricData(stressMetricQueries, startTime, endTime) if err != nil { return err } if len(metrics.Datapoints) == 0 || metrics.Datapoints[0].Maximum == nil { - return fmt.Errorf("\n getting metric %s failed with the namespace %s and dimension %v", metricName, metricNamespace, util.LogCloudWatchDimension(metricDimensions)) + return fmt.Errorf("\n getting windows metric %s failed with the namespace %s and dimension %v", metricName, metricNamespace, util.LogCloudWatchDimension(metricDimensions)) } if _, ok := windowsMetricPluginBoundValue[dataRate][receiver]; !ok { From 5e507b67e56f81298969eedcd1c85c2f35322776 Mon Sep 17 00:00:00 2001 From: Yared Taye Date: Mon, 17 Jul 2023 09:19:34 -0400 Subject: [PATCH 14/19] fix metrics for windows stress validtion --- .../validators/stress/stress_validator.go | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/validator/validators/stress/stress_validator.go b/validator/validators/stress/stress_validator.go index 3469e3e5b..440e821fb 100644 --- a/validator/validators/stress/stress_validator.go +++ b/validator/validators/stress/stress_validator.go @@ -419,18 +419,24 @@ func (s *StressValidator) ValidateStressMetricWindows(metricName, metricNamespac boundAndPeriod = s.vConfig.GetAgentCollectionPeriod().Seconds() receiver = s.vConfig.GetPluginsConfig()[0] //Assuming one plugin at a time ) + log.Printf("Start to collect and validate metric %s with the namespace %s, start time %v and end time %v \n", metricName, metricNamespace, startTime, endTime) - stressMetricQueries := s.buildStressMetricQueries(metricName, metricNamespace, metricDimensions) - - log.Printf("Start to collect and validate windows metric %s with the namespace %s, start time %v and end time %v \n", metricName, metricNamespace, startTime, endTime) - - metrics, err := awsservice.GetMetricData(stressMetricQueries, startTime, endTime) + metrics, err := awsservice.GetMetricStatistics( + metricName, + metricNamespace, + metricDimensions, + startTime, + endTime, + int32(boundAndPeriod), + []types.Statistic{types.StatisticMaximum}, + nil, + ) if err != nil { return err } if len(metrics.Datapoints) == 0 || metrics.Datapoints[0].Maximum == nil { - return fmt.Errorf("\n getting windows metric %s failed with the namespace %s and dimension %v", metricName, metricNamespace, util.LogCloudWatchDimension(metricDimensions)) + return fmt.Errorf("\n getting metric %s failed with the namespace %s and dimension %v", metricName, metricNamespace, util.LogCloudWatchDimension(metricDimensions)) } if _, ok := windowsMetricPluginBoundValue[dataRate][receiver]; !ok { From b462488ffe68d79f47bafc113887f8baee63d8e0 Mon Sep 17 00:00:00 2001 From: Yared Taye Date: Mon, 17 Jul 2023 11:52:01 -0400 Subject: [PATCH 15/19] change namespace --- .../validators/stress/stress_validator.go | 44 +++++++++++++++++++ 1 file changed, 44 insertions(+) diff --git a/validator/validators/stress/stress_validator.go b/validator/validators/stress/stress_validator.go index 440e821fb..dfe1ced15 100644 --- a/validator/validators/stress/stress_validator.go +++ b/validator/validators/stress/stress_validator.go @@ -455,8 +455,52 @@ func (s *StressValidator) ValidateStressMetricWindows(metricName, metricNamespac if metricValue < 0 || metricValue > upperBoundValue { return fmt.Errorf("\n metric %s with value %f is larger than %f limit", metricName, metricValue, upperBoundValue) + } var ( + dataRate = fmt.Sprint(s.vConfig.GetDataRate()) + boundAndPeriod = s.vConfig.GetAgentCollectionPeriod().Seconds() + receiver = s.vConfig.GetPluginsConfig()[0] //Assuming one plugin at a time + ) + + stressMetricQueries := s.buildStressMetricQueries(metricName, metricNamespace, metricDimensions) + + log.Printf("Start to collect and validate windows metric %s with the namespace %s, start time %v and end time %v \n", metricName, metricNamespace, startTime, endTime) + + metrics, err := awsservice.GetMetricData(stressMetricQueries, startTime, endTime) + if err != nil { + return err } + if len(metrics.MetricDataResults) == 0 || len(metrics.MetricDataResults[0].Values) == 0 { + return fmt.Errorf("\n getting windows metric %s failed with the namespace %s and dimension %v", metricName, metricNamespace, util.LogCloudWatchDimension(metricDimensions)) + } + + if _, ok := metricPluginBoundValue[dataRate][receiver]; !ok { + return fmt.Errorf("\n plugin %s does not have data rate", receiver) + } + + if _, ok := metricPluginBoundValue[dataRate][receiver][metricName]; !ok { + return fmt.Errorf("\n metric %s does not have bound", receiver) + } + + // Assuming each plugin are testing one at a time + // Validate if the corresponding metrics are within the acceptable range [acceptable value +- 30%] + metricValue := metrics.MetricDataResults[0].Values[0] + upperBoundValue := metricPluginBoundValue[dataRate][receiver][metricName] * (1 + metricErrorBound) + log.Printf("Metric %s within the namespace %s has value of %f and the upper bound is %f \n", metricName, metricNamespace, metricValue, upperBoundValue) + + if metricValue < 0 || metricValue > upperBoundValue { + return fmt.Errorf("\n metric %s with value %f is larger than %f limit", metricName, metricValue, upperBoundValue) + } + + // Validate if the metrics are not dropping any metrics and able to backfill within the same minute (e.g if the memory_rss metric is having collection_interval 1 + // , it will need to have 60 sample counts - 1 datapoint / second) + if ok := awsservice.ValidateSampleCount(metricName, metricNamespace, metricDimensions, startTime, endTime, metricSampleCount-5, metricSampleCount, int32(boundAndPeriod)); !ok { + return fmt.Errorf("\n metric %s is not within sample count bound [ %d, %d]", metricName, metricSampleCount-5, metricSampleCount) + } + + return nil + + // Validate if the metrics are not dropping any metrics and able to backfill within the same minute (e.g if the memory_rss metric is having collection_interval 1 // , it will need to have 60 sample counts - 1 datapoint / second) if ok := awsservice.ValidateSampleCount(metricName, metricNamespace, metricDimensions, startTime, endTime, metricSampleCount-5, metricSampleCount, int32(boundAndPeriod)); !ok { From 92e89db7df99874badef672b01d564e8c4b7a312 Mon Sep 17 00:00:00 2001 From: Yared Taye Date: Mon, 17 Jul 2023 12:33:59 -0400 Subject: [PATCH 16/19] fix stress validator change --- .../validators/stress/stress_validator.go | 44 ------------------- 1 file changed, 44 deletions(-) diff --git a/validator/validators/stress/stress_validator.go b/validator/validators/stress/stress_validator.go index dfe1ced15..440e821fb 100644 --- a/validator/validators/stress/stress_validator.go +++ b/validator/validators/stress/stress_validator.go @@ -455,52 +455,8 @@ func (s *StressValidator) ValidateStressMetricWindows(metricName, metricNamespac if metricValue < 0 || metricValue > upperBoundValue { return fmt.Errorf("\n metric %s with value %f is larger than %f limit", metricName, metricValue, upperBoundValue) - } var ( - dataRate = fmt.Sprint(s.vConfig.GetDataRate()) - boundAndPeriod = s.vConfig.GetAgentCollectionPeriod().Seconds() - receiver = s.vConfig.GetPluginsConfig()[0] //Assuming one plugin at a time - ) - - stressMetricQueries := s.buildStressMetricQueries(metricName, metricNamespace, metricDimensions) - - log.Printf("Start to collect and validate windows metric %s with the namespace %s, start time %v and end time %v \n", metricName, metricNamespace, startTime, endTime) - - metrics, err := awsservice.GetMetricData(stressMetricQueries, startTime, endTime) - if err != nil { - return err } - if len(metrics.MetricDataResults) == 0 || len(metrics.MetricDataResults[0].Values) == 0 { - return fmt.Errorf("\n getting windows metric %s failed with the namespace %s and dimension %v", metricName, metricNamespace, util.LogCloudWatchDimension(metricDimensions)) - } - - if _, ok := metricPluginBoundValue[dataRate][receiver]; !ok { - return fmt.Errorf("\n plugin %s does not have data rate", receiver) - } - - if _, ok := metricPluginBoundValue[dataRate][receiver][metricName]; !ok { - return fmt.Errorf("\n metric %s does not have bound", receiver) - } - - // Assuming each plugin are testing one at a time - // Validate if the corresponding metrics are within the acceptable range [acceptable value +- 30%] - metricValue := metrics.MetricDataResults[0].Values[0] - upperBoundValue := metricPluginBoundValue[dataRate][receiver][metricName] * (1 + metricErrorBound) - log.Printf("Metric %s within the namespace %s has value of %f and the upper bound is %f \n", metricName, metricNamespace, metricValue, upperBoundValue) - - if metricValue < 0 || metricValue > upperBoundValue { - return fmt.Errorf("\n metric %s with value %f is larger than %f limit", metricName, metricValue, upperBoundValue) - } - - // Validate if the metrics are not dropping any metrics and able to backfill within the same minute (e.g if the memory_rss metric is having collection_interval 1 - // , it will need to have 60 sample counts - 1 datapoint / second) - if ok := awsservice.ValidateSampleCount(metricName, metricNamespace, metricDimensions, startTime, endTime, metricSampleCount-5, metricSampleCount, int32(boundAndPeriod)); !ok { - return fmt.Errorf("\n metric %s is not within sample count bound [ %d, %d]", metricName, metricSampleCount-5, metricSampleCount) - } - - return nil - - // Validate if the metrics are not dropping any metrics and able to backfill within the same minute (e.g if the memory_rss metric is having collection_interval 1 // , it will need to have 60 sample counts - 1 datapoint / second) if ok := awsservice.ValidateSampleCount(metricName, metricNamespace, metricDimensions, startTime, endTime, metricSampleCount-5, metricSampleCount, int32(boundAndPeriod)); !ok { From 874bbbd3234221c7682bcf75a8d2ddbe85b053cb Mon Sep 17 00:00:00 2001 From: Yared Taye Date: Tue, 25 Jul 2023 23:05:04 -0400 Subject: [PATCH 17/19] fix broken changes during rebase --- generator/test_case_generator.go | 503 +++++++++++++------------------ 1 file changed, 214 insertions(+), 289 deletions(-) diff --git a/generator/test_case_generator.go b/generator/test_case_generator.go index 614a48a1d..223ed43fe 100644 --- a/generator/test_case_generator.go +++ b/generator/test_case_generator.go @@ -1,335 +1,260 @@ // Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: MIT -package performance +package main import ( + "encoding/json" "fmt" - "github.com/aws/aws-sdk-go-v2/service/cloudwatch" + "io" "log" - "strings" - "time" + "os" - "github.com/aws/aws-sdk-go-v2/service/cloudwatch/types" - "github.com/aws/aws-sdk-go/aws" - "golang.org/x/exp/maps" - "golang.org/x/exp/slices" - - "github.com/aws/amazon-cloudwatch-agent-test/util/awsservice" - "github.com/aws/amazon-cloudwatch-agent-test/validator/models" - "github.com/aws/amazon-cloudwatch-agent-test/validator/validators/basic" -) - -const ( - ServiceName = "AmazonCloudWatchAgent" - DynamoDBDataBase = "CWAPerformanceMetrics" -) - -var ( - // The default unit for these metrics is byte. However, we want to convert to MB for easier understanding - metricsConvertToMB = []string{"mem_total", "procstat_memory_rss", "procstat_memory_swap", "procstat_memory_data", "procstat_memory_vms", "procstat_write_bytes", "procstat_bytes_sent", "memory_rss", "memory_vms", "write_bytes", "Bytes_Sent_Per_Sec", "Available_Bytes"} + "github.com/mitchellh/mapstructure" ) -type PerformanceValidator struct { - vConfig models.ValidateConfig - models.ValidatorFactory +type matrixRow struct { + TestDir string `json:"test_dir"` + Os string `json:"os"` + Family string `json:"family"` + TestType string `json:"testType"` + Arc string `json:"arc"` + InstanceType string `json:"instanceType"` + Ami string `json:"ami"` + BinaryName string `json:"binaryName"` + Username string `json:"username"` + InstallAgentCommand string `json:"installAgentCommand"` + AgentStartCommand string `json:"agentStartCommand"` + CaCertPath string `json:"caCertPath"` + ValuesPerMinute int `json:"values_per_minute"` // Number of metrics to be sent or number of log lines to write + K8sVersion string `json:"k8s_version"` + TerraformDir string `json:"terraform_dir"` + UseSSM bool `json:"useSSM"` + ExcludedTests string `json:"excludedTests"` } -var _ models.ValidatorFactory = (*PerformanceValidator)(nil) - -func NewPerformanceValidator(vConfig models.ValidateConfig) models.ValidatorFactory { - return &PerformanceValidator{ - vConfig: vConfig, - ValidatorFactory: basic.NewBasicValidator(vConfig), - } +type testConfig struct { + // this gives more flexibility to define terraform dir when there should be a different set of terraform files + // e.g. statsd can have a multiple terraform module sets for difference test scenarios (ecs, eks or ec2) + testDir string + terraformDir string + // define target matrix field as set(s) + // empty map means a testConfig will be created with a test entry for each entry from *_test_matrix.json + targets map[string]map[string]struct{} } -func (s *PerformanceValidator) CheckData(startTime, endTime time.Time) error { - perfInfo := PerformanceInformation{} - if s.vConfig.GetOSFamily() == "windows" { - stat, err := s.GetWindowsPerformanceMetrics(startTime, endTime) - if err != nil { - return err - } - perfInfo, err = s.CalculateWindowsMetricStatsAndPackMetrics(stat) - if err != nil { - return err - } - } else { - metrics, err := s.GetPerformanceMetrics(startTime, endTime) - if err != nil { - return err - } - - perfInfo, err = s.CalculateMetricStatsAndPackMetrics(metrics) - if err != nil { - return err - } - } - err := s.SendPacketToDatabase(perfInfo) - if err != nil { - return err - } - - return nil -} - -func (s *PerformanceValidator) SendPacketToDatabase(perfInfo PerformanceInformation) error { - var ( - dataType = s.vConfig.GetDataType() - receiver = s.vConfig.GetPluginsConfig()[0] //Assuming one plugin at a time - commitHash, commitDate = s.vConfig.GetCommitInformation() - agentCollectionPeriod = fmt.Sprint(s.vConfig.GetAgentCollectionPeriod().Seconds()) - // The secondary global index that is used for checking if there are item has already been exist in the table - // The performance validator will query based on the UseCaseHash to confirm if the current commit with the use case - // has been exist or not? If yes, merge it. If not, sending it to the database - // https://github.com/aws/amazon-cloudwatch-agent-test/blob/e07fe7adb1b1d75244d8984507d3f83a7237c3d3/terraform/setup/main.tf#L46-L53 - kCheckingAttribute = []string{"CommitHash", "UseCase"} - vCheckingAttribute = []string{fmt.Sprint(commitHash), receiver} - ) - - err := backoff.Retry(func() error { - existingPerfInfo, err := awsservice.GetItemInDatabase(DynamoDBDataBase, "UseCaseHash", kCheckingAttribute, vCheckingAttribute, perfInfo) - if err != nil { - return err - } - - // Get the latest performance information from the database and update by merging the existing one - // and finally replace the packet in the database - maps.Copy(existingPerfInfo["Results"].(map[string]interface{}), perfInfo["Results"].(map[string]interface{})) - - finalPerfInfo := packIntoPerformanceInformation(existingPerfInfo["UniqueID"].(string), receiver, dataType, agentCollectionPeriod, commitHash, commitDate, existingPerfInfo["Results"]) - - err = awsservice.ReplaceItemInDatabase(DynamoDBDataBase, finalPerfInfo) - - if err != nil { - return err - } - return nil - }, awsservice.StandardExponentialBackoff) +const ( + testTypeKeyEc2Linux = "ec2_linux" +) - return err +// you can't have a const map in golang +var testTypeToTestConfig = map[string][]testConfig{ + "ec2_gpu": { + {testDir: "./test/nvidia_gpu"}, + }, + testTypeKeyEc2Linux: { + {testDir: "./test/ca_bundle"}, + {testDir: "./test/cloudwatchlogs"}, + { + testDir: "./test/metrics_number_dimension", + targets: map[string]map[string]struct{}{"os": {"al2": {}}}, + }, + {testDir: "./test/metric_value_benchmark"}, + {testDir: "./test/run_as_user"}, + {testDir: "./test/collection_interval"}, + {testDir: "./test/metric_dimension"}, + {testDir: "./test/restart"}, + {testDir: "./test/multi_config"}, + { + testDir: "./test/acceptance", + targets: map[string]map[string]struct{}{"os": {"ubuntu-20.04": {}}}, + }, + // skipping FIPS test as the test cannot be verified + // neither ssh nor SSM works after a reboot once FIPS is enabled + //{ + // testDir: "./test/fips", + // targets: map[string]map[string]struct{}{"os": {"rhel8": {}}}, + //}, + { + testDir: "./test/lvm", + targets: map[string]map[string]struct{}{"os": {"al2": {}}}, + }, + { + testDir: "./test/proxy", + targets: map[string]map[string]struct{}{"os": {"al2": {}}}, + }, + { + testDir: "./test/ssl_cert", + targets: map[string]map[string]struct{}{"os": {"al2": {}}}, + }, + { + testDir: "./test/userdata", + terraformDir: "terraform/ec2/userdata", + targets: map[string]map[string]struct{}{"os": {"ol9": {}}}, + }, + { + testDir: "./test/assume_role", + terraformDir: "terraform/ec2/creds", + targets: map[string]map[string]struct{}{"os": {"al2": {}}}, + }, + }, + /* + You can only place 1 mac instance on a dedicate host a single time. + Therefore, limit down the scope for testing in Mac since EC2 can be done with Linux + and Mac under the hood share similar plugins with Linux + */ + "ec2_mac": { + {testDir: "../../../test/feature/mac"}, + {testDir: "../../../test/run_as_user"}, + }, + "ec2_windows": { + {testDir: "../../../test/feature/windows"}, + {testDir: "../../../test/restart"}, + {testDir: "../../../test/acceptance"}, + {testDir: "../../../test/multi_config"}, + // assume role test doesn't add much value, and it already being tested with linux + //{testDir: "../../../test/assume_role"}, + }, + "ec2_performance": { + {testDir: "../../test/performance/emf"}, + {testDir: "../../test/performance/logs"}, + {testDir: "../../test/performance/system"}, + {testDir: "../../test/performance/statsd"}, + {testDir: "../../test/performance/collectd"}, + }, + "ec2_windows_performance": { + {testDir: "../../test/performance/windows/logs"}, + {testDir: "../../test/performance/windows/system"}, + {testDir: "../../test/performance/windows/windows_events"}, + }, + "ec2_stress": { + {testDir: "../../test/stress/emf"}, + {testDir: "../../test/stress/logs"}, + {testDir: "../../test/stress/system"}, + {testDir: "../../test/stress/statsd"}, + {testDir: "../../test/stress/collectd"}, + }, + "ec2_windows_stress": { + {testDir: "../../test/stress/windows/logs"}, + {testDir: "../../test/stress/windows/system"}, + {testDir: "../../test/stress/windows/windows_events"}, + }, + "ecs_fargate": { + {testDir: "./test/ecs/ecs_metadata"}, + }, + "ecs_ec2_daemon": { + {testDir: "./test/metric_value_benchmark"}, + {testDir: "./test/statsd"}, + {testDir: "./test/emf"}, + }, + "eks_daemon": { + { + testDir: "./test/metric_value_benchmark", + targets: map[string]map[string]struct{}{"arc": {"amd64": {}}}, + }, + { + testDir: "./test/statsd", terraformDir: "terraform/eks/daemon/statsd", + targets: map[string]map[string]struct{}{"arc": {"amd64": {}}}, + }, + { + testDir: "./test/emf", terraformDir: "terraform/eks/daemon/emf", + targets: map[string]map[string]struct{}{"arc": {"amd64": {}}}, + }, + { + testDir: "./test/fluent", terraformDir: "terraform/eks/daemon/fluent/d", + targets: map[string]map[string]struct{}{"arc": {"amd64": {}}}, + }, + {testDir: "./test/fluent", terraformDir: "terraform/eks/daemon/fluent/bit"}, + }, + "eks_deployment": { + {testDir: "./test/metric_value_benchmark"}, + }, } -func (s *PerformanceValidator) CalculateMetricStatsAndPackMetrics(metrics []types.MetricDataResult) (PerformanceInformation, error) { - var ( - receiver = s.vConfig.GetPluginsConfig()[0] //Assuming one plugin at a time - commitHash, commitDate = s.vConfig.GetCommitInformation() - dataType = s.vConfig.GetDataType() - dataRate = fmt.Sprint(s.vConfig.GetDataRate()) - uniqueID = s.vConfig.GetUniqueID() - agentCollectionPeriod = s.vConfig.GetAgentCollectionPeriod().Seconds() - ) - performanceMetricResults := make(map[string]Stats) - for _, metric := range metrics { - metricLabel := strings.Split(*metric.Label, " ") - metricName := metricLabel[len(metricLabel)-1] - metricValues := metric.Values - //Convert every bytes to MB - if slices.Contains(metricsConvertToMB, metricName) { - for i, val := range metricValues { - metricValues[i] = val / (1024 * 1024) - } - } - log.Printf("Start calculate metric statictics for metric %s %v \n", metricName, metricValues) - if !isAllValuesGreaterThanOrEqualToZero(metricValues) { - return nil, fmt.Errorf("\n values are not all greater than or equal to zero for metric %s with values: %v", metricName, metricValues) - } - metricStats := CalculateMetricStatisticsBasedOnDataAndPeriod(metricValues, agentCollectionPeriod) - log.Printf("Finished calculate metric statictics for metric %s: %v \n", metricName, metricStats) - performanceMetricResults[metricName] = metricStats +func copyAllEC2LinuxTestForOnpremTesting() { + /* Some tests need to be fixed in order to run in both environment, so for now for PoC, run one that works. + testTypeToTestConfig["ec2_linux_onprem"] = testTypeToTestConfig[testTypeKeyEc2Linux] + */ + testTypeToTestConfig["ec2_linux_onprem"] = []testConfig{ + { + testDir: "./test/lvm", + targets: map[string]map[string]struct{}{"os": {"al2": {}}}, + }, } - - return packIntoPerformanceInformation(uniqueID, receiver, dataType, fmt.Sprint(agentCollectionPeriod), commitHash, commitDate, map[string]interface{}{dataRate: performanceMetricResults}), nil } -func (s *PerformanceValidator) CalculateWindowsMetricStatsAndPackMetrics(statistic []*cloudwatch.GetMetricStatisticsOutput) (PerformanceInformation, error) { - var ( - receiver = s.vConfig.GetPluginsConfig()[0] //Assuming one plugin at a time - commitHash, commitDate = s.vConfig.GetCommitInformation() - dataType = s.vConfig.GetDataType() - dataRate = fmt.Sprint(s.vConfig.GetDataRate()) - uniqueID = s.vConfig.GetUniqueID() - agentCollectionPeriod = s.vConfig.GetAgentCollectionPeriod().Seconds() - ) - performanceMetricResults := make(map[string]Stats) +func main() { + copyAllEC2LinuxTestForOnpremTesting() - for _, metric := range statistic { - metricLabel := strings.Split(*metric.Label, " ") - metricName := metricLabel[len(metricLabel)-1] - metricValues := metric.Datapoints - //Convert every bytes to MB - if slices.Contains(metricsConvertToMB, metricName) { - for i, val := range metricValues { - *metricValues[i].Average = *val.Average / (1024 * 1024) - } - } - log.Printf("Start calculate metric statictics for metric %s \n", metricName) - if !isAllStatisticsGreaterThanOrEqualToZero(metricValues) { - return nil, fmt.Errorf("\n values are not all greater than or equal to zero for metric %s with values: %v", metricName, metricValues) - } - // GetMetricStatistics provides these statistics, however this will require maintaining multiple data arrays - // and can be difficult for code readability. This way follows the same calculation pattern as Linux - // and simplify the logics. - var data []float64 - for _, datapoint := range metric.Datapoints { - data = append(data, *datapoint.Average) - } - metricStats := CalculateMetricStatisticsBasedOnDataAndPeriod(data, agentCollectionPeriod) - log.Printf("Finished calculate metric statictics for metric %s: %+v \n", metricName, metricStats) - performanceMetricResults[metricName] = metricStats + for testType, testConfigs := range testTypeToTestConfig { + testMatrix := genMatrix(testType, testConfigs) + writeTestMatrixFile(testType, testMatrix) } - - return packIntoPerformanceInformation(uniqueID, receiver, dataType, fmt.Sprint(agentCollectionPeriod), commitHash, commitDate, map[string]interface{}{dataRate: performanceMetricResults}), nil } -func (s *PerformanceValidator) GetPerformanceMetrics(startTime, endTime time.Time) ([]types.MetricDataResult, error) { - var ( - metricNamespace = s.vConfig.GetMetricNamespace() - validationMetric = s.vConfig.GetMetricValidation() - ec2InstanceId = awsservice.GetInstanceId() - performanceMetricDataQueries = []types.MetricDataQuery{} - ) - log.Printf("Start getting performance metrics from CloudWatch") - for _, metric := range validationMetric { - metricDimensions := []types.Dimension{ - { - Name: aws.String("InstanceId"), - Value: aws.String(ec2InstanceId), - }, - } - for _, dimension := range metric.MetricDimension { - metricDimensions = append(metricDimensions, types.Dimension{ - Name: aws.String(dimension.Name), - Value: aws.String(dimension.Value), - }) - } - performanceMetricDataQueries = append(performanceMetricDataQueries, s.buildPerformanceMetricQueries(metric.MetricName, metricNamespace, metricDimensions)) - } - - for _, stat := range validationMetric { - metricDimensions := []types.Dimension{ - { - Name: aws.String("InstanceId"), - Value: aws.String(ec2InstanceId), - }, - } - for _, dimension := range stat.MetricDimension { - metricDimensions = append(metricDimensions, types.Dimension{ - Name: aws.String(dimension.Name), - Value: aws.String(dimension.Value), - }) - } - } - metrics, err := awsservice.GetMetricData(performanceMetricDataQueries, startTime, endTime) +func genMatrix(testType string, testConfigs []testConfig) []matrixRow { + openTestMatrix, err := os.Open(fmt.Sprintf("generator/resources/%v_test_matrix.json", testType)) if err != nil { - return nil, err + log.Panicf("can't read file %v_test_matrix.json err %v", testType, err) } - return metrics.MetricDataResults, nil -} + defer openTestMatrix.Close() -func (s *PerformanceValidator) GetWindowsPerformanceMetrics(startTime, endTime time.Time) ([]*cloudwatch.GetMetricStatisticsOutput, error) { - var ( - metricNamespace = s.vConfig.GetMetricNamespace() - validationMetric = s.vConfig.GetMetricValidation() - ec2InstanceId = awsservice.GetInstanceId() - ) - log.Printf("Start getting performance metrics from CloudWatch") + byteValueTestMatrix, _ := io.ReadAll(openTestMatrix) - var statistics = []*cloudwatch.GetMetricStatisticsOutput{} - for _, stat := range validationMetric { - metricDimensions := []types.Dimension{ - { - Name: aws.String("InstanceId"), - Value: aws.String(ec2InstanceId), - }, - } - for _, dimension := range stat.MetricDimension { - metricDimensions = append(metricDimensions, types.Dimension{ - Name: aws.String(dimension.Name), - Value: aws.String(dimension.Value), - }) - } - log.Printf("Trying to get Metric %s for GetMetricStatistic ", stat.MetricName) - statList := []types.Statistic{ - types.StatisticAverage, - } - // Windows procstat metrics always append a space and GetMetricData does not support space character - // Only workaround is to use GetMetricStatistics and retrieve the datapoints on a secondly period - statistic, err := awsservice.GetMetricStatistics(stat.MetricName, metricNamespace, metricDimensions, startTime, endTime, 1, statList, nil) - if err != nil { - return nil, err - } - statistics = append(statistics, statistic) - log.Printf("Statistics for Metric: %s", stat.MetricName) - for _, datapoint := range statistic.Datapoints { - log.Printf("Average: %f", *(datapoint.Average)) - } + var testMatrix []map[string]interface{} + err = json.Unmarshal(byteValueTestMatrix, &testMatrix) + if err != nil { + log.Panicf("can't unmarshall file %v_test_matrix.json err %v", testType, err) } - return statistics, nil -} - -func (s *PerformanceValidator) buildPerformanceMetricQueries(metricName, metricNamespace string, metricDimensions []types.Dimension) types.MetricDataQuery { - metricInformation := types.Metric{ - Namespace: aws.String(metricNamespace), - MetricName: aws.String(metricName), - Dimensions: metricDimensions, - } + testMatrixComplete := make([]matrixRow, 0, len(testMatrix)) + for _, test := range testMatrix { + for _, testConfig := range testConfigs { + row := matrixRow{TestDir: testConfig.testDir, TestType: testType, TerraformDir: testConfig.terraformDir} + err = mapstructure.Decode(test, &row) + if err != nil { + log.Panicf("can't decode map test %v to metric line struct with error %v", testConfig, err) + } - metricDataQuery := types.MetricDataQuery{ - MetricStat: &types.MetricStat{ - Metric: &metricInformation, - Period: aws.Int32(10), - Stat: aws.String(string(models.AVERAGE)), - }, - Id: aws.String(strings.ToLower(metricName)), + if testConfig.targets == nil || shouldAddTest(&row, testConfig.targets) { + testMatrixComplete = append(testMatrixComplete, row) + } + } } - return metricDataQuery + return testMatrixComplete } -// packIntoPerformanceInformation will package all the information into the required format of MongoDb Database -// https://github.com/aws/amazon-cloudwatch-agent-test/blob/e07fe7adb1b1d75244d8984507d3f83a7237c3d3/terraform/setup/main.tf#L8-L63 -func packIntoPerformanceInformation(uniqueID, receiver, dataType, collectionPeriod, commitHash string, commitDate int64, result interface{}) PerformanceInformation { - instanceAMI := awsservice.GetImageId() - instanceType := awsservice.GetInstanceType() - - return PerformanceInformation{ - "UniqueID": uniqueID, - "Service": ServiceName, - "UseCase": receiver, - "CommitDate": commitDate, - "CommitHash": commitHash, - "DataType": dataType, - "Results": result, - "CollectionPeriod": collectionPeriod, - "InstanceAMI": instanceAMI, - "InstanceType": instanceType, - } -} +// not so robust way to determine a matrix entry should be included to complete test matrix, but it serves the purpose +// struct (matrixRow) field should be added as elif to support more. could use reflection with some tradeoffs +func shouldAddTest(row *matrixRow, targets map[string]map[string]struct{}) bool { + for key, set := range targets { + var rowVal string + if key == "arc" { + rowVal = row.Arc + } else if key == "os" { + rowVal = row.Os + } -func isAllValuesGreaterThanOrEqualToZero(values []float64) bool { - if len(values) == 0 { - return false - } - for _, value := range values { - if value < 0 { + if rowVal == "" { + continue + } + _, ok := set[rowVal] + if !ok { return false } } return true } -func isAllStatisticsGreaterThanOrEqualToZero(datapoints []types.Datapoint) bool { - if len(datapoints) == 0 { - return false +func writeTestMatrixFile(testType string, testMatrix []matrixRow) { + bytes, err := json.MarshalIndent(testMatrix, "", " ") + if err != nil { + log.Panicf("Can't marshal json for target os %v, err %v", testType, err) } - for _, datapoint := range datapoints { - if *datapoint.Average < 0 { - return false - } + err = os.WriteFile(fmt.Sprintf("generator/resources/%v_complete_test_matrix.json", testType), bytes, os.ModePerm) + if err != nil { + log.Panicf("Can't write json to file for target os %v, err %v", testType, err) } - return true } From 9097eaa256fcb0397cb11751e7e859f52a74dbeb Mon Sep 17 00:00:00 2001 From: Yared Taye Date: Wed, 26 Jul 2023 08:26:05 -0400 Subject: [PATCH 18/19] add os family --- test/performance/windows/windows_events/parameters.yml | 1 + test/stress/windows/windows_events/parameters.yml | 1 + 2 files changed, 2 insertions(+) diff --git a/test/performance/windows/windows_events/parameters.yml b/test/performance/windows/windows_events/parameters.yml index 8ff6fd147..8def65b79 100644 --- a/test/performance/windows/windows_events/parameters.yml +++ b/test/performance/windows/windows_events/parameters.yml @@ -13,6 +13,7 @@ agent_collection_period: 300 commit_hash: commit_date: +os_family: "" cloudwatch_agent_config: "" # Metric that the test needs to validate; moreover, the stress validation already has diff --git a/test/stress/windows/windows_events/parameters.yml b/test/stress/windows/windows_events/parameters.yml index 20bcc7609..9f19bca52 100644 --- a/test/stress/windows/windows_events/parameters.yml +++ b/test/stress/windows/windows_events/parameters.yml @@ -13,6 +13,7 @@ agent_collection_period: 300 commit_hash: commit_date: +os_family: "" cloudwatch_agent_config: "" # Metric that the test needs to validate; moreover, the stress validation already has From d31beecf35fcdf7d6340cc5dc4ced6ab7812df52 Mon Sep 17 00:00:00 2001 From: Yared Taye Date: Thu, 27 Jul 2023 10:25:50 -0400 Subject: [PATCH 19/19] remove test matrix since windows setup exists --- ...ws_events_log_performance_test_matrix.json | 23 -------------- ...windows_events_log_stress_test_matrix.json | 30 ------------------- 2 files changed, 53 deletions(-) delete mode 100644 generator/resources/ec2_windows_events_log_performance_test_matrix.json delete mode 100644 generator/resources/ec2_windows_events_log_stress_test_matrix.json diff --git a/generator/resources/ec2_windows_events_log_performance_test_matrix.json b/generator/resources/ec2_windows_events_log_performance_test_matrix.json deleted file mode 100644 index 2e1ae193e..000000000 --- a/generator/resources/ec2_windows_events_log_performance_test_matrix.json +++ /dev/null @@ -1,23 +0,0 @@ -[ - { - "os": "win-2022", - "ami": "cloudwatch-agent-integration-test-win-2022*", - "arc": "amd64", - "valuesPerMinute": 100, - "family": "windows" - }, - { - "os": "win-2022", - "ami": "cloudwatch-agent-integration-test-win-2022*", - "arc": "amd64", - "valuesPerMinute": 1000, - "family": "windows" - }, - { - "os": "win-2022", - "ami": "cloudwatch-agent-integration-test-win-2022*", - "arc": "amd64", - "valuesPerMinute": 5000, - "family": "windows" - } -] \ No newline at end of file diff --git a/generator/resources/ec2_windows_events_log_stress_test_matrix.json b/generator/resources/ec2_windows_events_log_stress_test_matrix.json deleted file mode 100644 index effa360e0..000000000 --- a/generator/resources/ec2_windows_events_log_stress_test_matrix.json +++ /dev/null @@ -1,30 +0,0 @@ -[ - { - "os": "win-2022", - "ami": "cloudwatch-agent-integration-test-win-2022*", - "arc": "amd64", - "valuesPerMinute": 1000, - "family": "windows" - }, - { - "os": "win-2022", - "ami": "cloudwatch-agent-integration-test-win-2022*", - "arc": "amd64", - "valuesPerMinute": 5000, - "family": "windows" - }, - { - "os": "win-2022", - "ami": "cloudwatch-agent-integration-test-win-2022*", - "arc": "amd64", - "valuesPerMinute": 10000, - "family": "windows" - }, - { - "os": "win-2022", - "ami": "cloudwatch-agent-integration-test-win-2022*", - "arc": "amd64", - "valuesPerMinute": 50000, - "family": "windows" - } -] \ No newline at end of file