Skip to content

Commit

Permalink
Moved configuration to run in a separate container
Browse files Browse the repository at this point in the history
* Adding the S3 access key and the S3/Glue data source is now done is a dedicated short lived container
* Added missing license headers

Signed-off-by: Norman Jordan <[email protected]>
  • Loading branch information
normanj-bitquill committed Jan 13, 2025
1 parent ca30131 commit 60eb7bc
Show file tree
Hide file tree
Showing 10 changed files with 120 additions and 72 deletions.
2 changes: 2 additions & 0 deletions docker/integ-test/.env
Original file line number Diff line number Diff line change
Expand Up @@ -12,3 +12,5 @@ OPENSEARCH_NODE_MEMORY=512m
OPENSEARCH_ADMIN_PASSWORD=C0rrecthorsebatterystaple.
OPENSEARCH_PORT=9200
OPENSEARCH_DASHBOARDS_PORT=5601
S3_ACCESS_KEY=Vt7jnvi5BICr1rkfsheT
S3_SECRET_KEY=5NK3StGvoGCLUWvbaGN0LBUf9N6sjE94PEzLdqwO
73 changes: 73 additions & 0 deletions docker/integ-test/configuration-updater/apply-configuration.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
/bin/sh

# Copyright OpenSearch Contributors
# SPDX-License-Identifier: Apache-2.0

# Login to Minio
curl -q \
-c /tmp/minio-cookies.txt \
-H 'Content-Type: application/json' \
-d '{"accessKey": "minioadmin", "secretKey": "minioadmin"}' \
http://minio-S3:9001/api/v1/login
# Delete the test bucket
curl -b /tmp/minio-cookies.txt \
-X DELETE \
http://minio-S3:9001/api/v1/buckets/test
# Create the integ-test bucket
curl -q \
-b /tmp/minio-cookies.txt \
-X POST \
-H 'Content-Type: application/json' \
-d '{"name": "integ-test", "versioning": {"enabled": true, "excludePrefixes": [], "excludeFolders": false}, "locking": true}' \
http://minio-S3:9001/api/v1/buckets
# Create the access key
curl -q \
-b /tmp/minio-cookies.txt \
-X POST \
-H 'Content-Type: application/json' \
-d "{\"policy\": \"\", \"accessKey\": \"${S3_ACCESS_KEY}\", \"secretKey\": \"${S3_SECRET_KEY}\", \"description\": \"\", \"comment\": \"\", \"name\": \"\", \"expiry\": null}" \
http://minio-S3:9001/api/v1/service-account-credentials

# Login to OpenSearch Dashboards
echo ">>> Login to OpenSearch dashboards"
curl -q \
-c /tmp/opensearch-cookies.txt \
-X POST \
-H 'Content-Type: application/json' \
-H 'Osd-Version: 2.18.0' \
-H 'Osd-Xsrf: fetch' \
-d "{\"username\": \"admin\", \"password\": \"${OPENSEARCH_ADMIN_PASSWORD}\"}" \
'http://opensearch-dashboards:5601/auth/login?dataSourceId='
if [ "$?" -eq "0" ]; then
echo " >>> Login successful"
else
echo " >>> Login failed"
fi
# Create the S3/Glue datasource
echo ">>> Creating datasource"
curl -q \
-b /tmp/opensearch-cookies.txt \
-X POST \
-H 'Content-Type: application/json' \
-H 'Osd-Version: 2.18.0' \
-H 'Osd-Xsrf: fetch' \
-d "{\"name\": \"mys3\", \"allowedRoles\": [], \"connector\": \"s3glue\", \"properties\": {\"glue.auth.type\": \"iam_role\", \"glue.auth.role_arn\": \"arn:aws:iam::123456789012:role/S3Access\", \"glue.indexstore.opensearch.uri\": \"http://opensearch:9200\", \"glue.indexstore.opensearch.auth\": \"basicauth\", \"glue.indexstore.opensearch.auth.username\": \"admin\", \"glue.indexstore.opensearch.auth.password\": \"${OPENSEARCH_ADMIN_PASSWORD}\"}}" \
http://opensearch-dashboards:5601/api/directquery/dataconnections
if [ "$?" -eq "0" ]; then
echo " >>> S3 datasource created"
else
echo " >>> Failed to create S3 datasource"
fi

echo ">>> Setting cluster settings"
curl -v \
-u "admin:${OPENSEARCH_ADMIN_PASSWORD}" \
-X PUT \
-H 'Content-Type: application/json' \
-d '{"persistent": {"plugins.query.executionengine.spark.config": "{\"applicationId\":\"integ-test\",\"executionRoleARN\":\"arn:aws:iam::xxxxx:role/emr-job-execution-role\",\"region\":\"us-west-2\", \"sparkSubmitParameters\": \"--conf spark.dynamicAllocation.enabled=false\"}"}}' \
http://opensearch:9200/_cluster/settings
if [ "$?" -eq "0" ]; then
echo " >>> Successfully set cluster settings"
else
echo " >>> Failed to set cluster settings"
fi
20 changes: 20 additions & 0 deletions docker/integ-test/docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,8 @@ services:
condition: service_healthy
opensearch-dashboards:
condition: service_healthy
configuration-updater:
condition: service_completed_successfully

spark-worker:
image: bitnami/spark:${SPARK_VERSION:-3.5.3}
Expand Down Expand Up @@ -204,6 +206,24 @@ services:
networks:
- opensearch-net

configuration-updater:
image: alpine/curl:latest
entrypoint: /bin/sh
command: /apply-configuration.sh
environment:
- S3_ACCESS_KEY=${S3_ACCESS_KEY}
- S3_SECRET_KEY=${S3_SECRET_KEY}
- OPENSEARCH_ADMIN_PASSWORD=${OPENSEARCH_ADMIN_PASSWORD}
volumes:
- type: bind
source: configuration-updater/apply-configuration.sh
target: /apply-configuration.sh
depends_on:
opensearch-dashboards:
condition: service_healthy
networks:
- opensearch-net

volumes:
opensearch-data:
minio-data:
Expand Down
3 changes: 3 additions & 0 deletions docker/integ-test/metastore/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
# Copyright OpenSearch Contributors
# SPDX-License-Identifier: Apache-2.0

FROM openjdk:21-jdk-bookworm

WORKDIR /opt
Expand Down
3 changes: 3 additions & 0 deletions docker/integ-test/opensearch/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
# Copyright OpenSearch Contributors
# SPDX-License-Identifier: Apache-2.0

FROM opensearchproject/opensearch:latest

USER root
Expand Down
3 changes: 3 additions & 0 deletions docker/integ-test/opensearch/docker-command-runner.sh
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
#!/bin/bash

# Copyright OpenSearch Contributors
# SPDX-License-Identifier: Apache-2.0

function process_files {
for cmd_file in `ls -1`; do
echo "$cmd_file" | grep -q 'cmd$'
Expand Down
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
/*
* Copyright OpenSearch Contributors
* SPDX-License-Identifier: Apache-2.0
*/

package com.amazonaws.services.emrserverless;

import com.amazonaws.ClientConfigurationFactory;
Expand Down
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
/*
* Copyright OpenSearch Contributors
* SPDX-License-Identifier: Apache-2.0
*/

package org.opensearch.spark.emrserverless;

import com.amazonaws.AmazonWebServiceClient;
Expand Down
3 changes: 3 additions & 0 deletions docker/integ-test/spark-submit/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
# Copyright OpenSearch Contributors
# SPDX-License-Identifier: Apache-2.0

FROM bitnami/spark:3.5.3
ARG FLINT_JAR
ARG PPL_JAR
Expand Down
75 changes: 3 additions & 72 deletions docker/integ-test/spark/spark-master-entrypoint.sh
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
#!/bin/bash

# Copyright OpenSearch Contributors
# SPDX-License-Identifier: Apache-2.0

# Add passwd and shadow entries so that su works
grep -q '^spark:' /etc/passwd
if [ "$?" -ne "0" ]; then
Expand All @@ -13,76 +16,4 @@ fi
apt update
apt install -y curl

S3_ACCESS_KEY=`grep '^ACCESS_KEY=' /opt/bitnami/spark/s3.credentials | sed -e 's/^.*=//'`
S3_SECRET_KEY=`grep '^SECRET_KEY=' /opt/bitnami/spark/s3.credentials | sed -e 's/^.*=//'`

# Login to Minio
curl -q \
-c /tmp/minio-cookies.txt \
-H 'Content-Type: application/json' \
-d '{"accessKey": "minioadmin", "secretKey": "minioadmin"}' \
http://minio-S3:9001/api/v1/login
# Delete the test bucket
curl -b /tmp/minio-cookies.txt \
-X DELETE \
http://minio-S3:9001/api/v1/buckets/test
# Create the integ-test bucket
curl -q \
-b /tmp/minio-cookies.txt \
-X POST \
-H 'Content-Type: application/json' \
-d '{"name": "integ-test", "versioning": {"enabled": true, "excludePrefixes": [], "excludeFolders": false}, "locking": true}' \
http://minio-S3:9001/api/v1/buckets
# Create the access key
curl -q \
-b /tmp/minio-cookies.txt \
-X POST \
-H 'Content-Type: application/json' \
-d "{\"policy\": \"\", \"accessKey\": \"${S3_ACCESS_KEY}\", \"secretKey\": \"${S3_SECRET_KEY}\", \"description\": \"\", \"comment\": \"\", \"name\": \"\", \"expiry\": null}" \
http://minio-S3:9001/api/v1/service-account-credentials

# Login to OpenSearch Dashboards
echo ">>> Login to OpenSearch dashboards"
curl -q \
-c /tmp/opensearch-cookies.txt \
-X POST \
-H 'Content-Type: application/json' \
-H 'Osd-Version: 2.18.0' \
-H 'Osd-Xsrf: fetch' \
-d "{\"username\": \"admin\", \"password\": \"${OPENSEARCH_ADMIN_PASSWORD}\"}" \
'http://opensearch-dashboards:5601/auth/login?dataSourceId='
if [ "$?" -eq "0" ]; then
echo " >>> Login successful"
else
echo " >>> Login failed"
fi
# Create the S3/Glue datasource
echo ">>> Creating datasource"
curl -q \
-b /tmp/opensearch-cookies.txt \
-X POST \
-H 'Content-Type: application/json' \
-H 'Osd-Version: 2.18.0' \
-H 'Osd-Xsrf: fetch' \
-d '{"name": "mys3", "allowedRoles": [], "connector": "s3glue", "properties": {"glue.auth.type": "iam_role", "glue.auth.role_arn": "arn:aws:iam::123456789012:role/S3Access", "glue.indexstore.opensearch.uri": "http://opensearch:9200", "glue.indexstore.opensearch.auth": "basicauth", "glue.indexstore.opensearch.auth.username": "admin", "glue.indexstore.opensearch.auth.password": "C0rrecthorsebatterystaple."}}' \
http://opensearch-dashboards:5601/api/directquery/dataconnections
if [ "$?" -eq "0" ]; then
echo " >>> S3 datasource created"
else
echo " >>> Failed to create S3 datasource"
fi

echo ">>> Setting cluster settings"
curl -v \
-u "admin:${OPENSEARCH_ADMIN_PASSWORD}" \
-X PUT \
-H 'Content-Type: application/json' \
-d '{"persistent": {"plugins.query.executionengine.spark.config": "{\"applicationId\":\"integ-test\",\"executionRoleARN\":\"arn:aws:iam::xxxxx:role/emr-job-execution-role\",\"region\":\"us-west-2\", \"sparkSubmitParameters\": \"--conf spark.dynamicAllocation.enabled=false\"}"}}' \
http://opensearch:9200/_cluster/settings
if [ "$?" -eq "0" ]; then
echo " >>> Successfully set cluster settings"
else
echo " >>> Failed to set cluster settings"
fi

su spark -c '/opt/bitnami/scripts/spark/entrypoint.sh /opt/bitnami/scripts/spark/run.sh'

0 comments on commit 60eb7bc

Please sign in to comment.