From 1152ab3283358d2c51ee5894d97980ed254b9120 Mon Sep 17 00:00:00 2001 From: Sean Dawson <dawsons@ansto.gov.au> Date: Thu, 21 Nov 2024 09:03:45 +1100 Subject: [PATCH 1/6] Redact anything potentially sensitive from config logging - It might be overkill to redact a lot of the keys, but its probably better to be safe than sorry --- bluesky_kafka/consume.py | 3 +- bluesky_kafka/logging_utils.py | 125 ++++++++++++++++++++++ bluesky_kafka/produce.py | 3 +- bluesky_kafka/tests/test_logging_utils.py | 29 +++++ 4 files changed, 158 insertions(+), 2 deletions(-) create mode 100644 bluesky_kafka/logging_utils.py create mode 100644 bluesky_kafka/tests/test_logging_utils.py diff --git a/bluesky_kafka/consume.py b/bluesky_kafka/consume.py index 6ded6f0..8fb31c0 100644 --- a/bluesky_kafka/consume.py +++ b/bluesky_kafka/consume.py @@ -1,5 +1,6 @@ import logging +from bluesky_kafka.logging_utils import redact_config import msgpack import msgpack_numpy as mpn @@ -120,7 +121,7 @@ def __init__( logger.debug( "BlueskyConsumer configuration:\n%s", - self._consumer_config, + redact_config(self._consumer_config), ) logger.debug("subscribing to Kafka topic(s): %s", topics) diff --git a/bluesky_kafka/logging_utils.py b/bluesky_kafka/logging_utils.py new file mode 100644 index 0000000..076284b --- /dev/null +++ b/bluesky_kafka/logging_utils.py @@ -0,0 +1,125 @@ +# See https://docs.confluent.io/platform/current/installation/configuration/producer-configs.html and +# https://docs.confluent.io/platform/current/installation/configuration/consumer-configs.html +CONFIG_KEY_WHITELIST = { + # -- Common Keys + "bootstrap.servers", + # "ssl.key.password", + # "ssl.keystore.certificate.chain", + # "ssl.keystore.key", + # "ssl.keystore.location", + # "ssl.keystore.password", + # "ssl.truststore.certificates", + # "ssl.truststore.location", + # "ssl.truststore.password", + "client.dns.lookup", + "client.id", + "connections.max.idle.ms", + "receive.buffer.bytes", + "request.timeout.ms", + # "sasl.client.callback.handler.class", + # "sasl.jaas.config", + # "sasl.kerberos.service.name", + # "sasl.login.callback.handler.class", + # "sasl.login.class", + # "sasl.mechanism", + # "sasl.oauthbearer.jwks.endpoint.url", + # "sasl.oauthbearer.token.endpoint.url", + "security.protocol", + "send.buffer.bytes", + "socket.connection.setup.timeout.max.ms", + "socket.connection.setup.timeout.ms", + # "ssl.enabled.protocols", + # "ssl.keystore.type", + # "ssl.protocol", + # "ssl.provider", + # "ssl.truststore.type", + "auto.include.jmx.reporter", + "enable.metrics.push", + "interceptor.classes", + "metadata.max.age.ms", + "metric.reporters", + "metrics.num.samples", + "metrics.recording.level", + "metrics.sample.window.ms", + "reconnect.backoff.max.ms", + "reconnect.backoff.ms", + "retry.backoff.max.ms", + "retry.backoff.ms", + # "sasl.kerberos.kinit.cmd", + # "sasl.kerberos.min.time.before.relogin", + # "sasl.kerberos.ticket.renew.jitter", + # "sasl.kerberos.ticket.renew.window.factor", + # "sasl.login.connect.timeout.ms", + # "sasl.login.read.timeout.ms", + # "sasl.login.refresh.buffer.seconds", + # "sasl.login.refresh.min.period.seconds", + # "sasl.login.refresh.window.factor", + # "sasl.login.refresh.window.jitter", + # "sasl.login.retry.backoff.max.ms", + # "sasl.login.retry.backoff.ms", + # "sasl.oauthbearer.clock.skew.seconds", + # "sasl.oauthbearer.expected.audience", + # "sasl.oauthbearer.expected.issuer", + # "sasl.oauthbearer.jwks.endpoint.refresh.ms", + # "sasl.oauthbearer.jwks.endpoint.retry.backoff.max.ms", + # "sasl.oauthbearer.jwks.endpoint.retry.backoff.ms", + # "sasl.oauthbearer.scope.claim.name", + # "sasl.oauthbearer.sub.claim.name", + # "security.providers", + # "ssl.cipher.suites", + # "ssl.endpoint.identification.algorithm", + # "ssl.engine.factory.class", + # "ssl.keymanager.algorithm", + # "ssl.secure.random.implementation", + # "ssl.trustmanager.algorithm", + # -- Consumer Keys + "key.deserializer", + "value.deserializer", + "fetch.min.bytes", + "group.id", + "group.protocol", + "heartbeat.interval.ms", + "max.partition.fetch.bytes", + "session.timeout.ms", + "allow.auto.create.topics", + "auto.offset.reset", + "default.api.timeout.ms", + "enable.auto.commit", + "exclude.internal.topics", + "fetch.max.bytes", + "group.instance.id", + "group.remote.assignor", + "isolation.level", + "max.poll.interval.ms", + "max.poll.records", + "partition.assignment.strategy", + "auto.commit.interval.ms", + "check.crcs", + "client.rack", + "fetch.max.wait.ms", + # -- Producer Keys + "key.serializer", + "value.serializer", + "buffer.memory", + "compression.type", + "retries", + "batch.size", + "delivery.timeout.ms", + "linger.ms", + "max.block.ms", + "max.request.size", + "partitioner.class", + "partitioner.ignore.keys", + "acks", + "enable.idempotence", + "max.in.flight.requests.per.connection", + "metadata.max.idle.ms", + "partitioner.adaptive.partitioning.enable", + "partitioner.availability.timeout.ms", + "transaction.timeout.ms", + "transactional.id", +} + + +def redact_config(config): + return {k: v if k in CONFIG_KEY_WHITELIST else "****" for k, v in config.items()} diff --git a/bluesky_kafka/produce.py b/bluesky_kafka/produce.py index 1b284b0..26975f0 100644 --- a/bluesky_kafka/produce.py +++ b/bluesky_kafka/produce.py @@ -1,5 +1,6 @@ import logging +from bluesky_kafka.logging_utils import redact_config import msgpack import msgpack_numpy as mpn @@ -119,7 +120,7 @@ def __init__( else: self._producer_config["bootstrap.servers"] = ",".join(bootstrap_servers) - logger.debug("producer configuration: %s", self._producer_config) + logger.debug("producer configuration: %s", redact_config(self._producer_config)) if on_delivery is None: self.on_delivery = default_delivery_report diff --git a/bluesky_kafka/tests/test_logging_utils.py b/bluesky_kafka/tests/test_logging_utils.py new file mode 100644 index 0000000..4f23ecd --- /dev/null +++ b/bluesky_kafka/tests/test_logging_utils.py @@ -0,0 +1,29 @@ +from bluesky_kafka.logging_utils import redact_config + + +def test_redact_config(): + example_config = { + "bootstrap.servers": "some-kafka-server:9092", + "sasl.mechanisms": "PLAIN", + "sasl.username": "brokerUser", + "security.protocol": "SASL_SSL", + "ssl.ca.location": "/opt/kafka/config/certs/kafka-tls-ca", + "sasl.password": "SECRET PASSWORD", + "acks": "all", # Producer key + "enable.idempotence": "false", + "group.id": "some-group", # Consumer key + } + + reacted_example_config = redact_config(example_config) + + assert reacted_example_config == { + "bootstrap.servers": "some-kafka-server:9092", + "sasl.mechanisms": "****", + "sasl.username": "****", + "security.protocol": "SASL_SSL", + "ssl.ca.location": "****", + "sasl.password": "****", + "acks": "all", + "enable.idempotence": "false", + "group.id": "some-group", + } From c4962e6913ca5c8c243185e48290fa28485393e6 Mon Sep 17 00:00:00 2001 From: Sean Dawson <dawsons@ansto.gov.au> Date: Thu, 21 Nov 2024 09:12:42 +1100 Subject: [PATCH 2/6] Use consistent react function - I found another place where the password is getting redacted so I thought it might be good to use the same function to do it everywhere --- bluesky_kafka/consume.py | 4 +--- bluesky_kafka/produce.py | 4 +--- 2 files changed, 2 insertions(+), 6 deletions(-) diff --git a/bluesky_kafka/consume.py b/bluesky_kafka/consume.py index 8fb31c0..bf5797e 100644 --- a/bluesky_kafka/consume.py +++ b/bluesky_kafka/consume.py @@ -130,9 +130,7 @@ def __init__( self.closed = False def __str__(self): - safe_config = dict(self._consumer_config) - if "sasl.password" in safe_config: - safe_config["sasl.password"] = "****" + safe_config = redact_config(self._consumer_config) return ( f"{type(self)}(" f"topics={self._topics}, " diff --git a/bluesky_kafka/produce.py b/bluesky_kafka/produce.py index 26975f0..6d4be0a 100644 --- a/bluesky_kafka/produce.py +++ b/bluesky_kafka/produce.py @@ -131,9 +131,7 @@ def __init__( self._serializer = serializer def __str__(self): - safe_config = dict(self._producer_config) - if "sasl.password" in safe_config: - safe_config["sasl.password"] = "****" + safe_config = redact_config(self._producer_config) return ( f"{type(self)}(" f"topic='{self.topic}', " From 021d77c10b4bf1d9b81681b046189e0e9b9ebfa5 Mon Sep 17 00:00:00 2001 From: Sean Dawson <dawsons@ansto.gov.au> Date: Thu, 21 Nov 2024 09:12:57 +1100 Subject: [PATCH 3/6] Add some documentation to redact_config --- bluesky_kafka/logging_utils.py | 24 ++++++++++++++---------- 1 file changed, 14 insertions(+), 10 deletions(-) diff --git a/bluesky_kafka/logging_utils.py b/bluesky_kafka/logging_utils.py index 076284b..793b5ae 100644 --- a/bluesky_kafka/logging_utils.py +++ b/bluesky_kafka/logging_utils.py @@ -1,16 +1,9 @@ # See https://docs.confluent.io/platform/current/installation/configuration/producer-configs.html and # https://docs.confluent.io/platform/current/installation/configuration/consumer-configs.html CONFIG_KEY_WHITELIST = { + ## # -- Common Keys "bootstrap.servers", - # "ssl.key.password", - # "ssl.keystore.certificate.chain", - # "ssl.keystore.key", - # "ssl.keystore.location", - # "ssl.keystore.password", - # "ssl.truststore.certificates", - # "ssl.truststore.location", - # "ssl.truststore.password", "client.dns.lookup", "client.id", "connections.max.idle.ms", @@ -72,7 +65,8 @@ # "ssl.keymanager.algorithm", # "ssl.secure.random.implementation", # "ssl.trustmanager.algorithm", - # -- Consumer Keys + ## + # -- Consumer Only Keys "key.deserializer", "value.deserializer", "fetch.min.bytes", @@ -97,7 +91,8 @@ "check.crcs", "client.rack", "fetch.max.wait.ms", - # -- Producer Keys + ## + # -- Producer Only Keys "key.serializer", "value.serializer", "buffer.memory", @@ -122,4 +117,13 @@ def redact_config(config): + """ + Takes a consumer/producer config dictionary and makes sure that anything potentially sensitive + is masked out by asterisks so it can be safely logged. + + Parameters + ---------- + config : dict + Dictionary of configuration information used to construct a consumer or producer. + """ return {k: v if k in CONFIG_KEY_WHITELIST else "****" for k, v in config.items()} From 162ff49c6511a6bb2ab0602a34e9075cf719d64d Mon Sep 17 00:00:00 2001 From: Sean Dawson <dawsons@ansto.gov.au> Date: Thu, 21 Nov 2024 09:16:42 +1100 Subject: [PATCH 4/6] docker-compose is no longer its own binary - It lives under docker --- .github/workflows/tests.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 47100f4..ac77e03 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -19,7 +19,7 @@ jobs: - uses: actions/checkout@v2 - name: start Kafka and Zookeeper - run: docker-compose -f scripts/bitnami-kafka-docker-compose.yml up -d + run: docker compose -f scripts/bitnami-kafka-docker-compose.yml up -d - name: is Kafka running? run: docker ps -a From 07c7a5367495af7b54a92e278060b0bf5377e812 Mon Sep 17 00:00:00 2001 From: Sean Dawson <dawsons@ansto.gov.au> Date: Thu, 21 Nov 2024 09:18:42 +1100 Subject: [PATCH 5/6] Test more python versions --- .github/workflows/tests.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index ac77e03..0a071dd 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -11,7 +11,7 @@ jobs: strategy: matrix: - python-version: ["3.8", "3.9", "3.10"] + python-version: ["3.8", "3.9", "3.10", "3.11", "3.12"] fail-fast: false steps: From 2892e89432c2889a6c0b3bdfdc67a6e796eeb931 Mon Sep 17 00:00:00 2001 From: Sean Dawson <dawsons@ansto.gov.au> Date: Thu, 21 Nov 2024 09:27:32 +1100 Subject: [PATCH 6/6] Update test script to match CI --- scripts/start_kafka.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/start_kafka.sh b/scripts/start_kafka.sh index df2bafa..9f201cf 100644 --- a/scripts/start_kafka.sh +++ b/scripts/start_kafka.sh @@ -1 +1 @@ -sudo docker-compose -f bitnami-kafka-docker-compose.yml up \ No newline at end of file +sudo docker compose -f bitnami-kafka-docker-compose.yml up \ No newline at end of file