Skip to content

Commit

Permalink
Add leaderelection module
Browse files Browse the repository at this point in the history
Add leaderelection module, based off of the leaderelection module in
kubernetes-client/python. The module has been altered slightly to
support asyncio.

Fixes #297
  • Loading branch information
JacobHenner committed Jan 6, 2025
1 parent 3ab6408 commit cce9f8b
Show file tree
Hide file tree
Showing 8 changed files with 925 additions and 0 deletions.
77 changes: 77 additions & 0 deletions examples/leaderelection.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
# Copyright 2021 The Kubernetes Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import asyncio
import os
import uuid

from kubernetes_asyncio import config
from kubernetes_asyncio.client import api_client
from kubernetes_asyncio.leaderelection import electionconfig, leaderelection
from kubernetes_asyncio.leaderelection.resourcelock.configmaplock import (
ConfigMapLock,
)


async def main():

# Authenticate using config file
await config.load_kube_config(config_file=os.environ.get("KUBECONFIG", ""))

# Parameters required from the user

# A unique identifier for this candidate
candidate_id = uuid.uuid4()

# Name of the lock object to be created
lock_name = "examplepython"

# Kubernetes namespace
lock_namespace = "default"

# The function that a user wants to run once a candidate is elected as a
# leader. Cancellation is supported (when a held leader lock is lost).
async def example_start_func():
try:
print("I am leader")
except asyncio.CancelledError:
print(
"Start function cancelled - lost leader election after becoming leader"
)

async def example_end_func():
print("I am no longer leader")

# A user can choose not to provide any callbacks for what to do when a candidate fails to lead - onStoppedLeading()
# In that case, a default callback function will be used

async with api_client.ApiClient() as apic:
# Create config
leader_election_config = electionconfig.Config(
ConfigMapLock(lock_name, lock_namespace, candidate_id, apic),
lease_duration=17,
renew_deadline=15,
retry_period=5,
onstarted_leading=example_start_func,
onstopped_leading=example_end_func,
)

# Enter leader election
await leaderelection.LeaderElection(leader_election_config).run()
# User can choose to do another round of election or simply exit
print("Exited leader election")


if __name__ == "__main__":
asyncio.run(main())
Empty file.
69 changes: 69 additions & 0 deletions kubernetes_asyncio/leaderelection/electionconfig.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
# Copyright 2021 The Kubernetes Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import logging

logging.basicConfig(level=logging.INFO)


class Config:
# Validate config, exit if an error is detected
def __init__(
self,
lock,
lease_duration,
renew_deadline,
retry_period,
onstarted_leading,
onstopped_leading,
):
self.jitter_factor = 1.2

if lock is None:
raise ValueError("lock cannot be None")

Check warning on line 34 in kubernetes_asyncio/leaderelection/electionconfig.py

View check run for this annotation

Codecov / codecov/patch

kubernetes_asyncio/leaderelection/electionconfig.py#L34

Added line #L34 was not covered by tests
self.lock = lock

if lease_duration <= renew_deadline:
raise ValueError("lease_duration must be greater than renew_deadline")

Check warning on line 38 in kubernetes_asyncio/leaderelection/electionconfig.py

View check run for this annotation

Codecov / codecov/patch

kubernetes_asyncio/leaderelection/electionconfig.py#L38

Added line #L38 was not covered by tests

if renew_deadline <= self.jitter_factor * retry_period:
raise ValueError(

Check warning on line 41 in kubernetes_asyncio/leaderelection/electionconfig.py

View check run for this annotation

Codecov / codecov/patch

kubernetes_asyncio/leaderelection/electionconfig.py#L41

Added line #L41 was not covered by tests
"renewDeadline must be greater than retry_period*jitter_factor"
)

if lease_duration < 1:
raise ValueError("lease_duration must be greater than one")

Check warning on line 46 in kubernetes_asyncio/leaderelection/electionconfig.py

View check run for this annotation

Codecov / codecov/patch

kubernetes_asyncio/leaderelection/electionconfig.py#L46

Added line #L46 was not covered by tests

if renew_deadline < 1:
raise ValueError("renew_deadline must be greater than one")

Check warning on line 49 in kubernetes_asyncio/leaderelection/electionconfig.py

View check run for this annotation

Codecov / codecov/patch

kubernetes_asyncio/leaderelection/electionconfig.py#L49

Added line #L49 was not covered by tests

if retry_period < 1:
raise ValueError("retry_period must be greater than one")

Check warning on line 52 in kubernetes_asyncio/leaderelection/electionconfig.py

View check run for this annotation

Codecov / codecov/patch

kubernetes_asyncio/leaderelection/electionconfig.py#L52

Added line #L52 was not covered by tests

self.lease_duration = lease_duration
self.renew_deadline = renew_deadline
self.retry_period = retry_period

if onstarted_leading is None:
raise ValueError("callback onstarted_leading cannot be None")

Check warning on line 59 in kubernetes_asyncio/leaderelection/electionconfig.py

View check run for this annotation

Codecov / codecov/patch

kubernetes_asyncio/leaderelection/electionconfig.py#L59

Added line #L59 was not covered by tests
self.onstarted_leading = onstarted_leading

if onstopped_leading is None:
self.onstopped_leading = self.on_stoppedleading_callback

Check warning on line 63 in kubernetes_asyncio/leaderelection/electionconfig.py

View check run for this annotation

Codecov / codecov/patch

kubernetes_asyncio/leaderelection/electionconfig.py#L63

Added line #L63 was not covered by tests
else:
self.onstopped_leading = onstopped_leading

# Default callback for when the current candidate if a leader, stops leading
def on_stoppedleading_callback(self):
logging.info("{} stopped leading".format(self.lock.identity))

Check warning on line 69 in kubernetes_asyncio/leaderelection/electionconfig.py

View check run for this annotation

Codecov / codecov/patch

kubernetes_asyncio/leaderelection/electionconfig.py#L69

Added line #L69 was not covered by tests
242 changes: 242 additions & 0 deletions kubernetes_asyncio/leaderelection/leaderelection.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,242 @@
# Copyright 2021 The Kubernetes Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import asyncio
import datetime
import json
import logging
import sys
import time
from http import HTTPStatus

from .leaderelectionrecord import LeaderElectionRecord

logging.basicConfig(level=logging.INFO)

"""
This package implements leader election using an annotation in a Kubernetes
object. The onstarted_leading coroutine is run as a task, which is cancelled if
the leader lock is obtained and then lost.
At first all candidates are considered followers. The one to create a lock or
update an existing lock first becomes the leader and remains so until it keeps
renewing its lease.
"""


class LeaderElection:
def __init__(self, election_config):
if election_config is None:
sys.exit("argument config not passed")

Check warning on line 41 in kubernetes_asyncio/leaderelection/leaderelection.py

View check run for this annotation

Codecov / codecov/patch

kubernetes_asyncio/leaderelection/leaderelection.py#L41

Added line #L41 was not covered by tests

# Latest record observed in the created lock object
self.observed_record = None

# The configuration set for this candidate
self.election_config = election_config

# Latest update time of the lock
self.observed_time_milliseconds = 0

# Point of entry to Leader election
async def run(self):
# Try to create/ acquire a lock
if await self.acquire():
logging.info(
"{} successfully acquired lease".format(
self.election_config.lock.identity
)
)

task = asyncio.create_task(self.election_config.onstarted_leading())

await self.renew_loop()

# Leader lock lost - cancel the onstarted_leading coroutine if it's
# still running. This permits onstarted_leading to clean up state
# that might not be accessible to onstopped_leading.
task.cancel()

# Failed to update lease, run onstopped_leading callback. This is
# preserved in order to continue to provide an interface similar to
# the one provided by `kubernetes-client/python`.
await self.election_config.onstopped_leading()

async def acquire(self):
# Follower
logging.info("{} is a follower".format(self.election_config.lock.identity))
retry_period = self.election_config.retry_period

while True:
succeeded = await self.try_acquire_or_renew()

if succeeded:
return True

await asyncio.sleep(retry_period)

async def renew_loop(self):
# Leader
logging.info(
"Leader has entered renew loop and will try to update lease continuously"
)

retry_period = self.election_config.retry_period
renew_deadline = self.election_config.renew_deadline * 1000

while True:
timeout = int(time.time() * 1000) + renew_deadline
succeeded = False

while int(time.time() * 1000) < timeout:
succeeded = await self.try_acquire_or_renew()

if succeeded:
break
await asyncio.sleep(retry_period)

if succeeded:
await asyncio.sleep(retry_period)
continue

# failed to renew, return
return

async def try_acquire_or_renew(self):
now_timestamp = time.time()
now = datetime.datetime.fromtimestamp(now_timestamp)

# Check if lock is created
lock_status, old_election_record = await self.election_config.lock.get(
self.election_config.lock.name, self.election_config.lock.namespace
)

# create a default Election record for this candidate
leader_election_record = LeaderElectionRecord(
self.election_config.lock.identity,
str(self.election_config.lease_duration),
str(now),
str(now),
)

# A lock is not created with that name, try to create one
if not lock_status:
if json.loads(old_election_record.body)["code"] != HTTPStatus.NOT_FOUND:
logging.info(

Check warning on line 136 in kubernetes_asyncio/leaderelection/leaderelection.py

View check run for this annotation

Codecov / codecov/patch

kubernetes_asyncio/leaderelection/leaderelection.py#L136

Added line #L136 was not covered by tests
"Error retrieving resource lock {} as {}".format(
self.election_config.lock.name, old_election_record.reason
)
)
return False

Check warning on line 141 in kubernetes_asyncio/leaderelection/leaderelection.py

View check run for this annotation

Codecov / codecov/patch

kubernetes_asyncio/leaderelection/leaderelection.py#L141

Added line #L141 was not covered by tests

logging.info(
"{} is trying to create a lock".format(
leader_election_record.holder_identity
)
)
create_status = await self.election_config.lock.create(
name=self.election_config.lock.name,
namespace=self.election_config.lock.namespace,
election_record=leader_election_record,
)

if not create_status:
logging.info(

Check warning on line 155 in kubernetes_asyncio/leaderelection/leaderelection.py

View check run for this annotation

Codecov / codecov/patch

kubernetes_asyncio/leaderelection/leaderelection.py#L155

Added line #L155 was not covered by tests
"{} Failed to create lock".format(
leader_election_record.holder_identity
)
)
return False

Check warning on line 160 in kubernetes_asyncio/leaderelection/leaderelection.py

View check run for this annotation

Codecov / codecov/patch

kubernetes_asyncio/leaderelection/leaderelection.py#L160

Added line #L160 was not covered by tests

self.observed_record = leader_election_record
self.observed_time_milliseconds = int(time.time() * 1000)
return True

# A lock exists with that name
# Validate old_election_record
if old_election_record is None:
# try to update lock with proper annotation and election record
return await self.update_lock(leader_election_record)

Check warning on line 170 in kubernetes_asyncio/leaderelection/leaderelection.py

View check run for this annotation

Codecov / codecov/patch

kubernetes_asyncio/leaderelection/leaderelection.py#L170

Added line #L170 was not covered by tests

if (
old_election_record.holder_identity is None
or old_election_record.lease_duration is None
or old_election_record.acquire_time is None
or old_election_record.renew_time is None
):
# try to update lock with proper annotation and election record
return await self.update_lock(leader_election_record)

Check warning on line 179 in kubernetes_asyncio/leaderelection/leaderelection.py

View check run for this annotation

Codecov / codecov/patch

kubernetes_asyncio/leaderelection/leaderelection.py#L179

Added line #L179 was not covered by tests

# Report transitions
if (
self.observed_record
and self.observed_record.holder_identity
!= old_election_record.holder_identity
):
logging.info(

Check warning on line 187 in kubernetes_asyncio/leaderelection/leaderelection.py

View check run for this annotation

Codecov / codecov/patch

kubernetes_asyncio/leaderelection/leaderelection.py#L187

Added line #L187 was not covered by tests
"Leader has switched to {}".format(old_election_record.holder_identity)
)

if (
self.observed_record is None
or old_election_record.__dict__ != self.observed_record.__dict__
):
self.observed_record = old_election_record
self.observed_time_milliseconds = int(time.time() * 1000)

# If This candidate is not the leader and lease duration is yet to finish
if (
self.election_config.lock.identity != self.observed_record.holder_identity
and self.observed_time_milliseconds
+ self.election_config.lease_duration * 1000
> int(now_timestamp * 1000)
):
logging.info(
"yet to finish lease_duration, lease held by {} and has not expired".format(
old_election_record.holder_identity
)
)
return False

# If this candidate is the Leader
if self.election_config.lock.identity == self.observed_record.holder_identity:
# Leader updates renewTime, but keeps acquire_time unchanged
leader_election_record.acquire_time = self.observed_record.acquire_time

return await self.update_lock(leader_election_record)

async def update_lock(self, leader_election_record):
# Update object with latest election record
update_status = await self.election_config.lock.update(
self.election_config.lock.name,
self.election_config.lock.namespace,
leader_election_record,
)

if not update_status:
logging.info(
"{} failed to acquire lease".format(
leader_election_record.holder_identity
)
)
return False

self.observed_record = leader_election_record
self.observed_time_milliseconds = int(time.time() * 1000)
logging.info(
"leader {} has successfully acquired lease".format(
leader_election_record.holder_identity
)
)
return True
Loading

0 comments on commit cce9f8b

Please sign in to comment.