Skip to content

Commit

Permalink
Add leaderelection module
Browse files Browse the repository at this point in the history
Add leaderelection module, based off of the leaderelection module in
kubernetes-client/python. The module has been altered slightly to
support asyncio.

Fixes #297
  • Loading branch information
JacobHenner committed Jan 5, 2025
1 parent 3ab6408 commit c7de87d
Show file tree
Hide file tree
Showing 8 changed files with 921 additions and 0 deletions.
Empty file.
68 changes: 68 additions & 0 deletions kubernetes_asyncio/leaderelection/electionconfig.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
# Copyright 2021 The Kubernetes Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import logging
import sys

logging.basicConfig(level=logging.INFO)


class Config:
# Validate config, exit if an error is detected
def __init__(
self,
lock,
lease_duration,
renew_deadline,
retry_period,
onstarted_leading,
onstopped_leading,
):
self.jitter_factor = 1.2

if lock is None:
sys.exit("lock cannot be None")

Check warning on line 35 in kubernetes_asyncio/leaderelection/electionconfig.py

View check run for this annotation

Codecov / codecov/patch

kubernetes_asyncio/leaderelection/electionconfig.py#L35

Added line #L35 was not covered by tests
self.lock = lock

if lease_duration <= renew_deadline:
sys.exit("lease_duration must be greater than renew_deadline")

Check warning on line 39 in kubernetes_asyncio/leaderelection/electionconfig.py

View check run for this annotation

Codecov / codecov/patch

kubernetes_asyncio/leaderelection/electionconfig.py#L39

Added line #L39 was not covered by tests

if renew_deadline <= self.jitter_factor * retry_period:
sys.exit("renewDeadline must be greater than retry_period*jitter_factor")

Check warning on line 42 in kubernetes_asyncio/leaderelection/electionconfig.py

View check run for this annotation

Codecov / codecov/patch

kubernetes_asyncio/leaderelection/electionconfig.py#L42

Added line #L42 was not covered by tests

if lease_duration < 1:
sys.exit("lease_duration must be greater than one")

Check warning on line 45 in kubernetes_asyncio/leaderelection/electionconfig.py

View check run for this annotation

Codecov / codecov/patch

kubernetes_asyncio/leaderelection/electionconfig.py#L45

Added line #L45 was not covered by tests

if renew_deadline < 1:
sys.exit("renew_deadline must be greater than one")

Check warning on line 48 in kubernetes_asyncio/leaderelection/electionconfig.py

View check run for this annotation

Codecov / codecov/patch

kubernetes_asyncio/leaderelection/electionconfig.py#L48

Added line #L48 was not covered by tests

if retry_period < 1:
sys.exit("retry_period must be greater than one")

Check warning on line 51 in kubernetes_asyncio/leaderelection/electionconfig.py

View check run for this annotation

Codecov / codecov/patch

kubernetes_asyncio/leaderelection/electionconfig.py#L51

Added line #L51 was not covered by tests

self.lease_duration = lease_duration
self.renew_deadline = renew_deadline
self.retry_period = retry_period

if onstarted_leading is None:
sys.exit("callback onstarted_leading cannot be None")

Check warning on line 58 in kubernetes_asyncio/leaderelection/electionconfig.py

View check run for this annotation

Codecov / codecov/patch

kubernetes_asyncio/leaderelection/electionconfig.py#L58

Added line #L58 was not covered by tests
self.onstarted_leading = onstarted_leading

if onstopped_leading is None:
self.onstopped_leading = self.on_stoppedleading_callback

Check warning on line 62 in kubernetes_asyncio/leaderelection/electionconfig.py

View check run for this annotation

Codecov / codecov/patch

kubernetes_asyncio/leaderelection/electionconfig.py#L62

Added line #L62 was not covered by tests
else:
self.onstopped_leading = onstopped_leading

# Default callback for when the current candidate if a leader, stops leading
def on_stoppedleading_callback(self):
logging.info("{} stopped leading".format(self.lock.identity))

Check warning on line 68 in kubernetes_asyncio/leaderelection/electionconfig.py

View check run for this annotation

Codecov / codecov/patch

kubernetes_asyncio/leaderelection/electionconfig.py#L68

Added line #L68 was not covered by tests
77 changes: 77 additions & 0 deletions kubernetes_asyncio/leaderelection/example.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
# Copyright 2021 The Kubernetes Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import asyncio
import os
import uuid

Check warning on line 17 in kubernetes_asyncio/leaderelection/example.py

View check run for this annotation

Codecov / codecov/patch

kubernetes_asyncio/leaderelection/example.py#L15-L17

Added lines #L15 - L17 were not covered by tests

from kubernetes_asyncio import config
from kubernetes_asyncio.client import api_client
from kubernetes_asyncio.leaderelection import electionconfig, leaderelection
from kubernetes_asyncio.leaderelection.resourcelock.configmaplock import (

Check warning on line 22 in kubernetes_asyncio/leaderelection/example.py

View check run for this annotation

Codecov / codecov/patch

kubernetes_asyncio/leaderelection/example.py#L19-L22

Added lines #L19 - L22 were not covered by tests
ConfigMapLock,
)


async def main():

Check warning on line 27 in kubernetes_asyncio/leaderelection/example.py

View check run for this annotation

Codecov / codecov/patch

kubernetes_asyncio/leaderelection/example.py#L27

Added line #L27 was not covered by tests

# Authenticate using config file
await config.load_kube_config(config_file=os.environ.get("KUBECONFIG", ""))

Check warning on line 30 in kubernetes_asyncio/leaderelection/example.py

View check run for this annotation

Codecov / codecov/patch

kubernetes_asyncio/leaderelection/example.py#L30

Added line #L30 was not covered by tests

# Parameters required from the user

# A unique identifier for this candidate
candidate_id = uuid.uuid4()

Check warning on line 35 in kubernetes_asyncio/leaderelection/example.py

View check run for this annotation

Codecov / codecov/patch

kubernetes_asyncio/leaderelection/example.py#L35

Added line #L35 was not covered by tests

# Name of the lock object to be created
lock_name = "examplepython"

Check warning on line 38 in kubernetes_asyncio/leaderelection/example.py

View check run for this annotation

Codecov / codecov/patch

kubernetes_asyncio/leaderelection/example.py#L38

Added line #L38 was not covered by tests

# Kubernetes namespace
lock_namespace = "default"

Check warning on line 41 in kubernetes_asyncio/leaderelection/example.py

View check run for this annotation

Codecov / codecov/patch

kubernetes_asyncio/leaderelection/example.py#L41

Added line #L41 was not covered by tests

# The function that a user wants to run once a candidate is elected as a
# leader. Cancellation is supported (when a held leader lock is lost).
async def example_start_func():
try:
print("I am leader")
except asyncio.CancelledError:
print(

Check warning on line 49 in kubernetes_asyncio/leaderelection/example.py

View check run for this annotation

Codecov / codecov/patch

kubernetes_asyncio/leaderelection/example.py#L45-L49

Added lines #L45 - L49 were not covered by tests
"Start function cancelled - lost leader election after becoming leader"
)

async def example_end_func():
print("I am no longer leader")

Check warning on line 54 in kubernetes_asyncio/leaderelection/example.py

View check run for this annotation

Codecov / codecov/patch

kubernetes_asyncio/leaderelection/example.py#L53-L54

Added lines #L53 - L54 were not covered by tests

# A user can choose not to provide any callbacks for what to do when a candidate fails to lead - onStoppedLeading()
# In that case, a default callback function will be used

async with api_client.ApiClient() as apic:

Check warning on line 59 in kubernetes_asyncio/leaderelection/example.py

View check run for this annotation

Codecov / codecov/patch

kubernetes_asyncio/leaderelection/example.py#L59

Added line #L59 was not covered by tests
# Create config
leader_election_config = electionconfig.Config(

Check warning on line 61 in kubernetes_asyncio/leaderelection/example.py

View check run for this annotation

Codecov / codecov/patch

kubernetes_asyncio/leaderelection/example.py#L61

Added line #L61 was not covered by tests
ConfigMapLock(lock_name, lock_namespace, candidate_id, apic),
lease_duration=17,
renew_deadline=15,
retry_period=5,
onstarted_leading=example_start_func,
onstopped_leading=example_end_func,
)

# Enter leader election
await leaderelection.LeaderElection(leader_election_config).run()

Check warning on line 71 in kubernetes_asyncio/leaderelection/example.py

View check run for this annotation

Codecov / codecov/patch

kubernetes_asyncio/leaderelection/example.py#L71

Added line #L71 was not covered by tests
# User can choose to do another round of election or simply exit
print("Exited leader election")

Check warning on line 73 in kubernetes_asyncio/leaderelection/example.py

View check run for this annotation

Codecov / codecov/patch

kubernetes_asyncio/leaderelection/example.py#L73

Added line #L73 was not covered by tests


if __name__ == "__main__":
asyncio.run(main())

Check warning on line 77 in kubernetes_asyncio/leaderelection/example.py

View check run for this annotation

Codecov / codecov/patch

kubernetes_asyncio/leaderelection/example.py#L76-L77

Added lines #L76 - L77 were not covered by tests
239 changes: 239 additions & 0 deletions kubernetes_asyncio/leaderelection/leaderelection.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,239 @@
# Copyright 2021 The Kubernetes Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import asyncio
import datetime
import json
import logging
import sys
import time
from http import HTTPStatus

from .leaderelectionrecord import LeaderElectionRecord

logging.basicConfig(level=logging.INFO)

"""
This package implements leader election using an annotation in a Kubernetes
object. The onstarted_leading coroutine is run as a task, which is cancelled if
the leader lock is obtained and then lost.
At first all candidates are considered followers. The one to create a lock or
update an existing lock first becomes the leader and remains so until it keeps
renewing its lease.
"""


class LeaderElection:
def __init__(self, election_config):
if election_config is None:
sys.exit("argument config not passed")

Check warning on line 41 in kubernetes_asyncio/leaderelection/leaderelection.py

View check run for this annotation

Codecov / codecov/patch

kubernetes_asyncio/leaderelection/leaderelection.py#L41

Added line #L41 was not covered by tests

# Latest record observed in the created lock object
self.observed_record = None

# The configuration set for this candidate
self.election_config = election_config

# Latest update time of the lock
self.observed_time_milliseconds = 0

# Point of entry to Leader election
async def run(self):
# Try to create/ acquire a lock
if await self.acquire():
logging.info(
"{} successfully acquired lease".format(
self.election_config.lock.identity
)
)

task = asyncio.create_task(self.election_config.onstarted_leading())

await self.renew_loop()

# Leader lock lost - cancel the onstarted_leading coroutine if it's
# still running
task.cancel()

# Failed to update lease, run OnStoppedLeading callback
await self.election_config.onstopped_leading()

async def acquire(self):
# Follower
logging.info("{} is a follower".format(self.election_config.lock.identity))
retry_period = self.election_config.retry_period

while True:
succeeded = await self.try_acquire_or_renew()

if succeeded:
return True

await asyncio.sleep(retry_period)

async def renew_loop(self):
# Leader
logging.info(
"Leader has entered renew loop and will try to update lease continuously"
)

retry_period = self.election_config.retry_period
renew_deadline = self.election_config.renew_deadline * 1000

while True:
timeout = int(time.time() * 1000) + renew_deadline
succeeded = False

while int(time.time() * 1000) < timeout:
succeeded = await self.try_acquire_or_renew()

if succeeded:
break
await asyncio.sleep(retry_period)

if succeeded:
await asyncio.sleep(retry_period)
continue

# failed to renew, return
return

async def try_acquire_or_renew(self):
now_timestamp = time.time()
now = datetime.datetime.fromtimestamp(now_timestamp)

# Check if lock is created
lock_status, old_election_record = await self.election_config.lock.get(
self.election_config.lock.name, self.election_config.lock.namespace
)

# create a default Election record for this candidate
leader_election_record = LeaderElectionRecord(
self.election_config.lock.identity,
str(self.election_config.lease_duration),
str(now),
str(now),
)

# A lock is not created with that name, try to create one
if not lock_status:
if json.loads(old_election_record.body)["code"] != HTTPStatus.NOT_FOUND:
logging.info(

Check warning on line 133 in kubernetes_asyncio/leaderelection/leaderelection.py

View check run for this annotation

Codecov / codecov/patch

kubernetes_asyncio/leaderelection/leaderelection.py#L133

Added line #L133 was not covered by tests
"Error retrieving resource lock {} as {}".format(
self.election_config.lock.name, old_election_record.reason
)
)
return False

Check warning on line 138 in kubernetes_asyncio/leaderelection/leaderelection.py

View check run for this annotation

Codecov / codecov/patch

kubernetes_asyncio/leaderelection/leaderelection.py#L138

Added line #L138 was not covered by tests

logging.info(
"{} is trying to create a lock".format(
leader_election_record.holder_identity
)
)
create_status = await self.election_config.lock.create(
name=self.election_config.lock.name,
namespace=self.election_config.lock.namespace,
election_record=leader_election_record,
)

if create_status is False:
logging.info(

Check warning on line 152 in kubernetes_asyncio/leaderelection/leaderelection.py

View check run for this annotation

Codecov / codecov/patch

kubernetes_asyncio/leaderelection/leaderelection.py#L152

Added line #L152 was not covered by tests
"{} Failed to create lock".format(
leader_election_record.holder_identity
)
)
return False

Check warning on line 157 in kubernetes_asyncio/leaderelection/leaderelection.py

View check run for this annotation

Codecov / codecov/patch

kubernetes_asyncio/leaderelection/leaderelection.py#L157

Added line #L157 was not covered by tests

self.observed_record = leader_election_record
self.observed_time_milliseconds = int(time.time() * 1000)
return True

# A lock exists with that name
# Validate old_election_record
if old_election_record is None:
# try to update lock with proper annotation and election record
return await self.update_lock(leader_election_record)

Check warning on line 167 in kubernetes_asyncio/leaderelection/leaderelection.py

View check run for this annotation

Codecov / codecov/patch

kubernetes_asyncio/leaderelection/leaderelection.py#L167

Added line #L167 was not covered by tests

if (
old_election_record.holder_identity is None
or old_election_record.lease_duration is None
or old_election_record.acquire_time is None
or old_election_record.renew_time is None
):
# try to update lock with proper annotation and election record
return await self.update_lock(leader_election_record)

Check warning on line 176 in kubernetes_asyncio/leaderelection/leaderelection.py

View check run for this annotation

Codecov / codecov/patch

kubernetes_asyncio/leaderelection/leaderelection.py#L176

Added line #L176 was not covered by tests

# Report transitions
if (
self.observed_record
and self.observed_record.holder_identity
!= old_election_record.holder_identity
):
logging.info(

Check warning on line 184 in kubernetes_asyncio/leaderelection/leaderelection.py

View check run for this annotation

Codecov / codecov/patch

kubernetes_asyncio/leaderelection/leaderelection.py#L184

Added line #L184 was not covered by tests
"Leader has switched to {}".format(old_election_record.holder_identity)
)

if (
self.observed_record is None
or old_election_record.__dict__ != self.observed_record.__dict__
):
self.observed_record = old_election_record
self.observed_time_milliseconds = int(time.time() * 1000)

# If This candidate is not the leader and lease duration is yet to finish
if (
self.election_config.lock.identity != self.observed_record.holder_identity
and self.observed_time_milliseconds
+ self.election_config.lease_duration * 1000
> int(now_timestamp * 1000)
):
logging.info(
"yet to finish lease_duration, lease held by {} and has not expired".format(
old_election_record.holder_identity
)
)
return False

# If this candidate is the Leader
if self.election_config.lock.identity == self.observed_record.holder_identity:
# Leader updates renewTime, but keeps acquire_time unchanged
leader_election_record.acquire_time = self.observed_record.acquire_time

return await self.update_lock(leader_election_record)

async def update_lock(self, leader_election_record):
# Update object with latest election record
update_status = await self.election_config.lock.update(
self.election_config.lock.name,
self.election_config.lock.namespace,
leader_election_record,
)

if update_status is False:
logging.info(
"{} failed to acquire lease".format(
leader_election_record.holder_identity
)
)
return False

self.observed_record = leader_election_record
self.observed_time_milliseconds = int(time.time() * 1000)
logging.info(
"leader {} has successfully acquired lease".format(
leader_election_record.holder_identity
)
)
return True
Loading

0 comments on commit c7de87d

Please sign in to comment.