Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add a ranges axis #67

Merged
merged 4 commits into from
Nov 27, 2024
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
76 changes: 76 additions & 0 deletions bucket/axisutils.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
# SPDX-License-Identifier: MIT
# Copyright (c) 2023-2024 Vypercore. All Rights Reserved

from typing import Callable

from pydantic import validate_call


Expand Down Expand Up @@ -131,3 +133,77 @@ def read_write():

def polarity():
return {"Negative": 1, "Positive": 0}

@validate_call
def ranges(
min_val: int = 0,
max_val: int | None = None,
num_ranges: int = None,
separate_min: bool = False,
separate_max: bool = False,
formatter: Callable[[int], str] = str,
):
"""
Creates an axis with a specified number of ranges from min to max values.
Decimal values will be used if max_val < 2**20, else they will be hexadecimal

eg. max_val=100, num_ranges=5, separate_max=True:
-> {
"0 -> 19": [0, 19],
"20 -> 39": [20, 39],
"40 -> 59": [40, 59],
"60 -> 79": [60, 79],
"80 -> 99": [80, 99],
"100": 100
}


Parameters:
min_val: Min value for range (default: 0)
max_val: Max value for range
num_ranges: Number of ranges to be split into
separate_min: Split out min val as separate bucket (default: False)
separate_max: Split out max val as separate bucket (default: False)
formatter: Formatter for the name of each bucket. (default: str)

Returns: Dict of {bucket_name: value}

"""

assert max_val is not None, "Max_val must be provided"
assert (
min_val < max_val
), f"min_val ({min_val} must be lower than max_val ({max_val}))"

# assert each range is 1+ in size
total_range = max_val - min_val
total_range -= 1 if separate_min else 0
total_range -= 1 if separate_max else 0
assert (
(total_range / num_ranges) > 1.0
), f"Total range is too small to have {num_ranges} ranges. Need at least 1 value per range."

ranges = {}
if separate_min:
name = formatter(min_val)
ranges[name] = min_val
min_val += 1
if separate_max:
name = formatter(max_val)
ranges[name] = max_val
max_val -= 1

step = (total_range + 1) // num_ranges
remainder = (total_range + 1) % num_ranges

start = min_val
for _ in range(num_ranges):
end = start + step - 1
if remainder > 0:
end += 1
remainder -= 1
name = f"{formatter(start)} -> {formatter(end)}"
ranges[name] = [start, end]
start = end + 1

return ranges