Skip to content

Commit

Permalink
Revert "Put filters into separate config key"
Browse files Browse the repository at this point in the history
This reverts commit 1bebecf.
  • Loading branch information
victorlin committed Apr 12, 2024
1 parent 1bebecf commit 5483a02
Show file tree
Hide file tree
Showing 30 changed files with 511 additions and 721 deletions.
132 changes: 39 additions & 93 deletions generate-subsampling-config.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,40 +12,12 @@
Path(SUBSAMPLING_CONFIG_DIR).mkdir(exist_ok=True)


class Filters:
min_date: Optional[str]
max_date: Optional[str]
excludes: Optional[List[str]]
# TODO: add more options

def __init__(self, **kwargs):
# Initialize instance attributes
for option in self.__annotations__.keys():
if option not in self.__annotations__:
raise Exception(f'Option {option!r} not allowed.')
# TODO: Check types
value = kwargs[option] if option in kwargs else None
self.__setattr__(option, value)

def to_dict(self):
options = dict()

if self.min_date:
options['min_date'] = self.min_date

if self.max_date:
options['max_date'] = self.max_date

if self.excludes:
options['exclude'] = self.excludes

return options


class Sample:
filters: Optional[Filters]
group_by: Optional[List[str]]
weight: Optional[int]
min_date: Optional[str]
max_date: Optional[str]
excludes: Optional[List[str]]
disable_probabilistic_sampling: Optional[bool]
priorities: Optional[Any]

Expand All @@ -63,18 +35,24 @@ def __init__(self, name, **kwargs):
def to_dict(self):
options = dict()

if self.filters:
options['filters'] = self.filters.to_dict()

if self.group_by:
options['group_by'] = self.group_by

if self.weight:
options['weight'] = self.weight

if self.min_date:
options['min_date'] = self.min_date

if self.max_date:
options['max_date'] = self.max_date

if self.disable_probabilistic_sampling:
options['disable_probabilistic_sampling'] = True

if self.excludes:
options['exclude'] = self.excludes

return options


Expand Down Expand Up @@ -205,7 +183,7 @@ def write_region_time_builds():
'month',
],
weight=weights[location],
filters=Filters(excludes=excludes[location]),
excludes=excludes[location],
))
else:
# Early sequences
Expand All @@ -218,10 +196,8 @@ def write_region_time_builds():
'month',
],
weight=(WEIGHT_EARLY * weights[location]),
filters=Filters(
max_date=time,
excludes=excludes[location],
),
max_date=time,
excludes=excludes[location],
))

# Recent sequences
Expand All @@ -233,10 +209,8 @@ def write_region_time_builds():
GROUP_BY_RECENT_TEMPORAL_RESOLUTION[time],
],
weight=(WEIGHT_RECENT * weights[location]),
filters=Filters(
min_date=time,
excludes=excludes[location],
),
min_date=time,
excludes=excludes[location],
))

# Asia gets special treatment because two countries must be weighted differently.
Expand Down Expand Up @@ -276,9 +250,7 @@ def write_region_time_builds():
'month',
],
weight=(WEIGHT_FOCAL * weights[location]),
filters=Filters(
excludes=excludes[location],
),
excludes=excludes[location],
))

# Contextual sequences from the rest of the world
Expand All @@ -290,9 +262,7 @@ def write_region_time_builds():
'month',
],
weight=WEIGHT_CONTEXTUAL,
filters=Filters(
excludes=['region=Asia'],
),
excludes=['region=Asia'],
))

else:
Expand All @@ -307,10 +277,8 @@ def write_region_time_builds():
'month',
],
weight=(WEIGHT_EARLY * WEIGHT_FOCAL * weights[location]),
filters=Filters(
max_date=time,
excludes=excludes[location],
),
max_date=time,
excludes=excludes[location],
))

# Early contextual sequences from the rest of the world
Expand All @@ -322,10 +290,8 @@ def write_region_time_builds():
'month',
],
weight=(WEIGHT_EARLY * WEIGHT_CONTEXTUAL),
filters=Filters(
max_date=time,
excludes=['region=Asia'],
),
max_date=time,
excludes=['region=Asia'],
))

# Recent focal sequences for region
Expand All @@ -340,10 +306,8 @@ def write_region_time_builds():
'month',
],
weight=(WEIGHT_RECENT * WEIGHT_FOCAL * weights[location]),
filters=Filters(
min_date=time,
excludes=excludes[location],
),
min_date=time,
excludes=excludes[location],
))

# Recent contextual sequences from the rest of the world
Expand All @@ -356,10 +320,8 @@ def write_region_time_builds():
'month',
],
weight=(WEIGHT_RECENT * WEIGHT_CONTEXTUAL),
filters=Filters(
min_date=time,
excludes=['region=Asia'],
),
min_date=time,
excludes=['region=Asia'],
))

# Everything else is a "standard" region with dynamic geographical/temporal grouping.
Expand All @@ -376,9 +338,7 @@ def write_region_time_builds():
'month',
],
weight=WEIGHT_FOCAL,
filters=Filters(
excludes=[f'region!={region}'],
),
excludes=[f'region!={region}'],
))

# Contextual sequences from the rest of the world
Expand All @@ -390,9 +350,7 @@ def write_region_time_builds():
'month',
],
weight=WEIGHT_CONTEXTUAL,
filters=Filters(
excludes=[f'region={region}'],
),
excludes=[f'region={region}'],
))
else:
# Early focal sequences for region
Expand All @@ -404,10 +362,8 @@ def write_region_time_builds():
'month',
],
weight=(WEIGHT_EARLY * WEIGHT_FOCAL),
filters=Filters(
max_date=time,
excludes=[f'region!={region}'],
),
max_date=time,
excludes=[f'region!={region}'],
))

# Early contextual sequences from the rest of the world
Expand All @@ -419,10 +375,8 @@ def write_region_time_builds():
'month',
],
weight=(WEIGHT_EARLY * WEIGHT_CONTEXTUAL),
filters=Filters(
max_date=time,
excludes=[f'region={region}'],
),
max_date=time,
excludes=[f'region={region}'],
))

# Recent focal sequences for region
Expand All @@ -433,10 +387,8 @@ def write_region_time_builds():
GROUP_BY_RECENT_TEMPORAL_RESOLUTION[time],
],
weight=(WEIGHT_RECENT * WEIGHT_FOCAL),
filters=Filters(
min_date=time,
excludes=[f'region!={region}'],
),
min_date=time,
excludes=[f'region!={region}'],
))

# Recent contextual sequences from the rest of the world
Expand All @@ -447,10 +399,8 @@ def write_region_time_builds():
GROUP_BY_RECENT_TEMPORAL_RESOLUTION[time],
],
weight=(WEIGHT_RECENT * WEIGHT_CONTEXTUAL),
filters=Filters(
min_date=time,
excludes=[f'region={region}'],
),
min_date=time,
excludes=[f'region={region}'],
))

config.to_file(filename)
Expand Down Expand Up @@ -478,9 +428,7 @@ def write_ci_build():
],
weight=2,
disable_probabilistic_sampling=True,
filters=Filters(
excludes=['region!=Europe'],
),
excludes=['region!=Europe'],
))
config.add(Sample(
name='global',
Expand All @@ -490,9 +438,7 @@ def write_ci_build():
],
weight=1,
disable_probabilistic_sampling=True,
filters=Filters(
excludes=['region=Europe'],
),
excludes=['region=Europe'],
# TODO: add Priority(type=proximity, focus=region)
))
filename = Path(SUBSAMPLING_CONFIG_DIR, f"nextstrain_ci_sampling.yaml")
Expand Down
28 changes: 12 additions & 16 deletions subsampling/africa_1m.yaml
Original file line number Diff line number Diff line change
@@ -1,40 +1,36 @@
size: 4000
samples:
focal_early:
filters:
max_date: 1M
exclude:
- region!=Africa
group_by:
- country
- year
- month
weight: 4
max_date: 1M
exclude:
- region!=Africa
context_early:
filters:
max_date: 1M
exclude:
- region=Africa
group_by:
- country
- year
- month
weight: 1
max_date: 1M
exclude:
- region=Africa
focal_recent:
filters:
min_date: 1M
exclude:
- region!=Africa
group_by:
- country
- week
weight: 16
min_date: 1M
exclude:
- region!=Africa
context_recent:
filters:
min_date: 1M
exclude:
- region=Africa
group_by:
- country
- week
weight: 4
min_date: 1M
exclude:
- region=Africa
28 changes: 12 additions & 16 deletions subsampling/africa_2m.yaml
Original file line number Diff line number Diff line change
@@ -1,40 +1,36 @@
size: 4000
samples:
focal_early:
filters:
max_date: 2M
exclude:
- region!=Africa
group_by:
- country
- year
- month
weight: 4
max_date: 2M
exclude:
- region!=Africa
context_early:
filters:
max_date: 2M
exclude:
- region=Africa
group_by:
- country
- year
- month
weight: 1
max_date: 2M
exclude:
- region=Africa
focal_recent:
filters:
min_date: 2M
exclude:
- region!=Africa
group_by:
- country
- week
weight: 16
min_date: 2M
exclude:
- region!=Africa
context_recent:
filters:
min_date: 2M
exclude:
- region=Africa
group_by:
- country
- week
weight: 4
min_date: 2M
exclude:
- region=Africa
Loading

0 comments on commit 5483a02

Please sign in to comment.