From da037ad5feddaa566edf59f8dce6284d8fd28414 Mon Sep 17 00:00:00 2001 From: Iris Rademacher Date: Mon, 5 Feb 2024 13:04:59 -0500 Subject: [PATCH 1/5] initial config change --- Makefile | 3 +-- pyproject.toml | 17 ++++++++++++++++- 2 files changed, 17 insertions(+), 3 deletions(-) diff --git a/Makefile b/Makefile index cf0df384f83..d2be20d58e4 100644 --- a/Makefile +++ b/Makefile @@ -39,8 +39,7 @@ check-all: check-hail check-services .PHONY: check-hail-fast check-hail-fast: - ruff check hail/python/hail - ruff check hail/python/hailtop + ruff check hail ruff format hail --diff $(PYTHON) -m pyright hail/python/hailtop diff --git a/pyproject.toml b/pyproject.toml index f49fda6fca8..91336eb992a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -21,7 +21,22 @@ force-exclude = true known-first-party = ["auth", "batch", "ci", "gear", "hailtop", "monitoring", "website", "web_common"] [tool.ruff.per-file-ignores] -"hail/python/hail/**/*" = ["I", "PL", "RUF"] +"hail/python/hail/backend/**/*" = ["I", "PL", "RUF"] +"hail/python/hail/docs/**/*" = ["I", "PL", "RUF"] +"hail/python/hail/experimental/**/*" = ["I", "PL", "RUF"] +"hail/python/hail/expr/**/*" = ["I", "PL", "RUF"] +"hail/python/hail/fs/**/*" = ["I", "PL", "RUF"] +"hail/python/hail/genetics/**/*" = ["I", "PL", "RUF"] +"hail/python/hail/ggplot/**/*" = ["I", "PL", "RUF"] +"hail/python/hail/ir/**/*" = ["I", "PL", "RUF"] +"hail/python/hail/linalg/**/*" = ["I", "PL", "RUF"] +"hail/python/hail/methods/**/*" = ["I", "PL", "RUF"] +"hail/python/hail/nd/**/*" = ["I", "PL", "RUF"] +"hail/python/hail/plot/**/*" = ["I", "PL", "RUF"] +"hail/python/hail/stats/**/*" = ["I", "PL", "RUF"] +"hail/python/hail/typecheck/**/*" = ["I", "PL", "RUF"] +"hail/python/hail/utils/**/*" = ["I", "PL", "RUF"] +"hail/python/hail/vds/**/*" = ["I", "PL", "RUF"] "hail/python/hailtop/**/*" = ["I"] "hail/python/test/**/*" = ["ALL"] "hail/python/cluster-tests/**/*" = ["ALL"] From 0766712c7f82ae442d8b87bea36a15ca261c3a40 Mon Sep 17 00:00:00 2001 From: Iris Rademacher Date: Mon, 5 Feb 2024 14:47:32 -0500 Subject: [PATCH 2/5] base hail folder --- hail/python/hail/__init__.py | 89 +++++++++++------------ hail/python/hail/context.py | 27 +++---- hail/python/hail/matrixtable.py | 51 +++++++------- hail/python/hail/table.py | 121 ++++++++++++++++---------------- 4 files changed, 146 insertions(+), 142 deletions(-) diff --git a/hail/python/hail/__init__.py b/hail/python/hail/__init__.py index cf4e994fa10..e687b4db5bb 100644 --- a/hail/python/hail/__init__.py +++ b/hail/python/hail/__init__.py @@ -1,6 +1,7 @@ +import sys from typing import Optional + import pkg_resources -import sys if sys.version_info < (3, 9): raise EnvironmentError( @@ -31,64 +32,66 @@ # F403 'from .expr import *' used; unable to detect undefined names # F401 '.expr.*' imported but unused # E402 module level import not at top of file -from .expr import * # noqa: E402, F403 -from .expr import aggregators # noqa: E402 from hail.utils import ( # noqa: E402 - Struct, + ANY_REGION, Interval, + Struct, + copy_log, hadoop_copy, - hadoop_open, - hadoop_ls, - hadoop_stat, hadoop_exists, - hadoop_is_file, hadoop_is_dir, + hadoop_is_file, + hadoop_ls, + hadoop_open, hadoop_scheme_supported, - copy_log, - ANY_REGION, + hadoop_stat, ) -from .table import Table, GroupedTable, asc, desc # noqa: E402 -from .matrixtable import MatrixTable, GroupedMatrixTable # noqa: E402 -from .genetics import * # noqa: F403, E402 -from .methods import * # noqa: F403, E402 -from . import expr # noqa: E402 -from . import genetics # noqa: E402 -from . import methods # noqa: E402 -from . import stats # noqa: E402 -from . import linalg # noqa: E402 -from . import plot # noqa: E402 -from . import ggplot # noqa: E402 -from . import experimental # noqa: E402 -from . import ir # noqa: E402 -from . import backend # noqa: E402 -from . import nd # noqa: E402 -from . import utils # noqa: E402 -from . import vds # noqa: E402 +from . import ( # noqa: E402 + backend, + experimental, + expr, + genetics, + ggplot, + ir, + linalg, + methods, + nd, + plot, + stats, + utils, + vds, +) from .context import ( # noqa: E402 - init, - init_local, - init_batch, - stop, - spark_context, - tmp_dir, - default_reference, - get_reference, - set_global_seed, - reset_global_randomness, - _set_flags, + TemporaryDirectory, + TemporaryFilename, + _async_current_backend, _get_flags, + _set_flags, _with_flags, - _async_current_backend, - current_backend, - debug_info, citation, cite_hail, cite_hail_bibtex, + current_backend, + debug_info, + default_reference, + get_reference, + init, + init_batch, + init_local, + reset_global_randomness, + set_global_seed, + spark_context, + stop, + tmp_dir, version, - TemporaryFilename, - TemporaryDirectory, ) +from .expr import * # noqa: E402, F403 +from .expr import aggregators # noqa: E402 +from .genetics import * # noqa: F403, E402 +from .matrixtable import GroupedMatrixTable, MatrixTable # noqa: E402 +from .methods import * # noqa: F403, E402 +from .table import GroupedTable, Table, asc, desc # noqa: E402 agg = aggregators scan = aggregators.aggregators.ScanFunctions({name: getattr(agg, name) for name in agg.__all__}) diff --git a/hail/python/hail/context.py b/hail/python/hail/context.py index 429bba68452..5734123a871 100644 --- a/hail/python/hail/context.py +++ b/hail/python/hail/context.py @@ -1,27 +1,28 @@ -from typing import Optional, Union, Tuple, Type, List, Dict -from types import TracebackType -import warnings -import sys import os +import sys +import warnings from contextlib import contextmanager -from urllib.parse import urlparse, urlunparse from random import Random +from types import TracebackType +from typing import Dict, List, Optional, Tuple, Type, Union +from urllib.parse import urlparse, urlunparse import pkg_resources from pyspark import SparkContext import hail +from hail.backend import Backend from hail.genetics.reference_genome import ReferenceGenome, reference_genome_type -from hail.typecheck import nullable, typecheck, typecheck_method, enumeration, dictof, oneof, sized_tupleof, sequenceof +from hail.typecheck import dictof, enumeration, nullable, oneof, sequenceof, sized_tupleof, typecheck, typecheck_method from hail.utils import get_env_or_default -from hail.utils.java import Env, warning, choose_backend -from hail.backend import Backend +from hail.utils.java import Env, choose_backend, warning +from hailtop.aiocloud.aiogoogle import GCSRequesterPaysConfiguration, get_gcs_requester_pays_configuration +from hailtop.fs.fs import FS from hailtop.hail_event_loop import hail_event_loop from hailtop.utils import secret_alnum_string -from hailtop.fs.fs import FS -from hailtop.aiocloud.aiogoogle import GCSRequesterPaysConfiguration, get_gcs_requester_pays_configuration -from .builtin_references import BUILTIN_REFERENCES + from .backend.backend import local_jar_information +from .builtin_references import BUILTIN_REFERENCES def _get_tmpdir(tmpdir): @@ -598,8 +599,8 @@ def init_local( _optimizer_iterations=None, gcs_requester_pays_configuration: Optional[GCSRequesterPaysConfiguration] = None, ): - from hail.backend.py4j_backend import connect_logger from hail.backend.local_backend import LocalBackend + from hail.backend.py4j_backend import connect_logger log = _get_log(log) tmpdir = _get_tmpdir(tmpdir) @@ -961,8 +962,8 @@ def _with_flags(**flags): def debug_info(): - from hail.backend.spark_backend import SparkBackend from hail.backend.backend import local_jar_information + from hail.backend.spark_backend import SparkBackend spark_conf = None if isinstance(Env.backend(), SparkBackend): diff --git a/hail/python/hail/matrixtable.py b/hail/python/hail/matrixtable.py index 728196cbf57..ed0072d3b90 100644 --- a/hail/python/hail/matrixtable.py +++ b/hail/python/hail/matrixtable.py @@ -1,43 +1,44 @@ import itertools -from typing import Iterable, Optional, Dict, Tuple, Any, List +import warnings from collections import Counter +from typing import Any, Dict, Iterable, List, Optional, Tuple + import hail as hl +from hail import ir from hail.expr.expressions import ( Expression, + ExpressionException, + Indices, StructExpression, - expr_struct, - expr_any, - expr_bool, + TupleExpression, analyze, - Indices, - construct_reference, construct_expr, + construct_reference, + expr_any, + expr_bool, + expr_struct, extract_refs_by_indices, - ExpressionException, - TupleExpression, unify_all, ) -from hail.expr.types import types_match, tarray, tset from hail.expr.matrix_type import tmatrix -import hail.ir as ir -from hail.table import Table, ExprContainer, TableIndexKeyError +from hail.expr.types import tarray, tset, types_match +from hail.table import ExprContainer, Table, TableIndexKeyError from hail.typecheck import ( - typecheck, - typecheck_method, - dictof, - anytype, anyfunc, + anytype, + dictof, + enumeration, + lazy, nullable, - sequenceof, - oneof, numeric, - lazy, - enumeration, + oneof, + sequenceof, + typecheck, + typecheck_method, ) -from hail.utils import storage_level, default_handler, deduplicate -from hail.utils.java import warning, Env, info -from hail.utils.misc import wrap_to_tuple, get_key_by_exprs, get_select_exprs, check_annotate_exprs, process_joins -import warnings +from hail.utils import deduplicate, default_handler, storage_level +from hail.utils.java import Env, info, warning +from hail.utils.misc import check_annotate_exprs, get_key_by_exprs, get_select_exprs, process_joins, wrap_to_tuple class GroupedMatrixTable(ExprContainer): @@ -3372,7 +3373,7 @@ def _annotate_all( itertools.chain(row_exprs.keys(), col_exprs.keys(), entry_exprs.keys(), global_exprs.keys()) ): if field_name in self._fields: - raise RuntimeError(f'field {repr(field_name)} already in matrix table, cannot use _annotate_all') + raise RuntimeError(f'field {field_name!r} already in matrix table, cannot use _annotate_all') base, cleanup = self._process_joins(*all_exprs) mir = base._mir @@ -4067,7 +4068,7 @@ def union_cols( other = other.rename(renames) info( 'Table.union_cols: renamed the following fields on the right to avoid name conflicts:' - + ''.join(f'\n {repr(k)} -> {repr(v)}' for k, v in renames.items()) + + ''.join(f'\n {k!r} -> {v!r}' for k, v in renames.items()) ) return MatrixTable(ir.MatrixUnionCols(self._mir, other._mir, row_join_type)) diff --git a/hail/python/hail/table.py b/hail/python/hail/table.py index d06fce97421..e34c468b42c 100644 --- a/hail/python/hail/table.py +++ b/hail/python/hail/table.py @@ -1,74 +1,75 @@ import collections import itertools -import pandas -import numpy as np -import pyspark import pprint import shutil -from typing import Optional, Dict, Callable, Sequence, Union, List, overload +from typing import Callable, ClassVar, Dict, List, Optional, Sequence, Union, overload + +import numpy as np +import pandas +import pyspark +import hail as hl +from hail import ir from hail.expr.expressions import ( - Expression, - StructExpression, BooleanExpression, - expr_struct, - expr_any, - expr_bool, - analyze, - Indices, - construct_reference, - to_expr, - construct_expr, - extract_refs_by_indices, - ExpressionException, - TupleExpression, - unify_all, - NumericExpression, - StringExpression, CallExpression, CollectionExpression, DictExpression, + Expression, + ExpressionException, + Indices, IntervalExpression, LocusExpression, NDArrayExpression, - expr_stream, + NumericExpression, + StringExpression, + StructExpression, + TupleExpression, + analyze, + construct_expr, + construct_reference, + expr_any, expr_array, + expr_bool, + expr_stream, + expr_struct, + extract_refs_by_indices, + to_expr, + unify_all, ) -from hail.expr.types import hail_type, tstruct, types_match, tarray, tset, dtypes_from_pandas from hail.expr.table_type import ttable -import hail.ir as ir +from hail.expr.types import dtypes_from_pandas, hail_type, tarray, tset, tstruct, types_match from hail.typecheck import ( - typecheck, - typecheck_method, - dictof, - anytype, anyfunc, + anytype, + dictof, + enumeration, + func_spec, + lazy, nullable, - sequenceof, - oneof, numeric, - lazy, - enumeration, + oneof, + sequenceof, table_key_type, - func_spec, + typecheck, + typecheck_method, ) from hail.utils import deduplicate from hail.utils.interval import Interval -from hail.utils.placement_tree import PlacementTree from hail.utils.java import Env, info, warning from hail.utils.misc import ( - wrap_to_tuple, - storage_level, - plural, - get_nice_field_error, - get_nice_attr_error, - get_key_by_exprs, + check_annotate_exprs, check_keys, + get_key_by_exprs, + get_nice_attr_error, + get_nice_field_error, get_select_exprs, - check_annotate_exprs, + plural, process_joins, + storage_level, + wrap_to_tuple, ) -import hail as hl +from hail.utils.placement_tree import PlacementTree table_type = lazy() @@ -118,7 +119,7 @@ def desc(col): class ExprContainer: # this can only grow as big as the object dir, so no need to worry about memory leak - _warned_about = set() + _warned_about: ClassVar = set() def __init__(self): self._fields: Dict[str, Expression] = {} @@ -137,8 +138,8 @@ def _set_field(self, key, value): if key not in ExprContainer._warned_about: ExprContainer._warned_about.add(key) warning( - f"Name collision: field {repr(key)} already in object dict. " - f"\n This field must be referenced with __getitem__ syntax: obj[{repr(key)}]" + f"Name collision: field {key!r} already in object dict. " + f"\n This field must be referenced with __getitem__ syntax: obj[{key!r}]" ) else: self.__dict__[key] = value @@ -276,9 +277,7 @@ def aggregate(self, **named_exprs) -> 'Table': Aggregated table. """ for name, expr in named_exprs.items(): - analyze( - f'GroupedTable.aggregate: ({repr(name)})', expr, self._parent._global_indices, {self._parent._row_axis} - ) + analyze(f'GroupedTable.aggregate: ({name!r})', expr, self._parent._global_indices, {self._parent._row_axis}) if not named_exprs.keys().isdisjoint(set(self._key_expr)): intersection = set(named_exprs.keys()) & set(self._key_expr) raise ValueError( @@ -2242,8 +2241,7 @@ def format_line(values, widths, right_align): s += format_line(type_strs[start:end], block_column_width, block_right_align) s += hline for row in rows: - row = row[start:end] - s += format_line(row, block_column_width, block_right_align) + s += format_line(row[start:end], block_column_width, block_right_align) s += hline if has_more: @@ -3324,7 +3322,7 @@ def join( right: 'Table', how='inner', _mangle: Callable[[str, int], str] = lambda s, i: f'{s}_{i}', - _join_key: int = None, + _join_key: Optional[int] = None, ) -> 'Table': """Join two tables together. @@ -3407,7 +3405,7 @@ def join( right = right.rename(renames) info( 'Table.join: renamed the following fields on the right to avoid name conflicts:' - + ''.join(f'\n {repr(k)} -> {repr(v)}' for k, v in renames.items()) + + ''.join(f'\n {k!r} -> {v!r}' for k, v in renames.items()) ) return Table(ir.TableJoin(self._tir, right._tir, how, _join_key)) @@ -3541,7 +3539,7 @@ def expand_types(self) -> 'Table': t = t.order_by(*t.key) def _expand(e): - if isinstance(e, CollectionExpression) or isinstance(e, DictExpression): + if isinstance(e, (CollectionExpression, DictExpression)): return hl.map(lambda x: _expand(x), hl.array(e)) elif isinstance(e, StructExpression): return hl.struct(**{k: _expand(v) for (k, v) in e.items()}) @@ -3680,16 +3678,17 @@ def order_by(self, *exprs) -> 'Table': lifted_exprs = [] for e in exprs: sort_type = 'A' + _e = e if isinstance(e, Ascending): - e = e.col + _e = e.col elif isinstance(e, Descending): - e = e.col + _e = e.col sort_type = 'D' - if isinstance(e, str): - expr = self[e] + if isinstance(_e, str): + expr = self[_e] else: - expr = e + expr = _e lifted_exprs.append((expr, sort_type)) sort_fields = [] @@ -3905,9 +3904,9 @@ def to_matrix_table(self, row_key, col_key, row_fields=[], col_fields=[], n_part row_field_set = set(self.row) for k, v in c.items(): if k not in row_field_set: - raise ValueError(f"'to_matrix_table': field {repr(k)} is not a row field") + raise ValueError(f"'to_matrix_table': field {k!r} is not a row field") if v > 1: - raise ValueError(f"'to_matrix_table': field {repr(k)} appeared in {v} field groups") + raise ValueError(f"'to_matrix_table': field {k!r} appeared in {v} field groups") if len(row_key) == 0: raise ValueError("'to_matrix_table': require at least one row key field") @@ -4431,7 +4430,7 @@ def collect_by_key(self, name: str = 'values') -> 'Table': :class:`.Table` """ - import hail.methods.misc as misc + from hail.methods import misc misc.require_key(self, 'collect_by_key') @@ -4481,7 +4480,7 @@ def distinct(self) -> 'Table': :class:`.Table` """ - import hail.methods.misc as misc + from hail.methods import misc misc.require_key(self, 'distinct') From 7e74e5ffbdee04ed1d5cf4945ff01432d5b8f448 Mon Sep 17 00:00:00 2001 From: Iris Rademacher Date: Mon, 5 Feb 2024 14:49:47 -0500 Subject: [PATCH 3/5] backend --- hail/python/hail/backend/backend.py | 16 ++++---- hail/python/hail/backend/local_backend.py | 18 ++++----- hail/python/hail/backend/py4j_backend.py | 12 +++--- hail/python/hail/backend/service_backend.py | 41 ++++++++++----------- hail/python/hail/backend/spark_backend.py | 10 ++--- pyproject.toml | 1 - 6 files changed, 47 insertions(+), 51 deletions(-) diff --git a/hail/python/hail/backend/backend.py b/hail/python/hail/backend/backend.py index 0b8c74766c2..d454ef49ba4 100644 --- a/hail/python/hail/backend/backend.py +++ b/hail/python/hail/backend/backend.py @@ -1,20 +1,21 @@ -from typing import Mapping, List, Union, TypeVar, Tuple, Dict, Optional, Any, AbstractSet import abc -from enum import Enum -from dataclasses import dataclass import warnings +import zipfile +from dataclasses import dataclass +from enum import Enum +from typing import AbstractSet, Any, ClassVar, Dict, List, Mapping, Optional, Tuple, TypeVar, Union + import orjson import pkg_resources -import zipfile from hailtop.config.user_config import unchecked_configuration_of from hailtop.fs.fs import FS from ..builtin_references import BUILTIN_REFERENCE_RESOURCE_PATHS from ..expr import Expression -from ..expr.table_type import ttable -from ..expr.matrix_type import tmatrix from ..expr.blockmatrix_type import tblockmatrix +from ..expr.matrix_type import tmatrix +from ..expr.table_type import ttable from ..expr.types import HailType, dtype, tvoid from ..ir import BaseIR, finalize_randomness from ..ir.renderer import CSERenderer @@ -23,7 +24,6 @@ from ..table import Table from ..utils.java import FatalError - Dataset = TypeVar('Dataset', Table, MatrixTable, BlockMatrix) @@ -139,7 +139,7 @@ class FromFASTAFilePayload(ActionPayload): class Backend(abc.ABC): # Must match knownFlags in HailFeatureFlags.scala - _flags_env_vars_and_defaults: Dict[str, Tuple[str, Optional[str]]] = { + _flags_env_vars_and_defaults: ClassVar[Dict[str, Tuple[str, Optional[str]]]] = { "no_whole_stage_codegen": ("HAIL_DEV_NO_WHOLE_STAGE_CODEGEN", None), "no_ir_logging": ("HAIL_DEV_NO_IR_LOG", None), "lower": ("HAIL_DEV_LOWER", None), diff --git a/hail/python/hail/backend/local_backend.py b/hail/python/hail/backend/local_backend.py index 595bd04e42e..ec4a2ea8e64 100644 --- a/hail/python/hail/backend/local_backend.py +++ b/hail/python/hail/backend/local_backend.py @@ -1,19 +1,19 @@ -from typing import Optional, Union, Tuple, List import os import sys +from typing import List, Optional, Tuple, Union -from py4j.java_gateway import JavaGateway, GatewayParameters, launch_gateway +from py4j.java_gateway import GatewayParameters, JavaGateway, launch_gateway -from hail.ir.renderer import CSERenderer from hail.ir import finalize_randomness -from .py4j_backend import Py4JBackend, uninstall_exception_handler -from .backend import local_jar_information +from hail.ir.renderer import CSERenderer +from hailtop.aiotools.validators import validate_file +from hailtop.fs.router_fs import RouterFS +from hailtop.utils import find_spark_home + from ..expr import Expression from ..expr.types import HailType - -from hailtop.utils import find_spark_home -from hailtop.fs.router_fs import RouterFS -from hailtop.aiotools.validators import validate_file +from .backend import local_jar_information +from .py4j_backend import Py4JBackend, uninstall_exception_handler class LocalBackend(Py4JBackend): diff --git a/hail/python/hail/backend/py4j_backend.py b/hail/python/hail/backend/py4j_backend.py index 9fcb9d61579..dcc9c8d9eae 100644 --- a/hail/python/hail/backend/py4j_backend.py +++ b/hail/python/hail/backend/py4j_backend.py @@ -1,24 +1,23 @@ -from typing import Mapping, Set, Tuple import abc +import http.client import socket import socketserver import sys from threading import Thread +from typing import Mapping, Set, Tuple import orjson -import requests import py4j +import requests from py4j.java_gateway import JavaObject, JVMView import hail from hail.expr import construct_expr from hail.ir import JavaIR -from hail.utils.java import FatalError, Env, scala_package_object +from hail.utils.java import Env, FatalError, scala_package_object -from .backend import ActionTag, Backend, fatal_error_from_java_error_triplet from ..hail_logging import Logger - -import http.client +from .backend import ActionTag, Backend, fatal_error_from_java_error_triplet # This defaults to 65536 and fails if a header is longer than _MAXLINE # The timing json that we output can exceed 65536 bytes so we raise the limit @@ -43,7 +42,6 @@ def install_exception_handler(): def uninstall_exception_handler(): global _installed - global _original if _installed: _installed = False py4j.protocol.get_return_value = _original diff --git a/hail/python/hail/backend/service_backend.py b/hail/python/hail/backend/service_backend.py index 1fb95c30115..184bf945b58 100644 --- a/hail/python/hail/backend/service_backend.py +++ b/hail/python/hail/backend/service_backend.py @@ -1,40 +1,39 @@ -from typing import Dict, Optional, Awaitable, Mapping, Any, List, Union, Tuple, TypeVar, Set import abc import asyncio -from dataclasses import dataclass +import logging import math import struct -from hail.expr.expressions.base_expression import Expression -import orjson -import logging -from contextlib import AsyncExitStack import warnings +from contextlib import AsyncExitStack +from dataclasses import dataclass +from typing import Any, Awaitable, Dict, List, Mapping, Optional, Set, Tuple, TypeVar, Union -from hail.context import TemporaryDirectory, TemporaryFilename, tmp_dir, revision, version -from hail.utils import FatalError -from hail.expr.types import HailType +import orjson + +import hailtop.aiotools.fs as afs +from hail.context import TemporaryDirectory, TemporaryFilename, revision, tmp_dir, version from hail.experimental import read_expression, write_expression +from hail.expr.expressions.base_expression import Expression +from hail.expr.types import HailType from hail.ir import finalize_randomness from hail.ir.renderer import CSERenderer - +from hail.utils import FatalError from hailtop import yamlx -from hailtop.config import ConfigVariable, configuration_of, get_remote_tmpdir -from hailtop.hail_event_loop import hail_event_loop -from hailtop.utils import async_to_blocking, Timings, am_i_interactive, retry_transient_errors -from hailtop.utils.rich_progress_bar import BatchProgressBar -from hailtop.batch_client.aioclient import Batch, BatchClient -from hailtop.aiotools.router_fs import RouterAsyncFS from hailtop.aiocloud.aiogoogle import GCSRequesterPaysConfiguration, get_gcs_requester_pays_configuration -import hailtop.aiotools.fs as afs +from hailtop.aiotools.fs.exceptions import UnexpectedEOFError +from hailtop.aiotools.router_fs import RouterAsyncFS +from hailtop.aiotools.validators import validate_file +from hailtop.batch_client.aioclient import Batch, BatchClient +from hailtop.config import ConfigVariable, configuration_of, get_remote_tmpdir from hailtop.fs.fs import FS from hailtop.fs.router_fs import RouterFS -from hailtop.aiotools.fs.exceptions import UnexpectedEOFError +from hailtop.hail_event_loop import hail_event_loop +from hailtop.utils import Timings, am_i_interactive, async_to_blocking, retry_transient_errors +from hailtop.utils.rich_progress_bar import BatchProgressBar -from .backend import Backend, fatal_error_from_java_error_triplet, ActionTag, ActionPayload, ExecutePayload from ..builtin_references import BUILTIN_REFERENCES from ..utils import ANY_REGION -from hailtop.aiotools.validators import validate_file - +from .backend import ActionPayload, ActionTag, Backend, ExecutePayload, fatal_error_from_java_error_triplet ReferenceGenomeConfig = Dict[str, Any] diff --git a/hail/python/hail/backend/spark_backend.py b/hail/python/hail/backend/spark_backend.py index 79e41d1c9ab..a5cdc7efff0 100644 --- a/hail/python/hail/backend/spark_backend.py +++ b/hail/python/hail/backend/spark_backend.py @@ -1,10 +1,10 @@ -import sys import os -import pyspark -import pyspark.sql +import sys +from typing import Optional import orjson -from typing import Optional +import pyspark +import pyspark.sql from hail.expr.table_type import ttable from hail.fs.hadoop_fs import HadoopFS @@ -13,8 +13,8 @@ from hailtop.aiotools.router_fs import RouterAsyncFS from hailtop.aiotools.validators import validate_file -from .py4j_backend import Py4JBackend from .backend import local_jar_information +from .py4j_backend import Py4JBackend def append_to_comma_separated_list(conf: pyspark.SparkConf, k: str, *new_values: str): diff --git a/pyproject.toml b/pyproject.toml index 91336eb992a..169c40fc0fe 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -21,7 +21,6 @@ force-exclude = true known-first-party = ["auth", "batch", "ci", "gear", "hailtop", "monitoring", "website", "web_common"] [tool.ruff.per-file-ignores] -"hail/python/hail/backend/**/*" = ["I", "PL", "RUF"] "hail/python/hail/docs/**/*" = ["I", "PL", "RUF"] "hail/python/hail/experimental/**/*" = ["I", "PL", "RUF"] "hail/python/hail/expr/**/*" = ["I", "PL", "RUF"] From b99a7dc5c7d9ace9824bae71cdbe432fc76618ec Mon Sep 17 00:00:00 2001 From: Iris Rademacher Date: Mon, 5 Feb 2024 14:50:37 -0500 Subject: [PATCH 4/5] docs --- hail/python/hail/docs/conf.py | 2 +- pyproject.toml | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/hail/python/hail/docs/conf.py b/hail/python/hail/docs/conf.py index c3be204f3ba..a7bfbf2c136 100644 --- a/hail/python/hail/docs/conf.py +++ b/hail/python/hail/docs/conf.py @@ -17,10 +17,10 @@ # documentation root, use os.path.abspath to make it absolute, like shown here. # +import datetime import os import re import sys -import datetime sys.path.insert(0, os.path.abspath('./_ext')) diff --git a/pyproject.toml b/pyproject.toml index 169c40fc0fe..75e2126c3f2 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -21,7 +21,6 @@ force-exclude = true known-first-party = ["auth", "batch", "ci", "gear", "hailtop", "monitoring", "website", "web_common"] [tool.ruff.per-file-ignores] -"hail/python/hail/docs/**/*" = ["I", "PL", "RUF"] "hail/python/hail/experimental/**/*" = ["I", "PL", "RUF"] "hail/python/hail/expr/**/*" = ["I", "PL", "RUF"] "hail/python/hail/fs/**/*" = ["I", "PL", "RUF"] From 720c0a4f4fce3521eead970fdc2f8f95620a788f Mon Sep 17 00:00:00 2001 From: Iris Rademacher Date: Mon, 5 Feb 2024 14:58:40 -0500 Subject: [PATCH 5/5] experimental --- hail/python/hail/experimental/__init__.py | 28 ++++++------ hail/python/hail/experimental/datasets.py | 21 +++++---- hail/python/hail/experimental/db.py | 28 ++++++------ hail/python/hail/experimental/expressions.py | 6 +-- .../filtering_allele_frequency.py | 6 +-- hail/python/hail/experimental/function.py | 9 ++-- .../hail/experimental/haplotype_freq_em.py | 6 +-- hail/python/hail/experimental/import_gtf.py | 7 +-- hail/python/hail/experimental/interact.py | 3 +- .../hail/experimental/ld_score_regression.py | 6 +-- hail/python/hail/experimental/ldscore.py | 4 +- hail/python/hail/experimental/ldscsim.py | 44 +++++++++---------- hail/python/hail/experimental/lens.py | 1 + hail/python/hail/experimental/loop.py | 4 +- hail/python/hail/experimental/pca.py | 4 +- .../experimental/phase_by_transmission.py | 7 +-- hail/python/hail/experimental/plots.py | 9 ++-- .../hail/experimental/sparse_mt/__init__.py | 2 +- .../hail/experimental/sparse_mt/densify.py | 2 +- .../hail/experimental/table_ndarray_utils.py | 2 +- hail/python/hail/experimental/tidyr.py | 6 +-- hail/python/hail/experimental/time.py | 2 +- .../hail/experimental/write_multiple.py | 12 ++--- pyproject.toml | 1 - 24 files changed, 111 insertions(+), 109 deletions(-) diff --git a/hail/python/hail/experimental/__init__.py b/hail/python/hail/experimental/__init__.py index bb055dbc52e..730336b053b 100644 --- a/hail/python/hail/experimental/__init__.py +++ b/hail/python/hail/experimental/__init__.py @@ -1,24 +1,24 @@ -from .ldscore import ld_score -from .ld_score_regression import ld_score_regression -from .expressions import write_expression, read_expression -from .filtering_allele_frequency import filtering_allele_frequency -from .haplotype_freq_em import haplotype_freq_em -from .plots import hail_metadata, plot_roc_curve -from .phase_by_transmission import phase_by_transmission, phase_trio_matrix_by_transmission, explode_trio_matrix from .datasets import load_dataset -from .import_gtf import import_gtf, get_gene_intervals -from .write_multiple import write_matrix_tables, block_matrices_tofiles, export_block_matrices, write_block_matrices +from .db import DB from .export_entries_by_col import export_entries_by_col -from .sparse_mt import sparse_split_multi, densify +from .expressions import read_expression, write_expression +from .filtering_allele_frequency import filtering_allele_frequency +from .full_outer_join_mt import full_outer_join_mt from .function import define_function +from .haplotype_freq_em import haplotype_freq_em +from .import_gtf import get_gene_intervals, import_gtf +from .ld_score_regression import ld_score_regression +from .ldscore import ld_score from .ldscsim import simulate_phenotypes -from .full_outer_join_mt import full_outer_join_mt -from .tidyr import gather, separate, spread -from .db import DB from .loop import loop -from .time import strftime, strptime from .pca import pc_project +from .phase_by_transmission import explode_trio_matrix, phase_by_transmission, phase_trio_matrix_by_transmission +from .plots import hail_metadata, plot_roc_curve +from .sparse_mt import densify, sparse_split_multi from .table_ndarray_utils import mt_to_table_of_ndarray +from .tidyr import gather, separate, spread +from .time import strftime, strptime +from .write_multiple import block_matrices_tofiles, export_block_matrices, write_block_matrices, write_matrix_tables __all__ = [ 'ld_score', diff --git a/hail/python/hail/experimental/datasets.py b/hail/python/hail/experimental/datasets.py index b36fd5fb87a..4a9d1eb349c 100644 --- a/hail/python/hail/experimental/datasets.py +++ b/hail/python/hail/experimental/datasets.py @@ -2,9 +2,10 @@ import os from typing import Optional, Union -import hail as hl import pkg_resources +import hail as hl + def _read_dataset(path: str) -> Union[hl.Table, hl.MatrixTable, hl.linalg.BlockMatrix]: if path.endswith('.ht'): @@ -60,7 +61,7 @@ def load_dataset( if region not in valid_regions: raise ValueError( f'Specify valid region parameter,' - f' received: region={repr(region)}.\n' + f' received: region={region!r}.\n' f'Valid region values are {valid_regions}.' ) @@ -68,7 +69,7 @@ def load_dataset( if cloud not in valid_clouds: raise ValueError( f'Specify valid cloud parameter,' - f' received: cloud={repr(cloud)}.\n' + f' received: cloud={cloud!r}.\n' f'Valid cloud platforms are {valid_clouds}.' ) @@ -84,29 +85,27 @@ def load_dataset( versions = set(dataset['version'] for dataset in datasets[name]['versions']) if version not in versions: raise ValueError( - f'Version {repr(version)} not available for dataset' f' {repr(name)}.\n' f'Available versions: {versions}.' + f'Version {version!r} not available for dataset' f' {name!r}.\n' f'Available versions: {versions}.' ) reference_genomes = set(dataset['reference_genome'] for dataset in datasets[name]['versions']) if reference_genome not in reference_genomes: raise ValueError( - f'Reference genome build {repr(reference_genome)} not' - f' available for dataset {repr(name)}.\n' + f'Reference genome build {reference_genome!r} not' + f' available for dataset {name!r}.\n' f'Available reference genome builds:' f' {reference_genomes}.' ) clouds = set(k for dataset in datasets[name]['versions'] for k in dataset['url'].keys()) if cloud not in clouds: - raise ValueError( - f'Cloud platform {repr(cloud)} not available for dataset {name}.\nAvailable platforms: {clouds}.' - ) + raise ValueError(f'Cloud platform {cloud!r} not available for dataset {name}.\nAvailable platforms: {clouds}.') regions = set(k for dataset in datasets[name]['versions'] for k in dataset['url'][cloud].keys()) if region not in regions: raise ValueError( - f'Region {repr(region)} not available for dataset' - f' {repr(name)} on cloud platform {repr(cloud)}.\n' + f'Region {region!r} not available for dataset' + f' {name!r} on cloud platform {cloud!r}.\n' f'Available regions: {regions}.' ) diff --git a/hail/python/hail/experimental/db.py b/hail/python/hail/experimental/db.py index 188a66861b4..cd4d6716e93 100644 --- a/hail/python/hail/experimental/db.py +++ b/hail/python/hail/experimental/db.py @@ -1,18 +1,19 @@ import json import os import warnings -from typing import Iterable, List, Optional, Set, Tuple, Union +from typing import ClassVar, Iterable, List, Optional, Set, Tuple, Union -import hail as hl import pkg_resources + +import hail as hl from hailtop.utils import external_requests_client_session, retry_response_returning_functions -from .lens import MatrixRows, TableRows from ..expr import StructExpression from ..matrixtable import MatrixTable, matrix_table_type from ..table import Table, table_type from ..typecheck import oneof, typecheck_method from ..utils.java import Env, info +from .lens import MatrixRows, TableRows class DatasetVersion: @@ -306,10 +307,10 @@ class DB: >>> db = hl.experimental.DB(region='us', cloud='gcp') """ - _valid_key_properties = {'gene', 'unique'} - _valid_regions = {'us', 'eu'} - _valid_clouds = {'gcp', 'aws'} - _valid_combinations = {('us', 'aws'), ('us', 'gcp'), ('eu', 'gcp')} + _valid_key_properties: ClassVar = {'gene', 'unique'} + _valid_regions: ClassVar = {'us', 'eu'} + _valid_clouds: ClassVar = {'gcp', 'aws'} + _valid_combinations: ClassVar = {('us', 'aws'), ('us', 'gcp'), ('eu', 'gcp')} def __init__( self, *, region: str = 'us', cloud: str = 'gcp', url: Optional[str] = None, config: Optional[dict] = None @@ -317,19 +318,19 @@ def __init__( if region not in DB._valid_regions: raise ValueError( f'Specify valid region parameter,' - f' received: region={repr(region)}.\n' + f' received: region={region!r}.\n' f'Valid regions are {DB._valid_regions}.' ) if cloud not in DB._valid_clouds: raise ValueError( f'Specify valid cloud parameter,' - f' received: cloud={repr(cloud)}.\n' + f' received: cloud={cloud!r}.\n' f'Valid cloud platforms are {DB._valid_clouds}.' ) if (region, cloud) not in DB._valid_combinations: raise ValueError( - f'The {repr(region)} region is not available for' - f' the {repr(cloud)} cloud platform. ' + f'The {region!r} region is not available for' + f' the {cloud!r} cloud platform. ' f'Valid region, cloud combinations are' f' {DB._valid_combinations}.' ) @@ -348,9 +349,8 @@ def __init__( response = retry_response_returning_functions(session.get, url) config = response.json() assert isinstance(config, dict) - else: - if not isinstance(config, dict): - raise ValueError(f'expected a dict mapping dataset names to ' f'configurations, but found {config}') + elif not isinstance(config, dict): + raise ValueError(f'expected a dict mapping dataset names to ' f'configurations, but found {config}') config = {k: v for k, v in config.items() if 'annotation_db' in v} self.region = region self.cloud = cloud diff --git a/hail/python/hail/experimental/expressions.py b/hail/python/hail/experimental/expressions.py index 3d6a080a972..f2b4f2a87f9 100644 --- a/hail/python/hail/experimental/expressions.py +++ b/hail/python/hail/experimental/expressions.py @@ -1,8 +1,8 @@ import hail as hl -from hail.expr.expressions import expr_any, analyze -from hail.expr.types import hail_type +from hail.expr.expressions import analyze, expr_any from hail.expr.table_type import ttable -from hail.typecheck import typecheck, nullable +from hail.expr.types import hail_type +from hail.typecheck import nullable, typecheck @typecheck(expr=expr_any, path=str, overwrite=bool) diff --git a/hail/python/hail/experimental/filtering_allele_frequency.py b/hail/python/hail/experimental/filtering_allele_frequency.py index 9df76230324..a3b3d9ef92f 100644 --- a/hail/python/hail/experimental/filtering_allele_frequency.py +++ b/hail/python/hail/experimental/filtering_allele_frequency.py @@ -1,7 +1,7 @@ -from hail.expr.expressions import expr_float64, expr_int32, Float64Expression +from hail.expr.expressions import Float64Expression, expr_float64, expr_int32 +from hail.expr.functions import _func from hail.expr.types import tfloat64 from hail.typecheck import typecheck -from hail.expr.functions import _func @typecheck(ac=expr_int32, an=expr_int32, ci=expr_float64) @@ -12,7 +12,7 @@ def filtering_allele_frequency(ac, an, ci) -> Float64Expression: The filtering allele frequency is the highest true population allele frequency for which the upper bound of the `ci` (confidence interval) of allele count - under a Poisson distribution is still less than the variant’s observed + under a Poisson distribution is still less than the variant's observed `ac` (allele count) in the reference sample, given an `an` (allele number). This function defines a "filtering AF" that represents diff --git a/hail/python/hail/experimental/function.py b/hail/python/hail/experimental/function.py index dd76602cbe8..5f3f3c47625 100644 --- a/hail/python/hail/experimental/function.py +++ b/hail/python/hail/experimental/function.py @@ -1,8 +1,9 @@ -from typing import Optional, Tuple, Sequence, Callable -from hail.expr.expressions import construct_expr, expr_any, unify_all, Expression -from hail.expr.types import hail_type, HailType +from typing import Callable, Optional, Sequence, Tuple + +from hail.expr.expressions import Expression, construct_expr, expr_any, unify_all +from hail.expr.types import HailType, hail_type from hail.ir import Apply, Ref -from hail.typecheck import typecheck, nullable, tupleof, anytype +from hail.typecheck import anytype, nullable, tupleof, typecheck from hail.utils.java import Env diff --git a/hail/python/hail/experimental/haplotype_freq_em.py b/hail/python/hail/experimental/haplotype_freq_em.py index 363d84811e3..f66d9fb5bc7 100644 --- a/hail/python/hail/experimental/haplotype_freq_em.py +++ b/hail/python/hail/experimental/haplotype_freq_em.py @@ -1,7 +1,7 @@ -from hail.expr.expressions import expr_int32, expr_array, ArrayExpression -from hail.expr.types import tfloat64, tarray -from hail.typecheck import typecheck +from hail.expr.expressions import ArrayExpression, expr_array, expr_int32 from hail.expr.functions import _func +from hail.expr.types import tarray, tfloat64 +from hail.typecheck import typecheck @typecheck(gt_counts=expr_array(expr_int32)) diff --git a/hail/python/hail/experimental/import_gtf.py b/hail/python/hail/experimental/import_gtf.py index dde7c39e568..87e07ad2ba2 100644 --- a/hail/python/hail/experimental/import_gtf.py +++ b/hail/python/hail/experimental/import_gtf.py @@ -1,10 +1,11 @@ -import operator import functools +import operator + import hail as hl from hail.genetics.reference_genome import reference_genome_type -from hail.typecheck import typecheck, nullable, sequenceof -from hail.utils.java import info +from hail.typecheck import nullable, sequenceof, typecheck from hail.utils import new_temp_file +from hail.utils.java import info @typecheck( diff --git a/hail/python/hail/experimental/interact.py b/hail/python/hail/experimental/interact.py index b2878762a1a..af2bd736f17 100644 --- a/hail/python/hail/experimental/interact.py +++ b/hail/python/hail/experimental/interact.py @@ -2,7 +2,6 @@ from ipywidgets import widgets import hail as hl - from hail.expr.types import summary_type __all__ = [ @@ -347,7 +346,7 @@ def append_struct_frames(t, frames): frames.append(widgets.HTML('Fields:')) acc = widgets.Accordion([recursive_build(x) for x in t.values()]) for i, (name, fd) in enumerate(t.items()): - acc.set_title(i, f'{repr(name)} ({summary_type(fd)})') + acc.set_title(i, f'{name!r} ({summary_type(fd)})') acc.selected_index = None frames.append(acc) diff --git a/hail/python/hail/experimental/ld_score_regression.py b/hail/python/hail/experimental/ld_score_regression.py index b3f55fd0ed5..049f6442028 100644 --- a/hail/python/hail/experimental/ld_score_regression.py +++ b/hail/python/hail/experimental/ld_score_regression.py @@ -1,7 +1,7 @@ import hail as hl -from hail.expr.expressions import expr_float64, expr_numeric, analyze -from hail.typecheck import typecheck, oneof, sequenceof, nullable -from hail.utils import wrap_to_list, new_temp_file +from hail.expr.expressions import analyze, expr_float64, expr_numeric +from hail.typecheck import nullable, oneof, sequenceof, typecheck +from hail.utils import new_temp_file, wrap_to_list @typecheck( diff --git a/hail/python/hail/experimental/ldscore.py b/hail/python/hail/experimental/ldscore.py index 8df25f09d05..08971dc2ca7 100644 --- a/hail/python/hail/experimental/ldscore.py +++ b/hail/python/hail/experimental/ldscore.py @@ -1,7 +1,7 @@ import hail as hl +from hail.expr.expressions import expr_float64, expr_locus, expr_numeric from hail.linalg import BlockMatrix -from hail.typecheck import typecheck, nullable, sequenceof, oneof -from hail.expr.expressions import expr_float64, expr_numeric, expr_locus +from hail.typecheck import nullable, oneof, sequenceof, typecheck from hail.utils import new_temp_file, wrap_to_list diff --git a/hail/python/hail/experimental/ldscsim.py b/hail/python/hail/experimental/ldscsim.py index 72698580b60..92b6f32e93c 100644 --- a/hail/python/hail/experimental/ldscsim.py +++ b/hail/python/hail/experimental/ldscsim.py @@ -20,15 +20,16 @@ @author: nbaya """ +import numpy as np +import pandas as pd +from scipy import stats + import hail as hl -from hail.typecheck import typecheck, oneof, nullable -from hail.expr.expressions import expr_float64, expr_int32, expr_array, expr_call +from hail.expr.expressions import expr_array, expr_call, expr_float64, expr_int32 from hail.matrixtable import MatrixTable from hail.table import Table +from hail.typecheck import nullable, oneof, typecheck from hail.utils.java import Env -import numpy as np -import pandas as pd -import scipy.stats as stats @typecheck( @@ -177,7 +178,7 @@ def make_betas(mt, h2, pi=None, annot=None, rg=None): h2 = h2 if isinstance(h2, list) else [h2] annot_sum = mt.aggregate_rows(hl.agg.sum(annot)) mt = mt.annotate_rows(beta=hl.literal(h2).map(lambda x: hl.rand_norm(0, hl.sqrt(annot * x / (annot_sum * M))))) - elif len(h2) > 1 and (pi == [None] or pi == [1]): # multi-trait correlated infinitesimal + elif len(h2) > 1 and (pi in ([None], [1])): # multi-trait correlated infinitesimal mt, rg = multitrait_inf(mt=mt, h2=h2, rg=rg) elif len(h2) == 2 and len(pi) > 1 and len(rg) == 1: # two trait correlated spike & slab print('multitrait ss') @@ -551,21 +552,20 @@ def calculate_phenotypes(mt, genotype, beta, h2, popstrat=None, popstrat_var=Non y_no_noise=hl.agg.array_agg(lambda beta: hl.agg.sum(beta * mt['norm_gt']), mt['beta_' + uid]) ) mt = mt.annotate_cols(y=mt.y_no_noise + hl.literal(h2).map(lambda x: hl.rand_norm(0, hl.sqrt(1 - x)))) + elif exact_h2 and min([h2[0], 1 - h2[0]]) != 0: + print('exact h2') + mt = mt.annotate_cols(**{'y_no_noise_' + uid: hl.agg.sum(mt['beta_' + uid] * mt['norm_gt'])}) + y_no_noise_stdev = mt.aggregate_cols(hl.agg.stats(mt['y_no_noise_' + uid]).stdev) + mt = mt.annotate_cols( + y_no_noise=hl.sqrt(h2[0]) * mt['y_no_noise_' + uid] / y_no_noise_stdev + ) # normalize genetic component of phenotype to have variance of exactly h2 + mt = mt.annotate_cols(**{'noise_' + uid: hl.rand_norm(0, hl.sqrt(1 - h2[0]))}) + noise_stdev = mt.aggregate_cols(hl.agg.stats(mt['noise_' + uid]).stdev) + mt = mt.annotate_cols(noise=hl.sqrt(1 - h2[0]) * mt['noise_' + uid] / noise_stdev) + mt = mt.annotate_cols(y=mt.y_no_noise + hl.sqrt(1 - h2[0]) * mt['noise_' + uid] / noise_stdev) else: - if exact_h2 and min([h2[0], 1 - h2[0]]) != 0: - print('exact h2') - mt = mt.annotate_cols(**{'y_no_noise_' + uid: hl.agg.sum(mt['beta_' + uid] * mt['norm_gt'])}) - y_no_noise_stdev = mt.aggregate_cols(hl.agg.stats(mt['y_no_noise_' + uid]).stdev) - mt = mt.annotate_cols( - y_no_noise=hl.sqrt(h2[0]) * mt['y_no_noise_' + uid] / y_no_noise_stdev - ) # normalize genetic component of phenotype to have variance of exactly h2 - mt = mt.annotate_cols(**{'noise_' + uid: hl.rand_norm(0, hl.sqrt(1 - h2[0]))}) - noise_stdev = mt.aggregate_cols(hl.agg.stats(mt['noise_' + uid]).stdev) - mt = mt.annotate_cols(noise=hl.sqrt(1 - h2[0]) * mt['noise_' + uid] / noise_stdev) - mt = mt.annotate_cols(y=mt.y_no_noise + hl.sqrt(1 - h2[0]) * mt['noise_' + uid] / noise_stdev) - else: - mt = mt.annotate_cols(y_no_noise=hl.agg.sum(mt['beta_' + uid] * mt['norm_gt'])) - mt = mt.annotate_cols(y=mt.y_no_noise + hl.rand_norm(0, hl.sqrt(1 - h2[0]))) + mt = mt.annotate_cols(y_no_noise=hl.agg.sum(mt['beta_' + uid] * mt['norm_gt'])) + mt = mt.annotate_cols(y=mt.y_no_noise + hl.rand_norm(0, hl.sqrt(1 - h2[0]))) if popstrat is not None: var_factor = ( 1 @@ -660,7 +660,7 @@ def agg_fields(tb, coef_dict=None, str_expr=None, axis='rows'): :class:`.MatrixTable` or :class:`.Table` containing aggregation field. """ assert str_expr is not None or coef_dict is not None, "str_expr and coef_dict cannot both be None" - assert axis == 'rows' or axis == 'cols', "axis must be 'rows' or 'cols'" + assert axis in {'rows', 'cols'}, "axis must be 'rows' or 'cols'" coef_dict = get_coef_dict(tb=tb, str_expr=str_expr, ref_coef_dict=coef_dict, axis=axis) axis_field = 'annot' if axis == 'rows' else 'cov' annotate_fn = ( @@ -701,7 +701,7 @@ def get_coef_dict(tb, str_expr=None, ref_coef_dict=None, axis='rows'): `coef_dict` value, the row (or col) field name is specified by `coef_dict` key. """ assert str_expr is not None or ref_coef_dict is not None, "str_expr and ref_coef_dict cannot both be None" - assert axis == 'rows' or axis == 'cols', "axis must be 'rows' or 'cols'" + assert axis in {'rows', 'cols'}, "axis must be 'rows' or 'cols'" fields_to_search = tb.row if axis == 'rows' or isinstance(tb, Table) else tb.col # when axis='rows' we're searching for annotations, axis='cols' searching for covariates axis_field = 'annotation' if axis == 'rows' else 'covariate' diff --git a/hail/python/hail/experimental/lens.py b/hail/python/hail/experimental/lens.py index c2155db66cd..15ddb7a80ba 100644 --- a/hail/python/hail/experimental/lens.py +++ b/hail/python/hail/experimental/lens.py @@ -1,4 +1,5 @@ import abc + import hail as hl diff --git a/hail/python/hail/experimental/loop.py b/hail/python/hail/experimental/loop.py index c921625c631..443f966281a 100644 --- a/hail/python/hail/experimental/loop.py +++ b/hail/python/hail/experimental/loop.py @@ -1,7 +1,7 @@ from typing import Callable -import hail.ir as ir -from hail.expr.expressions import construct_variable, construct_expr, expr_any, to_expr, unify_all +from hail import ir +from hail.expr.expressions import construct_expr, construct_variable, expr_any, to_expr, unify_all from hail.expr.types import hail_type from hail.typecheck import anytype, typecheck from hail.utils.java import Env diff --git a/hail/python/hail/experimental/pca.py b/hail/python/hail/experimental/pca.py index 72948bddbd9..99fdfb42e60 100644 --- a/hail/python/hail/experimental/pca.py +++ b/hail/python/hail/experimental/pca.py @@ -1,12 +1,12 @@ import hail as hl -from hail.typecheck import typecheck from hail.expr.expressions import ( + expr_array, expr_call, expr_numeric, - expr_array, raise_unless_entry_indexed, raise_unless_row_indexed, ) +from hail.typecheck import typecheck @typecheck(call_expr=expr_call, loadings_expr=expr_array(expr_numeric), af_expr=expr_numeric) diff --git a/hail/python/hail/experimental/phase_by_transmission.py b/hail/python/hail/experimental/phase_by_transmission.py index 56afebecd95..dafe8ef92c6 100644 --- a/hail/python/hail/experimental/phase_by_transmission.py +++ b/hail/python/hail/experimental/phase_by_transmission.py @@ -1,8 +1,9 @@ -import hail as hl -from hail.typecheck import typecheck, sequenceof -from hail.expr.expressions import expr_str, expr_call, expr_locus, expr_array from typing import List +import hail as hl +from hail.expr.expressions import expr_array, expr_call, expr_locus, expr_str +from hail.typecheck import sequenceof, typecheck + @typecheck( locus=expr_locus(), diff --git a/hail/python/hail/experimental/plots.py b/hail/python/hail/experimental/plots.py index 8ef36aa2003..c2c3d97eff3 100644 --- a/hail/python/hail/experimental/plots.py +++ b/hail/python/hail/experimental/plots.py @@ -1,15 +1,16 @@ import json + import numpy as np import pandas as pd - -import hail as hl from bokeh.layouts import gridplot -from bokeh.models import Title, ColumnDataSource, HoverTool, Div, Tabs, TabPanel +from bokeh.models import ColumnDataSource, Div, HoverTool, TabPanel, Tabs, Title from bokeh.palettes import Spectral8 from bokeh.plotting import figure from bokeh.transform import factor_cmap + +import hail as hl from hail.typecheck import typecheck -from hail.utils.hadoop_utils import hadoop_open, hadoop_ls +from hail.utils.hadoop_utils import hadoop_ls, hadoop_open from hail.utils.java import warning diff --git a/hail/python/hail/experimental/sparse_mt/__init__.py b/hail/python/hail/experimental/sparse_mt/__init__.py index 809103319a7..de9571354a0 100644 --- a/hail/python/hail/experimental/sparse_mt/__init__.py +++ b/hail/python/hail/experimental/sparse_mt/__init__.py @@ -1,5 +1,5 @@ -from .sparse_split_multi import sparse_split_multi from .densify import densify +from .sparse_split_multi import sparse_split_multi __all__ = [ 'sparse_split_multi', diff --git a/hail/python/hail/experimental/sparse_mt/densify.py b/hail/python/hail/experimental/sparse_mt/densify.py index 19096b19ec5..5e6c7cfb204 100644 --- a/hail/python/hail/experimental/sparse_mt/densify.py +++ b/hail/python/hail/experimental/sparse_mt/densify.py @@ -21,7 +21,7 @@ def densify(sparse_mt): roughly costing as much as reading a matrix table created by importing a dense project VCF. """ - if list(sparse_mt.row_key)[0] != 'locus' or not isinstance(sparse_mt.locus.dtype, hl.tlocus): + if next(iter(sparse_mt.row_key)) != 'locus' or not isinstance(sparse_mt.locus.dtype, hl.tlocus): raise ValueError("first row key field must be named 'locus' and have type 'locus'") if 'END' not in sparse_mt.entry or sparse_mt.END.dtype != hl.tint32: raise ValueError("'densify' requires 'END' entry field of type 'int32'") diff --git a/hail/python/hail/experimental/table_ndarray_utils.py b/hail/python/hail/experimental/table_ndarray_utils.py index ac624a45511..8bea3efb20b 100644 --- a/hail/python/hail/experimental/table_ndarray_utils.py +++ b/hail/python/hail/experimental/table_ndarray_utils.py @@ -1,5 +1,5 @@ import hail as hl -from hail.expr import raise_unless_entry_indexed, matrix_table_source +from hail.expr import matrix_table_source, raise_unless_entry_indexed from hail.utils.java import Env diff --git a/hail/python/hail/experimental/tidyr.py b/hail/python/hail/experimental/tidyr.py index bb3b62f1c15..f1c34142b31 100644 --- a/hail/python/hail/experimental/tidyr.py +++ b/hail/python/hail/experimental/tidyr.py @@ -1,7 +1,7 @@ import hail as hl from hail import Table -from hail.typecheck import typecheck, nullable, oneof, sequenceof -from hail.utils import wrap_to_list, new_temp_file +from hail.typecheck import nullable, oneof, sequenceof, typecheck +from hail.utils import new_temp_file, wrap_to_list @typecheck(ht=Table, key=str, value=str, fields=str) @@ -82,7 +82,7 @@ def spread(ht, field, value, key=None) -> Table: field_vals = list(ht.aggregate(hl.agg.collect_as_set(ht[field]))) ht = ht.group_by(*key).aggregate( - **{rv: hl.agg.take(ht[rv], 1)[0] for rv in ht.row_value if rv not in set(key + [field, value])}, + **{rv: hl.agg.take(ht[rv], 1)[0] for rv in ht.row_value if rv not in set([*key, field, value])}, **{ fv: hl.agg.filter( ht[field] == fv, diff --git a/hail/python/hail/experimental/time.py b/hail/python/hail/experimental/time.py index 946ca480bc5..85d763a842e 100644 --- a/hail/python/hail/experimental/time.py +++ b/hail/python/hail/experimental/time.py @@ -1,5 +1,5 @@ import hail as hl -from hail.expr.expressions import expr_str, expr_int64 +from hail.expr.expressions import expr_int64, expr_str from hail.expr.functions import _func from hail.typecheck import typecheck diff --git a/hail/python/hail/experimental/write_multiple.py b/hail/python/hail/experimental/write_multiple.py index 123090e9bff..b6761eb0e80 100644 --- a/hail/python/hail/experimental/write_multiple.py +++ b/hail/python/hail/experimental/write_multiple.py @@ -1,16 +1,16 @@ from typing import List, Optional from hail import MatrixTable -from hail.linalg import BlockMatrix from hail.ir import ( - MatrixMultiWrite, - MatrixNativeMultiWriter, - BlockMatrixMultiWrite, BlockMatrixBinaryMultiWriter, - BlockMatrixTextMultiWriter, + BlockMatrixMultiWrite, BlockMatrixNativeMultiWriter, + BlockMatrixTextMultiWriter, + MatrixMultiWrite, + MatrixNativeMultiWriter, ) -from hail.typecheck import nullable, sequenceof, typecheck, enumeration +from hail.linalg import BlockMatrix +from hail.typecheck import enumeration, nullable, sequenceof, typecheck from hail.utils.java import Env diff --git a/pyproject.toml b/pyproject.toml index 75e2126c3f2..1d2f20f5ca8 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -21,7 +21,6 @@ force-exclude = true known-first-party = ["auth", "batch", "ci", "gear", "hailtop", "monitoring", "website", "web_common"] [tool.ruff.per-file-ignores] -"hail/python/hail/experimental/**/*" = ["I", "PL", "RUF"] "hail/python/hail/expr/**/*" = ["I", "PL", "RUF"] "hail/python/hail/fs/**/*" = ["I", "PL", "RUF"] "hail/python/hail/genetics/**/*" = ["I", "PL", "RUF"]