From 3e24e148a15332a3b8b6745936a4ff4ccfa9e5c6 Mon Sep 17 00:00:00 2001 From: Michael Ekstrand Date: Sat, 2 Mar 2024 19:09:24 -0500 Subject: [PATCH 1/8] fix format fails --- lenskit/__init__.py | 1 - lenskit/math/solve.py | 1 - 2 files changed, 2 deletions(-) diff --git a/lenskit/__init__.py b/lenskit/__init__.py index c43368426..4722caab2 100644 --- a/lenskit/__init__.py +++ b/lenskit/__init__.py @@ -8,7 +8,6 @@ Toolkit for recommender systems research, teaching, and more. """ - from importlib.metadata import PackageNotFoundError, version from lenskit.algorithms import * # noqa: F401,F403 diff --git a/lenskit/math/solve.py b/lenskit/math/solve.py index 75311732b..96997285b 100644 --- a/lenskit/math/solve.py +++ b/lenskit/math/solve.py @@ -8,7 +8,6 @@ Efficient solver routines. """ - import numpy as np import cffi From afa3d47a5464267985f6ca51077ef4e146f22fc2 Mon Sep 17 00:00:00 2001 From: Michael Ekstrand Date: Sat, 2 Mar 2024 19:11:43 -0500 Subject: [PATCH 2/8] fix lint formatting errors --- lenskit/algorithms/__init__.py | 4 ++-- lenskit/algorithms/als.py | 29 ++++++++++++++++------------- lenskit/algorithms/basic.py | 5 ++--- lenskit/algorithms/funksvd.py | 6 +++--- lenskit/algorithms/item_knn.py | 13 +++++++------ lenskit/algorithms/ranking.py | 4 ++-- lenskit/algorithms/svd.py | 6 +++--- lenskit/algorithms/user_knn.py | 7 +++---- lenskit/batch/_recommend.py | 4 ++-- lenskit/batch/_train.py | 4 ++-- lenskit/crossfold.py | 2 +- lenskit/data/matrix.py | 4 ++-- lenskit/datasets/fetch.py | 8 ++++---- lenskit/datasets/movielens.py | 5 +++-- lenskit/math/solve.py | 3 +-- lenskit/metrics/topn.py | 1 + lenskit/sharing/__init__.py | 10 +++++----- lenskit/sharing/binpickle.py | 6 +++--- lenskit/sharing/shm.py | 6 +++--- lenskit/topn.py | 6 +++--- lenskit/util/__init__.py | 6 +++--- lenskit/util/accum.py | 1 + lenskit/util/debug.py | 5 +++-- lenskit/util/log.py | 3 +-- lenskit/util/parallel.py | 16 ++++++++-------- lenskit/util/random.py | 1 - lenskit/util/test.py | 4 ++-- 27 files changed, 86 insertions(+), 83 deletions(-) diff --git a/lenskit/algorithms/__init__.py b/lenskit/algorithms/__init__.py index d503ac438..ecc8e1299 100644 --- a/lenskit/algorithms/__init__.py +++ b/lenskit/algorithms/__init__.py @@ -13,8 +13,8 @@ classes (:py:mod:`abc`) representing different algorithm capabilities. """ -from abc import ABCMeta, abstractmethod import inspect +from abc import ABCMeta, abstractmethod __all__ = ["Algorithm", "Recommender", "Predictor", "CandidateSelector"] @@ -217,8 +217,8 @@ def rated_items(ratings): Utility function for converting a series or array into an array of item IDs. Useful in implementations of :py:meth:`candidates`. """ - import pandas as pd import numpy as np + import pandas as pd if isinstance(ratings, pd.Series): return ratings.index.values diff --git a/lenskit/algorithms/als.py b/lenskit/algorithms/als.py index c3b7790ea..aba14e73a 100644 --- a/lenskit/algorithms/als.py +++ b/lenskit/algorithms/als.py @@ -8,16 +8,15 @@ from collections import namedtuple import numpy as np -from numba import njit, prange - from csr import CSR +from numba import njit, prange from seedbank import numpy_rng -from .bias import Bias -from .mf_common import MFPredictor -from ..data import sparse_ratings from .. import util +from ..data import sparse_ratings from ..math.solve import _dposv +from .bias import Bias +from .mf_common import MFPredictor _logger = logging.getLogger(__name__) @@ -563,22 +562,26 @@ class ImplicitMF(MFPredictor): initially defaulted to ``True``, but with a warning. In 0.14 it defaults to ``False``. Args: - features(int): the number of features to train - iterations(int): the number of iterations to train - reg(float): the regularization factor - weight(float): the scaling weight for positive samples (:math:`\\alpha` in :cite:p:`Hu2008-li`). + features(int): + The number of features to train + iterations(int): + The number of iterations to train + reg(float): + The regularization factor + weight(float): + The scaling weight for positive samples (:math:`\\alpha` in :cite:p:`Hu2008-li`). use_ratings(bool): Whether to use the `rating` column, if present. Defaults to ``False``; when ``True``, - the values from the ``rating`` column are used, and multipled by ``weight``; if ``False``, - ImplicitMF treats every rated user-item pair as having a rating of 1. + the values from the ``rating`` column are used, and multipled by ``weight``; if + ``False``, ImplicitMF treats every rated user-item pair as having a rating of 1. method(str): the training method. ``'cg'`` (the default) Conjugate gradient method :cite:p:`Takacs2011-ix`. ``'lu'`` - A direct implementation of the original implicit-feedback ALS concept :cite:p:`Hu2008-li` - using LU-decomposition to solve for the optimized matrices. + A direct implementation of the original implicit-feedback ALS concept + :cite:p:`Hu2008-li` using LU-decomposition to solve for the optimized matrices. rng_spec: Random number generator or state (see :func:`lenskit.util.random.rng`). diff --git a/lenskit/algorithms/basic.py b/lenskit/algorithms/basic.py index d66f0ffc3..bff2e0265 100644 --- a/lenskit/algorithms/basic.py +++ b/lenskit/algorithms/basic.py @@ -11,13 +11,12 @@ import logging from collections.abc import Iterable, Sequence -import pandas as pd import numpy as np +import pandas as pd from ..data import sparse_ratings -from . import Predictor, Recommender, CandidateSelector from ..util import derivable_rng - +from . import CandidateSelector, Predictor, Recommender from .bias import Bias # noqa: F401 from .ranking import TopN # noqa: F401 diff --git a/lenskit/algorithms/funksvd.py b/lenskit/algorithms/funksvd.py index 410cd9315..0e49fbf6a 100644 --- a/lenskit/algorithms/funksvd.py +++ b/lenskit/algorithms/funksvd.py @@ -11,9 +11,9 @@ import logging import time -import pandas as pd -import numpy as np import numba as n +import numpy as np +import pandas as pd from seedbank import numpy_rng try: @@ -21,9 +21,9 @@ except ImportError: from numba import jitclass +from .. import util from .bias import Bias from .mf_common import MFPredictor -from .. import util _logger = logging.getLogger(__name__) diff --git a/lenskit/algorithms/item_knn.py b/lenskit/algorithms/item_knn.py index e377175dd..c1ff6834e 100644 --- a/lenskit/algorithms/item_knn.py +++ b/lenskit/algorithms/item_knn.py @@ -8,24 +8,25 @@ Item-based k-NN collaborative filtering. """ -from sys import intern import logging import warnings +from sys import intern -import pandas as pd +import csr.kernel as csrk import numpy as np +import pandas as pd import scipy.sparse as sps import scipy.sparse.linalg as spla -from csr import CSR, create_from_sizes, create_empty -import csr.kernel as csrk +from csr import CSR, create_empty, create_from_sizes from numba import njit, prange from numba.typed import List -from lenskit import util, DataWarning, ConfigWarning +from lenskit import ConfigWarning, DataWarning, util from lenskit.data import sparse_ratings from lenskit.sharing import in_share_context -from lenskit.util.parallel import is_mp_worker from lenskit.util.accum import kvp_minheap_insert, kvp_minheap_sort +from lenskit.util.parallel import is_mp_worker + from . import Predictor _logger = logging.getLogger(__name__) diff --git a/lenskit/algorithms/ranking.py b/lenskit/algorithms/ranking.py index a5ae9167d..bbe8f222b 100644 --- a/lenskit/algorithms/ranking.py +++ b/lenskit/algorithms/ranking.py @@ -12,8 +12,8 @@ import numpy as np -from . import Recommender, Predictor from ..util import derivable_rng +from . import Predictor, Recommender _log = logging.getLogger(__name__) @@ -118,7 +118,7 @@ class PlackettLuce(Recommender): """ def __init__(self, predictor, selector=None, *, rng_spec=None): - from .basic import UnratedItemCandidateSelector, Popular + from .basic import Popular, UnratedItemCandidateSelector if isinstance(predictor, TopN): _log.warn("wrapping Top-N in PlackettLuce, candidate selector probably redundant") diff --git a/lenskit/algorithms/svd.py b/lenskit/algorithms/svd.py index 622f90747..eb7d1ec1d 100644 --- a/lenskit/algorithms/svd.py +++ b/lenskit/algorithms/svd.py @@ -6,8 +6,8 @@ import logging -import pandas as pd import numpy as np +import pandas as pd try: from sklearn.decomposition import TruncatedSVD @@ -17,10 +17,10 @@ TruncatedSVD = None SKL_AVAILABLE = False -from . import Predictor -from .bias import Bias from ..data import sparse_ratings from ..util import Stopwatch +from . import Predictor +from .bias import Bias _log = logging.getLogger(__name__) diff --git a/lenskit/algorithms/user_knn.py b/lenskit/algorithms/user_knn.py index e8e7dc104..f05583674 100644 --- a/lenskit/algorithms/user_knn.py +++ b/lenskit/algorithms/user_knn.py @@ -8,18 +8,17 @@ User-based k-NN collaborative filtering. """ -from sys import intern import logging +from sys import intern -import pandas as pd import numpy as np - +import pandas as pd from numba import njit from .. import util from ..data import sparse_ratings -from . import Predictor from ..util.accum import kvp_minheap_insert +from . import Predictor _logger = logging.getLogger(__name__) diff --git a/lenskit/batch/_recommend.py b/lenskit/batch/_recommend.py index aa50924bf..3031eb5df 100644 --- a/lenskit/batch/_recommend.py +++ b/lenskit/batch/_recommend.py @@ -7,11 +7,11 @@ import logging import warnings -import pandas as pd import numpy as np +import pandas as pd -from ..algorithms import Recommender from .. import util +from ..algorithms import Recommender from ..sharing import PersistedModel _logger = logging.getLogger(__name__) diff --git a/lenskit/batch/_train.py b/lenskit/batch/_train.py index 346b96e8b..b64a46609 100644 --- a/lenskit/batch/_train.py +++ b/lenskit/batch/_train.py @@ -6,9 +6,9 @@ import logging -from lenskit.sharing import persist, persist_binpickle -from lenskit.util.parallel import run_sp +from lenskit.sharing import persist_binpickle from lenskit.util import Stopwatch +from lenskit.util.parallel import run_sp _log = logging.getLogger(__name__) diff --git a/lenskit/crossfold.py b/lenskit/crossfold.py index 0afa94c84..e13c0e3dd 100644 --- a/lenskit/crossfold.py +++ b/lenskit/crossfold.py @@ -8,9 +8,9 @@ Data set cross-folding. """ -from collections import namedtuple import logging from abc import ABC, abstractmethod +from collections import namedtuple import numpy as np import pandas as pd diff --git a/lenskit/data/matrix.py b/lenskit/data/matrix.py index fed054ed3..287662b06 100644 --- a/lenskit/data/matrix.py +++ b/lenskit/data/matrix.py @@ -8,12 +8,12 @@ Data manipulation routines. """ -from collections import namedtuple import logging +from collections import namedtuple -import scipy.sparse as sps import numpy as np import pandas as pd +import scipy.sparse as sps from csr import CSR _log = logging.getLogger(__name__) diff --git a/lenskit/datasets/fetch.py b/lenskit/datasets/fetch.py index 68399904d..26433ad33 100644 --- a/lenskit/datasets/fetch.py +++ b/lenskit/datasets/fetch.py @@ -4,12 +4,12 @@ # Licensed under the MIT license, see LICENSE.md for details. # SPDX-License-Identifier: MIT -import sys -from zipfile import ZipFile -from urllib.request import urlopen import argparse -from pathlib import Path import logging +import sys +from pathlib import Path +from urllib.request import urlopen +from zipfile import ZipFile _log = logging.getLogger("lenskit.datasets.fetch") diff --git a/lenskit/datasets/movielens.py b/lenskit/datasets/movielens.py index dd61c9ce6..9c6afa5bf 100644 --- a/lenskit/datasets/movielens.py +++ b/lenskit/datasets/movielens.py @@ -8,12 +8,13 @@ Code to import commonly-used RecSys data sets into LensKit-compatible data frames. """ +import logging import os.path from pathlib import Path -import logging -import pandas as pd import numpy as np +import pandas as pd + from lenskit.util import cached _log = logging.getLogger(__name__) diff --git a/lenskit/math/solve.py b/lenskit/math/solve.py index 96997285b..608773cb4 100644 --- a/lenskit/math/solve.py +++ b/lenskit/math/solve.py @@ -8,10 +8,9 @@ Efficient solver routines. """ -import numpy as np - import cffi import numba as n +import numpy as np from numba.extending import get_cython_function_address __ffi = cffi.FFI() diff --git a/lenskit/metrics/topn.py b/lenskit/metrics/topn.py index f6f9988f4..680559f6e 100644 --- a/lenskit/metrics/topn.py +++ b/lenskit/metrics/topn.py @@ -10,6 +10,7 @@ import logging import warnings + import numpy as np import pandas as pd diff --git a/lenskit/sharing/__init__.py b/lenskit/sharing/__init__.py index 9d795c92e..289430d5d 100644 --- a/lenskit/sharing/__init__.py +++ b/lenskit/sharing/__init__.py @@ -8,12 +8,12 @@ Support for sharing and saving models and data structures. """ +import logging import os +import threading import warnings -from abc import abstractmethod, ABC +from abc import ABC, abstractmethod from contextlib import contextmanager -import threading -import logging _log = logging.getLogger(__name__) @@ -135,5 +135,5 @@ def persist(model, *, method=None): return method(model) -from .binpickle import persist_binpickle, BPKPersisted # noqa: E402,F401 -from .shm import persist_shm, SHMPersisted, SHM_AVAILABLE # noqa: E402,F401 +from .binpickle import BPKPersisted, persist_binpickle # noqa: E402,F401 +from .shm import SHM_AVAILABLE, SHMPersisted, persist_shm # noqa: E402,F401 diff --git a/lenskit/sharing/binpickle.py b/lenskit/sharing/binpickle.py index a17a72362..c6d89c353 100644 --- a/lenskit/sharing/binpickle.py +++ b/lenskit/sharing/binpickle.py @@ -4,15 +4,15 @@ # Licensed under the MIT license, see LICENSE.md for details. # SPDX-License-Identifier: MIT +import gc +import logging import os import pathlib import tempfile -import logging -import gc import binpickle -from . import sharing_mode, PersistedModel +from . import PersistedModel, sharing_mode _log = logging.getLogger(__name__) diff --git a/lenskit/sharing/shm.py b/lenskit/sharing/shm.py index ecdc41df8..64e266fd7 100644 --- a/lenskit/sharing/shm.py +++ b/lenskit/sharing/shm.py @@ -4,12 +4,12 @@ # Licensed under the MIT license, see LICENSE.md for details. # SPDX-License-Identifier: MIT -import sys import logging +import multiprocessing.shared_memory as shm import pickle -from . import sharing_mode, PersistedModel +import sys -import multiprocessing.shared_memory as shm +from . import PersistedModel, sharing_mode SHM_AVAILABLE = sys.platform != "win32" diff --git a/lenskit/topn.py b/lenskit/topn.py index 6223b45d0..9aa0ea295 100644 --- a/lenskit/topn.py +++ b/lenskit/topn.py @@ -4,13 +4,13 @@ # Licensed under the MIT license, see LICENSE.md for details. # SPDX-License-Identifier: MIT -import logging import functools as ft +import logging import numpy as np import pandas as pd -from .metrics.topn import * +from .metrics.topn import * # noqa: F403 from .util import Stopwatch _log = logging.getLogger(__name__) @@ -20,7 +20,7 @@ def _length(df, *args, **kwargs): return float(len(df)) -@bulk_impl(_length) +@bulk_impl(_length) # noqa: F405 def _bulk_length(df, *args): return df.groupby("LKRecID")["item"].count() diff --git a/lenskit/util/__init__.py b/lenskit/util/__init__.py index 01c14f704..0ebe9619e 100644 --- a/lenskit/util/__init__.py +++ b/lenskit/util/__init__.py @@ -9,14 +9,14 @@ """ import logging -from textwrap import dedent from copy import deepcopy +from textwrap import dedent from ..algorithms import Algorithm from .log import log_to_notebook, log_to_stderr # noqa: F401 -from .timing import Stopwatch # noqa: F401 -from .random import derivable_rng from .parallel import proc_count # noqa: F401 +from .random import derivable_rng +from .timing import Stopwatch # noqa: F401 try: import resource diff --git a/lenskit/util/accum.py b/lenskit/util/accum.py index 1cc419cc5..fda816f79 100644 --- a/lenskit/util/accum.py +++ b/lenskit/util/accum.py @@ -9,6 +9,7 @@ """ from numba import njit + from .array import swap diff --git a/lenskit/util/debug.py b/lenskit/util/debug.py index dd5d1ff42..06516b62a 100644 --- a/lenskit/util/debug.py +++ b/lenskit/util/debug.py @@ -17,10 +17,11 @@ Turn on verbose logging """ -import sys import logging -from typing import Optional +import sys from dataclasses import dataclass +from typing import Optional + import numba import threadpoolctl diff --git a/lenskit/util/log.py b/lenskit/util/log.py index dade6fc27..1e9c1cf31 100644 --- a/lenskit/util/log.py +++ b/lenskit/util/log.py @@ -8,10 +8,9 @@ Logging utilities. """ -import sys import logging +import sys from logging.handlers import QueueListener -import multiprocessing as mp _log = logging.getLogger(__name__) _lts_initialized = False diff --git a/lenskit/util/parallel.py b/lenskit/util/parallel.py index f54c95278..89b367b6d 100644 --- a/lenskit/util/parallel.py +++ b/lenskit/util/parallel.py @@ -8,21 +8,21 @@ Utilities for parallel processing. """ -import os -import multiprocessing as mp -from multiprocessing.queues import SimpleQueue +import faulthandler import functools as ft import logging import logging.handlers -import faulthandler -from concurrent.futures import ProcessPoolExecutor -from abc import ABC, abstractmethod +import multiprocessing as mp +import os import pickle -from threadpoolctl import threadpool_limits +from abc import ABC, abstractmethod +from concurrent.futures import ProcessPoolExecutor +from multiprocessing.queues import SimpleQueue import seedbank +from threadpoolctl import threadpool_limits -from lenskit.sharing import persist, PersistedModel +from lenskit.sharing import PersistedModel, persist from lenskit.util.log import log_queue _log = logging.getLogger(__name__) diff --git a/lenskit/util/random.py b/lenskit/util/random.py index f27416575..b6bb6d322 100644 --- a/lenskit/util/random.py +++ b/lenskit/util/random.py @@ -9,7 +9,6 @@ """ import numpy as np - import seedbank diff --git a/lenskit/util/test.py b/lenskit/util/test.py index 703a7c940..a9699ca00 100644 --- a/lenskit/util/test.py +++ b/lenskit/util/test.py @@ -14,11 +14,11 @@ import pytest -from lenskit.datasets import MovieLens, ML100K -from lenskit.crossfold import simple_test_pair from lenskit.algorithms.basic import PopScore from lenskit.algorithms.ranking import PlackettLuce from lenskit.batch import recommend +from lenskit.crossfold import simple_test_pair +from lenskit.datasets import ML100K, MovieLens ml_test = MovieLens("data/ml-latest-small") ml100k = ML100K("data/ml-100k") From 385a9b6fb8421004dc621f6c433200dbcd918af9 Mon Sep 17 00:00:00 2001 From: Michael Ekstrand Date: Sat, 2 Mar 2024 19:16:43 -0500 Subject: [PATCH 3/8] update conf.py --- docs/conf.py | 106 ++++++++++++++++++++++++++++++++++++++++++++++--- pyproject.toml | 1 - 2 files changed, 100 insertions(+), 7 deletions(-) diff --git a/docs/conf.py b/docs/conf.py index 4a05e78bf..7a5c0d2e2 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -6,42 +6,136 @@ import os import sys +from importlib import import_module # noqa: F401 + sys.path.insert(0, os.path.abspath("..")) +import sphinx_rtd_theme # noqa: F401 + import lenskit +# -- Project information ----------------------------------------------------- + project = "LensKit" -copyright = "2023 Michael Ekstrand" +copyright = "2018–2024 Drexel University, Boise State University, and collaborators" author = "Michael D. Ekstrand" +# The short X.Y version +version = ".".join(lenskit.__version__.split(".")[:2]) +# The full version, including alpha/beta/rc tags release = lenskit.__version__ + extensions = [ + "myst_nb", "sphinx.ext.napoleon", "sphinx.ext.autodoc", "sphinx.ext.autosummary", "sphinx.ext.intersphinx", + "sphinx.ext.mathjax", "sphinxext.opengraph", + "sphinxcontrib.bibtex", + "sphinx_rtd_theme", ] +# The suffix(es) of source filenames. +# You can specify multiple suffix as a list of string: +# +# source_suffix = ['.rst', '.md'] source_suffix = ".rst" +# List of patterns, relative to source directory, that match files and +# directories to ignore when looking for source files. +# This pattern also affects html_static_path and html_extra_path . +exclude_patterns = ["_build", "Thumbs.db", ".DS_Store"] + +# The name of the Pygments (syntax highlighting) style to use. pygments_style = "sphinx" highlight_language = "python3" -html_theme = "furo" + +html_theme = "sphinx_rtd_theme" html_theme_options = { + # 'github_user': 'lenskit', + # 'github_repo': 'lkpy', + # 'travis_button': False, + # 'canonical_url': 'https://lkpy.lenskit.org/', + # 'font_family': 'Charter, serif' + # 'font_family': '"Source Sans Pro", "Georgia Pro", Georgia, serif', + # 'font_size': '15px', + # 'head_font_family': '"Merriweather Sans", "Arial", sans-serif', + # 'code_font_size': '1em', + # 'code_font_family': '"Source Code Pro", "Consolas", "Menlo", sans-serif' } + templates_path = ["_templates"] +html_static_path = ["_static"] + +# Custom sidebar templates, must be a dictionary that maps document names +# to template names. +# +# The default sidebars (for documents that don't match any pattern) are +# defined by theme itself. Builtin themes are using these templates by +# default: ``['localtoc.html', 'relations.html', 'sourcelink.html', +# 'searchbox.html']``. +# +# html_sidebars = {} + + +# -- Options for HTMLHelp output --------------------------------------------- + +# Output file base name for HTML help builder. +htmlhelp_basename = "LensKitdoc" + +# -- Options for LaTeX output ------------------------------------------------ + +latex_elements = { + # The paper size ('letterpaper' or 'a4paper'). + # + # 'papersize': 'letterpaper', + # The font size ('10pt', '11pt' or '12pt'). + # + # 'pointsize': '10pt', + # Additional stuff for the LaTeX preamble. + # + # 'preamble': '', + # Latex figure (float) alignment + # + # 'figure_align': 'htbp', +} + +# -- Extension configuration ------------------------------------------------- + +# -- Options for intersphinx extension --------------------------------------- + +# Example configuration for intersphinx: refer to the Python standard library. intersphinx_mapping = { "python": ("https://docs.python.org/3/", None), + "pandas": ("http://pandas.pydata.org/pandas-docs/stable/", None), "numpy": ("https://docs.scipy.org/doc/numpy/", None), + "scipy": ("https://docs.scipy.org/doc/scipy/reference/", None), + "scikit": ("https://scikit-learn.org/stable/", None), "sklearn": ("https://scikit-learn.org/stable/", None), + "binpickle": ("https://binpickle.lenskit.org/en/stable/", None), + "csr": ("https://csr.lenskit.org/en/latest/", None), + "seedbank": ("https://seedbank.lenskit.org/en/latest/", None), } -autodoc_default_options = { - "members": True, - "member-order": "bysource" -} +autodoc_default_options = {"members": True, "member-order": "bysource", "show-inheritance": True} autodoc_typehints = "description" + +bibtex_bibfiles = ["lenskit.bib"] +jupyter_execute_notebooks = "off" + +# -- Module Canonicalization ------------------------------------------------ + +# cleanups = { +# 'lenskit': ['Algorithm', 'Recommender', 'Predictor', 'CandidateSelector'] +# } + +# for module, objects in cleanups.items(): +# mod = import_module(module) +# for name in objects: +# obj = getattr(mod, name) +# obj.__module__ = module diff --git a/pyproject.toml b/pyproject.toml index 4be03f645..f9354ddc9 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -118,7 +118,6 @@ target-version = "py310" exclude = [ ".git", "__pycache__", - "docs/conf.py", "build", "dist", ] From 74c24cdd9160563140e48b19456ab4be5dbaee28 Mon Sep 17 00:00:00 2001 From: Michael Ekstrand Date: Sat, 2 Mar 2024 19:17:38 -0500 Subject: [PATCH 4/8] clean overlong lines --- lenskit/algorithms/item_knn.py | 65 +++++++++++++++++++--------------- 1 file changed, 37 insertions(+), 28 deletions(-) diff --git a/lenskit/algorithms/item_knn.py b/lenskit/algorithms/item_knn.py index c1ff6834e..35d9523c4 100644 --- a/lenskit/algorithms/item_knn.py +++ b/lenskit/algorithms/item_knn.py @@ -208,58 +208,67 @@ def _predict_sum(model, nitems, nrange, ratings, rated, targets): class ItemItem(Predictor): """ - Item-item nearest-neighbor collaborative filtering with ratings. This item-item implementation - is not terribly configurable; it hard-codes design decisions found to work well in the previous - Java-based LensKit code :cite:p:`Ekstrand2011-bp`. This implementation is based on the description - of item-based CF by :cite:t:`Deshpande2004-ht`, and produces results equivalent to Java LensKit. + Item-item nearest-neighbor collaborative filtering with ratings. This + item-item implementation is not terribly configurable; it hard-codes design + decisions found to work well in the previous Java-based LensKit code + :cite:p:`Ekstrand2011-bp`. This implementation is based on the description + of item-based CF by :cite:t:`Deshpande2004-ht`, and produces results + equivalent to Java LensKit. The k-NN predictor supports several aggregate functions: ``weighted-average`` - The weighted average of the user's rating values, using item-item similarities as - weights. + The weighted average of the user's rating values, using item-item + similarities as weights. ``sum`` - The sum of the similarities between the target item and the user's rated items, - regardless of the rating the user gave the items. + The sum of the similarities between the target item and the user's rated + items, regardless of the rating the user gave the items. Args: nnbrs(int): - the maximum number of neighbors for scoring each item (``None`` for unlimited) + the maximum number of neighbors for scoring each item (``None`` for + unlimited) min_nbrs(int): the minimum number of neighbors for scoring each item min_sim(float): minimum similarity threshold for considering a neighbor save_nbrs(float): the number of neighbors to save per item in the trained model (``None`` for unlimited) feedback(str): - Control how feedback should be interpreted. Specifies defaults for the other - settings, which can be overridden individually; can be one of the following values: + Control how feedback should be interpreted. Specifies defaults for + the other settings, which can be overridden individually; can be one + of the following values: ``explicit`` - Configure for explicit-feedback mode: use rating values, center ratings, and - use the ``weighted-average`` aggregate method for prediction. This is the - default setting. + Configure for explicit-feedback mode: use rating values, center + ratings, and use the ``weighted-average`` aggregate method for + prediction. This is the default setting. ``implicit`` - Configure for implicit-feedback mode: ignore rating values, do not center ratings, - and use the ``sum`` aggregate method for prediction. + Configure for implicit-feedback mode: ignore rating values, do + not center ratings, and use the ``sum`` aggregate method for + prediction. center(bool): - whether to normalize (mean-center) rating vectors prior to computing similarities - and aggregating user rating values. Defaults to ``True``; turn this off when working - with unary data and other data types that don't respond well to centering. + whether to normalize (mean-center) rating vectors prior to computing + similarities and aggregating user rating values. Defaults to + ``True``; turn this off when working with unary data and other data + types that don't respond well to centering. aggregate(str): - the type of aggregation to do. Can be ``weighted-average`` (the default) or ``sum``. + the type of aggregation to do. Can be ``weighted-average`` (the + default) or ``sum``. use_ratings(bool): - whether or not to use the rating values. If ``False``, it ignores rating values and - considers an implicit feedback signal of 1 for every (user,item) pair present. + whether or not to use the rating values. If ``False``, it ignores + rating values and considers an implicit feedback signal of 1 for + every (user,item) pair present. Attributes: item_index_(pandas.Index): the index of item IDs. item_means_(numpy.ndarray): the mean rating for each known item. - item_counts_(numpy.ndarray): the number of saved neighbors for each item. - sim_matrix_(matrix.CSR): the similarity matrix. - user_index_(pandas.Index): the index of known user IDs for the rating matrix. - rating_matrix_(matrix.CSR): the user-item rating matrix for looking up users' ratings. + item_counts_(numpy.ndarray): the number of saved neighbors for each + item. sim_matrix_(matrix.CSR): the similarity matrix. + user_index_(pandas.Index): the index of known user IDs for the rating + matrix. rating_matrix_(matrix.CSR): the user-item rating matrix for + looking up users' ratings. """ IGNORED_PARAMS = ["feedback"] @@ -312,12 +321,12 @@ def _check_setup(self): ) if self.aggregate == "weighted-average": _logger.warning( - "item-item configured to ignore ratings, but using weighted averages - likely bug" + "item-item ignoring ratings but using weighted averages - likely bug" ) warnings.warn( util.clean_str( """ - item-item configured to ignore ratings, but use weighted averages. This configuration + item-item ignoring ratings but using weighted averages. This configuration is unlikely to work well. """ ), From 262d9e1bbc1b71e888444de11b732aecd77840c2 Mon Sep 17 00:00:00 2001 From: Michael Ekstrand Date: Sat, 2 Mar 2024 19:18:12 -0500 Subject: [PATCH 5/8] clean movielens warnings --- lenskit/datasets/movielens.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lenskit/datasets/movielens.py b/lenskit/datasets/movielens.py index 9c6afa5bf..4155cd5f2 100644 --- a/lenskit/datasets/movielens.py +++ b/lenskit/datasets/movielens.py @@ -92,7 +92,7 @@ def movies(self): 5 Father of the Bride Part II (1995) Comedy ... [9125 rows x 2 columns] - """ + """ # noqa: E501 fn = self.path / "movies.csv" movies = pd.read_csv( @@ -377,7 +377,7 @@ def movies(self): 5 Father of the Bride Part II (1995) Comedy ... [10681 rows x 2 columns] - """ + """ # noqa: E501 fn = self.path / "movies.dat" movies = pd.read_csv( fn, From df46c872a06bfed2a6b9120ae9f7ca7c80e9639d Mon Sep 17 00:00:00 2001 From: Michael Ekstrand Date: Sat, 2 Mar 2024 18:55:42 -0500 Subject: [PATCH 6/8] specify dependencies more clearly --- pyproject.toml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index f9354ddc9..389ae5598 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -41,8 +41,8 @@ dev = [ "ruff >= 0.2", "copier ==9.*", "unbeheader ~= 1.3", # p2c: -p - "ipython", - "pyproject2conda", + "ipython >= 7", + "pyproject2conda ~=0.11", "sphinx-autobuild >= 2021", "lenskit-build-helpers >=0.1", # p2c: -p "invoke >=1", From 8508bd92d6bece819eed71780dce99c0942410dd Mon Sep 17 00:00:00 2001 From: Michael Ekstrand Date: Sat, 2 Mar 2024 18:58:56 -0500 Subject: [PATCH 7/8] use uv to install minimum dependencies --- .github/workflows/test.yml | 5 ++--- min-constraints.txt | 7 ------- 2 files changed, 2 insertions(+), 10 deletions(-) delete mode 100644 min-constraints.txt diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 8d069df17..6ce949a05 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -195,9 +195,8 @@ jobs: - name: Set up Python deps run: | set -e - python -m pip install -U pip wheel - python -m pip install -c min-constraints.txt .[test] - python -m pip install pytest-cov + python -m pip install -U 'uv>=0.1.13' + uv pip install --system --resolution=lowest -e '.[test]' - name: Inspect environment run: | diff --git a/min-constraints.txt b/min-constraints.txt deleted file mode 100644 index 1a8c793f1..000000000 --- a/min-constraints.txt +++ /dev/null @@ -1,7 +0,0 @@ -pandas==1.4.0 -numpy==1.22.0 -scipy==1.8.0 -numba==0.56.0 -cffi==1.15.0 -binpickle==0.3.2 -scikit-learn==1.1.0 From 95d63779df95cbe186dc5c80dbe18d8966ecdf33 Mon Sep 17 00:00:00 2001 From: Michael Ekstrand Date: Sat, 2 Mar 2024 19:34:46 -0500 Subject: [PATCH 8/8] change uv resolution to lowest-direct --- .github/workflows/test.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 6ce949a05..93fe2c2c7 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -196,7 +196,7 @@ jobs: run: | set -e python -m pip install -U 'uv>=0.1.13' - uv pip install --system --resolution=lowest -e '.[test]' + uv pip install --system --resolution=lowest-direct -e '.[test]' - name: Inspect environment run: |