Skip to content

Commit

Permalink
style: add type hint
Browse files Browse the repository at this point in the history
  • Loading branch information
shengchenyang committed May 17, 2024
1 parent 1618654 commit d119790
Show file tree
Hide file tree
Showing 6 changed files with 61 additions and 27 deletions.
7 changes: 5 additions & 2 deletions ayugespidertools/commands/version.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,14 @@
import argparse
from typing import List

from scrapy.commands.version import Command

from ayugespidertools import __version__


class AyuCommand(Command):
def short_desc(self):
def short_desc(self) -> str:
return "Print AyugeSpiderTools version"

def run(self, args, opts):
def run(self, args: List[str], opts: argparse.Namespace) -> None:
print(f"AyugeSpiderTools {__version__}")
5 changes: 2 additions & 3 deletions ayugespidertools/common/typevars.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,10 @@
from sqlalchemy import create_engine

if TYPE_CHECKING:
import logging

from loguru import Logger
from scrapy.utils.log import SpiderLoggerAdapter

slogT = Union[Logger, logging.LoggerAdapter]
slogT = Union[Logger, SpiderLoggerAdapter]

NoneType = type(None)
I_Str = TypeVar("I_Str", int, str)
Expand Down
60 changes: 44 additions & 16 deletions ayugespidertools/utils/cmdline.py
Original file line number Diff line number Diff line change
@@ -1,30 +1,42 @@
from __future__ import annotations

import argparse
import cProfile
import inspect
import os
import sys
from importlib.metadata import entry_points
from typing import TYPE_CHECKING, Callable, Dict, Iterable, List, Optional, Tuple, Type

from scrapy.commands import BaseRunSpiderCommand, ScrapyCommand, ScrapyHelpFormatter
from scrapy.crawler import CrawlerProcess
from scrapy.exceptions import UsageError
from scrapy.settings import BaseSettings, Settings
from scrapy.utils.misc import walk_modules
from scrapy.utils.project import get_project_settings, inside_project
from scrapy.utils.python import garbage_collect

from ayugespidertools import __version__

if TYPE_CHECKING:
# typing.ParamSpec requires Python 3.10
from typing_extensions import ParamSpec

_P = ParamSpec("_P")


class ScrapyArgumentParser(argparse.ArgumentParser):
def _parse_optional(self, arg_string):
# if starts with -: it means that is a parameter not an argument
def _parse_optional(
self, arg_string: str
) -> Optional[Tuple[Optional[argparse.Action], str, Optional[str]]]:
# if starts with -: it means that is a parameter not a argument
if arg_string[:2] == "-:":
return None

return super()._parse_optional(arg_string)


def _iter_command_classes(module_name):
def _iter_command_classes(module_name: str) -> Iterable[Type[ScrapyCommand]]:
# TODO: add `name` attribute to commands and merge this function with
# scrapy.utils.spider.iter_spider_classes
for module in walk_modules(module_name):
Expand All @@ -38,17 +50,19 @@ def _iter_command_classes(module_name):
yield obj


def _get_commands_from_module(module, inproject):
d = {}
def _get_commands_from_module(module: str, inproject: bool) -> Dict[str, ScrapyCommand]:
d: Dict[str, ScrapyCommand] = {}
for cmd in _iter_command_classes(module):
if inproject or not cmd.requires_project:
cmdname = cmd.__module__.split(".")[-1]
d[cmdname] = cmd()
return d


def _get_commands_from_entry_points(inproject, group="ayugespidertools.commands"):
cmds = {}
def _get_commands_from_entry_points(
inproject: bool, group: str = "ayugespidertools.commands"
) -> Dict[str, ScrapyCommand]:
cmds: Dict[str, ScrapyCommand] = {}
if sys.version_info >= (3, 10):
eps = entry_points(group=group)
else:
Expand All @@ -62,7 +76,9 @@ def _get_commands_from_entry_points(inproject, group="ayugespidertools.commands"
return cmds


def _get_commands_dict(settings, inproject):
def _get_commands_dict(
settings: BaseSettings, inproject: bool
) -> Dict[str, ScrapyCommand]:
cmds = _get_commands_from_module("ayugespidertools.commands", inproject)
cmds.update(_get_commands_from_entry_points(inproject))
cmds_module = settings["COMMANDS_MODULE"]
Expand All @@ -71,16 +87,17 @@ def _get_commands_dict(settings, inproject):
return cmds


def _pop_command_name(argv):
def _pop_command_name(argv: List[str]) -> Optional[str]:
i = 0
for arg in argv[1:]:
if not arg.startswith("-"):
del argv[i]
return arg
i += 1
return None


def _print_header(settings, inproject):
def _print_header(settings: BaseSettings, inproject: bool) -> None:
if inproject:
print(
f"AyugeSpiderTools {__version__} - active project: {settings['BOT_NAME']}\n"
Expand All @@ -89,7 +106,7 @@ def _print_header(settings, inproject):
print(f"AyugeSpiderTools {__version__} - no active project\n")


def _print_commands(settings, inproject):
def _print_commands(settings: BaseSettings, inproject: bool) -> None:
_print_header(settings, inproject)
print("Usage:")
print(" ayuge <command> [options] [args]\n")
Expand All @@ -104,13 +121,20 @@ def _print_commands(settings, inproject):
print('Use "ayuge <command> -h" to see more info about a command')


def _print_unknown_command(settings, cmdname, inproject):
def _print_unknown_command(
settings: BaseSettings, cmdname: str, inproject: bool
) -> None:
_print_header(settings, inproject)
print(f"Unknown command: {cmdname}\n")
print('Use "ayuge" to see available commands')


def _run_print_help(parser, func, *a, **kw):
def _run_print_help(
parser: argparse.ArgumentParser,
func: Callable[_P, None],
*a: _P.args,
**kw: _P.kwargs,
) -> None:
try:
func(*a, **kw)
except UsageError as e:
Expand All @@ -121,7 +145,9 @@ def _run_print_help(parser, func, *a, **kw):
sys.exit(2)


def execute(argv=None, settings=None):
def execute(
argv: Optional[List[str]] = None, settings: Optional[Settings] = None
) -> None:
if argv is None:
argv = sys.argv

Expand Down Expand Up @@ -163,14 +189,16 @@ def execute(argv=None, settings=None):
sys.exit(cmd.exitcode)


def _run_command(cmd, args, opts):
def _run_command(cmd: ScrapyCommand, args: List[str], opts: argparse.Namespace) -> None:
if opts.profile:
_run_command_profiled(cmd, args, opts)
else:
cmd.run(args, opts)


def _run_command_profiled(cmd, args, opts):
def _run_command_profiled(
cmd: ScrapyCommand, args: List[str], opts: argparse.Namespace
) -> None:
if opts.profile:
sys.stderr.write(
f"ayugespidertools: writing cProfile stats to {opts.profile!r}\n"
Expand Down
5 changes: 2 additions & 3 deletions tests/test_commands/test_commands_crawl.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,9 @@
import re
import subprocess
import sys
import tempfile
from pathlib import Path
from shutil import rmtree
from tempfile import mkdtemp
from tempfile import TemporaryFile, mkdtemp
from threading import Timer
from typing import Optional, Union

Expand All @@ -28,7 +27,7 @@ def tearDown(self):
rmtree(self.temp_path)

def call(self, *new_args, **kwargs):
with tempfile.TemporaryFile() as out:
with TemporaryFile() as out:
args = (sys.executable, "-m", "ayugespidertools.utils.cmdline") + new_args
return subprocess.call(
args, stdout=out, stderr=out, cwd=self.cwd, env=self.env, **kwargs
Expand Down
9 changes: 7 additions & 2 deletions tests/test_commands/test_commands_version.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import argparse
import sys
from io import StringIO
from pathlib import Path
Expand All @@ -15,8 +16,12 @@

def test_version():
cmd = AyuCommand()
output = StringIO()
cmd.run([], {"stdout": output})
namespace = argparse.Namespace()
options = {"stdout": StringIO()}
for key, value in options.items():
setattr(namespace, key, value)

cmd.run([], namespace)
assert cmd.short_desc() == "Print AyugeSpiderTools version"


Expand Down
2 changes: 1 addition & 1 deletion tests/test_spider.py
Original file line number Diff line number Diff line change
Expand Up @@ -236,7 +236,7 @@ class _CrawlSpider(self.spider_class):
rules = (Rule(LinkExtractor(), process_links="dummy_process_links"),)

def dummy_process_links(self, links):
return links
yield from links

spider = _CrawlSpider()
output = list(spider._requests_to_follow(response))
Expand Down

0 comments on commit d119790

Please sign in to comment.