Skip to content

Commit

Permalink
chore: add test rules & type hint
Browse files Browse the repository at this point in the history
  • Loading branch information
shengchenyang committed Mar 7, 2024
1 parent 23e7135 commit 3e0ce94
Show file tree
Hide file tree
Showing 5 changed files with 40 additions and 19 deletions.
19 changes: 11 additions & 8 deletions ayugespidertools/scraper/http/request/form.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from typing import TYPE_CHECKING, Optional, Union
from typing import TYPE_CHECKING, Any, Callable, Iterable, List, Optional, Tuple, Union

from scrapy import FormRequest

Expand All @@ -11,20 +11,23 @@
if TYPE_CHECKING:
from ayugespidertools.common.typevars import AiohttpRequestArgs

FormdataKVType = Tuple[str, Union[str, Iterable[str]]]
FormdataType = Optional[Union[dict, List[FormdataKVType]]]


class AiohttpFormRequest(AiohttpRequest, FormRequest):
"""使用 aiohttp 发送 FormRequest 请求"""

def __init__(
self,
url=None,
callback=None,
method=None,
formdata=None,
body=None,
url: str = None,
callback: Optional[Callable] = None,
method: Optional[str] = None,
formdata: FormdataType = None,
body: Optional[Union[bytes, str]] = None,
args: Optional[Union["AiohttpRequestArgs", dict]] = None,
**kwargs
):
**kwargs: Any
) -> None:
# First init FormRequest to get url, body and method
if formdata:
FormRequest.__init__(self, url=url, method=method, formdata=formdata)
Expand Down
2 changes: 1 addition & 1 deletion ayugespidertools/scraper/middlewares/headers/ua.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ def from_crawler(cls, crawler: "Crawler") -> "Self":
crawler.signals.connect(s.spider_opened, signal=signals.spider_opened)
return s

def spider_opened(self, spider: "AyuSpider"):
def spider_opened(self, spider: "AyuSpider") -> None:
# 带权重的 ua 列表,这里是根据 fake_useragent 库中的打印信息来规划权重的。
ua_arr = [
{"explorer": "opera", "weight": 16},
Expand Down
23 changes: 16 additions & 7 deletions ayugespidertools/scraper/middlewares/proxy/dynamic.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,19 @@
import base64
from typing import TYPE_CHECKING

from scrapy import signals

from ayugespidertools.common.multiplexing import ReuseOperation
from ayugespidertools.common.params import Param

if TYPE_CHECKING:
from scrapy import Request
from scrapy.crawler import Crawler
from scrapy.settings import Settings
from typing_extensions import Self

from ayugespidertools.spiders import AyuSpider


class DynamicProxyDownloaderMiddleware:
"""动态隧道代理中间件"""
Expand All @@ -15,12 +24,12 @@ def __init__(self):
self.password = None

@classmethod
def from_crawler(cls, crawler):
def from_crawler(cls, crawler: "Crawler") -> "Self":
s = cls()
crawler.signals.connect(s.spider_opened, signal=signals.spider_opened)
return s

def process_request(self, request, spider):
def process_request(self, request: "Request", spider: "AyuSpider") -> None:
# TODO: 根据权重来随机获取一个账号 DYNAMIC_PROXY_CONFIG
# account = ReuseOperation.random_weight(self.account_arr)
if request.url.startswith("https://"):
Expand All @@ -41,7 +50,7 @@ def process_request(self, request, spider):
# 采用 gzip 压缩加速访问
request.headers["Accept-Encoding"] = "gzip"

def spider_opened(self, spider):
def spider_opened(self, spider: "AyuSpider") -> None:
spider.slog.info(
f"动态隧道代理中间件: DynamicProxyDownloaderMiddleware 已开启,生效脚本为: {spider.name}"
)
Expand All @@ -54,7 +63,7 @@ def spider_opened(self, spider):
class AbuDynamicProxyDownloaderMiddleware:
"""阿布云动态代理 - 隧道验证方式(其实和快代理的写法一致)"""

def __init__(self, settings):
def __init__(self, settings: "Settings") -> None:
dynamic_proxy_conf = settings.get("DYNAMIC_PROXY_CONFIG", None)
# 查看动态隧道代理配置是否符合要求
is_match = ReuseOperation.is_dict_meet_min_limit(
Expand All @@ -70,17 +79,17 @@ def __init__(self, settings):
self.password = dynamic_proxy_conf["password"]

@classmethod
def from_crawler(cls, crawler):
def from_crawler(cls, crawler: "Crawler") -> "Self":
s = cls(crawler.settings)
crawler.signals.connect(s.spider_opened, signal=signals.spider_opened)
return s

def spider_opened(self, spider):
def spider_opened(self, spider: "AyuSpider") -> None:
spider.slog.info(
f"阿布云动态隧道代理中间件: AbuDynamicProxyDownloaderMiddleware 已开启,生效脚本为: {spider.name}"
)

def process_request(self, request, spider):
def process_request(self, request: "Request", spider: "AyuSpider") -> None:
if request.url.startswith("https://"):
request.meta["proxy"] = f"https://{self.proxy_url}"
elif request.url.startswith("http://"):
Expand Down
14 changes: 11 additions & 3 deletions ayugespidertools/scraper/middlewares/proxy/exclusive.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import base64
from typing import TYPE_CHECKING

import requests
from scrapy import signals
Expand All @@ -7,6 +8,13 @@
"ExclusiveProxyDownloaderMiddleware",
]

if TYPE_CHECKING:
from scrapy import Request
from scrapy.crawler import Crawler
from typing_extensions import Self

from ayugespidertools.spiders import AyuSpider


class ExclusiveProxyDownloaderMiddleware:
"""独享代理中间件"""
Expand All @@ -20,7 +28,7 @@ def __init__(self):
self.proxy = None

@classmethod
def from_crawler(cls, crawler):
def from_crawler(cls, crawler: "Crawler") -> "Self":
s = cls()
crawler.signals.connect(s.spider_opened, signal=signals.spider_opened)
return s
Expand All @@ -39,7 +47,7 @@ def get_proxy_ip(self, proxy_url: str, index: int) -> str:
except Exception:
raise Exception("获取独享代理时失败,请查看独享配置及网络是否正常。")

def process_request(self, request, spider):
def process_request(self, request: "Request", spider: "AyuSpider") -> None:
if request.url.startswith("https://"):
request.meta["proxy"] = f"https://{self.proxy}"
elif request.url.startswith("http://"):
Expand All @@ -53,7 +61,7 @@ def process_request(self, request, spider):
).decode("utf8")
request.headers["Proxy-Authorization"] = encoded_user_pass

def spider_opened(self, spider):
def spider_opened(self, spider: "AyuSpider") -> None:
spider.slog.info(
f"独享代理中间件: ExclusiveProxyDownloaderMiddleware 已开启,生效脚本为: {spider.name}"
)
Expand Down
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,7 @@ exclude_lines = [
"pragma: no cover",
"raise AssertionError",
"raise NotImplementedError",
"if TYPE_CHECKING:",
"__repr__",
"__str__",
]
Expand Down

0 comments on commit 3e0ce94

Please sign in to comment.