Skip to content

Commit

Permalink
chore: remove redundant comments
Browse files Browse the repository at this point in the history
  • Loading branch information
shengchenyang committed Apr 16, 2024
1 parent ddc4931 commit cfc1eda
Show file tree
Hide file tree
Showing 17 changed files with 11 additions and 41 deletions.
1 change: 0 additions & 1 deletion ayugespidertools/common/encryption.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,5 +72,4 @@ def uni_to_chr(uni: str) -> str:
"""
_uni = re.sub(r"^(0x|U\+|uni)", "", uni)
unicode_value = int(_uni, 16)
# 使用 chr() 函数将整数值转换为字符
return chr(unicode_value)
3 changes: 0 additions & 3 deletions ayugespidertools/common/expend.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,14 +67,11 @@ def _connect(
logger.warning(
f"目标数据库:{mysql_conf.database} 不存在,尝试创建中..."
)
# 如果连接目标数据库报不存在的错误时,先创建出此目标数据库
ReuseOperation.create_database(db_conf=mysql_conf)
else:
logger.error(f"connect to mysql failed: {e}")
else:
# 连接没有问题就直接返回连接对象
return conn
# 出现数据库不存在问题后,在创建数据库后,再次返回连接对象
return pymysql.connect(**pymysql_conn_args)

def _get_sql_by_item(
Expand Down
2 changes: 0 additions & 2 deletions ayugespidertools/common/mongodbpipe.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,11 +36,9 @@ def _get_insert_data(self, item_dict: dict) -> Tuple[dict, str]:
)
table_name = item_dict["_table"]
judge_item = next(iter(insert_data.values()))
# 是 namedtuple 类型
if ReuseOperation.is_namedtuple_instance(judge_item):
insert_data = {v: insert_data[v].key_value for v in insert_data.keys()}
table_name = item_dict["_table"].key_value
# 是普通的 dict 格式,则直接为 insert_data
return insert_data, table_name

def process_item_template(
Expand Down
6 changes: 1 addition & 5 deletions ayugespidertools/common/multiplexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -230,7 +230,6 @@ def reshape_item(cls, item_dict: Dict[str, Any]) -> AlterItem:
dict_conf=item_dict, keys=["_mongo_update_rule", "_table"]
)
judge_item = next(iter(insert_data.values()))
# 是 namedtuple 类型
if cls.is_namedtuple_instance(judge_item):
is_namedtuple = True
_table_name = item_dict["_table"].key_value
Expand All @@ -239,7 +238,7 @@ def reshape_item(cls, item_dict: Dict[str, Any]) -> AlterItem:
for key, value in insert_data.items():
new_item[key] = value.key_value
notes_dic[key] = value.notes
# 是普通的 dict 类型

else:
_table_name = item_dict["_table"]
table_info = AlterItemTable(_table_name, "")
Expand Down Expand Up @@ -317,7 +316,6 @@ def get_items_by_keys(
Returns:
1). 取值后的 dict,或不满足请求的 False 值
"""
# 参数先要满足最小限定,然后再取出限定的参数值;否则返回空字典
return (
{k: dict_conf[k] for k in keys}
if cls.is_dict_meet_min_limit(dict_conf=dict_conf, key_list=keys)
Expand Down Expand Up @@ -468,8 +466,6 @@ def get_ck_dict_from_headers(headers_ck_str: str) -> dict:
Returns:
1). 转化 dict 格式后的 ck
"""
# 也可以这样写,但不推荐
# dict(line.split("=", 1) for line in headers_ck_str.split("; "))
return {
x.split("=", 1)[0].strip(): x.split("=", 1)[1].strip()
for x in headers_ck_str.split(";")
Expand Down
4 changes: 2 additions & 2 deletions ayugespidertools/common/mysqlerrhandle.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,10 +82,10 @@ def _get_column_type(
cursor.execute(sql)
lines = cursor.fetchall()
if isinstance(lines, list):
# 注意,此处 AyuMysqlPipeline 返回的结构示例为:[{'COLUMN_TYPE': 'varchar(190)'}]
# 此处 AyuMysqlPipeline 返回的结构示例为:[{'COLUMN_TYPE': 'varchar(190)'}]
column_type = lines[0]["COLUMN_TYPE"] if len(lines) == 1 else ""
else:
# 注意,此处 AyuTwistedMysqlPipeline 返回的结构示例为:(('varchar(10)',),)
# 此处 AyuTwistedMysqlPipeline 返回的结构示例为:(('varchar(10)',),)
column_type = lines[0][0] if len(lines) == 1 else ""

except Exception as e:
Expand Down
2 changes: 0 additions & 2 deletions ayugespidertools/common/params.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,6 @@ class Param:
retry_num = 3
retry_time_min = 200
retry_time_max = 1000
# stop_max_delay 限制最长重试时间
stop_max_delay = 5000

aiohttp_retry_times_default = 3

Expand Down
2 changes: 1 addition & 1 deletion ayugespidertools/common/typevars.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Define your TypeVar here
# Define your Types here
import threading
from dataclasses import dataclass, field
from typing import TYPE_CHECKING, List, Literal, NamedTuple, Optional, TypeVar, Union
Expand Down
7 changes: 3 additions & 4 deletions ayugespidertools/common/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -204,7 +204,6 @@ def extract_with_json_rules(cls, json_data: dict, query_rules: List["Str_Lstr"])
Returns:
1). 提取的内容
"""
# 先判断层级,最多为 2 层
depth_num = ReuseOperation.get_array_depth(query_rules)
assert depth_num <= 2, "query_rules 参数错误,请输入深度最多为 2 的参数!"

Expand Down Expand Up @@ -312,7 +311,7 @@ def gen_tracks(distance):
t_list = [random.randint(50, 160)]
x_list = [random.randint(5, 11)]
y_list = []
# 生成x坐标轨迹, 生成t坐标轨迹
# 生成 x 坐标轨迹, 生成 t 坐标轨迹
for j in range(1, distance):
x_list.append(x_list[j - 1] + random.randint(2, 4))
if x_list[j] > distance:
Expand All @@ -326,7 +325,7 @@ def gen_tracks(distance):
break

length = len(x_list)
# 生成y坐标轨迹
# 生成 y 坐标轨迹
for i in range(1, length + 1):
if i < int(length * 0.4):
y_list.append(0)
Expand All @@ -340,7 +339,7 @@ def gen_tracks(distance):
y_list.append(-4)
t_list.append(t_list[i - 1] + random.randint(20, 80))

# 生成t的坐标
# 生成 t 的坐标
xyt = list(zip(x_list, y_list, t_list))
for j in range(length):
xyt[j] = list(xyt[j])
Expand Down
1 change: 0 additions & 1 deletion ayugespidertools/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@
class NormalConfig:
"""用于存放此项目的通用配置"""

# 项目根目录及其它所需目录
CONFIG_DIR = Path(__file__).parent
ROOT_DIR = CONFIG_DIR.parent
COMMON_DIR = CONFIG_DIR / "common"
Expand Down
6 changes: 1 addition & 5 deletions ayugespidertools/extras/cvnpil.py
Original file line number Diff line number Diff line change
Expand Up @@ -114,15 +114,13 @@ def _template_match(
# 左上角点的坐标
tl = max_loc

# 是否要输出绘制图像
if out:
# 绘制方框
th, tw = slider.shape[:2]
# 右下角点的坐标
br = (tl[0] + tw, tl[1] + th)
cv2.rectangle(bg, tl, br, (0, 0, 255), 2)
cv2.imwrite(out, bg)
# 返回缺口的X坐标
return tl[0]

@classmethod
Expand Down Expand Up @@ -150,7 +148,6 @@ def discern_gap(
bg_img = cls.image_edge_detection(bg_cv)
slider_img = cv2.cvtColor(slider_img, cv2.COLOR_GRAY2RGB)
bg_img = cv2.cvtColor(bg_img, cv2.COLOR_GRAY2RGB)
# 输出横坐标, 即滑块在图片上的位置
return cls._template_match(bg_img, slider_img, out)

@classmethod
Expand Down Expand Up @@ -216,11 +213,10 @@ def match_gap(bg: Union[str, bytes], slider: Union[str, bytes]):
if len(loc[1]) > 1:
rgt += (rgt - lft) / 2
elif len(loc[1]) == 1:
# 找到目标区域起点x坐标为:loc[1][0]
# 找到目标区域起点 x 坐标为:loc[1][0]
break
elif len(loc[1]) < 1:
rgt -= (rgt - lft) / 2
# 返回 x 坐标
return loc[1][0]

@staticmethod
Expand Down
1 change: 0 additions & 1 deletion ayugespidertools/formatdata.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,6 @@ def click_point_deal(decimal: float, decimal_places: int = 2) -> float:
Returns:
1). 四舍五入后的小数点
"""
# 先拼接需要保留的位数
decimal_deal = f"%.{decimal_places}f"
return float(decimal_deal % decimal)

Expand Down
2 changes: 0 additions & 2 deletions ayugespidertools/scraper/middlewares/headers/ua.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,6 @@ def __init__(self):
self.explorer_weights = None

def get_random_ua_by_weight(self) -> str:
# 先按权重取出所需浏览器类型
explorer_types = random.choices(
self.explorer_types, weights=self.explorer_weights
)
Expand Down Expand Up @@ -53,5 +52,4 @@ def spider_opened(self, spider: "AyuSpider") -> None:
)

def process_request(self, request: "Request", spider: "AyuSpider") -> None:
# 根据权重来获取随机请求头 ua 信息
request.headers.setdefault(b"User-Agent", self.get_random_ua_by_weight())
2 changes: 0 additions & 2 deletions ayugespidertools/scraper/middlewares/proxy/dynamic.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,6 @@ def from_crawler(cls, crawler: "Crawler") -> "Self":
return s

def process_request(self, request: "Request", spider: "AyuSpider") -> None:
# TODO: 根据权重来随机获取一个账号 DYNAMIC_PROXY_CONFIG
# account = ReuseOperation.random_weight(self.account_arr)
if request.url.startswith("https://"):
request.meta["proxy"] = (
f"https://{self.username}:{self.password}@{self.proxy_url}/"
Expand Down
6 changes: 3 additions & 3 deletions ayugespidertools/scraper/pipelines/msgproducer/kafkapub.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,9 +31,9 @@ def sendmsg(
Args:
topic: kafka topic
value: message value. Must be type bytes, or be
serializable to bytes via configured value_serializer. If value
is None, key is required and message acts as a 'delete'.
value: message value. Must be type bytes, or be serializable to
bytes via configured value_serializer. If value is None, key is
required and message acts as a 'delete'.
key: kafka key
"""
# Asynchronous by default
Expand Down
5 changes: 0 additions & 5 deletions ayugespidertools/scraper/pipelines/mysql/turbo.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,16 +43,11 @@ def open_spider(self, spider: "AyuSpider") -> None:
spider.slog.warning("未配置 POOL_DB_CONFIG 参数,将使用其默认参数")
self.pool_db_conf = {
"maxconnections": 5,
# 连接池中空闲连接的最大数量。默认 0,即无最大数量限制
"maxcached": 0,
# 连接的最大使用次数。默认 0,即无使用次数限制
"maxusage": 0,
# 连接数达到最大时,新连接是否可阻塞。默认False,即达到最大连接数时,再取新连接将会报错
"blocking": True,
}
self.mysql_conf = spider.mysql_conf

# 判断目标数据库是否连接正常。若连接目标数据库错误时,创建缺失的目标数据库。这个并不需要此连接对象,直接关闭即可
self._connect(spider.mysql_conf).close()

# 添加 PooledDB 的配置
Expand Down
1 change: 0 additions & 1 deletion ayugespidertools/scraper/pipelines/mysql/twisted.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,6 @@ def open_spider(self, spider: "AyuSpider") -> None:
assert hasattr(spider, "mysql_conf"), "未配置 Mysql 连接信息!"
self.slog = spider.slog
self.mysql_conf = spider.mysql_conf
# 判断目标数据库是否连接正常。若连接目标数据库错误时,创建缺失的目标数据库。
self._connect(self.mysql_conf).close()

_mysql_conf = {
Expand Down
1 change: 0 additions & 1 deletion ayugespidertools/scraper/spiders/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,6 @@ def update_settings(cls, settings: "BaseSettings") -> None:
)
_normal_settings["VIT_DIR"] = vit_dir

# 根据本地配置获取对应的 inner_settings
inner_settings = ReuseOperation.fetch_local_conf(
vit_dir=vit_dir, inner_settings=_normal_settings
)
Expand Down

0 comments on commit cfc1eda

Please sign in to comment.