Skip to content

Commit

Permalink
style: oracle & spider module type hint
Browse files Browse the repository at this point in the history
  • Loading branch information
shengchenyang committed Jan 18, 2024
1 parent ed6609a commit d9def24
Show file tree
Hide file tree
Showing 3 changed files with 31 additions and 44 deletions.
17 changes: 5 additions & 12 deletions ayugespidertools/scraper/pipelines/oracle/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from typing import TYPE_CHECKING, Optional
from typing import TYPE_CHECKING

from ayugespidertools.common.expend import OraclePipeEnhanceMixin
from ayugespidertools.common.multiplexing import ReuseOperation
Expand All @@ -9,23 +9,16 @@
from oracledb.connection import Connection
from oracledb.cursor import Cursor

from ayugespidertools.common.typevars import AlterItem, OracleConf
from ayugespidertools.common.typevars import AlterItem


class AyuOraclePipeline(OraclePipeEnhanceMixin):
"""Oracle 存储场景的 scrapy pipeline 扩展的功能示例"""

def __init__(self):
self.oracle_conf: Optional["OracleConf"] = None
self.slog = None
self.conn: Optional["Connection"] = None
self.cursor: Optional["Cursor"] = None
conn: "Connection"
cursor: "Cursor"

def open_spider(self, spider):
assert hasattr(spider, "oracle_conf"), "未配置 Oracle 连接信息!"
self.slog = spider.slog
self.oracle_conf = spider.oracle_conf
self.conn = self._connect(self.oracle_conf)
self.conn = self._connect(spider.oracle_conf)
self.cursor = self.conn.cursor()

def process_item(self, item, spider):
Expand Down
29 changes: 13 additions & 16 deletions ayugespidertools/scraper/pipelines/oracle/twisted.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from typing import TYPE_CHECKING, Optional
from typing import TYPE_CHECKING

from twisted.enterprise import adbapi

Expand All @@ -12,31 +12,28 @@
if TYPE_CHECKING:
from oracledb.connection import Connection

from ayugespidertools.common.typevars import OracleConf
from ayugespidertools.common.typevars import OracleConf, slogT


class AyuTwistedOraclePipeline(OraclePipeEnhanceMixin):
"""Oracle 存储场景下的异步操作"""

def __init__(self):
self.oracle_conf: Optional["OracleConf"] = None
self.slog = None
self.conn: Optional["Connection"] = None
self.dbpool = None
oracle_conf: "OracleConf"
slog: "slogT"
conn: "Connection"
dbpool: "adbapi.ConnectionPool"

def open_spider(self, spider):
assert hasattr(spider, "oracle_conf"), "未配置 Oracle 连接信息!"
self.slog = spider.slog
self.oracle_conf = spider.oracle_conf

_oracle_conf = {
"user": spider.oracle_conf.user,
"password": spider.oracle_conf.password,
"host": spider.oracle_conf.host,
"port": spider.oracle_conf.port,
"service_name": spider.oracle_conf.service_name,
"encoding": spider.oracle_conf.encoding,
"config_dir": spider.oracle_conf.thick_lib_dir or None,
"user": self.oracle_conf.user,
"password": self.oracle_conf.password,
"host": self.oracle_conf.host,
"port": self.oracle_conf.port,
"service_name": self.oracle_conf.service_name,
"encoding": self.oracle_conf.encoding,
"config_dir": self.oracle_conf.thick_lib_dir or None,
}
self.dbpool = adbapi.ConnectionPool(
"oracledb", cp_reconnect=True, **_oracle_conf
Expand Down
29 changes: 13 additions & 16 deletions ayugespidertools/scraper/spiders/__init__.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import time
from pathlib import Path
from typing import TYPE_CHECKING, Any, Optional, Union
from typing import TYPE_CHECKING, Any

from scrapy.spiders import Spider

Expand All @@ -25,35 +25,32 @@
]

if TYPE_CHECKING:
import logging

from elasticsearch import Elasticsearch
from loguru import Logger
from scrapy.crawler import Crawler
from scrapy.settings import BaseSettings
from sqlalchemy.engine.base import Connection as SqlalchemyConnectT
from sqlalchemy.engine.base import Engine as SqlalchemyEngineT
from typing_extensions import Self

from ayugespidertools.common.typevars import slogT


class AyuSpider(Spider):
"""用于初始配置 scrapy 的各种 setting 的值及 spider 全局变量等"""

SPIDER_TIME: str = time.strftime("%Y-%m-%d", time.localtime())
mysql_engine: "SqlalchemyEngineT"
mysql_engine_conn: "SqlalchemyConnectT"
postgres_engine: "SqlalchemyEngineT"
postgres_engine_conn: "SqlalchemyConnectT"
oracle_engine: "SqlalchemyEngineT"
oracle_engine_conn: "SqlalchemyConnectT"
es_engine: "Elasticsearch"
es_engine_conn: "Elasticsearch"

def __init__(self, *args: Any, **kwargs: Any):
super(AyuSpider, self).__init__(*args, **kwargs)
self.mysql_engine: Optional["SqlalchemyEngineT"] = None
self.mysql_engine_conn: Optional["SqlalchemyConnectT"] = None
self.postgres_engine: Optional["SqlalchemyEngineT"] = None
self.postgres_engine_conn: Optional["SqlalchemyConnectT"] = None
self.oracle_engine: Optional["SqlalchemyEngineT"] = None
self.oracle_engine_conn: Optional["SqlalchemyConnectT"] = None
self.es_engine: Optional["Elasticsearch"] = None
self.es_engine_conn: Optional["Elasticsearch"] = None
SPIDER_TIME: str = time.strftime("%Y-%m-%d", time.localtime())

@property
def slog(self) -> Union["Logger", "logging.LoggerAdapter"]:
def slog(self) -> "slogT":
"""本库的日志管理模块,使用 loguru 来管理日志
Note:
本配置可与 Scrapy 的 spider.log 同时管理,根据场景可以自行配置。
Expand Down

0 comments on commit d9def24

Please sign in to comment.