Skip to content

Commit

Permalink
chore: update start_urls tmpl && clean up templates
Browse files Browse the repository at this point in the history
  • Loading branch information
shengchenyang committed May 17, 2024
1 parent d119790 commit b34ec73
Show file tree
Hide file tree
Showing 4 changed files with 4 additions and 6 deletions.
2 changes: 1 addition & 1 deletion ayugespidertools/templates/spiders/async.tmpl
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ from ayugespidertools.spiders import AyuSpider
class $classname(AyuSpider):
name = "$name"
allowed_domains = ["$domain"]
start_urls = ["http://$domain/"]
start_urls = ["$url"]
custom_settings = {
"TWISTED_REACTOR": "twisted.internet.asyncioreactor.AsyncioSelectorReactor",
}
Expand Down
4 changes: 1 addition & 3 deletions ayugespidertools/templates/spiders/basic.tmpl
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ from sqlalchemy import text
class $classname(AyuSpider):
name = "$name"
allowed_domains = ["$domain"]
start_urls = ["http://$domain/"]
start_urls = ["$url"]
custom_settings = {
"DATABASE_ENGINE_ENABLED": True,
"ITEM_PIPELINES": {
Expand All @@ -29,12 +29,10 @@ class $classname(AyuSpider):
for curr_li in li_list:
octree_text = curr_li.xpath("a/text()").get()
octree_href = curr_li.xpath("a/@href").get()
octree_full_link = response.urljoin(octree_href)

octree_item = AyuItem(
octree_text=octree_text,
octree_href=octree_href,
octree_full_link=octree_full_link,
_table=_save_table,
# 可选参数:这里表示 MongoDB 存储场景以 octree_text 为去重规则,若存在则更新,不存在则新增
_mongo_update_rule={"octree_text": octree_text},
Expand Down
2 changes: 1 addition & 1 deletion ayugespidertools/templates/spiders/crawl.tmpl
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ from ayugespidertools.spiders import AyuCrawlSpider
class $classname(AyuCrawlSpider):
name = "$name"
allowed_domains = ["$domain"]
start_urls = ["http://$domain/"]
start_urls = ["$url"]
custom_settings = {
"ITEM_PIPELINES": {
"ayugespidertools.pipelines.AyuFtyMysqlPipeline": 300,
Expand Down
2 changes: 1 addition & 1 deletion ayugespidertools/templates/spiders/csvfeed.tmpl
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ from scrapy.spiders import CSVFeedSpider
class $classname(CSVFeedSpider):
name = "$name"
allowed_domains = ["$domain"]
start_urls = ["http://$domain/feed.csv"]
start_urls = ["$url"]
#headers = ["id", "name", "description", "image_link"]
#delimiter = "\t"

Expand Down

0 comments on commit b34ec73

Please sign in to comment.