-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Browse files
Browse the repository at this point in the history
…ability 本日(MM月DD日7時30分現在)総合運動場 個人開放のおしらせ を取得する
- Loading branch information
Showing
6 changed files
with
100 additions
and
43 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
File renamed without changes.
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,86 @@ | ||
import requests | ||
from bs4 import BeautifulSoup | ||
import logging | ||
|
||
logging.basicConfig(level=logging.INFO) | ||
|
||
|
||
class Scraping: | ||
TARGET_TEXT = "総合運動場 個人開放" | ||
KEYWORDS = ["陸上競技", "・陸上"] | ||
|
||
def __init__(self, url): | ||
self.url = url | ||
|
||
def get_soup(self, url): | ||
"""HTMLを取得してBeautifulSoupオブジェクトを返す""" | ||
try: | ||
response = requests.get(url) | ||
response.raise_for_status() | ||
return BeautifulSoup(response.text, 'html.parser') | ||
except requests.RequestException as e: | ||
logging.error(f"ウェブページの取得中にエラーが発生しました: {e}") | ||
return None | ||
|
||
def get_href(self, url): | ||
"""個人開放のリンクを取得""" | ||
soup = self.get_soup(url) | ||
if not soup: | ||
return None | ||
|
||
news_list = soup.find('div', class_='news-list') | ||
if not news_list: | ||
logging.error("指定されたセクション 'news-list' が見つかりません。") | ||
return None | ||
|
||
for item in news_list.find_all('div', class_='news-item'): | ||
news_link_div = item.find('div', class_='news-link') | ||
if not news_link_div: | ||
continue | ||
news_link_text = news_link_div.get_text(strip=True) | ||
if self.TARGET_TEXT in news_link_text: | ||
a_tag = item.find("a", href=True) | ||
if a_tag: | ||
return a_tag['href'] | ||
logging.info(f"'{self.TARGET_TEXT}' に一致するリンクが見つかりませんでした。") | ||
return None | ||
|
||
def get_availability_today(self, url): | ||
"""詳細ページのタイトルと内容を取得""" | ||
soup = self.get_soup(url) | ||
if not soup: | ||
return None, None | ||
|
||
title_tag = soup.find("h1") | ||
title = title_tag.get_text(strip=True) if title_tag else "タイトルが見つかりません。" | ||
|
||
content_div = soup.find('div', class_="news-contents") | ||
if not content_div: | ||
logging.error("コンテンツのセクション 'news-contents' が見つかりません。") | ||
return title, "コンテンツが見つかりませんでした。" | ||
|
||
body_text = "" | ||
for paragraph in content_div.find_all("p"): | ||
paragraph_content = paragraph.decode_contents().replace("<br/>", "\n") | ||
for line in paragraph_content.split("\n"): | ||
clean_line = BeautifulSoup(line, 'html.parser').get_text(strip=True) | ||
if any(keyword in clean_line for keyword in self.KEYWORDS): | ||
body_text += clean_line + "\n" | ||
|
||
if not body_text: | ||
body_text = "該当するコンテンツが見つかりませんでした。" | ||
|
||
return title, body_text | ||
|
||
def execute(self): | ||
"""実行""" | ||
href = self.get_href(self.url) | ||
if not href: | ||
logging.error(f"'{self.TARGET_TEXT}' のリンクが見つかりません。") | ||
return None, None | ||
|
||
absolute_url = href if href.startswith("http") else f"{self.url.rstrip('/')}/{href.lstrip('/')}" | ||
|
||
title, body = self.get_availability_today(absolute_url) | ||
logging.info(f"タイトル: {title}\n内容:\n{body}") | ||
return title, body |
12 changes: 12 additions & 0 deletions
12
py-functions/src/handlers/tokyo/setagaya/setagaya_sougou_scrape.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,12 @@ | ||
from firebase_functions import https_fn | ||
from src.handlers.tokyo.setagaya.scraping import Scraping | ||
@https_fn.on_request() | ||
def setagaya_sougou_availability(req: https_fn.Request) -> https_fn.Response: | ||
"世田谷総合運動場の陸上競技場貸出状況を取得する関数" | ||
url = "https://www.se-sports.or.jp/facility/sougou/" | ||
title, body = Scraping(url).execute() | ||
html = f""" | ||
<h1>{title}</h1> | ||
<div>{body}</div> | ||
""" | ||
return https_fn.Response(html, status=200) |
This file was deleted.
Oops, something went wrong.