Skip to content

Commit

Permalink
Merge pull request #77 from subaru-hello/#52/scrape-today-kojin-avail…
Browse files Browse the repository at this point in the history
…ability

本日(MM月DD日7時30分現在)総合運動場 個人開放のおしらせ を取得する
  • Loading branch information
subaru-hello authored Nov 23, 2024
2 parents 09d83a3 + 4283f2c commit b5cdae7
Show file tree
Hide file tree
Showing 6 changed files with 100 additions and 43 deletions.
4 changes: 2 additions & 2 deletions py-functions/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
from src.handlers.addmessage import addmessage
from src.handlers.makeuppercase import makeuppercase
from src.handlers.test import introduction
from src.handlers.setagaya_sougou_scrape import scraping
from src.handlers.tokyo.setagaya.setagaya_sougou_scrape import setagaya_sougou_availability

initialize_app()

Expand All @@ -19,4 +19,4 @@ def on_request_example(req: https_fn.Request) -> https_fn.Response:
addmessage
makeuppercase
introduction
scraping
setagaya_sougou_availability
File renamed without changes.
10 changes: 0 additions & 10 deletions py-functions/src/handlers/setagaya_sougou_scrape.py

This file was deleted.

86 changes: 86 additions & 0 deletions py-functions/src/handlers/tokyo/setagaya/scraping.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
import requests
from bs4 import BeautifulSoup
import logging

logging.basicConfig(level=logging.INFO)


class Scraping:
TARGET_TEXT = "総合運動場 個人開放"
KEYWORDS = ["陸上競技", "・陸上"]

def __init__(self, url):
self.url = url

def get_soup(self, url):
"""HTMLを取得してBeautifulSoupオブジェクトを返す"""
try:
response = requests.get(url)
response.raise_for_status()
return BeautifulSoup(response.text, 'html.parser')
except requests.RequestException as e:
logging.error(f"ウェブページの取得中にエラーが発生しました: {e}")
return None

def get_href(self, url):
"""個人開放のリンクを取得"""
soup = self.get_soup(url)
if not soup:
return None

news_list = soup.find('div', class_='news-list')
if not news_list:
logging.error("指定されたセクション 'news-list' が見つかりません。")
return None

for item in news_list.find_all('div', class_='news-item'):
news_link_div = item.find('div', class_='news-link')
if not news_link_div:
continue
news_link_text = news_link_div.get_text(strip=True)
if self.TARGET_TEXT in news_link_text:
a_tag = item.find("a", href=True)
if a_tag:
return a_tag['href']
logging.info(f"'{self.TARGET_TEXT}' に一致するリンクが見つかりませんでした。")
return None

def get_availability_today(self, url):
"""詳細ページのタイトルと内容を取得"""
soup = self.get_soup(url)
if not soup:
return None, None

title_tag = soup.find("h1")
title = title_tag.get_text(strip=True) if title_tag else "タイトルが見つかりません。"

content_div = soup.find('div', class_="news-contents")
if not content_div:
logging.error("コンテンツのセクション 'news-contents' が見つかりません。")
return title, "コンテンツが見つかりませんでした。"

body_text = ""
for paragraph in content_div.find_all("p"):
paragraph_content = paragraph.decode_contents().replace("<br/>", "\n")
for line in paragraph_content.split("\n"):
clean_line = BeautifulSoup(line, 'html.parser').get_text(strip=True)
if any(keyword in clean_line for keyword in self.KEYWORDS):
body_text += clean_line + "\n"

if not body_text:
body_text = "該当するコンテンツが見つかりませんでした。"

return title, body_text

def execute(self):
"""実行"""
href = self.get_href(self.url)
if not href:
logging.error(f"'{self.TARGET_TEXT}' のリンクが見つかりません。")
return None, None

absolute_url = href if href.startswith("http") else f"{self.url.rstrip('/')}/{href.lstrip('/')}"

title, body = self.get_availability_today(absolute_url)
logging.info(f"タイトル: {title}\n内容:\n{body}")
return title, body
12 changes: 12 additions & 0 deletions py-functions/src/handlers/tokyo/setagaya/setagaya_sougou_scrape.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
from firebase_functions import https_fn
from src.handlers.tokyo.setagaya.scraping import Scraping
@https_fn.on_request()
def setagaya_sougou_availability(req: https_fn.Request) -> https_fn.Response:
"世田谷総合運動場の陸上競技場貸出状況を取得する関数"
url = "https://www.se-sports.or.jp/facility/sougou/"
title, body = Scraping(url).execute()
html = f"""
<h1>{title}</h1>
<div>{body}</div>
"""
return https_fn.Response(html, status=200)
31 changes: 0 additions & 31 deletions py-functions/src/models/scraping.py

This file was deleted.

0 comments on commit b5cdae7

Please sign in to comment.