-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcrawl.py
114 lines (103 loc) · 7.55 KB
/
crawl.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
from instagram_crawler import list_instagram_posts_by_username
from apscheduler.schedulers.background import BackgroundScheduler
from apscheduler.triggers.cron import CronTrigger
import logging
from datetime import datetime
import time
scheduler = BackgroundScheduler()
student_council = ("총학생회", "plussu__63rd", "https://www.instagram.com/plussu__63rd")
profiles = [
("인문대학", "soongsil_humanities", "https://www.instagram.com/soongsil_humanities/"),
("자연과학대학", "natsci_ssu", "https://www.instagram.com/natsci_ssu/"),
("사회과학대학", "ssu_social", "https://www.instagram.com/ssu_social/"),
("법과대학", "ssu_college_of_law", "https://www.instagram.com/ssu_college_of_law/"),
("경제통상대학", "ssu_53eg", "https://www.instagram.com/ssu_53eg/"),
("경영대학", "ssu_attention", "https://www.instagram.com/ssu_attention/"),
("공과대학", "ssu_engineering", "https://www.instagram.com/ssu_engineering/"),
("IT대학", "it_soongsil", "https://www.instagram.com/it_soongsil/"),
("기독교학과", "ssu_docs", "https://www.instagram.com/ssu_docs/"),
("국어국문학과", "ssu_kr_atti_42", "https://www.instagram.com/ssu_kr_atti_42/"),
("영어영문학과", "ssu_english", "https://www.instagram.com/ssu_english/"),
("독어독문학과", "ssu_welle", "https://www.instagram.com/ssu_welle/"),
("불어불문학과", "ssu_francais", "https://www.instagram.com/ssu_francais/"),
("중어중문학과", "ssu_moonlight", "https://www.instagram.com/ssu_moonlight/"),
("일어일문학과", "ssu_japanese", "https://www.instagram.com/ssu_japanese/"),
("철학과", "ssu_philosophy_", "https://www.instagram.com/ssu_philosophy_/"),
("사학과", "ssu_history", "https://www.instagram.com/ssu_history/"),
("예술창작학부 문예창작전공", "ssu_creativewriting", "https://www.instagram.com/ssu_creativewriting/"),
("예술창작학부 영화예술전공", "ssu_ssfilm", "https://www.instagram.com/ssu_ssfilm/"),
("수학과", "ssu_math_", "https://www.instagram.com/ssu_math_/"),
("물리학과", "ssu_physics", "https://www.instagram.com/ssu_physics/"),
("화학과", "ssu.chem_resonance", "https://www.instagram.com/ssu.chem_resonance/"),
("정보통계보험수리학과", "ssu_statistics", "https://www.instagram.com/ssu_statistics/"),
("의생명시스템학부", "ssubio2001", "https://www.instagram.com/ssubio2001/"),
("사회복지학부", "ssu_sowe", "https://www.instagram.com/ssu_sowe/"),
("행정학부", "ssupa___", "https://www.instagram.com/ssupa___/"),
("정치외교학과", "ssu_psir", "https://www.instagram.com/ssu_psir/"),
("정보사회학과", "ssu_inso", "https://www.instagram.com/ssu_inso/"),
("언론홍보학과", "ssu_unhong", "https://www.instagram.com/ssu_unhong/"),
("평생교육학과", "ssu_lle25", "https://www.instagram.com/ssu_lle25/"),
("법학과", "ssu_law_12th", "https://www.instagram.com/ssu_law_12th/"),
("국제법무학과", "ssu_globallaw", "https://www.instagram.com/ssu_globallaw/"),
("경제학과", "ssu_economics", "https://www.instagram.com/ssu_economics/"),
("글로벌통상학과", "ssu_globalcommerce", "https://instagram.com/ssu_globalcommerce?igshid=MzRlODBiNWFlZA=="),
("경영학부", "ssu_raon24", "https://instagram.com/ssu_raon24?igshid=MzRlODBiNWFlZA=="),
("회계학과", "13th_reborn", "https://instagram.com/13th_reborn?igshid=MzRlODBiNWFlZA=="),
("벤처중소기업학과", "ssu_vivid_26th", "https://instagram.com/ssu_vivid_26th?igshid=MzRlODBiNWFlZA=="),
("금융학부", "14_finance_ato", "https://instagram.com/14_finance_ato?igshid=MzRlODBiNWFlZA=="),
("화학공학과", "c.gnal_chemi", "https://instagram.com/c.gnal_chemi?igshid=MzRlODBiNWFlZA=="),
("신소재공학과(구, 유기신소재파이버공학과)", "ssu_mse_official", "https://www.instagram.com/ssu_mse_official/"),
("전기공학부", "with_on_35", "https://www.instagram.com/with_on_35/"),
("기계공학부", "ssu_mecha_moment", "https://www.instagram.com/ssu_mecha_moment/"),
("산업정보시스템공학과", "ssu_iise", "https://instagram.com/ssu_iise?igshid=MzRlODBiNWFlZA=="),
("건축학부", "soongsil_archi", "https://instagram.com/soongsil_archi?igshid=MzRlODBiNWFlZA=="),
("컴퓨터학부", "ssu_cse", "https://www.instagram.com/ssu_cse/"),
("전자정보공학부", "ssu_electronic_engineering", "https://www.instagram.com/ssu_electronic_engineering/"),
("글로벌미디어학부", "ssu_globalmedia", "https://www.instagram.com/ssu_globalmedia/"),
("소프트웨어학부", "ssu_soft", "https://www.instagram.com/ssu_soft/"),
("AI 융합학부(구, 스마트시스템SW학과)", "ssu_ai_conv", "https://www.instagram.com/ssu_ai_conv/"),
("융합특성화자유전공학부", "ssu_convergence", "https://www.instagram.com/ssu_convergence/"),
("숭실대학교 공식", "soongsil1897", "https://instagram.com/soongsil1897?igshid=MzRlODBiNWFlZA=="),
("숭실대학교 홍보대사 미소", "ssu__miso", "https://instagram.com/ssu__miso?igshid=MzRlODBiNWFlZA=="),
("숭실대학교 생협", "ssucoop", "https://instagram.com/ssucoop?igshid=MzRlODBiNWFlZA=="),
("숭실대학교 동아리연합회", "ssudy_2023", "https://instagram.com/ssudy_2023?igshid=MzRlODBiNWFlZA=="),
("숭실대학교 국제처", "soongsil_studyabroad", "https://instagram.com/soongsil_studyabroad?igshid=MzRlODBiNWFlZA=="),
("숭실대학교 중앙감사위원회", "ssu_audit", "https://instagram.com/ssu_audit?igshid=MzRlODBiNWFlZA=="),
("숭실대학교 방송국", "ssu_ssbs", "https://instagram.com/ssu_ssbs?igshid=MzRlODBiNWFlZA=="),
("숭실대학교 인터넷방송국", "ssizen.net2000", "https://instagram.com/ssizen.net2000?igshid=MzRlODBiNWFlZA=="),
("숭실대학교 인권위원회", "ssu_huri", "https://www.instagram.com/ssu_huri/"),
("유어슈", "yourssu_official", "https://www.instagram.com/yourssu_official/")
]
logging.basicConfig(filename='crawl.log', level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
num_profiles = len(profiles)
current_profile_index = 0
current_profile = 0
def crawl_profiles():
global current_profile_index
global current_profile
# Alternate between student_council and profiles every 4 cycles
if current_profile_index % 4 == 0:
logging.info(f"{student_council[0]} 크롤링 시작...{datetime.now()}")
print(f"{student_council[0]} 크롤링 시작...{datetime.now()}")
logging.info(f'크롤링 한 피드 개수: {list_instagram_posts_by_username(student_council[1], student_council[2])}')
else:
# Choose the current profile using modulo operation
profile_index = current_profile_index % len(profiles)
profile = profiles[profile_index]
logging.info(f"{profile[0]} 크롤링 시작...{datetime.now()}")
print(f"{profile[0]} 크롤링 시작...{datetime.now()}")
logging.info(f'크롤링 한 피드 개수: {list_instagram_posts_by_username(profile[1], profile[2])}')
if current_profile >= num_profiles - 1:
logging.info('전체 수행 완료')
current_profile = 0
else:
current_profile += 1
current_profile_index += 1
logging.info(f'스크랩 개수: {current_profile_index}')
if __name__ == "__main__":
scheduler.start()
scheduler.add_job(crawl_profiles, CronTrigger(minute="0,30")) # Run every 30 minutes
# print('크롤링 시작')
# print(list_instagram_posts_by_username('plussu__63rd', 'https://www.instagram.com/plussu__63rd'))
while True:
time.sleep(60)