diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..2eea525 --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +.env \ No newline at end of file diff --git a/components/.DS_Store b/components/.DS_Store new file mode 100644 index 0000000..feff29a Binary files /dev/null and b/components/.DS_Store differ diff --git a/components/.env b/components/.env deleted file mode 100644 index 850608a..0000000 --- a/components/.env +++ /dev/null @@ -1,4 +0,0 @@ -SELENIUMCLIENT=yourseleniumclient -SEARCHCLIENT=yourmeilisearchclient -SEARCHAPIKEY=yourmeilisearchclientapikey -CRONMONITORING=yourhealthcheckspingurl \ No newline at end of file diff --git a/components/grabber.py b/components/grabber.py index c015891..d659174 100644 --- a/components/grabber.py +++ b/components/grabber.py @@ -1,42 +1,32 @@ #Libraries and importing from bs4 import BeautifulSoup -from selenium import webdriver -from selenium.webdriver.chrome.options import Options +import undetected_chromedriver as uc from selenium.webdriver.support.ui import WebDriverWait from selenium.webdriver.support import expected_conditions as EC from selenium.webdriver.common.by import By -from selenium.webdriver.common.desired_capabilities import DesiredCapabilities from selenium.common.exceptions import TimeoutException import random import json import time -import decouple from decouple import config import requests from os import getcwd # -SELENIUMCLIENT = config('SELENIUMCLIENT') print('starting process') #getting updated input file url = "https://raw.githubusercontent.com/Wamy-Dev/ReziWebsite/main/Input%20Data.txt" -directory = getcwd() r = requests.get(url) data = open("./components/Input Data.txt", "wb") data.write(r.content) data.close() #setting up chrome settings -uc = webdriver -chrome_options = webdriver.ChromeOptions() -#chrome_options.add_argument('--headless') #remove hashtag at the start to run in headless mode, must also remove extension for this to work, not recommended +chrome_options = uc.ChromeOptions() chrome_options.add_extension('./resources/ublockorigin.crx') chrome_options.add_extension('./resources/popupblockerpro.crx') -chrome_options.add_argument("start-maximized") chrome_options.add_argument('--disable-dev-shm-usage') -chrome_options.add_experimental_option("excludeSwitches", ["enable-automation"]) -chrome_options.add_experimental_option('useAutomationExtension', False) +chrome_options.arguments.extend(["--no-sandbox", "--disable-setuid-sandbox"]) chrome_options.add_argument("user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/100.0.4896.127 Safari/537.36") -#wd = uc.Chrome(executable_path='./resources/chromedriver',options=chrome_options) #if local, make sure -wd = uc.Remote(SELENIUMCLIENT, options=chrome_options) #if for remote +wd = uc.Chrome(chrome_options) #if in docker container json_data={} #getting the links and setting up json def link_container(site_name,container_tag,class_tag,html,domain): diff --git a/components/sendtosearch.py b/components/sendtosearch.py index bcf3a15..aa1bad0 100644 --- a/components/sendtosearch.py +++ b/components/sendtosearch.py @@ -1,9 +1,7 @@ import meilisearch import requests import json -import os from datetime import datetime -import decouple from decouple import config # SEARCHCLIENT = config('SEARCHCLIENT') diff --git a/main.py b/main.py index 65b23cf..64f07e3 100644 --- a/main.py +++ b/main.py @@ -1,18 +1,18 @@ -#Welcome to Rezi! -# ____ _ -# / __ \___ ____ (_) -# / /_/ / _ \/_ / / / -# / _, _/ __/ / /_/ / -#/_/ |_|\___/ /___/_/ -#Rezi was written in Python 3.9.6 on Sublime Text. -#Please visit the github at https://github.com/Wamy-Dev/ReziWebsite -import requests -import sys -from datetime import datetime -sys.path.append('./components') -# -now=datetime.now() -current_time = now.strftime("%H:%M:%S") -print(f"Time started: {current_time}.") -# +#Welcome to Rezi! +# ____ _ +# / __ \___ ____ (_) +# / /_/ / _ \/_ / / / +# / _, _/ __/ / /_/ / +#/_/ |_|\___/ /___/_/ +#Rezi was written in Python 3.9.6 on Sublime Text. +#Please visit the github at https://github.com/Wamy-Dev/ReziWebsite +import requests +import sys +from datetime import datetime +sys.path.append('./components') +# +now=datetime.now() +current_time = now.strftime("%H:%M:%S") +print(f"Time started: {current_time}.") +# import grabber \ No newline at end of file diff --git a/requirements.txt b/resources/requirements.txt similarity index 66% rename from requirements.txt rename to resources/requirements.txt index 68e9e87..45585b5 100644 --- a/requirements.txt +++ b/resources/requirements.txt @@ -1,6 +1,6 @@ beautifulsoup4 selenium -selenium-stealth +undetected-chromedriver output meilisearch python-decouple