Added actions

joeyism · Jun 27, 2019 · 20abdcc · 20abdcc
1 parent fdf4b36
commit 20abdcc
Show file tree

Hide file tree

Showing 5 changed files with 70 additions and 9 deletions.
diff --git a/README.md b/README.md
@@ -17,7 +17,19 @@ export CHROMEDRIVER=~/chromedriver
 ```
 
 ## Usage
-To use it, just create the class
+To use it, just create the class.
+
+SAMPLE USAGE
+```python
+from linkedin_scraper import Person, actions
+from selenium import webdriver
+driver = webdriver.Chrome()
+
+email = "[email protected]"
+password = "password123"
+actions.login(driver, email, password) # if email and password isnt given, it'll prompt in terminal
+person = Person("https://www.linkedin.com/in/andre-iguodala-65b48ab5", driver=driver)
+```
 
 ### User Scraping
 
@@ -59,14 +71,27 @@ person.scrape(close_on_complete=False)
 ``` 
 so it doesn't close.
 
+### Scraping sites and login automatically
+From verison **2.4.0** on, `actions` is a part of the library that allows signing into Linkedin first. The email and password can be provided as a variable into the function. If not provided, both will be prompted in terminal.
+
+```python
+from linkedin_scraper import Person, actions
+from selenium import webdriver
+driver = webdriver.Chrome()
+email = "[email protected]"
+password = "password123"
+actions.login(driver, email, password) # if email and password isnt given, it'll prompt in terminal
+person = Person("https://www.linkedin.com/in/andre-iguodala-65b48ab5", driver=driver)
+```
+
 
 ## API
 
 ### Person
 Overall, to a Person object can be created with the following inputs:
 
 ```python
-Person( linkedin_url=None, experiences = [], educations = [], driver = None, scrape = True)
+Person(linkedin_url=None, experiences=[], educations=[], driver=None, scrape=True)
 ```
 #### `linkedin_url`
 This is the linkedin url of their profile
@@ -99,7 +124,7 @@ This is the meat of the code, where execution of this function scrapes the profi
 ### Company
 
 ```python
-Company(linkedin_url = None, name = None, about_us =None, website = None, headquarters = None, founded = None, company_type = None, company_size = None, specialties = None, showcase_pages =[], affiliated_companies = [], driver = None, scrape = True, get_employees = True)
+Company(linkedin_url=None, name=None, about_us=one, website=None, headquarters=None, founded=None, company_type=None, company_size=None, specialties=None, showcase_pages=], affiliated_companies=[], driver=None, scrape=True, get_employees=True)
 ```
 
 #### `linkedin_url`
@@ -144,7 +169,7 @@ Whether to get all the employees of company
 For example
 ```python
 driver = webdriver.Chrome()
-company = Company("https://ca.linkedin.com/company/google", driver = driver)
+company = Company("https://ca.linkedin.com/company/google", driver=driver)
 ```
 
 
@@ -153,6 +178,9 @@ This is the meat of the code, where execution of this function scrapes the compa
 
 
 ## Versions
+**2.4.0**
+* Added `actions` for login
+
 **2.3.1**
 * Fixed bugs
 

diff --git a/README.rst b/README.rst
@@ -210,6 +210,11 @@ This is the meat of the code, where execution of this function scrapes the compa
 Versions
 --------
 
+**2.3.1**
+
+
+* Fixed bugs
+
 **2.2.x**
 
 

diff --git a/linkedin_scraper/__init__.py b/linkedin_scraper/__init__.py
@@ -3,7 +3,7 @@
 from .objects import Institution, Experience, Education
 from .company import Company
 
-__version__ = "2.3.2"
+__version__ = "2.4.0"
 
 import glob
 modules = glob.glob(dirname(__file__)+"/*.py")

diff --git a/linkedin_scraper/actions.py b/linkedin_scraper/actions.py
@@ -0,0 +1,29 @@
+import getpass
+from selenium.webdriver.support.wait import WebDriverWait
+from selenium.webdriver.common.by import By
+from selenium.webdriver.support import expected_conditions as EC
+
+def __prompt_email_password():
+  u = input("Email: ")
+  p = getpass.getpass(prompt="Password: ")
+  return (u, p)
+
+def page_has_loaded(driver):
+    page_state = driver.execute_script('return document.readyState;')
+    return page_state == 'complete'
+
+def login(driver, email=None, password=None):
+  if not email or not password:
+    email, password = __prompt_email_password()
+
+  driver.get("https://www.linkedin.com/login")
+  element = WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.ID, "username")))
+
+  email_elem = driver.find_element_by_id("username")
+  email_elem.send_keys(email)
+
+  password_elem = driver.find_element_by_id("password")
+  password_elem.send_keys(password)
+  driver.find_element_by_tag_name("button").click()
+
+  element = WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.ID, "profile-nav-item")))
diff --git a/linkedin_scraper/person.py b/linkedin_scraper/person.py
@@ -6,7 +6,6 @@
 from selenium.webdriver.support import expected_conditions as EC
 from .functions import time_divide
 from .objects import Experience, Education, Scraper
-from .selenium_utils import WebElement
 import os
 
 class Person(Scraper):
@@ -16,7 +15,7 @@ class Person(Scraper):
     also_viewed_urls = []
     linkedin_url = None
 
-    def __init__(self, linkedin_url = None, name = None, experiences = [], educations = [], driver = None, get = True, scrape = True):
+    def __init__(self, linkedin_url=None, name=None, experiences=[], educations=[], driver=None, get=True, scrape=True):
         self.linkedin_url = linkedin_url
         self.name = name
         self.experiences = experiences or []
@@ -48,7 +47,7 @@ def add_experience(self, experience):
     def add_education(self, education):
         self.educations.append(education)
 
-    def scrape(self, close_on_complete = True):
+    def scrape(self, close_on_complete=True):
         if self.is_signed_in():
             self.scrape_logged_in(close_on_complete = close_on_complete)
         else:
@@ -106,7 +105,7 @@ def scrape_logged_in(self, close_on_complete=True):
             driver.close()
 
 
-    def scrape_not_logged_in(self, close_on_complete=True, retry_limit = 10):
+    def scrape_not_logged_in(self, close_on_complete=True, retry_limit=10):
         driver = self.driver
         retry_times = 0
         while self.is_signed_in() and retry_times <= retry_limit: