Skip to content

Commit

Permalink
Added actions
Browse files Browse the repository at this point in the history
  • Loading branch information
joeyism committed Jun 27, 2019
1 parent fdf4b36 commit 20abdcc
Show file tree
Hide file tree
Showing 5 changed files with 70 additions and 9 deletions.
36 changes: 32 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,19 @@ export CHROMEDRIVER=~/chromedriver
```

## Usage
To use it, just create the class
To use it, just create the class.

SAMPLE USAGE
```python
from linkedin_scraper import Person, actions
from selenium import webdriver
driver = webdriver.Chrome()

email = "[email protected]"
password = "password123"
actions.login(driver, email, password) # if email and password isnt given, it'll prompt in terminal
person = Person("https://www.linkedin.com/in/andre-iguodala-65b48ab5", driver=driver)
```

### User Scraping

Expand Down Expand Up @@ -59,14 +71,27 @@ person.scrape(close_on_complete=False)
```
so it doesn't close.

### Scraping sites and login automatically
From verison **2.4.0** on, `actions` is a part of the library that allows signing into Linkedin first. The email and password can be provided as a variable into the function. If not provided, both will be prompted in terminal.

```python
from linkedin_scraper import Person, actions
from selenium import webdriver
driver = webdriver.Chrome()
email = "[email protected]"
password = "password123"
actions.login(driver, email, password) # if email and password isnt given, it'll prompt in terminal
person = Person("https://www.linkedin.com/in/andre-iguodala-65b48ab5", driver=driver)
```


## API

### Person
Overall, to a Person object can be created with the following inputs:

```python
Person( linkedin_url=None, experiences = [], educations = [], driver = None, scrape = True)
Person(linkedin_url=None, experiences=[], educations=[], driver=None, scrape=True)
```
#### `linkedin_url`
This is the linkedin url of their profile
Expand Down Expand Up @@ -99,7 +124,7 @@ This is the meat of the code, where execution of this function scrapes the profi
### Company

```python
Company(linkedin_url = None, name = None, about_us =None, website = None, headquarters = None, founded = None, company_type = None, company_size = None, specialties = None, showcase_pages =[], affiliated_companies = [], driver = None, scrape = True, get_employees = True)
Company(linkedin_url=None, name=None, about_us=one, website=None, headquarters=None, founded=None, company_type=None, company_size=None, specialties=None, showcase_pages=], affiliated_companies=[], driver=None, scrape=True, get_employees=True)
```

#### `linkedin_url`
Expand Down Expand Up @@ -144,7 +169,7 @@ Whether to get all the employees of company
For example
```python
driver = webdriver.Chrome()
company = Company("https://ca.linkedin.com/company/google", driver = driver)
company = Company("https://ca.linkedin.com/company/google", driver=driver)
```


Expand All @@ -153,6 +178,9 @@ This is the meat of the code, where execution of this function scrapes the compa


## Versions
**2.4.0**
* Added `actions` for login

**2.3.1**
* Fixed bugs

Expand Down
5 changes: 5 additions & 0 deletions README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -210,6 +210,11 @@ This is the meat of the code, where execution of this function scrapes the compa
Versions
--------

**2.3.1**


* Fixed bugs

**2.2.x**


Expand Down
2 changes: 1 addition & 1 deletion linkedin_scraper/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
from .objects import Institution, Experience, Education
from .company import Company

__version__ = "2.3.2"
__version__ = "2.4.0"

import glob
modules = glob.glob(dirname(__file__)+"/*.py")
Expand Down
29 changes: 29 additions & 0 deletions linkedin_scraper/actions.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
import getpass
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC

def __prompt_email_password():
u = input("Email: ")
p = getpass.getpass(prompt="Password: ")
return (u, p)

def page_has_loaded(driver):
page_state = driver.execute_script('return document.readyState;')
return page_state == 'complete'

def login(driver, email=None, password=None):
if not email or not password:
email, password = __prompt_email_password()

driver.get("https://www.linkedin.com/login")
element = WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.ID, "username")))

email_elem = driver.find_element_by_id("username")
email_elem.send_keys(email)

password_elem = driver.find_element_by_id("password")
password_elem.send_keys(password)
driver.find_element_by_tag_name("button").click()

element = WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.ID, "profile-nav-item")))
7 changes: 3 additions & 4 deletions linkedin_scraper/person.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@
from selenium.webdriver.support import expected_conditions as EC
from .functions import time_divide
from .objects import Experience, Education, Scraper
from .selenium_utils import WebElement
import os

class Person(Scraper):
Expand All @@ -16,7 +15,7 @@ class Person(Scraper):
also_viewed_urls = []
linkedin_url = None

def __init__(self, linkedin_url = None, name = None, experiences = [], educations = [], driver = None, get = True, scrape = True):
def __init__(self, linkedin_url=None, name=None, experiences=[], educations=[], driver=None, get=True, scrape=True):
self.linkedin_url = linkedin_url
self.name = name
self.experiences = experiences or []
Expand Down Expand Up @@ -48,7 +47,7 @@ def add_experience(self, experience):
def add_education(self, education):
self.educations.append(education)

def scrape(self, close_on_complete = True):
def scrape(self, close_on_complete=True):
if self.is_signed_in():
self.scrape_logged_in(close_on_complete = close_on_complete)
else:
Expand Down Expand Up @@ -106,7 +105,7 @@ def scrape_logged_in(self, close_on_complete=True):
driver.close()


def scrape_not_logged_in(self, close_on_complete=True, retry_limit = 10):
def scrape_not_logged_in(self, close_on_complete=True, retry_limit=10):
driver = self.driver
retry_times = 0
while self.is_signed_in() and retry_times <= retry_limit:
Expand Down

0 comments on commit 20abdcc

Please sign in to comment.