Fix experience and education bug

joeyism · Feb 19, 2023 · 3589dc8 · 3589dc8
1 parent f3c7f9c
commit 3589dc8
Show file tree

Hide file tree

Showing 4 changed files with 247 additions and 222 deletions.
diff --git a/README.rst b/README.rst
@@ -7,6 +7,57 @@ Linkedin Scraper
 
 Scrapes Linkedin User Data
 
+`Linkedin Scraper <#linkedin-scraper>`_
+
+
+* `Installation <#installation>`_
+* `Setup <#setup>`_
+* `Usage <#usage>`_
+
+  * `Sample Usage <#sample-usage>`_
+  * `User Scraping <#user-scraping>`_
+  * `Company Scraping <#company-scraping>`_
+  * `Job Scraping <#job-scraping>`_
+  * `Job Search Scraping <#job-search-scraping>`_
+  * `Scraping sites where login is required first <#scraping-sites-where-login-is-required-first>`_
+  * `Scraping sites and login automatically <#scraping-sites-and-login-automatically>`_
+
+* `API <#api>`_
+
+  * `Person <#person>`_
+
+    * `\ ``linkedin_url`` <#linkedin_url>`_
+    * `\ ``name`` <#name>`_
+    * `\ ``about`` <#about>`_
+    * `\ ``experiences`` <#experiences>`_
+    * `\ ``educations`` <#educations>`_
+    * `\ ``interests`` <#interests>`_
+    * `\ ``accomplishment`` <#accomplishment>`_
+    * `\ ``company`` <#company>`_
+    * `\ ``job_title`` <#job_title>`_
+    * `\ ``driver`` <#driver>`_
+    * `\ ``scrape`` <#scrape>`_
+    * `\ ``scrape(close_on_complete=True)`` <#scrapeclose_on_completetrue>`_
+
+  * `Company <#company>`_
+
+    * `\ ``linkedin_url`` <#linkedin_url-1>`_
+    * `\ ``name`` <#name-1>`_
+    * `\ ``about_us`` <#about_us>`_
+    * `\ ``website`` <#website>`_
+    * `\ ``headquarters`` <#headquarters>`_
+    * `\ ``founded`` <#founded>`_
+    * `\ ``company_type`` <#company_type>`_
+    * `\ ``company_size`` <#company_size>`_
+    * `\ ``specialties`` <#specialties>`_
+    * `\ ``showcase_pages`` <#showcase_pages>`_
+    * `\ ``affiliated_companies`` <#affiliated_companies>`_
+    * `\ ``driver`` <#driver-1>`_
+    * `\ ``get_employees`` <#get_employees>`_
+    * `\ ``scrape(close_on_complete=True)`` <#scrapeclose_on_completetrue-1>`_
+
+* `Contribution <#contribution>`_
+
 Installation
 ------------
 
@@ -42,7 +93,7 @@ Sample Usage
    email = "[email protected]"
    password = "password123"
    actions.login(driver, email, password) # if email and password isnt given, it'll prompt in terminal
-   person = Person("https://www.linkedin.com/in/andre-iguodala-65b48ab5", driver=driver)
+   person = Person("https://www.linkedin.com/in/joey-sham-aa2a50122", driver=driver)
 
 **NOTE**\ : The account used to log-in should have it's language set English to make sure everything works as expected.
 
@@ -62,6 +113,42 @@ Company Scraping
    from linkedin_scraper import Company
    company = Company("https://ca.linkedin.com/company/google")
 
+Job Scraping
+^^^^^^^^^^^^
+
+.. code-block:: python
+
+   from linkedin_scraper import JobSearch, actions
+   from selenium import webdriver
+
+   driver = webdriver.Chrome()
+   email = "[email protected]"
+   password = "password123"
+   actions.login(driver, email, password) # if email and password isnt given, it'll prompt in terminal
+   input("Press Enter")
+   job = Job("https://www.linkedin.com/jobs/collections/recommended/?currentJobId=3456898261", driver=driver, close_on_complete=False)
+
+Job Search Scraping
+^^^^^^^^^^^^^^^^^^^
+
+.. code-block:: python
+
+   from linkedin_scraper import JobSearch, actions
+   from selenium import webdriver
+
+   driver = webdriver.Chrome()
+   email = "[email protected]"
+   password = "password123"
+   actions.login(driver, email, password) # if email and password isnt given, it'll prompt in terminal
+   input("Press Enter")
+   job_search = JobSearch(driver=driver, close_on_complete=False, scrape=False)
+   # job_search contains jobs from your logged in front page:
+   # - job_search.recommended_jobs
+   # - job_search.still_hiring
+   # - job_search.more_jobs
+
+   job_listings = job_search.search("Machine Learning Engineer") # returns the list of `Job` from the first page
+
 Scraping sites where login is required first
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
@@ -156,12 +243,12 @@ This is the interests they have. A list of ``linkedin_scraper.scraper.Interest``
 This is the accomplishments they have. A list of ``linkedin_scraper.scraper.Accomplishment``
 
 ``company``
-^^^^^^^^^^^^^^^
+~~~~~~~~~~~~~~~
 
 This the most recent company or institution they have worked at. 
 
 ``job_title``
-^^^^^^^^^^^^^^^^^
+~~~~~~~~~~~~~~~~~
 
 This the most recent job title they have. 
 
@@ -183,7 +270,7 @@ For example
 When this is **True**\ , the scraping happens automatically. To scrape afterwards, that can be run by the ``scrape()`` function from the ``Person`` object.
 
 ``scrape(close_on_complete=True)``
-^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
 This is the meat of the code, where execution of this function scrapes the profile. If *close_on_complete* is True (which it is by default), then the browser will close upon completion. If scraping of other profiles are desired, then you might want to set that to false so you can keep using the same driver.
 
@@ -267,7 +354,7 @@ For example
    company = Company("https://ca.linkedin.com/company/google", driver=driver)
 
 ``scrape(close_on_complete=True)``
-^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
 This is the meat of the code, where execution of this function scrapes the company. If *close_on_complete* is True (which it is by default), then the browser will close upon completion. If scraping of other companies are desired, then you might want to set that to false so you can keep using the same driver.
 

diff --git a/linkedin_scraper/__init__.py b/linkedin_scraper/__init__.py
@@ -5,7 +5,7 @@
 from .jobs import Job
 from .job_search import JobSearch
 
-__version__ = "2.10.0"
+__version__ = "2.10.1"
 
 import glob
 modules = glob.glob(dirname(__file__)+"/*.py")

diff --git a/linkedin_scraper/objects.py b/linkedin_scraper/objects.py
@@ -1,4 +1,5 @@
 from dataclasses import dataclass
+from time import sleep
 
 from selenium.webdriver import Chrome
 
@@ -19,6 +20,7 @@ class Contact:
 @dataclass
 class Institution:
     institution_name: str = None
+    linkedin_url: str = None
     website: str = None
     industry: str = None
     type: str = None
@@ -62,6 +64,10 @@ class Scraper:
     WAIT_FOR_ELEMENT_TIMEOUT = 5
     TOP_CARD = "pv-top-card"
 
+    @staticmethod
+    def wait(duration):
+        sleep(int(duration))
+
     def focus(self):
         self.driver.execute_script('alert("Focus window")')
         self.driver.switch_to.alert.accept()