Skip to content

Automated data update #361

Automated data update

Automated data update #361

Workflow file for this run

# This workflow will install Python dependencies, run tests and lint with a single version of Python
# For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions
name: Automated data update
on:
push:
branches: [ "test-workflows" ]
schedule:
- cron: '0 0 */2 * *' # runs once every three days
permissions:
contents: write
jobs:
build:
runs-on: ubuntu-22.04
steps:
- uses: actions/checkout@v3
- uses: szenius/[email protected]
with:
timezoneLinux: "America/New_York"
timezoneMacos: "America/New_York"
timezoneWindows: "Eastern Standard Time"
- name: Set up Python 3.11.1
uses: actions/setup-python@v3
with:
python-version: "3.11.1"
- name: Install pipenv
run: python -m pip install --upgrade pipenv wheel
- id: cache-pipenv
uses: actions/cache@v1
with:
path: ~/.local/share/virtualenvs
key: ${{ runner.os }}-pipenv-${{ hashFiles('**/Pipfile.lock') }}
- name: Install dependencies
if: steps.cache-pipenv.outputs.cache-hit != 'true'
run: pipenv install --deploy --dev
- name: Crawl users data
env:
CONNECTION_URL: ${{ secrets.CONNECTION_URL }}
MONGO_DB: ${{ secrets.MONGO_DB }}
TMDB_KEY: ${{ secrets.TMDB_KEY }}
run: cd data_processing && pipenv run python get_users.py
- name: Crawl ratings data
env:
CONNECTION_URL: ${{ secrets.CONNECTION_URL }}
MONGO_DB: ${{ secrets.MONGO_DB }}
TMDB_KEY: ${{ secrets.TMDB_KEY }}
run: cd data_processing && pipenv run python get_ratings.py
- name: Get movie data
env:
CONNECTION_URL: ${{ secrets.CONNECTION_URL }}
MONGO_DB: ${{ secrets.MONGO_DB }}
TMDB_KEY: ${{ secrets.TMDB_KEY }}
run: cd data_processing && pipenv run python get_movies.py
- name: Create new training data sample file
env:
CONNECTION_URL: ${{ secrets.CONNECTION_URL }}
MONGO_DB: ${{ secrets.MONGO_DB }}
TMDB_KEY: ${{ secrets.TMDB_KEY }}
run: cd data_processing && pipenv run python create_training_data.py
- name: Update last updated date
run: cd data_processing && pipenv run python update_last_updated.py
- uses: stefanzweifel/git-auto-commit-action@v4
with:
# Optional. Commit message for the created commit.
# Defaults to "Apply automatic changes"
commit_message: Automated data update
# Optional. Local and remote branch name where commit is going to be pushed
# to. Defaults to the current branch.
# You might need to set `create_branch: true` if the branch does not exist.
branch: main
# Optional. Options used by `git-commit`.
# See https://git-scm.com/docs/git-commit#_options
commit_options: '--no-verify --signoff'
# Optional. Local file path to the repository.
# Defaults to the root of the repository.
repository: .