diff --git a/.bumpversion.cfg b/.bumpversion.cfg index 57921aec488..475485a355b 100644 --- a/.bumpversion.cfg +++ b/.bumpversion.cfg @@ -30,10 +30,4 @@ values = [bumpversion:file:helm/charts/determined/Chart.yaml] -[bumpversion:glob:model_hub/examples/huggingface/*/*.yaml] - -[bumpversion:glob:model_hub/examples/mmdetection/*.yaml] - -[bumpversion:glob:model_hub/examples/mmdetection/hydra/configs/config.yaml] - [bumpversion:file:docs/_static/version-switcher/versions.json] diff --git a/.circleci/real_config.yml b/.circleci/real_config.yml index bb502fa358a..96ee7ac785a 100644 --- a/.circleci/real_config.yml +++ b/.circleci/real_config.yml @@ -401,9 +401,6 @@ commands: determined: type: boolean default: false - model-hub: - type: boolean - default: false install-python: type: boolean default: true @@ -427,9 +424,6 @@ commands: if [ "<>" = "true" ]; then cat harness/setup.py >> /tmp/cachefile fi - if [ "<>" = "true" ]; then - cat model_hub/setup.py >> /tmp/cachefile - fi echo <> >> /tmp/cachefile if [ -n <> ]; then for i in <>; do @@ -497,34 +491,15 @@ commands: echo 'export PATH=/tmp/venv/bin:$PATH' >> $BASH_ENV /tmp/venv/bin/python -m pip install --upgrade pip wheel setuptools - # Either of make -C {harness,model_hub} build require pypa's build module. - when: - condition: - or: - - <> - - <> + condition: <> steps: - run: name: Install pypa builder command: python3 -m pip install build - - - when: - condition: <> - steps: - install-wheel: package-name: determined package-location: ./harness - - when: - condition: <> - steps: - - run: - name: Install mmdetection dependencies - command: | - sudo apt-get update - sudo apt-get install -y ffmpeg libsm6 libxext6 - - install-wheel: - package-name: model-hub - package-location: ./model_hub - run: name: Install <> command: | @@ -1524,7 +1499,6 @@ jobs: - setup-python-venv: install-python: false determined: true - model-hub: true extra-requirements-file: "docs/requirements.txt" executor: <> - run: make -C examples build @@ -1542,7 +1516,6 @@ jobs: paths: - examples/build - harness/dist - - model_hub/dist - docs/build - docs/site - run: tar czf docs.tgz docs/site/html @@ -1557,7 +1530,6 @@ jobs: - setup-python-venv: install-python: false determined: true - model-hub: true extra-requirements-file: "docs/requirements.txt" executor: <> - attach_workspace: @@ -1572,7 +1544,6 @@ jobs: - setup-python-venv: install-python: false determined: true - model-hub: true extra-requirements-file: "docs/requirements.txt" executor: <> - attach_workspace: @@ -1633,7 +1604,6 @@ jobs: - setup-python-venv: install-python: false determined: true - model-hub: true extra-requirements-file: "docs/requirements.txt" executor: <> - when: @@ -1740,20 +1710,6 @@ jobs: - make-package - run: tools/scripts/retry.sh make -C master publish-dev - run: tools/scripts/retry.sh make -C agent publish-dev - - run: - name: Build and publish model_hub docker images - command: | - if [ ${CIRCLE_BRANCH} = 'main' ] || [[ ${CIRCLE_BRANCH} == *"release-"* ]]; then - # For main and release branches, we will tag and publish both the environment - # with the git hash as well as the version. This will make that image available - # immediately for nightly tests. - make -C model_hub build-docker - tools/scripts/retry.sh make -C model_hub publish-docker - else - # Otherwise, only tag and publish the environment with the git hash. - make -C model_hub build-docker-dev - tools/scripts/retry.sh make -C model_hub publish-docker-dev - fi - run: mkdir /tmp/pkgs && cp -v */dist/*.{rpm,deb,tar.gz} /tmp/pkgs - store_artifacts: path: /tmp/pkgs @@ -1784,20 +1740,6 @@ jobs: - make-package-ee - run: tools/scripts/retry.sh make -C master publish-dev-ee - run: tools/scripts/retry.sh make -C agent publish-dev-ee - - run: - name: Build and publish model_hub docker images - command: | - if [ ${CIRCLE_BRANCH} = 'main' ] || [[ ${CIRCLE_BRANCH} == *"release-"* ]]; then - # For main and release branches, we will tag and publish both the environment - # with the git hash as well as the version. This will make that image available - # immediately for nightly tests. - make -C model_hub build-docker - tools/scripts/retry.sh make -C model_hub publish-docker - else - # Otherwise, only tag and publish the environment with the git hash. - make -C model_hub build-docker-dev - tools/scripts/retry.sh make -C model_hub publish-docker-dev - fi - run: mkdir /tmp/pkgs && cp -v */dist/*.{rpm,deb,tar.gz} /tmp/pkgs package-and-push-system-rc: @@ -1824,8 +1766,6 @@ jobs: - make-package - run: make -C master publish - run: make -C agent publish - - run: make -C model_hub build-docker - - run: tools/scripts/retry.sh make -C model_hub publish-docker - run: mkdir /tmp/pkgs && cp -v */dist/*.{rpm,deb,tar.gz} /tmp/pkgs - store_artifacts: path: /tmp/pkgs @@ -1880,8 +1820,6 @@ jobs: - run: no_output_timeout: 30m command: make -C agent release - - run: make -C model_hub build-docker - - run: make -C model_hub publish-docker - run: mkdir /tmp/pkgs && cp -v */dist/*.{rpm,deb,tar.gz} /tmp/pkgs - store_artifacts: path: /tmp/pkgs @@ -2400,11 +2338,9 @@ jobs: - setup-python-venv: install-python: false determined: true - model-hub: true - extra-requirements-file: "requirements.txt model_hub/tests/requirements.txt" + extra-requirements-file: "requirements.txt" executor: <> - run: make -C harness check - - run: make -C model_hub check - run: make -C e2e_tests check - run: make -C tools check - run: make -C schemas check @@ -2625,35 +2561,6 @@ jobs: - store_test_results: path: /tmp/test-results - test-unit-model-hub: - docker: - - image: <> - resource_class: medium+ - steps: - - checkout - - add-and-fetch-upstream - - skip-if-only-docs - - skip-if-only-github - - skip-if-only-webui - - install-codecov - - setup-python-venv: - install-python: false - determined: true - model-hub: true - extras-requires: "torch==1.9.0 torchvision==0.10.0" - extra-requirements-file: "model_hub/tests/requirements.txt" - executor: <> - - run: COVERAGE_FILE=$PWD/test-model-hub-pycov make -C model_hub test - - run: coverage xml -i --data-file=./test-model-hub-pycov - - run: codecov -v -t $CODECOV_TOKEN -F harness - - upload-junit-datadog: - service: test-unit-model-hub - env: ci-cpu - - persist_to_workspace: - root: . - paths: - - test-model-hub-pycov - python-coverage: docker: - image: <> @@ -2666,16 +2573,13 @@ jobs: - setup-python-venv: install-python: false determined: false - model-hub: false extras-requires: "coverage" executor: <> - attach_workspace: at: . - run: coverage combine *-pycov - run: coverage report --include 'harness/determined/*' --skip-covered - - run: coverage report --include 'model_hub/model_hub/*' --skip-covered - run: coverage html --include 'harness/determined/*' --skip-covered -d cov-html/harness - - run: coverage html --include 'model_hub/model_hub/*' --skip-covered -d cov-html/model_hub - store_artifacts: path: cov-html destination: cov-html @@ -4051,7 +3955,6 @@ workflows: name: f-test-unit-harness-gpu-parallel filters: *any-fork - - test-unit-model-hub - test-unit-storage: context: storage-unit-tests filters: *any-upstream @@ -4063,7 +3966,6 @@ workflows: - test-unit-harness-tf2 - test-unit-harness-pytorch2-cpu - test-unit-harness-pytorch2-gpu - - test-unit-model-hub - test-unit-storage send-alerts: @@ -5066,29 +4968,6 @@ workflows: enable-tls: [true] mark: ["e2e_gpu"] - # mmdetection tests - - request-mmdetection-tests: - type: approval - filters: *upstream-feature-branch - - - test-e2e-aws: - name: test-e2e-mmdetection - context: - - aws - - aws-ci-cluster-default-user-credentials - - determined-ee - filters: *upstream-feature-branch - requires: - - request-mmdetection-tests - - package-and-push-system-dev-ee - matrix: - parameters: - compute-agent-instance-type: ["g4dn.metal"] - aux-agent-instance-type: ["m6i.large"] - cluster-id-prefix: ["mmdetection"] - mark: ["model_hub_mmdetection"] - max-dynamic-agents: [2] - # packaging tests - request-packaging-tests: type: approval @@ -5421,19 +5300,6 @@ workflows: compute-agent-instance-type: ["g4dn.metal"] aux-agent-instance-type: ["m6i.large"] max-dynamic-agents: [2] - - test-e2e-aws: - name: test-e2e-gpu-mmdetection - context: - - aws - - aws-ci-cluster-default-user-credentials - - determined-ee - matrix: - parameters: - cluster-id-prefix: ["mmdet"] - mark: ["model_hub_mmdetection"] - compute-agent-instance-type: ["g4dn.metal"] - aux-agent-instance-type: ["m6i.large"] - max-dynamic-agents: [2] - test-e2e-aws: name: test-e2e-gpu-deepspeed context: @@ -5632,7 +5498,8 @@ workflows: name: publish-python-package-rc matrix: parameters: - path: ["harness", "model_hub"] + path: + - "harness" context: determined-production filters: *rc-filters requires: @@ -5649,7 +5516,8 @@ workflows: name: publish-python-package-release matrix: parameters: - path: ["harness", "model_hub"] + path: + - "harness" context: determined-production filters: *release-filters requires: diff --git a/.github/dependabot.yml b/.github/dependabot.yml index 9123e93d4f6..840d1fecdd4 100644 --- a/.github/dependabot.yml +++ b/.github/dependabot.yml @@ -95,15 +95,6 @@ updates: # - determined-ai/someteam open-pull-requests-limit: 0 - # Maintain python dependencies for Model Hub - - package-ecosystem: pip - directory: /model-hub/tests - schedule: - interval: daily - # reviewers: - # - determined-ai/someteam - open-pull-requests-limit: 0 - # Maintain python dependencies for docs - package-ecosystem: pip directory: /docs diff --git a/.github/workflows/lint-python.yml b/.github/workflows/lint-python.yml index f2455814022..ab0389a698e 100644 --- a/.github/workflows/lint-python.yml +++ b/.github/workflows/lint-python.yml @@ -21,7 +21,6 @@ jobs: - e2e_tests - examples - harness - - model_hub - schemas - tools runs-on: ubuntu-latest @@ -37,7 +36,6 @@ jobs: cache: pip cache-dependency-path: | harness/setup.py - model_hub/setup.py requirements.txt - name: Install pip dependencies run: | @@ -51,12 +49,5 @@ jobs: make build pip install --find-links dist determined==${{ env.VERSION }} pip install --no-deps --force-reinstall --find-links dist determined==${{ env.VERSION }} - - name: Install model_hub - working-directory: model_hub - run: | - sudo apt-get update && sudo apt-get install -y ffmpeg libsm6 libxext6 - make build - pip install --find-links dist model-hub==${{ env.VERSION }} - pip install --no-deps --force-reinstall --find-links dist model-hub==${{ env.VERSION }} - name: Run checks run: make -C ${{ matrix.component }} check diff --git a/.gitignore b/.gitignore index 3d83f228d37..e91d30a5951 100644 --- a/.gitignore +++ b/.gitignore @@ -80,9 +80,6 @@ gobin *.DS_Store .dccache -# Hydra output -model_hub/examples/mmdetection/hydra/outputs - # junit test results *.junit.xml diff --git a/docs/.redirects/redirects.json b/docs/.redirects/redirects.json index ce8964b7d2b..c9e6a0025f6 100644 --- a/docs/.redirects/redirects.json +++ b/docs/.redirects/redirects.json @@ -1,4 +1,10 @@ { + "reference/model-hub/mmdetection-api": "../training/_index.html", + "reference/model-hub/transformers-api": "../training/_index.html", + "reference/model-hub/_index": "../training/_index.html", + "model-hub-library/transformers/_index": "../../model-dev-guide/api-guides/_index.html", + "model-hub-library/mmdetection/_index": "../../model-dev-guide/api-guides/_index.html", + "model-hub-library/_index": "../model-dev-guide/api-guides/_index.html", "reference/python-sdk": "python-sdk/python-sdk.html", "reference/training/experiment-config-reference": "../experiment-config-reference.html", "model-dev-guide/dtrain/optimize-training": "../profiling.html", @@ -14,8 +20,8 @@ "architecture/system-architecture": "../get-started/architecture/system-architecture.html", "architecture/introduction": "../get-started/architecture/introduction.html", "setup-cluster/deploy-cluster/slurm/install-on-slurm": "../../slurm/install-on-slurm.html", - "setup-cluster/deploy-cluster/slurm/hpc-with-agent": "../../slurm/_index.html", - "setup-cluster/slurm/hpc-with-agent": "../slurm/_index.html", + "setup-cluster/deploy-cluster/slurm/hpc-with-agent": "../../slurm/_index.html", + "setup-cluster/slurm/hpc-with-agent": "_index.html", "setup-cluster/deploy-cluster/slurm/hpc-launching-architecture": "../../slurm/hpc-launching-architecture.html", "setup-cluster/deploy-cluster/slurm/slurm-requirements": "../../slurm/slurm-requirements.html", "setup-cluster/deploy-cluster/slurm/upgrade-on-hpc": "../../slurm/upgrade-on-hpc.html", @@ -56,8 +62,8 @@ "setup-cluster/deploy-cluster/k8s/overview": "../../k8s/_index.html", "setup-cluster/deploy-cluster/gcp/overview": "../../gcp/_index.html", "setup-cluster/deploy-cluster/aws/overview": "../../aws/_index.html", - "reference/model-hub/modelhub/transformers-api": "../transformers-api.html", - "reference/model-hub/modelhub/mmdetection-api": "../mmdetection-api.html", + "reference/model-hub/modelhub/transformers-api": "../../training/_index.html", + "reference/model-hub/modelhub/mmdetection-api": "../../training/_index.html", "reference/interface/job-config-reference": "../job-config-reference.html", "reference/deploy/config/master-config-reference": "../master-config-reference.html", "reference/deploy/config/helm-config-reference": "../helm-config-reference.html", @@ -72,13 +78,13 @@ "setup-cluster/security/oauth": "../../manage/security/oauth.html", "setup-cluster/security/index": "../../manage/security/_index.html", "integrations/ecosystem/ecosystem-integration": "../_index.html", - "reference/model-hub/index": "_index.html", + "reference/model-hub/index": "../training/_index.html", "reference/training/index": "_index.html", "reference/deploy/index": "_index.html", - "model-hub-library/transformers/overview": "_index.html", - "model-hub-library/transformers/examples": "_index.html", - "model-hub-library/transformers/tutorial": "_index.html", - "model-hub-library/mmdetection/overview": "_index.html", + "model-hub-library/transformers/overview": "../../model-dev-guide/api-guides/_index.html", + "model-hub-library/transformers/examples": "../../model-dev-guide/api-guides/_index.html", + "model-hub-library/transformers/tutorial": "../../model-dev-guide/api-guides/_index.html", + "model-hub-library/mmdetection/overview": "../../model-dev-guide/api-guides/_index.html", "model-dev-guide/hyperparameter/search-methods/index": "_index.html", "model-dev-guide/api-guides/batch-processing/batch-process-api-ug": "../batch-process-api-ug.html", "model-dev-guide/best-practices/index": "../_index.html", @@ -105,7 +111,7 @@ "architecture/index": "../get-started/architecture/_index.html", "reference/index": "_index.html", "model-dev-guide/index": "_index.html", - "model-hub-library/index": "_index.html", + "model-hub-library/index": "../model-dev-guide/api-guides/_index.html", "tutorials/index": "_index.html", "articles/viewing-epoch-based-metrics": "../tutorials/viewing-epoch-based-metrics.html", "setup-cluster/deploy-cluster/on-prem/wsl": "../../on-prem/options/wsl.html", @@ -218,9 +224,9 @@ "cluster-setup-guide/workspaces": "../manage/workspaces.html", "quickstart-mdldev": "tutorials/quickstart-mdldev.html", "reference/reference-searcher/custom-searcher-reference": "../custom-searcher-reference.html", - "reference/reference-model-hub/modelhub/transformers-api": "../../model-hub/transformers-api.html", - "reference/reference-model-hub/modelhub/mmdetection-api": "../../model-hub/mmdetection-api.html", - "reference/reference-model-hub/index": "../model-hub/_index.html", + "reference/reference-model-hub/modelhub/transformers-api": "../../training/_index.html", + "reference/reference-model-hub/modelhub/mmdetection-api": "../../training/_index.html", + "reference/reference-model-hub/index": "../training/_index.html", "reference/reference-interface/job-config-reference": "../job-config-reference.html", "reference/reference-deploy/config/master-config-reference": "../../deploy/master-config-reference.html", "reference/reference-deploy/config/agent-config-reference": "../../deploy/agent-config-reference.html", @@ -304,13 +310,13 @@ "interact/rest-apis": "../reference/rest-api.html", "join-community": "index.html", "manage/elasticsearch-logging-backend": "_index.html", - "model-hub/index": "../model-hub-library/_index.html", - "model-hub/mmdetection/api": "../../reference/model-hub/mmdetection-api.html", - "model-hub/mmdetection/index": "../../model-hub-library/mmdetection/_index.html", - "model-hub/transformers/api": "../../reference/model-hub/transformers-api.html", - "model-hub/transformers/examples": "../../model-hub-library/_index.html", - "model-hub/transformers/index": "../../model-hub-library/transformers/_index.html", - "model-hub/transformers/tutorial": "../../model-hub-library/_index.html", + "model-hub/index": "../model-dev-guide/api-guides/_index.html", + "model-hub/mmdetection/api": "../../reference/training/_index.html", + "model-hub/mmdetection/index": "../../model-dev-guide/api-guides/_index.html", + "model-hub/transformers/api": "../../reference/training/_index.html", + "model-hub/transformers/examples": "../../model-dev-guide/api-guides/_index.html", + "model-hub/transformers/index": "../../model-dev-guide/api-guides/_index.html", + "model-hub/transformers/tutorial": "../../model-dev-guide/api-guides/_index.html", "post-training/index": "../model-dev-guide/model-management/_index.html", "post-training/model-registry": "../model-dev-guide/model-management/model-registry-org.html", "post-training/use-trained-models": "../model-dev-guide/model-management/_index.html", @@ -466,4 +472,4 @@ "tutorials/porting-tutorial": "pytorch-mnist-tutorial.html", "tutorials/quick-start": "quickstart-mdldev.html", "tutorials/pytorch-mnist-local-qs": "../get-started/webui-qs.html" -} \ No newline at end of file +} diff --git a/docs/index.rst b/docs/index.rst index c0ec5e46b25..748b3d68ef7 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -5,7 +5,6 @@ Set Up Manage Tutorials - Model Hub Library Model Dev Guide Reference Tools @@ -63,13 +62,6 @@ almost any deep learning model using Determined.

Learn the basic steps needed to set up a Determined environment and train models.

-
reference icon diff --git a/docs/model-hub-library/_index.rst b/docs/model-hub-library/_index.rst deleted file mode 100644 index 7250870656c..00000000000 --- a/docs/model-hub-library/_index.rst +++ /dev/null @@ -1,31 +0,0 @@ -################### - Model Hub Library -################### - -.. important:: - - **Deprecation Notice**: The Model Hub library is now deprecated. Users of ``MMDetTrial`` and - ``BaseTransformerTrial`` should switch to either the :ref:`Core API ` or the - :ref:`PyTorch Trainer ` for integrations with ``mmcv`` and ``huggingface``. - -.. meta:: - :description: The Model Hub Library page contains info about Transformers and MMDetection where you can access the benefits of using Determined. - -+-------------------------------+-----------------------------------------------------------------+ -| Title | Description | -+===============================+=================================================================+ -| :ref:`model-hub-transformers` | The Determined library serves as an alternative to the Hugging | -| | Face Trainer Class and provides access to the benefits of using | -| | Determined. | -+-------------------------------+-----------------------------------------------------------------+ -| :ref:`model-hub-mmdetection` | The MMDetection library serves as an alternative to the trainer | -| | used by MMDetection and provides access to all of the | -| | Determined benefits. | -+-------------------------------+-----------------------------------------------------------------+ - -.. toctree:: - :maxdepth: 1 - :hidden: - - Hugging Face Transformers - MMDetection diff --git a/docs/model-hub-library/mmdetection/_index.rst b/docs/model-hub-library/mmdetection/_index.rst deleted file mode 100644 index e8618ca61fd..00000000000 --- a/docs/model-hub-library/mmdetection/_index.rst +++ /dev/null @@ -1,37 +0,0 @@ -.. _model-hub-mmdetection: - -############# - MMDetection -############# - -.. _readme: https://github.com/determined-ai/determined/tree/main/model_hub/examples/mmdetection/README.md - -`The MMDetection library `_ is a popular library for -object detection. It provides implementations for many popular object detection approaches like -Faster-RCNN and Mask-RCNN in addition to cutting edge methods from the research community. - -**model-hub** makes it easy to use MMDetection with Determined while keeping the developer -experience as close as possible to what it's like working directly with **MMDetection**. Our library -serves as an alternative to the trainer used by MMDetection (see `mmcv's runner -`_) and provides access to the -benefits of using Determined, including the following: - -- Easy multi-node distributed training with no code modifications. Determined automatically sets up - the distributed backend for you. -- Experiment monitoring and tracking, artifact tracking, and :ref:`state-of-the-art hyperparameter - search ` without requiring third-party integrations. -- :ref:`Automated cluster management, fault tolerance, and job rescheduling ` so you - don't have to worry about provisioning resources or babysitting your experiments. - -.. include:: ../../_shared/note-dtrain-learn-more.txt - -Given the benefits above, we think this library will be particularly useful to you if any of the -following apply: - -- You want to perform object detection using a powerful integrated platform that will scale easily - with your needs. -- You are an Determined user that wants to get started quickly with **MMDetection**. -- You are a **MMDetection** user that wants to easily run more advanced workflows like multi-node - distributed training and advanced hyperparameter search. -- You are a **MMDetection** user looking for a single platform to manage experiments, handle - checkpoints with automated fault tolerance, and perform hyperparameter search/visualization. diff --git a/docs/model-hub-library/transformers/_index.rst b/docs/model-hub-library/transformers/_index.rst deleted file mode 100644 index 51834508c93..00000000000 --- a/docs/model-hub-library/transformers/_index.rst +++ /dev/null @@ -1,46 +0,0 @@ -.. _model-hub-transformers: - -########################### - Hugging Face Transformers -########################### - -`The Hugging Face Transformers Library `_ is the de -facto library for natural language processing (NLP) models. It provides pretrained weights for -leading NLP models and lets you easily use these pretrained models for the most common NLP tasks, -such as language modeling, text classification, and question answering. - -**model-hub** makes it easy to train transformer models in Determined while keeping the developer -experience as close as possible to working directly with **transformers**. The Determined library -serves as an alternative to the Hugging Face `Trainer Class -`_ and provides access to the -benefits of using Determined, including: - -- Easy multi-node distributed training with no code modifications. Determined automatically sets up - the distributed backend for you. -- Experiment monitoring and tracking, artifact tracking, and :ref:`state-of-the-art hyperparameter - search ` without requiring third-party integrations. -- :ref:`Automated cluster management, fault tolerance, and job rescheduling ` to free you - from provisioning resources closely monitoring experiments. - -.. include:: ../../_shared/note-dtrain-learn-more.txt - -Model Hub Transformers is similar to the ``no_trainer`` version of **transformers** examples in that -you have more control over the training and evaluation routines if you want. - -Given the above benefits, this library can be particularly useful if any of the following apply: - -- You are an Determined user that wants to get started quickly with **transformers**. -- You are a **transformers** user that wants to easily run more advanced workflows like multi-node - distributed training and advanced hyperparameter search. -- You are a **transformers** user looking for a single platform to manage experiments, handle - checkpoints with automated fault tolerance, and perform hyperparameter search/visualization. - -************* - Limitations -************* - -The following Hugging Face **transformers** features are currently not supported: - -- TensorFlow version of transformers -- Support for fairscale -- Running on TPUs diff --git a/docs/reference/_index.rst b/docs/reference/_index.rst index 4a23186a2b9..3475640054d 100644 --- a/docs/reference/_index.rst +++ b/docs/reference/_index.rst @@ -27,12 +27,6 @@

Reference documentation for the Training APIs and Experiment Configuration File.

-

Deployment Reference

@@ -77,5 +71,4 @@ Python SDK REST API Determined CLI Reference - Model Hub APIs Customer Searcher Reference diff --git a/docs/reference/model-hub/_index.rst b/docs/reference/model-hub/_index.rst deleted file mode 100644 index c66b40e464a..00000000000 --- a/docs/reference/model-hub/_index.rst +++ /dev/null @@ -1,26 +0,0 @@ -################ - Model Hub APIs -################ - -This section includes reference documentation for the model hub APIs: - -+-----------------------------------+--------------------------------------------------------------+ -| Title | Description | -+===================================+==============================================================+ -| :ref:`model-hub-mmdetection-api` | The MMDetection API reference, which makes it easy to use | -| | the popular `MMDetection library | -| | `_ with | -| | Determined. | -+-----------------------------------+--------------------------------------------------------------+ -| :ref:`model-hub-transformers-api` | The Transformers API reference for using the `Hugging Face | -| | Transformers Library | -| | `_ with | -| | Determined. | -+-----------------------------------+--------------------------------------------------------------+ - -.. toctree:: - :maxdepth: 1 - :hidden: - - MMDetection API - Transformers API diff --git a/docs/reference/model-hub/mmdetection-api.rst b/docs/reference/model-hub/mmdetection-api.rst deleted file mode 100644 index fbb8131ff3a..00000000000 --- a/docs/reference/model-hub/mmdetection-api.rst +++ /dev/null @@ -1,25 +0,0 @@ -.. _model-hub-mmdetection-api: - -################# - MMDetection API -################# - -*************************** - ``model_hub.mmdetection`` -*************************** - -.. _mmdettrial: - -.. _readme: https://github.com/determined-ai/determined-examples/blob/main/model_hub/mmdetection/README.md - -.. autoclass:: model_hub.mmdetection.MMDetTrial - -Simlar to using the MMDetection library directly, the main way users customize an experiment is by -modifying the MMDetection config. To find out how to configure MMDetection using the -:ref:`experiment configuration ` file, visit the readme_. - -Helper Functions -================ - -.. automodule:: model_hub.mmdetection - :members: get_pretrained_ckpt_path, GCSBackend, S3Backend diff --git a/docs/reference/model-hub/transformers-api.rst b/docs/reference/model-hub/transformers-api.rst deleted file mode 100644 index 963d50a518c..00000000000 --- a/docs/reference/model-hub/transformers-api.rst +++ /dev/null @@ -1,56 +0,0 @@ -.. _model-hub-transformers-api: - -################## - Transformers API -################## - -*************************** - ``model_hub.huggingface`` -*************************** - -.. _basetransformertrial: - -.. autoclass:: model_hub.huggingface.BaseTransformerTrial - -The ``__init__`` method replicated below makes heavy use of the :ref:`helper functions -` in the next section. - -.. literalinclude:: ../../../model_hub/model_hub/huggingface/_trial.py - :language: python - :pyobject: BaseTransformerTrial.__init__ - -The ``evaluate_batch`` method replicated below should work for most models and tasks but can be -overwritten for more custom behavior in a subclass. - -.. literalinclude:: ../../../model_hub/model_hub/huggingface/_trial.py - :language: python - :pyobject: BaseTransformerTrial.train_batch - -.. _transformers-functions: - -Helper Functions -================ - -The ``BaseTransformerTrial`` calls many helper functions below that are also useful when subclassing -``BaseTransformerTrial`` or writing custom transformers trials for use with Determined. - -.. automodule:: model_hub.huggingface - :members: default_parse_config_tokenizer_model_kwargs, default_parse_optimizer_lr_scheduler_kwargs, build_using_auto, build_default_optimizer, build_default_lr_scheduler, default_load_dataset - -Structured Dataclasses -====================== - -Structured dataclasses are used to ensure that Determined parses the experiment config correctly. -See the below classes for details on what fields can be used in the experiment config to configure -the dataset; transformers config, model, and tokenizer; as well as optimizer and learning rate -scheduler for use with the functions above. - -.. autoclass:: model_hub.huggingface.DatasetKwargs - -.. autoclass:: model_hub.huggingface.ConfigKwargs - -.. autoclass:: model_hub.huggingface.ModelKwargs - -.. autoclass:: model_hub.huggingface.OptimizerKwargs - -.. autoclass:: model_hub.huggingface.LRSchedulerKwargs diff --git a/docs/release-notes/remove-model-hub.rst b/docs/release-notes/remove-model-hub.rst new file mode 100644 index 00000000000..d76078ec04d --- /dev/null +++ b/docs/release-notes/remove-model-hub.rst @@ -0,0 +1,11 @@ +:orphan: + +**Breaking Changes** + +- API: Remove model_hub library from determined. + + - Starting with this release, MMDetTrial and BaseTransformerTrial are removed. HuggingFace users + should look at provided `HuggingFace TrainerAPI + examples_`, + which use a custom callback in place of BaseTransformerTrial. Users of MMDetTrial can refer to + :ref:`Core API `. diff --git a/e2e_tests/pytest.ini b/e2e_tests/pytest.ini index 44c0c56e259..14bef41bfd9 100644 --- a/e2e_tests/pytest.ini +++ b/e2e_tests/pytest.ini @@ -30,9 +30,6 @@ markers = gpu_required: tests with a hard CUDA requirement distributed: distributed training tests parallel: parallel, multi-gpu tests - model_hub_transformers: model_hub_transformers tests - model_hub_transformers_amp: model_hub_transformers_amp tests - model_hub_mmdetection: model_hub_mmdetection tests deepspeed: DeepSpeedTrial tests nightly: nightly tests det_deploy_local: test det deploy local diff --git a/e2e_tests/tests/conftest.py b/e2e_tests/tests/conftest.py index 0c8a2498ce4..b305d86e3c9 100644 --- a/e2e_tests/tests/conftest.py +++ b/e2e_tests/tests/conftest.py @@ -44,9 +44,6 @@ "distributed", "parallel", "nightly", - "model_hub_transformers", - "model_hub_transformers_amp", - "model_hub_mmdetection", "deepspeed", "managed_devcluster", "port_registry", diff --git a/e2e_tests/tests/fixtures/mmdetection/distributed_fake_data.yaml b/e2e_tests/tests/fixtures/mmdetection/distributed_fake_data.yaml deleted file mode 100644 index ee9abb622a9..00000000000 --- a/e2e_tests/tests/fixtures/mmdetection/distributed_fake_data.yaml +++ /dev/null @@ -1,28 +0,0 @@ -name: mmdet_fake_distributed -data: - file_client_args: - backend: fake -hyperparameters: - global_batch_size: 8 - config_file: /mmdetection/configs/mask_rcnn/mask_rcnn_r50_fpn_poly_1x_coco.py - merge_config: null - use_pretrained: false - override_mmdet_config: - data.train.ann_file: /tmp/instances_train2017.json - data.val.ann_file: /tmp/instances_val2017.json -searcher: - name: single - metric: bbox_mAP - max_length: - batches: 200 - smaller_is_better: false -environment: - image: - gpu: determinedai/model-hub-mmdetection -bind_mounts: - - host_path: /tmp - container_path: /tmp -resources: - slots_per_trial: 8 # max number of GPUs a trial is allowed to individually use -max_restarts: 0 -entrypoint: python3 -m determined.launch.torch_distributed --trial model_hub.mmdetection:MMDetTrial diff --git a/e2e_tests/tests/fixtures/mmdetection/startup-hook.sh b/e2e_tests/tests/fixtures/mmdetection/startup-hook.sh deleted file mode 100644 index 346ad2a8dc5..00000000000 --- a/e2e_tests/tests/fixtures/mmdetection/startup-hook.sh +++ /dev/null @@ -1,4 +0,0 @@ -wget http://images.cocodataset.org/annotations/annotations_trainval2017.zip -unzip annotations_trainval2017.zip -mv annotations/instances_train2017.json /tmp -mv annotations/instances_val2017.json /tmp diff --git a/e2e_tests/tests/model_hub/test_mmdetection.py b/e2e_tests/tests/model_hub/test_mmdetection.py deleted file mode 100644 index c05741b66cd..00000000000 --- a/e2e_tests/tests/model_hub/test_mmdetection.py +++ /dev/null @@ -1,101 +0,0 @@ -import os -import subprocess -from typing import Dict - -import pytest - -from tests import api_utils -from tests import config as conf -from tests import experiment as exp - - -def set_docker_image(config: Dict) -> Dict: - git_short_hash = ( - subprocess.check_output(["git", "rev-parse", "--short", "HEAD"]).strip().decode("utf-8") - ) - - config = conf.set_image( - config, conf.TF2_CPU_IMAGE, f"determinedai/model-hub-mmdetection:{git_short_hash}" - ) - return config - - -@pytest.mark.model_hub_mmdetection -@pytest.mark.skip(reason="mmdetection requires maintenance") -def test_maskrcnn_distributed_fake() -> None: - example_path = conf.fixtures_path("mmdetection") - config = conf.load_config(os.path.join(example_path, "distributed_fake_data.yaml")) - config = conf.set_max_length(config, {"batches": 200}) - config = set_docker_image(config) - - exp.run_basic_test_with_temp_config(api_utils.user_session(), config, example_path, 1) - - -@pytest.mark.model_hub_mmdetection -@pytest.mark.skip(reason="mmdetection requires maintenance") -def test_fasterrcnn_distributed_fake() -> None: - example_path = conf.fixtures_path("mmdetection") - config = conf.load_config(os.path.join(example_path, "distributed_fake_data.yaml")) - config = conf.set_max_length(config, {"batches": 200}) - config = set_docker_image(config) - config = conf.set_hparam( - config, "config_file", "/mmdetection/configs/faster_rcnn/faster_rcnn_r50_fpn_1x_coco.py" - ) - - exp.run_basic_test_with_temp_config(api_utils.user_session(), config, example_path, 1) - - -@pytest.mark.model_hub_mmdetection -@pytest.mark.skip(reason="mmdetection requires maintenance") -def test_retinanet_distributed_fake() -> None: - example_path = conf.fixtures_path("mmdetection") - config = conf.load_config(os.path.join(example_path, "distributed_fake_data.yaml")) - config = conf.set_max_length(config, {"batches": 200}) - config = set_docker_image(config) - config = conf.set_hparam( - config, "config_file", "/mmdetection/configs/retinanet/retinanet_r50_fpn_1x_coco.py" - ) - - exp.run_basic_test_with_temp_config(api_utils.user_session(), config, example_path, 1) - - -@pytest.mark.model_hub_mmdetection -@pytest.mark.skip(reason="mmdetection requires maintenance") -def test_gfl_distributed_fake() -> None: - example_path = conf.fixtures_path("mmdetection") - config = conf.load_config(os.path.join(example_path, "distributed_fake_data.yaml")) - config = conf.set_max_length(config, {"batches": 200}) - config = set_docker_image(config) - config = conf.set_hparam( - config, "config_file", "/mmdetection/configs/gfl/gfl_r50_fpn_1x_coco.py" - ) - - exp.run_basic_test_with_temp_config(api_utils.user_session(), config, example_path, 1) - - -@pytest.mark.model_hub_mmdetection -@pytest.mark.skip(reason="mmdetection requires maintenance") -def test_yolo_distributed_fake() -> None: - example_path = conf.fixtures_path("mmdetection") - config = conf.load_config(os.path.join(example_path, "distributed_fake_data.yaml")) - config = conf.set_max_length(config, {"batches": 200}) - config = set_docker_image(config) - config = conf.set_hparam( - config, "config_file", "/mmdetection/configs/yolo/yolov3_d53_320_273e_coco.py" - ) - - exp.run_basic_test_with_temp_config(api_utils.user_session(), config, example_path, 1) - - -@pytest.mark.model_hub_mmdetection -@pytest.mark.skip(reason="mmdetection requires maintenance") -def test_detr_distributed_fake() -> None: - example_path = conf.fixtures_path("mmdetection") - config = conf.load_config(os.path.join(example_path, "distributed_fake_data.yaml")) - config = conf.set_max_length(config, {"batches": 200}) - config = set_docker_image(config) - config = conf.set_hparam( - config, "config_file", "/mmdetection/configs/detr/detr_r50_8x2_150e_coco.py" - ) - - exp.run_basic_test_with_temp_config(api_utils.user_session(), config, example_path, 1) diff --git a/harness/determined/common/api/analytics.py b/harness/determined/common/api/analytics.py index 24edb5dbdd7..471bc2e9d34 100644 --- a/harness/determined/common/api/analytics.py +++ b/harness/determined/common/api/analytics.py @@ -19,7 +19,6 @@ def get_library_version_analytics() -> Dict[str, Any]: modules = [ "determined", - "model_hub", "torch", "tensorflow", "transformers", diff --git a/model_hub/.flake8 b/model_hub/.flake8 deleted file mode 100644 index b81069ac479..00000000000 --- a/model_hub/.flake8 +++ /dev/null @@ -1,47 +0,0 @@ -[flake8] -max-line-length = 100 -exclude = - .git, - __pycache__, - build, - dist - tests/fixtures/merge_config.py - -# We ignore F401 in __init__.py because it is expected for there to be -# "unused imports" when defining a "regular" package. (This file is -# implicitly executed when the package is imported, and the imports would -# be used by the importer.) We ignore patch_saver_restore.py because it includes -# a near-verbatim TensorFlow function with a small patch. -per-file-ignores = __init__.py:F401 patch_saver_restore.py:E111,E114, - -# Explanations for ignored error codes: -# - A003 (class attribute shadowing python builtin): model_hub/mmdetection/_callbacks.py will fail but we need it to match the signature for a mmdetection class. -# - D1* (no missing docstrings): too much effort to start enforcing -# - D200 (short docstring must fit in one line with quotes): stylistic choice -# - D202 (no blank lines after function docstrings): stylistic choice -# - D203 (blank line before class docstring): stylistic choice -# - D205 (blank line between summary and description): not enforcing single-line summaries -# - D212 (docstring should start on first line): stylistic choice (prefer D213, docstrings start on second line) -# - D4* (docstring content warnings): too much effort to start enforcing -# - E203 (no space before colon): not PEP8-compliant; triggered by Black-formatted code -# - W503 (no line breaks before binary operator): not PEP8-compliant; triggered by Black-formatted code -# - C812-C816 (missing trailing comma): stylistic choice -ignore = A003,D1,D200,D202,D203,D205,D212,D4,E203,W503,C812,C813,C814,C815,C816 -# Disable flake8-import-restrictions -extend-ignore = I20 - -show_source = true - -# flake8-colors -format = ${cyan}%(path)s${reset}:${yellow_bold}%(row)d${reset}:${green_bold}%(col)d${reset}: ${red_bold}%(code)s${reset} %(text)s - -# flake8-docstrings -docstring-convention = google - -# flake8-import-order -application-import-names = determined -import-order-style = edited - -# flake8-quotes -inline-quotes = " -multiline-quotes = """ diff --git a/model_hub/MANIFEST.in b/model_hub/MANIFEST.in deleted file mode 100644 index d8263882c3f..00000000000 --- a/model_hub/MANIFEST.in +++ /dev/null @@ -1 +0,0 @@ -include model_hub/py.typed diff --git a/model_hub/Makefile b/model_hub/Makefile deleted file mode 100644 index 180d2677aca..00000000000 --- a/model_hub/Makefile +++ /dev/null @@ -1,140 +0,0 @@ -SHELL := /bin/bash -VERSION := $(shell cat ../VERSION) -SHORT_GIT_HASH := $(shell git rev-parse --short HEAD) - -ARTIFACTS_DIR := /tmp/artifacts - -# Model-hub library environments will be built on top of the default GPU and CPU images in master/pkg/model/defaults.go -DEFAULT_GPU_IMAGE := determinedai/pytorch-tensorflow-cuda-dev:0736b6d - -############REMINDER############ -# When bumping third-party library versions, remember to bump versions in -# tests/requirements.txt and docs/requirements.txt to match. -############REMINDER############ - -# Transformers Args -TRANSFORMERS_VERSION := 4.8.2 -DATASETS_VERSION := 1.9.0 -TRANSFORMERS_ENVIRONMENT_ROOT := determinedai/model-hub-transformers -# MMDetection Args -MMCV_CUDA_VERSION := cu113 # Needs to match DEFAULT_GPU_IMAGE -TORCH_VERSION := 1.12.0 # Needs to match DEFAULT_GPU_IMAGE -MMCV_VERSION := 1.7.1 # Needs to support the above Torch version -MMDETECTION_VERSION := 2.27.0 -MMDETECTION_ENVIRONMENT_ROOT := determinedai/model-hub-mmdetection - -.PHONY: clean -clean: - rm -rf .pytest_cache/ - rm -rf .mypy_cache/ - rm -rf *.egg-info/ - rm -rf pip-wheel-metadata/ - rm -rf dist/ - rm -rf build/ - rm -f .coverage - find . \( -name __pycache__ -o -name \*.pyc \) -delete - find . -type d -name .mypy_cache -prune -exec rm -rf {} \; - -.PHONY: build -build: - PYTHONWARNINGS=ignore:Normalizing:UserWarning:setuptools.dist \ - python -m build -nxw >/dev/null - -.PHONY: publish -publish: - twine upload --verbose --non-interactive dist/* - -.PHONY: fmt -fmt: - isort . - black . - -.PHONY: check -check: - isort . --check-only - black . --check - python -m flake8 - mypy . - -.PHONY: test -test: - coverage run -m pytest -v -s --durations=0 tests - -# We will only use HASH for dev environments. -.PHONY: build-transformers-dev -build-transformers-dev: - docker build -f docker/Dockerfile.transformers \ - --build-arg BASE_IMAGE=$(DEFAULT_GPU_IMAGE) \ - --build-arg TRANSFORMERS_VERSION=$(TRANSFORMERS_VERSION) \ - --build-arg DATASETS_VERSION=$(DATASETS_VERSION) \ - --build-arg MODEL_HUB_VERSION=$(VERSION) \ - -t $(TRANSFORMERS_ENVIRONMENT_ROOT):$(SHORT_GIT_HASH) \ - . - -.PHONY: publish-transformers-dev -publish-transformers-dev: - ./docker/publish-docker.sh transformers-gpu-hash $(TRANSFORMERS_ENVIRONMENT_ROOT):$(SHORT_GIT_HASH) $(ARTIFACTS_DIR) - -.PHONY: build-mmdetection-dev -build-mmdetection-dev: - docker build -f docker/Dockerfile.mmdetection \ - --build-arg BASE_IMAGE=$(DEFAULT_GPU_IMAGE) \ - --build-arg MMCV_CUDA_VERSION=$(MMCV_CUDA_VERSION) \ - --build-arg TORCH_VERSION=$(TORCH_VERSION) \ - --build-arg MMCV_VERSION=$(MMCV_VERSION) \ - --build-arg MMDETECTION_VERSION=$(MMDETECTION_VERSION) \ - --build-arg MODEL_HUB_VERSION=$(VERSION) \ - -t $(MMDETECTION_ENVIRONMENT_ROOT):$(SHORT_GIT_HASH) \ - . - -.PHONY: publish-mmdetection-dev -publish-mmdetection-dev: - ./docker/publish-docker.sh mmdetection-gpu-hash $(MMDETECTION_ENVIRONMENT_ROOT):$(SHORT_GIT_HASH) $(ARTIFACTS_DIR) - -.PHONY: build-docker-dev -build-docker-dev: build-transformers-dev build-mmdetection-dev - -.PHONY: publish-docker-dev -publish-docker-dev: publish-transformers-dev publish-mmdetection-dev - -# We will use a tag of the format determinedai/model-hub-transformers:VERSION for -# master and releases. -.PHONY: build-transformers -build-transformers: - docker build -f docker/Dockerfile.transformers \ - --build-arg BASE_IMAGE=$(DEFAULT_GPU_IMAGE) \ - --build-arg TRANSFORMERS_VERSION=$(TRANSFORMERS_VERSION) \ - --build-arg DATASETS_VERSION=$(DATASETS_VERSION) \ - --build-arg MODEL_HUB_VERSION=$(VERSION) \ - -t $(TRANSFORMERS_ENVIRONMENT_ROOT):$(SHORT_GIT_HASH) \ - -t $(TRANSFORMERS_ENVIRONMENT_ROOT):$(VERSION) \ - . - -.PHONY: publish-transformers -publish-transformers: - ./docker/publish-docker.sh transformers-gpu-hash $(TRANSFORMERS_ENVIRONMENT_ROOT):$(SHORT_GIT_HASH) $(ARTIFACTS_DIR) - ./docker/publish-docker.sh transformers-gpu-version $(TRANSFORMERS_ENVIRONMENT_ROOT):$(VERSION) $(ARTIFACTS_DIR) - -.PHONY: build-mmdetection -build-mmdetection: - docker build -f docker/Dockerfile.mmdetection \ - --build-arg BASE_IMAGE=$(DEFAULT_GPU_IMAGE) \ - --build-arg MMCV_CUDA_VERSION=$(MMCV_CUDA_VERSION) \ - --build-arg TORCH_VERSION=$(TORCH_VERSION) \ - --build-arg MMCV_VERSION=$(MMCV_VERSION) \ - --build-arg MMDETECTION_VERSION=$(MMDETECTION_VERSION) \ - --build-arg MODEL_HUB_VERSION=$(VERSION) \ - -t $(MMDETECTION_ENVIRONMENT_ROOT):$(SHORT_GIT_HASH) \ - -t $(MMDETECTION_ENVIRONMENT_ROOT):$(VERSION) \ - . - -.PHONY: publish-mmdetection -publish-mmdetection: - ./docker/publish-docker.sh mmdetection-gpu-hash $(MMDETECTION_ENVIRONMENT_ROOT):$(SHORT_GIT_HASH) $(ARTIFACTS_DIR) - ./docker/publish-docker.sh mmdetection-gpu-version $(MMDETECTION_ENVIRONMENT_ROOT):$(VERSION) $(ARTIFACTS_DIR) - -.PHONY: build-docker -build-docker: build-transformers build-mmdetection - -.PHONY: publish-docker -publish-docker: publish-transformers publish-mmdetection diff --git a/model_hub/README.md b/model_hub/README.md deleted file mode 100644 index d70ff1d0448..00000000000 --- a/model_hub/README.md +++ /dev/null @@ -1,25 +0,0 @@ -# Determined's Model-Hub Library - -Determined's Model-Hub library makes it super easy to use popular external -libraries with Determined's training platform. Support for each external -library will provide a Trial interface, helper functions, and official examples -to allows users to easily use these libraries with all of Determined features -(e.g., distributed training, experiment tracking, hyperparameter search). - -## Supported Libraries - -Model-Hub currently supports - -* [Huggingface transformers](https://github.com/huggingface/transformers) -* [MMDetection](https://github.com/open-mmlab/mmdetection) - -Please refer to [our documentation]( -https://docs.determined.ai/latest/model-hub/index.html) for more information on -how to use these libraries with Determined. - -Libraries we are working to support in the future include: - -* [detectron2](https://github.com/facebookresearch/detectron2) - -If there is a library you want us to support for use with Determined, please -open an issue to submit a request. diff --git a/model_hub/docker/Dockerfile.mmdetection b/model_hub/docker/Dockerfile.mmdetection deleted file mode 100644 index 5276be89fc8..00000000000 --- a/model_hub/docker/Dockerfile.mmdetection +++ /dev/null @@ -1,34 +0,0 @@ -ARG BASE_IMAGE -# This will be an image from determinedai/environments -FROM ${BASE_IMAGE} - -ENV CMAKE_PREFIX_PATH="$(dirname $(which conda))/../" - -RUN apt-get update && apt-get install -y git ninja-build libglib2.0-0 libsm6 libxrender-dev libxext6 libgl1-mesa-glx unzip\ - && apt-get clean \ - && rm -rf /var/lib/apt/lists/* - -# Install MMCV -ARG MMCV_VERSION -ARG MMCV_CUDA_VERSION -ARG TORCH_VERSION -RUN pip install mmcv-full==${MMCV_VERSION} -f https://download.openmmlab.com/mmcv/dist/${MMCV_CUDA_VERSION}/torch${TORCH_VERSION}/index.html -RUN pip install numpy==1.24.4 - -# Install MMDetection -RUN conda clean --all -RUN git clone https://github.com/open-mmlab/mmdetection.git /mmdetection -WORKDIR /mmdetection -ARG MMDETECTION_VERSION -RUN git checkout tags/v${MMDETECTION_VERSION} -ENV FORCE_CUDA="1" -RUN pip install -r requirements/build.txt -RUN pip install git+https://github.com/cocodataset/panopticapi.git -RUN pip install --no-cache-dir -e . -ENV MMDETECTION_CONFIG_DIR=/mmdetection/configs - -# Wheel must be built before building the docker image -RUN mkdir -p /tmp/model-hub-wheel -ADD dist /tmp/model-hub-wheel -ARG MODEL_HUB_VERSION -RUN python -m pip install --find-links=/tmp/model-hub-wheel model-hub==${MODEL_HUB_VERSION} diff --git a/model_hub/docker/Dockerfile.transformers b/model_hub/docker/Dockerfile.transformers deleted file mode 100644 index 292081e1eec..00000000000 --- a/model_hub/docker/Dockerfile.transformers +++ /dev/null @@ -1,15 +0,0 @@ -ARG BASE_IMAGE -# This will be an image from determinedai/environments -FROM ${BASE_IMAGE} - -ARG TRANSFORMERS_VERSION -ARG DATASETS_VERSION -RUN pip install transformers==${TRANSFORMERS_VERSION} datasets==${DATASETS_VERSION} -RUN pip install sentencepiece!=0.1.92 protobuf scikit-learn conllu seqeval - - -# Wheel must be built before building the docker image -RUN mkdir -p /tmp/model-hub-wheel -ADD dist /tmp/model-hub-wheel -ARG MODEL_HUB_VERSION -RUN python -m pip install --find-links=/tmp/model-hub-wheel model-hub==${MODEL_HUB_VERSION} diff --git a/model_hub/docker/publish-docker.sh b/model_hub/docker/publish-docker.sh deleted file mode 100755 index 7badc51bd32..00000000000 --- a/model_hub/docker/publish-docker.sh +++ /dev/null @@ -1,24 +0,0 @@ -#!/bin/sh - -set -e -set -x - -docker info - -if [ "$#" -ne 3 ]; then - echo "usage: $0 LOG_NAME TAG ARTIFACTS_DIR" >&2 - exit 1 -fi - -log_name="$1" -tag="$2" -artifacts="$3" - -underscore_name="$(echo -n "$log_name" | tr - _)" - -docker push "$tag" - -mkdir -p "$artifacts" - -log_file="$artifacts/publish-$log_name" -echo "${underscore_name}" >"$log_file" diff --git a/model_hub/model_hub/__init__ 2.py b/model_hub/model_hub/__init__ 2.py deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/model_hub/model_hub/__init__.py b/model_hub/model_hub/__init__.py deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/model_hub/model_hub/__version__ 2.py b/model_hub/model_hub/__version__ 2.py deleted file mode 100644 index 739c35a75cd..00000000000 --- a/model_hub/model_hub/__version__ 2.py +++ /dev/null @@ -1 +0,0 @@ -__version__ = "0.34.1-dev0" diff --git a/model_hub/model_hub/__version__.py b/model_hub/model_hub/__version__.py deleted file mode 100644 index b6c740e3f43..00000000000 --- a/model_hub/model_hub/__version__.py +++ /dev/null @@ -1 +0,0 @@ -__version__ = "0.36.1-dev0" diff --git a/model_hub/model_hub/huggingface/__init__.py b/model_hub/model_hub/huggingface/__init__.py deleted file mode 100644 index 02c4438985e..00000000000 --- a/model_hub/model_hub/huggingface/__init__.py +++ /dev/null @@ -1,23 +0,0 @@ -from model_hub.huggingface._config_parser import ( - DatasetKwargs, - ConfigKwargs, - TokenizerKwargs, - ModelKwargs, - OptimizerKwargs, - LRSchedulerKwargs, - parse_dict_to_dataclasses, - default_parse_config_tokenizer_model_kwargs, - default_parse_optimizer_lr_scheduler_kwargs, -) - -from model_hub.huggingface._trial import ( - build_using_auto, - build_default_optimizer, - build_default_lr_scheduler, - default_load_dataset, - BaseTransformerTrial, -) - -from model_hub.huggingface._utils import ( - remove_unused_columns, -) diff --git a/model_hub/model_hub/huggingface/_config_parser.py b/model_hub/model_hub/huggingface/_config_parser.py deleted file mode 100644 index 5d8e94e6eb1..00000000000 --- a/model_hub/model_hub/huggingface/_config_parser.py +++ /dev/null @@ -1,359 +0,0 @@ -import dataclasses -from typing import Any, Dict, Optional, Tuple, Union - -from model_hub import utils - - -class FlexibleDataclass: - """ - A variant of dataclass that allows fields without defaults to be unpopulated for - class instances. - - Fields with defaults will always be set as instance attributes. - Fields without defaults will be set as attributes only if a value is provided in the init. - """ - - def __init__(self, **kwargs: Dict[str, Any]) -> None: - field_names = [f.name for f in dataclasses.fields(self)] - - # If a dictionary key corresponds to a field, set it as an attribute. - for k, v in kwargs.items(): - if k in field_names: - setattr(self, k, v) - - # Otherwise, for fields with defaults, set those attributes. - for f in dataclasses.fields(self): - if not hasattr(self, f.name) and f.default is not dataclasses.MISSING: - setattr(self, f.name, f.default) - - def as_dict(self) -> Dict[str, Any]: - output = {} - for f in dataclasses.fields(self): - if hasattr(self, f.name): - output[f.name] = getattr(self, f.name) - return output - - def __repr__(self) -> str: - fields_str = ", ".join( - [ - "{}={}".format(f.name, getattr(self, f.name)) - for f in dataclasses.fields(self) - if hasattr(self, f.name) - ] - ) - return self.__class__.__qualname__ + f"({fields_str})" - - -@dataclasses.dataclass(init=False, repr=False) -class DatasetKwargs(FlexibleDataclass): - """ - Config parser for dataset fields. - - Either ``dataset_name`` needs to be provided or ``train_file`` and ``validation_file`` need - to be provided. - - Args: - dataset_name (optional, defaults to ``None``): Path argument to pass to HuggingFace - ``datasets.load_dataset``. Can be a dataset identifier in HuggingFace Datasets Hub or - a local path to processing script. - dataset_config_name (optional, defaults to ``None``): The name of the dataset configuration - to pass to HuggingFace ``datasets.load_dataset``. - validation_split_percentage (optional, defaults to ``None``): This is used to create a - validation split from the training data when a dataset does not have a predefined - validation split. - train_file (optional, defaults to ``None``): Path to training data. This will be used if - a dataset_name is not provided. - validation_file (optional, defaults to ``None``): Path to validation data. This will be - used if a dataset_name is not provided. - - Returns: - dataclass with the above fields populated according to provided config. - """ - - dataset_name: Optional[str] = dataclasses.field( - default=None, - ) - dataset_config_name: Optional[str] = dataclasses.field( - default=None, - ) - validation_split_percentage: Optional[float] = dataclasses.field( - default=None, - ) - train_file: Optional[str] = dataclasses.field( - default=None, - ) - validation_file: Optional[str] = dataclasses.field( - default=None, - ) - - -@dataclasses.dataclass(init=False, repr=False) -class ConfigKwargs(FlexibleDataclass): - """ - Config parser for transformers config fields. - - Args: - pretrained_model_name_or_path: Path to pretrained model or model identifier from - huggingface.co/models. - cache_dir (optional, defaults to ``None``): Where do you want to store the pretrained models - downloaded from huggingface.co. - revision (optional, defaults to ``None``): The specific model version to use (can be a - branch name, tag name or commit id). - use_auth_token (optional, defaults to ``None``): Will use the token generated when running - ``transformers-cli login`` (necessary to use this script with private models). - num_labels (optional, excluded if not provided): Number of labels to use in the last layer - added to the model, typically for a classification task. - finetuning_task (optional, excluded if not provided): Name of the task used to fine-tune - the model. This can be used when converting from an original PyTorch checkpoint. - - Returns: - dataclass with the above fields populated according to provided config. - """ - - # Fields without defaults will be set as attributes only if a value is provided in the init. - num_labels: Optional[int] = dataclasses.field() - finetuning_task: Optional[str] = dataclasses.field() - - # Fields with defaults should always be set. - pretrained_model_name_or_path: Optional[str] = dataclasses.field( - default=None, - ) - cache_dir: Optional[str] = dataclasses.field( - default=None, - ) - revision: Optional[str] = dataclasses.field( - default="main", - ) - use_auth_token: Optional[bool] = dataclasses.field( - default=False, - ) - - -@dataclasses.dataclass(init=False, repr=False) -class TokenizerKwargs(FlexibleDataclass): - """ - Config parser for transformers tokenizer fields. - - Args: - pretrained_model_name_or_path: Path to pretrained model or model identifier from - huggingface.co/models. - cache_dir (optional, defaults to ``None``): Where do you want to store the pretrained models - downloaded from huggingface.co. - revision (optional, defaults to ``None``): The specific model version to use (can be a - branch name, tag name or commit id). - use_auth_token (optional, defaults to ``None``): Will use the token generated when running - ``transformers-cli login`` (necessary to use this script with private models). - use_fast (optional, defaults to ``True``): Whether to use one of the fast tokenizer - (backed by the tokenizers library) or not. - do_lower_case (optional, excluded if not provided): Indicate if tokenizer should do lower - case - - Returns: - dataclass with the above fields populated according to provided config. - - """ - - # Fields without defaults will be set as attributes only if a value is provided in the init. - do_lower_case: Optional[bool] = dataclasses.field() - - # Fields with defaults should always be set. - pretrained_model_name_or_path: Optional[str] = dataclasses.field( - default=None, - ) - cache_dir: Optional[str] = dataclasses.field( - default=None, - ) - revision: Optional[str] = dataclasses.field( - default="main", - ) - use_auth_token: Optional[bool] = dataclasses.field( - default=False, - ) - use_fast: Optional[bool] = dataclasses.field( - default=True, - ) - - -@dataclasses.dataclass -class ModelKwargs(FlexibleDataclass): - """ - Config parser for transformers model fields. - - Args: - pretrained_model_name_or_path: Path to pretrained model or model identifier from - huggingface.co/models. - cache_dir (optional, defaults to ``None``): Where do you want to store the pretrained models - downloaded from huggingface.co. - revision (optional, defaults to ``None``): The specific model version to use (can be a - branch name, tag name or commit id). - use_auth_token (optional, defaults to ``None``): Will use the token generated when running - ``transformers-cli login`` (necessary to use this script with private models). - - Returns: - dataclass with the above fields populated according to provided config. - - """ - - pretrained_model_name_or_path: str = dataclasses.field() - cache_dir: Optional[str] = dataclasses.field( - default=None, - ) - revision: Optional[str] = dataclasses.field( - default="main", - ) - use_auth_token: Optional[bool] = dataclasses.field( - default=False, - ) - - -@dataclasses.dataclass -class OptimizerKwargs: - """ - Config parser for transformers optimizer fields. - - """ - - weight_decay: Optional[float] = dataclasses.field( - default=0, - ) - adafactor: Optional[bool] = dataclasses.field( - default=False, - metadata={"help": "Whether to use adafactor optimizer. Will use AdamW by default."}, - ) - learning_rate: Optional[float] = dataclasses.field( - default=5e-5, - ) - max_grad_norm: Optional[float] = dataclasses.field( - default=1.0, - ) - adam_beta1: Optional[float] = dataclasses.field( - default=0.9, - ) - adam_beta2: Optional[float] = dataclasses.field( - default=0.999, - ) - adam_epsilon: Optional[float] = dataclasses.field( - default=1e-8, - ) - scale_parameter: Optional[bool] = dataclasses.field( - default=False, - metadata={ - "help": "For adafactor optimizer, if True, learning rate is scaled by " - "root mean square." - }, - ) - relative_step: Optional[bool] = dataclasses.field( - default=False, - metadata={ - "help": "For adafactor optimizer, if True, time-dependent learning rate is computed " - "instead of external learning rate." - }, - ) - - -@dataclasses.dataclass -class LRSchedulerKwargs: - """ - Config parser for transformers lr scheduler fields. - """ - - num_training_steps: int = dataclasses.field() - lr_scheduler_type: Optional[str] = dataclasses.field( - default="linear", - metadata={ - "help": "One of linear, cosine, cosine_with_restarts, polynomial, constant, or " - "constant_with_warmup." - }, - ) - num_warmup_steps: Optional[int] = dataclasses.field( - default=0, - ) - - -def parse_dict_to_dataclasses( - dataclass_types: Tuple[Any, ...], - args: Union[Dict[str, Any], utils.AttrDict], - as_dict: bool = False, -) -> Tuple[Any, ...]: - """ - This function will fill in values for a dataclass if the target key is found - in the provided args dictionary. We can have one argument key value be filled in - to multiple dataclasses if the key is found in them. - - Args: - dataclass_types: dataclasses with expected attributes. - args: arguments that will be parsed to each of the dataclass_types. - as_dict: if true will return dictionary instead of AttrDict - - Returns: - One dictionary for each dataclass with keys filled in from args if found. - """ - outputs = [] - for dtype in dataclass_types: - keys = {f.name for f in dataclasses.fields(dtype) if f.init} - inputs = {k: v for k, v in args.items() if k in keys} - obj = dtype(**inputs) - if as_dict: - try: - obj = utils.AttrDict(obj.as_dict()) - except AttributeError: - obj = utils.AttrDict(dataclasses.asdict(obj)) - outputs.append(obj) - return (*outputs,) - - -def default_parse_config_tokenizer_model_kwargs( - hparams: Union[Dict, utils.AttrDict], -) -> Tuple[utils.AttrDict, utils.AttrDict, utils.AttrDict]: - """ - This function converts hparams into fields for the transformers config, tokenizer, - and model. See the defined dataclasses ConfigKwargs, TokenizerKwargs, and ModelKwargs for - expected fields and defaults. - - Args: - hparams: hyperparameters to parse. - - Returns: - One AttrDict each for the config, tokenizer, and model. - """ - if not isinstance(hparams, utils.AttrDict): - hparams = utils.AttrDict(hparams) - config_args, tokenizer_args, model_args = parse_dict_to_dataclasses( - (ConfigKwargs, TokenizerKwargs, ModelKwargs), hparams, as_dict=True - ) - - # If a pretrained_model_name_or_path is provided it will be parsed to the - # arguments for config, tokenizer, and model. Then, if specific names are - # provided for config, tokenizer, or model we will override it. - if "config_name" in hparams: - config_args.pretrained_model_name_or_path = hparams.config_name - if "tokenizer_name" in hparams: - tokenizer_args.pretrained_model_name_or_path = hparams.tokenizer_name - if "model_name" in hparams: - model_args.pretrained_model_name_or_path = hparams.model_name - assert ( - config_args.pretrained_model_name_or_path is not None - and tokenizer_args.pretrained_model_name_or_path is not None - and model_args.pretrained_model_name_or_path is not None - ) - return config_args, tokenizer_args, model_args - - -def default_parse_optimizer_lr_scheduler_kwargs( - hparams: Union[Dict, utils.AttrDict] -) -> Tuple[OptimizerKwargs, LRSchedulerKwargs]: - """ - Parse hparams relevant for the optimizer and lr_scheduler and fills in with - the same defaults as those used by the transformers Trainer. See the defined dataclasses - OptimizerKwargs and LRSchedulerKwargs for expected fields and defaults. - - Args: - hparams: hparams to parse. - - Returns: - Configuration for the optimizer and lr scheduler. - """ - optimizer_args, scheduler_args = parse_dict_to_dataclasses( - (OptimizerKwargs, LRSchedulerKwargs), hparams - ) - return optimizer_args, scheduler_args diff --git a/model_hub/model_hub/huggingface/_trial.py b/model_hub/model_hub/huggingface/_trial.py deleted file mode 100644 index 319da312e12..00000000000 --- a/model_hub/model_hub/huggingface/_trial.py +++ /dev/null @@ -1,309 +0,0 @@ -import dataclasses -import logging -import warnings -from typing import Any, Dict, List, Optional, Tuple, Union, cast - -import datasets as hf_datasets -import torch -import transformers -import transformers.optimization as hf_opt - -import determined.pytorch as det_torch -from model_hub import utils -from model_hub.huggingface import _config_parser as hf_parse - -MODEL_MODES = { - "base": transformers.AutoModel, - "pretraining": transformers.AutoModelForPreTraining, - "causal-lm": transformers.AutoModelForCausalLM, - "masked-lm": transformers.AutoModelForMaskedLM, - "seq2seq-lm": transformers.AutoModelForSeq2SeqLM, - "sequence-classification": transformers.AutoModelForSequenceClassification, - "multiple-choice": transformers.AutoModelForMultipleChoice, - "next-sentence": transformers.AutoModelForNextSentencePrediction, - "token-classification": transformers.AutoModelForTokenClassification, - "question-answering": transformers.AutoModelForQuestionAnswering, -} - - -def build_using_auto( - config_kwargs: Union[Dict, utils.AttrDict], - tokenizer_kwargs: Union[Dict, utils.AttrDict], - model_mode: str, - model_kwargs: Union[Dict, utils.AttrDict], - use_pretrained_weights: bool = True, -) -> Tuple[ - transformers.PretrainedConfig, # This is how it's named in transformers - transformers.PreTrainedTokenizer, - transformers.PreTrainedModel, -]: - """ - Build the config, tokenizer, and model using tranformer's - Auto classes. - - Args: - config_kwargs: arguments for transformers configuration classes - tokenizer_kwargs: arguments for transformers tokenizer classes - model_mode: one of (pretraining, causal-lm, masked-lm, seq2seq-lm, sequence-classification, - multiple-choice, next-sentence, token-classification, question-answering) - model_kwargs: arguments for transformers model classes - - Returns: - transformer config, tokenizer, and model - """ - config = transformers.AutoConfig.from_pretrained(**config_kwargs) - tokenizer = transformers.AutoTokenizer.from_pretrained(**tokenizer_kwargs) - model_builder = MODEL_MODES[model_mode] - if isinstance(model_kwargs, hf_parse.ModelKwargs): - model_kwargs = dataclasses.asdict(model_kwargs) - if use_pretrained_weights: - model_kwargs["config"] = config - model = model_builder.from_pretrained(**model_kwargs) - else: - model = model_builder.from_config(config) - return config, tokenizer, model - - -def group_parameters_for_optimizer( - model: torch.nn.Module, - weight_decay: Optional[float] = 0, - no_decay: Tuple[str, ...] = ("bias", "LayerNorm.weight"), -) -> List[Dict[str, Any]]: - """ - Group parameters by whether weight_decay is applied or not. - - Args: - model: model supplying the learnable parameters - weight_decay: value for weight_decay - no_decay: variable names that should not have weight_decay applied - Returns: - grouped parameters according to whether weight_decay should be applied - """ - return [ - { - "params": [ - p for n, p in model.named_parameters() if not any(nd in n for nd in no_decay) - ], - "weight_decay": weight_decay, - }, - { - "params": [p for n, p in model.named_parameters() if any(nd in n for nd in no_decay)], - "weight_decay": 0.0, - }, - ] - - -def build_default_optimizer( - model: torch.nn.Module, optimizer_kwargs: hf_parse.OptimizerKwargs -) -> Union[hf_opt.Adafactor, hf_opt.AdamW]: - """ - This follows the function in transformer's Trainer to construct the optimizer. - - Args: - model: model whose parameters will be updated by the optimizer - weight_decay: weight_decay factor to apply to weights - optimizer_kwargs: see OptimizerKwargs in _config_parser.py for expected fields - Returns: - optimizer configured accordingly - """ - optimizer_grouped_parameters = group_parameters_for_optimizer( - model, optimizer_kwargs.weight_decay - ) - if optimizer_kwargs.adafactor: - return hf_opt.Adafactor( - optimizer_grouped_parameters, - lr=optimizer_kwargs.learning_rate, - scale_parameter=optimizer_kwargs.scale_parameter, - relative_step=optimizer_kwargs.relative_step, - ) - return hf_opt.AdamW( - optimizer_grouped_parameters, - lr=optimizer_kwargs.learning_rate, - betas=(optimizer_kwargs.adam_beta1, optimizer_kwargs.adam_beta2), - eps=optimizer_kwargs.adam_epsilon, - ) - - -def build_default_lr_scheduler( - optimizer: torch.optim.Optimizer, - scheduler_kwargs: hf_parse.LRSchedulerKwargs, -) -> Any: - """ - This follows the function in transformer's Trainer to construct the lr_scheduler. - - Args: - optimizer: optimizer to apply lr_scheduler to - scheduler_kwargs: see LRSchedulerKwargs in _config_parser.py for expected fields. - Returns: - lr_scheduler configured accordingly - """ - return hf_opt.get_scheduler( - scheduler_kwargs.lr_scheduler_type, - optimizer, - num_warmup_steps=scheduler_kwargs.num_warmup_steps, - num_training_steps=scheduler_kwargs.num_training_steps, - ) - - -def default_load_dataset( - data_config_input: Union[Dict, utils.AttrDict], -) -> Union[ - hf_datasets.Dataset, - hf_datasets.IterableDataset, - hf_datasets.DatasetDict, - hf_datasets.IterableDatasetDict, -]: - """ - Creates the dataset using HuggingFace datasets' load_dataset method. - If a dataset_name is provided, we will use that along with the dataset_config_name. - Otherwise, we will create the dataset using provided train_file and validation_file. - - Args: - data_config: arguments for load_dataset. See DatasetKwargs for expected fields. - Returns: - Dataset returned from hf_datasets.load_dataset. - """ - (data_config,) = hf_parse.parse_dict_to_dataclasses( - (hf_parse.DatasetKwargs,), data_config_input - ) - # This method is common in nearly all main HF examples. - if data_config.dataset_name is not None: - # Downloading and loading a dataset from the hub. - datasets = hf_datasets.load_dataset( - data_config.dataset_name, data_config.dataset_config_name - ) - assert hasattr(datasets, "keys"), "Expected a dictionary of datasets." - datasets = cast(Union[hf_datasets.DatasetDict, hf_datasets.IterableDatasetDict], datasets) - - if "validation" not in datasets.keys(): - assert ( - "validation_split_percentage" in data_config - ), "Validation split not provided by this huggingface dataset. Please specify " - "validation_split_percentage in data_config for use to create validation set" - datasets["validation"] = hf_datasets.load_dataset( - data_config.dataset_name, - data_config.dataset_config_name, - split=f"train[:{data_config.validation_split_percentage}%]", - ) - datasets["train"] = hf_datasets.load_dataset( - data_config.dataset_name, - data_config.dataset_config_name, - split=f"train[{data_config.validation_split_percentage}%:]", - ) - else: - data_files = {} - if data_config.train_file is not None: - data_files["train"] = data_config.train_file - if data_config.validation_file is not None: - data_files["validation"] = data_config.validation_file - extension = data_config.train_file.split(".")[-1] - if extension == "txt": - extension = "text" - datasets = hf_datasets.load_dataset(extension, data_files=data_files) - return datasets - - -class BaseTransformerTrial(det_torch.PyTorchTrial): - """ - This is the base PyTorchTrial for transformers that implements the ``__init__`` and - ``train_batch`` methods. - - You can subclass ``BaseTransformerTrial`` to customize a trial for your own usage by filing in - the expected methods for data loading and evaluation. - """ - - def __init__(self, context: det_torch.PyTorchTrialContext) -> None: - warnings.warn( - "BaseTransformerTrial is deprecated and will be removed in the next release." - "Users of huggingface can refer to the TrainerAPI examples we have provided.", - stacklevel=2, - ) - self.context = context - # A subclass of BaseTransformerTrial may have already set hparams and data_config - # attributes so we only reset them if they do not exist. - if not hasattr(self, "hparams"): - self.hparams = utils.AttrDict(context.get_hparams()) - if not hasattr(self, "data_config"): - self.data_config = utils.AttrDict(context.get_data_config()) - if not hasattr(self, "exp_config"): - self.exp_config = utils.AttrDict(context.get_experiment_config()) - # Check to make sure all expected hyperparameters are set. - self.check_hparams() - - # Parse hparams and data_config. - ( - self.config_kwargs, - self.tokenizer_kwargs, - self.model_kwargs, - ) = hf_parse.default_parse_config_tokenizer_model_kwargs(self.hparams) - optimizer_kwargs, scheduler_kwargs = hf_parse.default_parse_optimizer_lr_scheduler_kwargs( - self.hparams - ) - - self.config, self.tokenizer, self.model = build_using_auto( - self.config_kwargs, - self.tokenizer_kwargs, - self.hparams.model_mode, - self.model_kwargs, - use_pretrained_weights=self.hparams.use_pretrained_weights, - ) - self.model = self.context.wrap_model(self.model) - - self.optimizer = self.context.wrap_optimizer( - build_default_optimizer(self.model, optimizer_kwargs) - ) - - if self.hparams.use_apex_amp: - self.model, self.optimizer = self.context.configure_apex_amp( - models=self.model, - optimizers=self.optimizer, - ) - - self.lr_scheduler = self.context.wrap_lr_scheduler( - build_default_lr_scheduler(self.optimizer, scheduler_kwargs), - det_torch.LRScheduler.StepMode.STEP_EVERY_BATCH, - ) - - self.grad_clip_fn = None - - if optimizer_kwargs.max_grad_norm > 0: # type: ignore - self.grad_clip_fn = lambda x: torch.nn.utils.clip_grad_norm_( - x, optimizer_kwargs.max_grad_norm - ) - - def check_hparams(self) -> None: - # We require hparams to be an AttrDict. - if not isinstance(self.hparams, utils.AttrDict): - self.hparams = utils.AttrDict(self.hparams) - - if "num_training_steps" not in self.hparams: - # Compute the total number of training iterations used to configure the - # learning rate scheduler. - self.hparams.num_training_steps = utils.compute_num_training_steps( - self.context.get_experiment_config(), self.context.get_global_batch_size() - ) - if "use_pretrained_weights" not in self.hparams: - logging.warning( - "We will be using pretrained weights for the model by default." - "If you want to train the model from scratch, you can set a hyperparameter " - "named use_pretrained_weights to false in the experiment config." - ) - self.hparams.use_pretrained_weights = True - - required_hps = ("use_apex_amp", "model_mode", "num_training_steps") - for hp in required_hps: - assert ( - hp in self.hparams - ), "{} is a required hyperparameter for BaseTransformerTrial".format(hp) - - def train_batch(self, batch: Any, epoch_idx: int, batch_idx: int) -> Any: - # By default, all HF models return the loss in the first element. - # We do not automatically apply a label smoother for the user. - # If this is something you want to use, please see how it's - # applied by transformers.Trainer: - # https://github.com/huggingface/transformers/blob/v4.3.3/src/transformers/trainer.py#L1324 - outputs = self.model(**batch) - loss = outputs["loss"] if isinstance(outputs, dict) else outputs[0] - self.context.backward(loss) - self.context.step_optimizer(self.optimizer, self.grad_clip_fn) - return loss diff --git a/model_hub/model_hub/huggingface/_utils.py b/model_hub/model_hub/huggingface/_utils.py deleted file mode 100644 index c290b7f71f1..00000000000 --- a/model_hub/model_hub/huggingface/_utils.py +++ /dev/null @@ -1,38 +0,0 @@ -import inspect - -import datasets as hf_datasets -from torch import nn - -""" -The removed_unused_columns function below is largely derived from -transformer's trainer._removed_unused_columns method. - -The license for the transformer's library is reproduced below. - -============================================================================ - -Copyright 2020 The HuggingFace Team. All rights reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -""" - - -def remove_unused_columns(model: nn.Module, dataset: hf_datasets.Dataset) -> None: - # This method is implemented in transformer's Trainer. - # Inspect model forward signature to keep only the arguments it accepts. - signature = inspect.signature(model.forward) - signature_columns = list(signature.parameters.keys()) - # Labels may be named label or label_ids, the default data collator handles that. - signature_columns += ["label", "label_ids"] - columns = [k for k in signature_columns if k in dataset.column_names] - dataset.set_format(type=dataset.format["type"], columns=columns) diff --git a/model_hub/model_hub/mmdetection/__init__.py b/model_hub/model_hub/mmdetection/__init__.py deleted file mode 100644 index 8f21146e191..00000000000 --- a/model_hub/model_hub/mmdetection/__init__.py +++ /dev/null @@ -1,8 +0,0 @@ -from model_hub.mmdetection._data import GroupSampler, build_dataloader -from model_hub.mmdetection._callbacks import LrUpdaterCallback -from model_hub.mmdetection._trial import MMDetTrial -from model_hub.mmdetection.utils import ( - get_pretrained_ckpt_path, - build_fp16_loss_scaler, -) -from model_hub.mmdetection._data_backends import GCSBackend, S3Backend, FakeBackend, sub_backend diff --git a/model_hub/model_hub/mmdetection/_callbacks.py b/model_hub/model_hub/mmdetection/_callbacks.py deleted file mode 100644 index e4c7af74aad..00000000000 --- a/model_hub/model_hub/mmdetection/_callbacks.py +++ /dev/null @@ -1,125 +0,0 @@ -""" -Convert the LRUpdaterHook in mmcv to a PyTorchCallback. -See: https://github.com/open-mmlab/mmcv/blob/master/mmcv/runner/hooks/lr_updater.py. -""" -from typing import Any, Dict, Optional, cast - -import mmcv -import mmcv.runner.hooks as mmcv_hooks - -import determined.pytorch as det_torch - - -class DummyDataloader: - def __init__(self, epoch_len: Optional[int]): - self.epoch_len = epoch_len - - def __len__(self) -> Optional[int]: - return self.epoch_len - - -class FakeRunner: - """ - Mocks a mmcv runner and implements the same properties accessed by `LrUpdaterHook`. - Instead, we get them from the PyTorchTrialContext. - """ - - def __init__(self, context: det_torch.PyTorchTrialContext): - self.context = context - self._data_loader = None # type: Optional[DummyDataloader] - experiment_config = context.get_experiment_config() - self.max_length = experiment_config["searcher"]["max_length"] - - @property - def optimizer(self) -> Dict[int, Any]: - return dict(enumerate(self.context.optimizers)) - - @property - def data_loader(self) -> Optional[DummyDataloader]: - # The MMCV lr_updater uses runner.data_loader to get epoch length. - # We will use a fake data_loader here to return the epoch length. - if self._data_loader is None: - self._data_loader = DummyDataloader(self.context._epoch_len) - return self._data_loader - - @property - def iter(self) -> Optional[int]: - return self.context._current_batch_idx - - @property - def epoch(self) -> Optional[int]: - return self.context.current_train_epoch() - - @property - def max_epoch(self) -> int: - if "epochs" in self.max_length: - return int(self.max_length["epochs"]) - raise KeyError("max_length is not specified in terms of epochs") - - @property - def max_iters(self) -> int: - if "batches" in self.max_length: - return int(self.max_length["batches"]) - raise KeyError("max_length is not specified in terms of iterations") - - -def build_lr_hook(lr_config: Dict[Any, Any]) -> mmcv_hooks.LrUpdaterHook: - assert "policy" in lr_config, "policy must be specified in lr_config" - policy_type = lr_config.pop("policy") - if policy_type == policy_type.lower(): - policy_type = policy_type.title() - hook_type = policy_type + "LrUpdaterHook" - lr_config["type"] = hook_type - hook = mmcv.build_from_cfg(lr_config, mmcv_hooks.HOOKS) - return hook - - -class LrUpdaterCallback(det_torch.PyTorchCallback): - """ - Updates the learning rate for optimizers according to the configured LrUpdaterHook. - - mmcv's LrUpdaterHook replaces lr schedulers to perform lr warmup and annealing. - See: https://github.com/open-mmlab/mmcv/blob/master/mmcv/runner/hooks/lr_updater.py - for supported lr updaters. - - We mock the behavior of the mmcv hook with our PyTorchCallback. We do not have a - `on_batch_start` callback so that is called manually in the `train_batch` method - of `MMDetTrial`. - - The LrUpaterHook is configured from the mmdet configuration file passed to the - experiment config. - """ - - def __init__( - self, - context: det_torch.PyTorchTrialContext, - hook: Optional[mmcv_hooks.LrUpdaterHook] = None, - lr_config: Optional[Dict[Any, Any]] = None, - ): - """ - Creates the callback from either the provided `hook` or `lr_config`. - One of `hook` or `lr_config` must be defined. If both are provided, - `hook` takes precedence. - - Arguments: - context: PyTorchTrialContext used to get iterations and epoch information - hook (Optional): already created mmcv LrUpdaterHook - lr_config (Optional): configuration for LrUpdaterHook - """ - self.runner = FakeRunner(context) - assert ( - hook is not None or lr_config is not None - ), "One of hook or lr_config must be provided." - if hook is None: - lr_config = cast(Dict[Any, Any], lr_config) - hook = build_lr_hook(lr_config) - self.hook = hook - - def on_training_start(self) -> None: - self.hook.before_run(self.runner) - - def on_training_epoch_start(self, epoch_idx: int) -> None: - self.hook.before_train_epoch(self.runner) - - def on_batch_start(self) -> None: - self.hook.before_train_iter(self.runner) diff --git a/model_hub/model_hub/mmdetection/_data.py b/model_hub/model_hub/mmdetection/_data.py deleted file mode 100644 index ddd509874d7..00000000000 --- a/model_hub/model_hub/mmdetection/_data.py +++ /dev/null @@ -1,200 +0,0 @@ -""" -Data utility functions for creating the dataset and dataloader for use with MMDetTrial. -""" -import functools -import logging -import math -import os -from typing import Any, Iterator, List, Tuple - -import filelock -import mmcv -import mmcv.parallel -import mmdet.datasets -import numpy as np -import torch -import torch.utils.data as torch_data - -import determined.pytorch as det_torch - - -class GroupSampler(torch.utils.data.Sampler): - """ - Modifies DistributedGroupSampler from - https://github.com/open-mmlab/mmdetection/blob/master/mmdet/datasets/samplers/group_sampler.py - to work with our Dataloader which automatically handles sharding for distributed training. - """ - - def __init__( - self, - dataset: torch_data.Dataset, - samples_per_gpu: int, - num_replicas: int, - ): - """ - This sampler will generate indices such that each batch will belong to the same group. - For example, if batch size is `b`, samples 1 to b will be one group, samples b+1 to 2b - another group, etc. Hence, we effectively have len(dataset)/batch_size group indices which - get shuffled every epoch. - - Arguments: - dataset: dataset that has a flag attribute to indicate group member for each sample. - samples_per_gpu: number of samples per slot. - num_replicas: number of processes participating in distributed training. - """ - self.dataset = dataset - self.samples_per_gpu = samples_per_gpu - self.num_replicas = num_replicas - - assert hasattr(self.dataset, "flag") - self.flag = self.dataset.flag # type: ignore - self.group_sizes = np.bincount(self.flag) - - self.num_samples = 0 - for size in self.group_sizes: - self.num_samples += ( - int(math.ceil(size * 1.0 / self.samples_per_gpu / self.num_replicas)) - * self.samples_per_gpu - ) - self.total_size = self.num_samples * self.num_replicas - - def __iter__(self) -> Iterator[Any]: - shuffled_indices: List[int] = [] - for i, size in enumerate(self.group_sizes): - if size > 0: - group_indices = np.where(self.flag == i)[0] - assert len(group_indices) == size - shuffled_group_indices: List[int] = group_indices[ - list(torch.randperm(int(size))) - ].tolist() - extra = int( - math.ceil(size * 1.0 / self.samples_per_gpu / self.num_replicas) - ) * self.samples_per_gpu * self.num_replicas - len(shuffled_group_indices) - # pad index - tmp = shuffled_group_indices.copy() - for _ in range(extra // size): - shuffled_group_indices.extend(tmp) - shuffled_group_indices.extend(tmp[: extra % size]) - shuffled_indices.extend(shuffled_group_indices) - - assert len(shuffled_indices) == self.total_size - - shuffled_indices = [ - shuffled_indices[j] - for i in list(torch.randperm(len(shuffled_indices) // self.samples_per_gpu)) - for j in range(i * self.samples_per_gpu, (i + 1) * self.samples_per_gpu) - ] - - return iter(shuffled_indices) - - def __len__(self) -> int: - return self.total_size - - -def maybe_download_ann_file(cfg: mmcv.Config) -> None: - """ - mmdetection expects the annotation files to be available in the disk at a specific directory - to initialize a dataset. However, the annotation file is usually not available locally when a - cloud backend is used. We will try to download the annotation file if it exists from the cloud - if the backend is gcp or s3. - - Arguments: - cfg: mmcv.Config with dataset specifications. - """ - if "dataset" in cfg: - dataset = cfg.dataset - else: - dataset = cfg - ann_dir = "/".join(dataset.ann_file.split("/")[0:-1]) - os.makedirs(ann_dir, exist_ok=True) - lock = filelock.FileLock(dataset.ann_file + ".lock") - - with lock: - if not os.path.isfile(dataset.ann_file): - try: - assert ( - dataset.pipeline[0].type == "LoadImageFromFile" - ), "First step of dataset.pipeline is not LoadImageFromFile." - file_client_args = dataset.pipeline[0].file_client_args - file_client = mmcv.FileClient(**file_client_args) - ann_bytes = file_client.get(dataset.ann_file) - logging.info( - f'Downloading annotation file using {file_client_args["backend"]} backend.' - ) - with open(dataset.ann_file, "wb") as f: - f.write(ann_bytes) - except Exception as e: - logging.error( - f"Could not download missing annotation file. Encountered {e}." - f"Please make sure it is available at the following path {dataset.ann_file}." - ) - - -class DatasetWithIndex(torch.utils.data.Dataset): - """ - The way Determined shards a dataset for distributed training and then gathers predictions in - custom reducers does not maintain dataset ordering. Here, we include the index in the dataset - so that predictions can be sorted correctly at evaluation time. - """ - - def __init__(self, dataset: torch.utils.data.Dataset): - self.dataset = dataset - - def __getattr__(self, item: Any) -> Any: - return getattr(self.dataset, item) - - def __getitem__(self, idx: int) -> Any: - sample = self.dataset[idx] - if "idx" not in sample["img_metas"][0].data: - sample["img_metas"][0].data["idx"] = idx - return sample - - def __len__(self) -> int: - return self.dataset.__len__() # type: ignore - - -def build_dataloader( - cfg: mmcv.Config, - split: "str", - context: det_torch.PyTorchTrialContext, - shuffle: bool, -) -> Tuple[torch_data.Dataset, det_torch.DataLoader]: - """ - Build the dataset and dataloader according to cfg and sampler parameters. - - Arguments: - cfg: mmcv.Config with dataset specifications. - split: one of train, val, or test. If val or test, annotations are not loaded. - context: PyTorchTrialContext with seed info used to seed the dataloader workers. - shuffle: whether to shuffle indices for data loading. - Returns: - dataset and dataloader - """ - assert split in ["train", "val", "test"], "argument split must be one of train, val, or test." - num_samples_per_gpu = context.get_per_slot_batch_size() - num_replicas = context.distributed.get_size() - num_workers = cfg.workers_per_gpu - test_mode = False if split == "train" else True - - cfg = eval(f"cfg.{split}") - maybe_download_ann_file(cfg) - - dataset = mmdet.datasets.build_dataset(cfg, {"test_mode": test_mode}) - if test_mode: - dataset = DatasetWithIndex(dataset) - sampler = GroupSampler(dataset, num_samples_per_gpu, num_replicas) if shuffle else None - - return dataset, det_torch.DataLoader( - dataset, - batch_size=num_samples_per_gpu, - num_workers=num_workers, - sampler=sampler, - collate_fn=functools.partial(mmcv.parallel.collate, samples_per_gpu=num_samples_per_gpu), - pin_memory=False, - worker_init_fn=functools.partial( - mmdet.datasets.builder.worker_init_fn, - seed=context.get_trial_seed(), - rank=context.distributed.get_rank(), - num_workers=num_workers, - ), - ) diff --git a/model_hub/model_hub/mmdetection/_data_backends.py b/model_hub/model_hub/mmdetection/_data_backends.py deleted file mode 100644 index f95db7170e2..00000000000 --- a/model_hub/model_hub/mmdetection/_data_backends.py +++ /dev/null @@ -1,179 +0,0 @@ -""" -Add backends to support loading data from other sources including -S3 buckets, GCS storage buckets, and fake data. -""" -import contextlib -import logging -import os -import tempfile -import time -from typing import Any, Dict, Iterator, List, Optional, Union, cast - -import boto3 -import mmcv -from google.cloud import storage - -import determined -from model_hub import utils - - -class S3Backend(mmcv.fileio.BaseStorageBackend): # type: ignore - """ - To use a S3 bucket as the storage backend, set ``data.file_client_args`` field of - the experiment config as follows: - - .. code-block:: yaml - - data: - file_client_args: - backend: s3 - bucket_name: - """ - - def __init__(self, bucket_name: str): - self._storage_client = boto3.client("s3") - self._bucket = bucket_name - - def get(self, filepath: str) -> Any: - obj = self._storage_client.get_object(Bucket=self._bucket, Key=filepath) - data = obj["Body"].read() - return data - - def get_text(self, filepath: str) -> Any: - raise NotImplementedError - - @contextlib.contextmanager - def get_local_path(self, filepath: str) -> Iterator[str]: - """Download a file from ``filepath``. - ``get_local_path`` is decorated by :meth:`contxtlib.contextmanager`. It - can be called with ``with`` statement, and when exists from the - ``with`` statement, the temporary path will be released. - Args: - filepath (str): Download a file from ``filepath``. - """ - try: - f = tempfile.NamedTemporaryFile(delete=False) - f.write(self.get(filepath)) - f.close() - yield f.name - finally: - os.remove(f.name) - - -mmcv.fileio.FileClient.register_backend("s3", S3Backend) - - -class GCSBackend(mmcv.fileio.BaseStorageBackend): # type: ignore - """ - To use a Google Storage bucket as the storage backend, set ``data.file_client_args`` field of - the experiment config as follows: - - .. code-block:: yaml - - data: - file_client_args: - backend: gcs - bucket_name: - """ - - def __init__(self, bucket_name: str): - self._storage_client = storage.Client() - self._bucket = self._storage_client.bucket(bucket_name) - - def get(self, filepath: str) -> Any: - blob = self._bucket.blob(filepath) - try: - data = determined.util.download_gcs_blob_with_backoff(blob) - except Exception as e: - raise Exception(f"Encountered {e}, failed to download {filepath} from gcs bucket.") - return data - - def get_text(self, filepath: str) -> Any: - raise NotImplementedError - - @contextlib.contextmanager - def get_local_path(self, filepath: str) -> Iterator[str]: - """Download a file from ``filepath``. - ``get_local_path`` is decorated by :meth:`contxtlib.contextmanager`. It - can be called with ``with`` statement, and when exists from the - ``with`` statement, the temporary path will be released. - Args: - filepath (str): Download a file from ``filepath``. - """ - try: - f = tempfile.NamedTemporaryFile(delete=False) - f.write(self.get(filepath)) - f.close() - yield f.name - finally: - os.remove(f.name) - - -mmcv.fileio.FileClient.register_backend("gcs", GCSBackend) - - -class FakeBackend(mmcv.fileio.BaseStorageBackend): # type: ignore - def __init__(self, fake_img_path: Optional[str] = None): - if fake_img_path is None: - download_dir = os.path.join("/tmp", str(hash(time.time()))) - os.makedirs(download_dir, exist_ok=True) - fake_img_path = utils.download_url( - download_dir, - "https://images.freeimages.com/images/large-previews/5c6/sunset-jungle-1383333.jpg", - ) - logging.info("Downloaded fake image to use.") - - with open(fake_img_path, "rb") as f: - img_str = f.read() - self.data = img_str - - def get(self, filepath: str) -> Any: - return self.data - - def get_text(self, filepath: str) -> Any: - raise NotImplementedError - - -mmcv.fileio.FileClient.register_backend("fake", FakeBackend) - - -def sub_backend( - file_client_args: Dict[str, Any], - cfg: Union[mmcv.utils.config.Config, mmcv.utils.config.ConfigDict, List], -) -> None: - """ - Recurses through the mmcv.Config to replace the ``file_client_args`` field of calls to - ``LoadImageFromFile`` with the provided argument. ``file_client_args`` should be a dictionary - with a ``backend`` specified along with keyword arguments to instantiate the backend. - - .. code-block:: python - - # Using gcs backend - file_client_args = {'backend': 'gcs', 'bucket_name': 'mydatabucket'} - # Using s3 backend - file_client_args = {'backend': 's3', 'bucket_name': 'mydatabucket'} - # Using fake backend - file_client_args = {'backend': 'fake', 'fake_img_path': None} - - In addition to the backends registered in this file, mmcv supports - disk, ceph, memcache, lmdb, petrel, and http backends. The default backend is disk. - - It is better to override the backend using this function than to use other mechanisms - in `MMDetTrial.build_mmdet_config` because recursively going through the config will - cover all occurrences of `LoadImageFromFile`. - - Arguments: - file_client_args: dictionary with a backend field and keyword arguments for that backend. - cfg: base config for which to replace backends. - """ - if type(cfg) in [mmcv.utils.config.Config, mmcv.utils.config.ConfigDict]: - cfg = cast(Union[mmcv.utils.config.Config, mmcv.utils.config.ConfigDict], cfg) - if "type" in cfg and cfg["type"] in ["LoadImageFromFile", "LoadPanopticAnnotations"]: - cfg["file_client_args"] = file_client_args - else: - for k in cfg: - sub_backend(file_client_args, cfg[k]) - else: - if isinstance(cfg, list): - for i in cfg: - sub_backend(file_client_args, i) diff --git a/model_hub/model_hub/mmdetection/_trial.py b/model_hub/model_hub/mmdetection/_trial.py deleted file mode 100644 index af923ea18f6..00000000000 --- a/model_hub/model_hub/mmdetection/_trial.py +++ /dev/null @@ -1,287 +0,0 @@ -""" -Determined training loop for mmdetection -mmdetection: https://github.com/open-mmlab/mmdetection. - -This Determined trial definition makes use of mmcv and mmdet libraries. -The license for mmcv and mmdet is reproduced below. - -Copyright (c) OpenMMLab. All rights reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -""" - -import logging -import os -import warnings -from typing import Any, Dict, List - -import mmcv -import mmcv.parallel -import mmcv.runner -import mmdet.core -import mmdet.datasets -import mmdet.models -import numpy as np -import torch - -import determined.pytorch as det_torch -from determined.common import set_logger -from model_hub import utils -from model_hub.mmdetection import _callbacks as callbacks -from model_hub.mmdetection import _data as data -from model_hub.mmdetection import _data_backends as data_backends -from model_hub.mmdetection import utils as mmdetutils - - -class MMDetTrial(det_torch.PyTorchTrial): - """ - This trial serves as the trainer for MMDetection models. It replaces the - `mmcv runner used by MMDetection - `_. - - For nearly all use cases, you can just use this trial definition and control behavior - by changing the MMDetection config. If you want to customize the trial further, you - can use this trial as the starting point. - """ - - def __init__(self, context: det_torch.PyTorchTrialContext) -> None: - warnings.warn( - "MMDetTrial is deprecated and will be removed in the next release." - "Users of mmcv can implement training workloads with CoreAPI.", - stacklevel=2, - ) - self.context = context - self.hparams = utils.AttrDict(context.get_hparams()) - self.data_config = utils.AttrDict(context.get_data_config()) - self.cfg = self.build_mmdet_config() - # We will control how data is moved to GPU. - self.context.experimental.disable_auto_to_device() - - # Build model and make sure it's compatible with horovod. - self.model = mmdet.models.build_detector(self.cfg.model) - - # Initialize model - self.model.init_weights() - - # If use_pretrained, try loading pretrained weights for the mmcv config if available. - if self.hparams.use_pretrained: - ckpt_path, ckpt = mmdetutils.get_pretrained_ckpt_path("/tmp", self.hparams.config_file) - if ckpt_path is not None: - logging.info("Loading from pretrained weights.") - if "state_dict" in ckpt: - self.model.load_state_dict(ckpt["state_dict"]) - else: - self.model.load_state_dict(ckpt) - - # If fp16 is specified in the mmdet config, we will use torch native amp. - fp16_cfg = self.cfg.get("fp16", None) - if fp16_cfg is not None: - self.setup_torch_amp(fp16_cfg) - - self.model = self.context.wrap_model(self.model) - - self.optimizer = self.context.wrap_optimizer( - mmcv.runner.build_optimizer(self.model, self.cfg.optimizer) - ) - self.model.zero_grad() - - self.clip_grads_fn = None - if self.cfg.optimizer_config.grad_clip is not None: - self.clip_grads_fn = lambda x: torch.nn.utils.clip_grad_norm_( - x, - self.cfg.optimizer_config.grad_clip.max_norm, - self.cfg.optimizer_config.grad_clip.norm_type, - ) - - # mmdet sets loggers in the package that interrupt with Determined logging. - # We reset the root logger after mmdet models are initialized. - set_logger(bool(self.context.get_experiment_config().get("debug", False))) - - def build_mmdet_config(self) -> mmcv.Config: - """ - Apply overrides to the mmdet config according to the following experiment config fields: - - data.file_client_args - - hyperparameters.merge_config - - hyperparameters.override_mmdet_config. - - Returns: - overridden mmdet config - """ - config_file = self.hparams.config_file - if not os.path.exists(config_file): - config_dir = os.getenv("MMDETECTION_CONFIG_DIR") - if config_dir is not None: - config_file = os.path.join(config_dir, config_file) - if config_dir is None or not os.path.exists(config_file): - raise OSError(f"Config file {config_file} not found.") - cfg = mmcv.Config.fromfile(config_file) - cfg.data.val.test_mode = True - - # If a backend is specified, we will the backend used in all occurrences of - # LoadImageFromFile in the mmdet config. - if self.data_config.file_client_args is not None: - data_backends.sub_backend(self.data_config.file_client_args, cfg) - if self.hparams.merge_config is not None: - override_config = mmcv.Config.fromfile(self.hparams.merge_config) - new_config = mmcv.Config._merge_a_into_b(override_config, cfg._cfg_dict) - cfg = mmcv.Config(new_config, cfg._text, cfg._filename) - - if "override_mmdet_config" in self.hparams: - cfg.merge_from_dict(self.hparams.override_mmdet_config) - - cfg.data.val.pipeline = mmdet.datasets.replace_ImageToTensor(cfg.data.val.pipeline) - cfg.data.test.pipeline = mmdet.datasets.replace_ImageToTensor(cfg.data.test.pipeline) - - # Save and log the resulting config. - if "save_cfg" in self.hparams and self.hparams.save_cfg: - save_dir = self.hparams.save_dir if "save_dir" in self.hparams else "/tmp" - extension = cfg._filename.split(".")[-1] - cfg.dump(os.path.join(save_dir, f"final_config.{extension}")) - logging.info(cfg) - return cfg - - def setup_torch_amp(self, fp16_cfg: mmcv.Config) -> None: - """ - Build the torch amp gradient scaler according to the fp16_cfg. - Please refer to :meth:`model_hub.mmdetection.build_fp16_loss_scaler` function - to see how to configure fp16 training. - """ - mmcv.runner.wrap_fp16_model(self.model) - loss_scaler = mmdetutils.build_fp16_loss_scaler(fp16_cfg.loss_scale) - self.loss_scaler = self.context.wrap_scaler(loss_scaler) - self.context.experimental._auto_amp = True - - def build_callbacks(self) -> Dict[str, det_torch.PyTorchCallback]: - self.lr_updater = None - hooks = {} # type: Dict[str, det_torch.PyTorchCallback] - if "lr_config" in self.cfg: - logging.info("Adding lr updater callback.") - self.lr_updater = callbacks.LrUpdaterCallback( - self.context, lr_config=self.cfg.lr_config - ) - hooks["lr_updater"] = self.lr_updater - return hooks - - def train_batch(self, batch: Any, epoch_idx: int, batch_idx: int) -> Dict[str, torch.Tensor]: - batch = self.to_device(batch) - if self.lr_updater is not None: - self.lr_updater.on_batch_start() - batch = {key: batch[key].data[0] for key in batch} - - losses = self.model(**batch) - loss, log_vars = self.model._parse_losses(losses) - self.model.zero_grad() - self.context.backward(loss) - self.context.step_optimizer( - self.optimizer, clip_grads=self.clip_grads_fn, auto_zero_grads=False - ) - - lr = self.optimizer.param_groups[0]["lr"] - metrics = {"loss": loss, "lr": lr} - metrics.update(log_vars) - return metrics - - def evaluate_batch(self, batch: Any, batch_idx: int) -> Dict[str, Any]: - batch = self.to_device(batch) - batch = {key: batch[key][0].data for key in batch} - with torch.no_grad(): - result = self.model(return_loss=False, rescale=True, **batch) - if isinstance(result[0], tuple): - result = [ - (bbox_results, mmdet.core.encode_mask_results(mask_results)) - for bbox_results, mask_results in result - ] - self.reducer.update(([b["idx"] for b in batch["img_metas"][0]], result)) - return {} - - def build_training_data_loader(self) -> det_torch.DataLoader: - dataset, dataloader = data.build_dataloader( - self.cfg.data, - "train", - self.context, - True, - ) - self.model.CLASSES = dataset.CLASSES # type: ignore - return dataloader - - def build_validation_data_loader(self) -> det_torch.DataLoader: - dataset, dataloader = data.build_dataloader( - self.cfg.data, - "val", - self.context, - False, - ) - - def evaluate_fn(results: List[Any]) -> Any: - # Determined's distributed batch sampler interleaves shards on each GPU slot so - # sample i goes to worker with rank i % world_size. Therefore, we need to re-sort - # all the samples once we gather the predictions before computing the validation metric. - inds, results = zip(*results) - inds = [ind for sub_ind in inds for ind in sub_ind] - results = [res for result in results for res in result] - sorted_inds = np.argsort(inds) - results = [results[i] for i in sorted_inds] - - eval_kwargs = self.cfg.evaluation - - for key in ["interval", "tmpdir", "start", "gpu_collect"]: - eval_kwargs.pop(key, None) - - metrics = dataset.evaluate(results, **eval_kwargs) # type: ignore - if not len(metrics): - return {"bbox_mAP": 0} - return metrics - - self.reducer = self.context.wrap_reducer( - evaluate_fn, for_training=False, for_validation=True - ) - return dataloader - - def get_batch_length(self, batch: Any) -> int: - if isinstance(batch["img"], mmcv.parallel.data_container.DataContainer): - length = len(batch["img"].data[0]) - else: - # The validation data has a different format so we have separate handling below. - length = len(batch["img"][0].data[0]) - return length - - def to_device(self, batch: Any) -> Dict[str, Any]: - new_data = {} - for k, item in batch.items(): - if isinstance(item, mmcv.parallel.data_container.DataContainer) and not item.cpu_only: - new_data[k] = mmcv.parallel.data_container.DataContainer( - self.context.to_device(item.data), - item.stack, - item.padding_value, - item.cpu_only, - item.pad_dims, - ) - # The validation data has a different format so we have separate handling below. - elif ( - isinstance(item, list) - and len(item) == 1 - and isinstance(item[0], mmcv.parallel.data_container.DataContainer) - and not item[0].cpu_only - ): - new_data[k] = [ - mmcv.parallel.data_container.DataContainer( - self.context.to_device(item[0].data), - item[0].stack, - item[0].padding_value, - item[0].cpu_only, - item[0].pad_dims, - ) - ] - else: - new_data[k] = item - return new_data diff --git a/model_hub/model_hub/mmdetection/utils.py b/model_hub/model_hub/mmdetection/utils.py deleted file mode 100644 index 258b12e6a40..00000000000 --- a/model_hub/model_hub/mmdetection/utils.py +++ /dev/null @@ -1,105 +0,0 @@ -""" -Various utility functions for using mmdetection in Determined that may be useful -even if not using the provided MMDetTrial. - -build_fp16_loss_scaler is large derived from the original mmcv code at -https://github.com/open-mmlab/mmcv/blob/master/mmcv/runner/hooks/optimizer.py -mmcv is covered by the Apache 2.0 License. Copyright (c) OpenMMLab. All rights reserved. -""" -import os -from typing import Any, Dict, Tuple - -import mmcv -import torch - -import model_hub.utils - - -def get_config_pretrained_url_mapping() -> Dict[str, str]: - """ - Walks the MMDETECTION_CONFIG_DIR and creates a mapping of configs - to urls for pretrained checkpoints. The url for pretrained checkpoints - are parsed from the README files in each of the mmdetection config folders. - - MMDETECTION_CONFIG_DIR is set to /mmdetection/configs in the default - determinedai/model-hub-mmdetection docker image. - """ - models = {} - config_dir = os.getenv("MMDETECTION_CONFIG_DIR") - if config_dir: - for root, _, files in os.walk(config_dir): - for f in files: - if "README" in f: - with open(os.path.join(root, f), "r") as readme: - lines = readme.readlines() - for line in lines: - if "[config]" in line: - start = line.find("[config]") - end = line.find(".py", start) - start = line.rfind("/", start, end) - config_name = line[start + 1 : end + 3] - start = line.find("[model]") - end = line.find(".pth", start) - ckpt_name = line[start + 8 : end + 4] - models[config_name] = ckpt_name - return models - - -CONFIG_TO_PRETRAINED = get_config_pretrained_url_mapping() - - -def get_pretrained_ckpt_path(download_directory: str, config_file: str) -> Tuple[Any, Any]: - """ - If the config_file has an associated pretrained checkpoint, - return path to downloaded checkpoint and preloaded checkpoint - - Arguments: - download_directory: path to download checkpoints to - config_file: mmdet config file path for which to find and load pretrained weights - Returns: - checkpoint path, loaded checkpoint - """ - config_file = config_file.split("/")[-1] - if config_file in CONFIG_TO_PRETRAINED: - ckpt_path = model_hub.utils.download_url( - download_directory, CONFIG_TO_PRETRAINED[config_file] - ) - return ckpt_path, torch.load(ckpt_path) - return None, None - - -def build_fp16_loss_scaler(loss_scale: mmcv.Config) -> Any: - """ - This function is derived from mmcv, which is coverd by the Apache 2.0 License. - Copyright (c) OpenMMLab. All rights reserved. - - Arguments: - loss_scale (float | str | dict): Scale factor configuration. - If loss_scale is a float, static loss scaling will be used with - the specified scale. If loss_scale is a string, it must be - 'dynamic', then dynamic loss scaling will be used. - It can also be a dict containing arguments of GradScalar. - Defaults to 512. For PyTorch >= 1.6, mmcv uses official - implementation of GradScaler. If you use a dict version of - loss_scale to create GradScaler, please refer to: - https://pytorch.org/docs/stable/amp.html#torch.cuda.amp.GradScaler - for the parameters. - Examples: - >>> loss_scale = dict( - ... init_scale=65536.0, - ... growth_factor=2.0, - ... backoff_factor=0.5, - ... growth_interval=2000 - ... ) - """ - if loss_scale == "dynamic": - loss_scaler = torch.cuda.amp.GradScaler() - elif isinstance(loss_scale, float): - loss_scaler = torch.cuda.amp.GradScaler(init_scale=loss_scale) - elif isinstance(loss_scale, dict): - loss_scaler = torch.cuda.amp.GradScaler(**loss_scale) - else: - raise Exception( - "Cannot parse fp16 configuration. Expected cfg to be str(dynamic), float or dict." - ) - return loss_scaler diff --git a/model_hub/model_hub/py 2.typed b/model_hub/model_hub/py 2.typed deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/model_hub/model_hub/py.typed b/model_hub/model_hub/py.typed deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/model_hub/model_hub/utils 2.py b/model_hub/model_hub/utils 2.py deleted file mode 100644 index 4fb1d2ddf6d..00000000000 --- a/model_hub/model_hub/utils 2.py +++ /dev/null @@ -1,106 +0,0 @@ -import logging -import os -import typing -import urllib.parse -from typing import Any, Dict, List, Union - -import filelock -import numpy as np -import requests -import torch - - -def expand_like(arrays: List[np.ndarray], fill: float = -100) -> np.ndarray: - """ - Stacks a list of arrays along the first dimension; the arrays are allowed to differ in - the second dimension but should match for dim > 2. - - The output will have dimension - (sum([l.shape[0] for l in arrays]), max([l.shape[1] for l in in arrays]), ...) - For arrays that have fewer entries in the second dimension than the max, we will - pad with the fill value. - - Args: - arrays: List of np.ndarray to stack along the first dimension - fill: Value to fill in when padding to max size in the second dimension - - Returns: - stacked array - """ - full_shape = list(arrays[0].shape) - if len(full_shape) == 1: - return np.concatenate(arrays) - full_shape[0] = sum(a.shape[0] for a in arrays) - full_shape[1] = max(a.shape[1] for a in arrays) - result = np.full(full_shape, fill) - row_offset = 0 - for a in arrays: - result[row_offset : row_offset + a.shape[0], : a.shape[1]] = a - row_offset += a.shape[0] - return result - - -def numpify(x: Union[List, np.ndarray, torch.Tensor]) -> np.ndarray: - """ - Converts List or torch.Tensor to numpy.ndarray. - """ - if isinstance(x, np.ndarray): - return x - if isinstance(x, List): - return np.array(x) - if isinstance(x, torch.Tensor): - return x.cpu().numpy() # type: ignore - raise TypeError("Expected input of type List, np.ndarray, or torch.Tensor.") - - -def download_url(download_directory: str, url: str) -> str: - url_path = urllib.parse.urlparse(url).path - basename = url_path.rsplit("/", 1)[1] - - os.makedirs(download_directory, exist_ok=True) - filepath = os.path.join(download_directory, basename) - lock = filelock.FileLock(filepath + ".lock") - - with lock: - if not os.path.exists(filepath): - logging.info("Downloading {} to {}".format(url, filepath)) - - r = requests.get(url, stream=True) - with open(filepath, "wb") as f: - for chunk in r.iter_content(chunk_size=8192): - if chunk: - f.write(chunk) - return filepath - - -def compute_num_training_steps(experiment_config: Dict, global_batch_size: int) -> int: - max_length_unit = list(experiment_config["searcher"]["max_length"].keys())[0] - max_length: int = experiment_config["searcher"]["max_length"][max_length_unit] - if max_length_unit == "batches": - return max_length - if max_length_unit == "epochs": - if "records_per_epoch" in experiment_config: - return max_length * int(experiment_config["records_per_epoch"] / global_batch_size) - raise Exception( - "Missing num_training_steps hyperparameter in the experiment " - "configuration, which is needed to configure the learning rate scheduler." - ) - # Otherwise, max_length_unit=='records' - return int(max_length / global_batch_size) - - -class AttrDict(dict): - def __init__(self, *args: Any, **kwargs: Any) -> None: - super().__init__(*args, **kwargs) - self.__dict__ = self - for key in self.keys(): - if isinstance(self[key], dict): - self[key] = AttrDict(self[key]) - - if typing.TYPE_CHECKING: - - def __getattr__(self, item: Any) -> Any: - return True - - def __setattr__(self, item: Any, value: Any) -> None: - return None diff --git a/model_hub/model_hub/utils.py b/model_hub/model_hub/utils.py deleted file mode 100644 index 4fb1d2ddf6d..00000000000 --- a/model_hub/model_hub/utils.py +++ /dev/null @@ -1,106 +0,0 @@ -import logging -import os -import typing -import urllib.parse -from typing import Any, Dict, List, Union - -import filelock -import numpy as np -import requests -import torch - - -def expand_like(arrays: List[np.ndarray], fill: float = -100) -> np.ndarray: - """ - Stacks a list of arrays along the first dimension; the arrays are allowed to differ in - the second dimension but should match for dim > 2. - - The output will have dimension - (sum([l.shape[0] for l in arrays]), max([l.shape[1] for l in in arrays]), ...) - For arrays that have fewer entries in the second dimension than the max, we will - pad with the fill value. - - Args: - arrays: List of np.ndarray to stack along the first dimension - fill: Value to fill in when padding to max size in the second dimension - - Returns: - stacked array - """ - full_shape = list(arrays[0].shape) - if len(full_shape) == 1: - return np.concatenate(arrays) - full_shape[0] = sum(a.shape[0] for a in arrays) - full_shape[1] = max(a.shape[1] for a in arrays) - result = np.full(full_shape, fill) - row_offset = 0 - for a in arrays: - result[row_offset : row_offset + a.shape[0], : a.shape[1]] = a - row_offset += a.shape[0] - return result - - -def numpify(x: Union[List, np.ndarray, torch.Tensor]) -> np.ndarray: - """ - Converts List or torch.Tensor to numpy.ndarray. - """ - if isinstance(x, np.ndarray): - return x - if isinstance(x, List): - return np.array(x) - if isinstance(x, torch.Tensor): - return x.cpu().numpy() # type: ignore - raise TypeError("Expected input of type List, np.ndarray, or torch.Tensor.") - - -def download_url(download_directory: str, url: str) -> str: - url_path = urllib.parse.urlparse(url).path - basename = url_path.rsplit("/", 1)[1] - - os.makedirs(download_directory, exist_ok=True) - filepath = os.path.join(download_directory, basename) - lock = filelock.FileLock(filepath + ".lock") - - with lock: - if not os.path.exists(filepath): - logging.info("Downloading {} to {}".format(url, filepath)) - - r = requests.get(url, stream=True) - with open(filepath, "wb") as f: - for chunk in r.iter_content(chunk_size=8192): - if chunk: - f.write(chunk) - return filepath - - -def compute_num_training_steps(experiment_config: Dict, global_batch_size: int) -> int: - max_length_unit = list(experiment_config["searcher"]["max_length"].keys())[0] - max_length: int = experiment_config["searcher"]["max_length"][max_length_unit] - if max_length_unit == "batches": - return max_length - if max_length_unit == "epochs": - if "records_per_epoch" in experiment_config: - return max_length * int(experiment_config["records_per_epoch"] / global_batch_size) - raise Exception( - "Missing num_training_steps hyperparameter in the experiment " - "configuration, which is needed to configure the learning rate scheduler." - ) - # Otherwise, max_length_unit=='records' - return int(max_length / global_batch_size) - - -class AttrDict(dict): - def __init__(self, *args: Any, **kwargs: Any) -> None: - super().__init__(*args, **kwargs) - self.__dict__ = self - for key in self.keys(): - if isinstance(self[key], dict): - self[key] = AttrDict(self[key]) - - if typing.TYPE_CHECKING: - - def __getattr__(self, item: Any) -> Any: - return True - - def __setattr__(self, item: Any, value: Any) -> None: - return None diff --git a/model_hub/mypy.ini b/model_hub/mypy.ini deleted file mode 100644 index 977540c9eb5..00000000000 --- a/model_hub/mypy.ini +++ /dev/null @@ -1,29 +0,0 @@ -[mypy] -mypy_path = ../harness -python_version = 3.8 -follow_imports = silent -ignore_missing_imports = True - -# All strict checks. -check_untyped_defs = True -disallow_incomplete_defs = True -disallow_subclassing_any = True -disallow_untyped_calls = True -disallow_untyped_decorators = True -disallow_untyped_defs = True -no_implicit_optional = True -strict_equality = True -warn_redundant_casts = True -warn_return_any = True -warn_unused_configs = True -warn_unused_ignores = True -exclude = (build|examples) - -[mypy-torch.nn.*] -follow_imports = skip - -[mypy-azure.storage.blob.*] -; starting in azure 12.9.0: -; site-packages/azure/storage/blob/_serialize.py:129: error: Type signature has too many arguments -; site-packages/azure/storage/blob/_blob_client.py:1406: error: Type signature has too few arguments -follow_imports = skip diff --git a/model_hub/pyproject.toml b/model_hub/pyproject.toml deleted file mode 100644 index aa4949aa1cc..00000000000 --- a/model_hub/pyproject.toml +++ /dev/null @@ -1,2 +0,0 @@ -[tool.black] -line-length = 100 diff --git a/model_hub/setup.py b/model_hub/setup.py deleted file mode 100644 index e977f3d64a2..00000000000 --- a/model_hub/setup.py +++ /dev/null @@ -1,22 +0,0 @@ -import setuptools - -setuptools.setup( - name="model-hub", - version="0.36.1-dev0", - author="Determined AI", - author_email="ai-open-source@hpe.com", - url="https://determined.ai/", - description="Model Hub for Determined Deep Learning Training Platform", - long_description="See https://docs.determined.ai/ for more information.", - license="Apache License 2.0", - classifiers=["License :: OSI Approved :: Apache Software License"], - packages=setuptools.find_packages(include=["model_hub*"]), - python_requires=">=3.6", - include_package_data=True, - # Versions of model-hub will correspond to specific versions of third party - # libraries that are guaranteed to work with our code. Other versions - # may work with model-hub as well but are not officially supported. - install_requires=[ - "determined>=0.13.11", # We require custom reducers for PyTorchTrial. - ], -) diff --git a/model_hub/tests/__init__.py b/model_hub/tests/__init__.py deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/model_hub/tests/fixtures/maskrcnn.yaml b/model_hub/tests/fixtures/maskrcnn.yaml deleted file mode 100644 index e4e95ecde36..00000000000 --- a/model_hub/tests/fixtures/maskrcnn.yaml +++ /dev/null @@ -1,18 +0,0 @@ -name: mmdet_maskrcnn -data: - file_client_args: - backend: fake -hyperparameters: - global_batch_size: 2 - config_file: mask_rcnn/mask_rcnn_r50_caffe_fpn_1x_coco.py - merge_config: null - use_pretrained: false - override_mmdet_config: - data.train.ann_file: /tmp/annotations/instances_train2017.json - data.val.ann_file: /tmp/annotations/instances_val2017.json -searcher: - name: single - metric: bbox_mAP - max_length: - batches: 200 - smaller_is_better: false diff --git a/model_hub/tests/fixtures/merge_config.py b/model_hub/tests/fixtures/merge_config.py deleted file mode 100644 index 18e280ab068..00000000000 --- a/model_hub/tests/fixtures/merge_config.py +++ /dev/null @@ -1,8 +0,0 @@ -# optimizer -optimizer = dict( - type="AdamW", - lr=0.0001, - weight_decay=0.0001, - paramwise_cfg=dict(custom_keys={"backbone": dict(lr_mult=0.1, decay_mult=1.0)}), -) -optimizer_config = dict(_delete_=True, grad_clip=dict(max_norm=0.1, norm_type=2)) diff --git a/model_hub/tests/requirements.txt b/model_hub/tests/requirements.txt deleted file mode 100644 index 0cd4e9c6178..00000000000 --- a/model_hub/tests/requirements.txt +++ /dev/null @@ -1,15 +0,0 @@ -# pytest 6.0 has linter-breaking changes -pytest>=6.0.1 -mypy==0.910 -coverage - -# install transformers and datasets -transformers==4.8.2 -datasets==1.9.0 - -# install mmcv and mmdet -mmdet==2.21.0 --f https://download.openmmlab.com/mmcv/dist/cpu/torch1.9.0/index.html -mmcv-full==1.4.5 - -setuptools<70 # pinned due to dependency removed in setuptools (MD-418) diff --git a/model_hub/tests/test_hf.py b/model_hub/tests/test_hf.py deleted file mode 100644 index 5a23e82f8a4..00000000000 --- a/model_hub/tests/test_hf.py +++ /dev/null @@ -1,33 +0,0 @@ -import model_hub.huggingface as hf -from model_hub import utils - - -def test_config_parser() -> None: - args = {"pretrained_model_name_or_path": "xnli", "num_labels": 4} - config = hf.parse_dict_to_dataclasses((hf.ConfigKwargs,), args, as_dict=True)[0] - target = utils.AttrDict( - { - "pretrained_model_name_or_path": "xnli", - "revision": "main", - "use_auth_token": False, - "cache_dir": None, - "num_labels": 4, - } - ) - assert config == target - - -def test_nodefault_config_parser() -> None: - args = { - "pretrained_model_name_or_path": "xnli", - } - config = hf.parse_dict_to_dataclasses((hf.ConfigKwargs,), args, as_dict=True)[0] - target = utils.AttrDict( - { - "pretrained_model_name_or_path": "xnli", - "revision": "main", - "use_auth_token": False, - "cache_dir": None, - } - ) - assert config == target diff --git a/model_hub/tests/test_mmdetection.py b/model_hub/tests/test_mmdetection.py deleted file mode 100644 index ff348849b1d..00000000000 --- a/model_hub/tests/test_mmdetection.py +++ /dev/null @@ -1,147 +0,0 @@ -import os -import shutil -from typing import Generator - -import git -import pytest -import torch - -import determined as det -import determined.pytorch as det_torch -import model_hub.mmdetection as mh_mmdet -import model_hub.mmdetection._callbacks as callbacks -import model_hub.utils as mh_utils -from determined.common import util - - -def cleanup_dir(directory: str) -> None: - for root, dirs, files in os.walk(directory, topdown=False): - for name in files: - os.remove(os.path.join(root, name)) - for name in dirs: - os.rmdir(os.path.join(root, name)) - os.rmdir(directory) - - -@pytest.fixture(scope="module") -def mmdet_config_dir() -> Generator[str, None, None]: - git.Repo.clone_from("https://github.com/open-mmlab/mmdetection", "/tmp/mmdetection") - repo = git.Repo("/tmp/mmdetection") - repo.git.checkout("tags/v2.28.2") - mmdet_config_dir = "/tmp/mmdetection/configs" - os.environ["MMDETECTION_CONFIG_DIR"] = mmdet_config_dir - yield mmdet_config_dir - - # cleanup - cleanup_dir("/tmp/mmdetection") - - -@pytest.fixture(scope="module") -def context(mmdet_config_dir: str) -> det_torch.PyTorchTrialContext: - config_file = "./tests/fixtures/maskrcnn.yaml" - with open(config_file, "rb") as f: - config = util.safe_load_yaml_with_exceptions(f) - - core_context, env = det._make_local_execution_env( - managed_training=False, - test_mode=False, - config=config, - checkpoint_dir="/tmp", - limit_gpus=1, - ) - - context = det_torch.PyTorchTrialContext( - core_context=core_context, - trial_seed=env.trial_seed, - hparams=config["hyperparameters"], - slots_per_trial=1, - num_gpus=1, - exp_conf=config, - aggregation_frequency=1, - steps_completed=0, - managed_training=False, - debug_enabled=False, - ) - context._set_default_gradient_compression(False) - context._set_default_average_aggregated_gradients(True) - - return context - - -@pytest.fixture(scope="module") -def trial(context: det_torch.PyTorchTrialContext) -> mh_mmdet.MMDetTrial: - trial = mh_mmdet.MMDetTrial(context) - return trial - - -@pytest.fixture(scope="module") -def dataloader(trial: mh_mmdet.MMDetTrial) -> Generator[torch.utils.data.DataLoader, None, None]: - mh_utils.download_url( - "/tmp", "http://images.cocodataset.org/annotations/annotations_trainval2017.zip" - ) - shutil.unpack_archive("/tmp/annotations_trainval2017.zip", "/tmp") - det_data_loader = trial.build_training_data_loader() - data_loader = det_data_loader.get_data_loader() - trial.context._current_batch_idx = 0 - trial.context._epoch_len = len(data_loader) - yield data_loader - - # cleanup - os.remove("/tmp/annotations_trainval2017.zip") - cleanup_dir("/tmp/annotations") - - -# _callbacks.py -def test_fake_runner(trial: mh_mmdet.MMDetTrial, dataloader: torch.utils.data.DataLoader) -> None: - runner = callbacks.FakeRunner(trial.context) - assert len(runner.optimizer) == 1 - assert len(runner.data_loader) == len(dataloader) # type: ignore - assert runner.iter == 0 - assert runner.epoch == 0 - assert runner.max_iters == 200 - - -# _data.py -def test_group_sampler(dataloader: torch.utils.data.DataLoader) -> None: - dataset = dataloader.dataset - sampler = mh_mmdet.GroupSampler(dataset, 2, 1) - flags = [dataset.flag[i] for i in sampler] # type: ignore - test = [flags[i] == flags[i + 1] for i in range(0, len(flags), 2)] - assert all(test) - - -# utils.py -def test_get_pretrained_weights( - mmdet_config_dir: None, context: det_torch.PyTorchTrialContext -) -> None: - mh_mmdet.utils.CONFIG_TO_PRETRAINED = mh_mmdet.utils.get_config_pretrained_url_mapping() - path, ckpt = mh_mmdet.get_pretrained_ckpt_path("/tmp", context.get_hparam("config_file")) - assert path is not None - assert ckpt is not None - - -# _trial.py -class TestMMDetTrial: - def test_merge_config( - self, context: det_torch.PyTorchTrialContext, trial: mh_mmdet.MMDetTrial - ) -> None: - hparams = context.get_hparams() - hparams["merge_config"] = "./tests/fixtures/merge_config.py" - trial.hparams = mh_utils.AttrDict(hparams) - new_cfg = trial.build_mmdet_config() - assert new_cfg.optimizer.type == "AdamW" - assert new_cfg.optimizer_config.grad_clip.max_norm == 0.1 - - def test_override_mmdet_config( - self, context: det_torch.PyTorchTrialContext, trial: mh_mmdet.MMDetTrial - ) -> None: - hparams = context.get_hparams() - hparams["override_mmdet_config"] = { - "optimizer_config._delete_": True, - "optimizer_config.grad_clip.max_norm": 35, - "optimizer_config.grad_clip.norm_type": 2, - } - trial.hparams = mh_utils.AttrDict(hparams) - new_cfg = trial.build_mmdet_config() - assert new_cfg.optimizer_config.grad_clip.max_norm == 35 - assert new_cfg.optimizer_config.grad_clip.norm_type == 2 diff --git a/model_hub/tests/test_utils.py b/model_hub/tests/test_utils.py deleted file mode 100644 index e3f1883d2ed..00000000000 --- a/model_hub/tests/test_utils.py +++ /dev/null @@ -1,35 +0,0 @@ -import os - -import numpy as np - -from model_hub import utils - - -def test_compute_num_training_steps() -> None: - experiment_config = {"searcher": {"max_length": {"epochs": 3}}, "records_per_epoch": 124} - num_training_steps = utils.compute_num_training_steps(experiment_config, 16) - assert num_training_steps == 21 - - experiment_config = { - "searcher": {"max_length": {"batches": 300}}, - } - num_training_steps = utils.compute_num_training_steps(experiment_config, 16) - assert num_training_steps == 300 - - experiment_config = { - "searcher": {"max_length": {"records": 3000}}, - } - num_training_steps = utils.compute_num_training_steps(experiment_config, 16) - assert num_training_steps == 187 - - -def test_expand_like() -> None: - array_list = [np.array([[1, 2], [3, 4]]), np.array([[2, 3, 4], [3, 4, 5]])] - result = utils.expand_like(array_list) - assert np.array_equal(result, np.array([[1, 2, -100], [3, 4, -100], [2, 3, 4], [3, 4, 5]])) - - -def test_download_url() -> None: - url = "https://images.freeimages.com/images/large-previews/5c6/sunset-jungle-1383333.jpg" - file_path = utils.download_url("/tmp", url) - assert os.path.exists(file_path) diff --git a/pre-commit/check.py b/pre-commit/check.py index 1724165c6a9..80d6cb65d68 100755 --- a/pre-commit/check.py +++ b/pre-commit/check.py @@ -65,7 +65,6 @@ def is_child(path: Path, parent: Path) -> bool: root / "docs": "make fmt check build", root / ".circleci": "type circleci || exit 0 && circleci config validate config.yml", root / "e2e_tests": "make fmt check", - root / "model_hub": "make fmt check", } diff --git a/requirements.txt b/requirements.txt index 9188929ab1f..29bc496bca3 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,5 +1,4 @@ -e harness --e model_hub -r docs/requirements.txt -r harness/tests/requirements/requirements-harness.txt -r e2e_tests/tests/requirements.txt