From fe7a7373c9efd20f6d8fb4721c82b08f0d4ffdf6 Mon Sep 17 00:00:00 2001 From: Benedikt Fuchs Date: Fri, 6 Dec 2024 21:58:02 +0100 Subject: [PATCH 1/4] bump version to 0.15.0 --- README.md | 2 +- docs/conf.py | 4 ++-- flair/__init__.py | 2 +- setup.py | 2 +- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index fdf4130124..a8717e3ff3 100644 --- a/README.md +++ b/README.md @@ -23,7 +23,7 @@ document embeddings, including our proposed [Flair embeddings](https://www.aclwe * **A PyTorch NLP framework.** Our framework builds directly on [PyTorch](https://pytorch.org/), making it easy to train your own models and experiment with new approaches using Flair embeddings and classes. -Now at [version 0.14.0](https://github.com/flairNLP/flair/releases)! +Now at [version 0.15.0](https://github.com/flairNLP/flair/releases)! ## State-of-the-Art Models diff --git a/docs/conf.py b/docs/conf.py index 22ffbd0194..9b908d4734 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -6,8 +6,8 @@ # -- Project information ----------------------------------------------------- from sphinx_github_style import get_linkcode_resolve -version = "0.14.0" -release = "0.14.0" +version = "0.15.0" +release = "0.15.0" project = "flair" author = importlib_metadata.metadata(project)["Author"] copyright = f"2023 {author}" diff --git a/flair/__init__.py b/flair/__init__.py index 341f630e43..4f27b4c2ad 100644 --- a/flair/__init__.py +++ b/flair/__init__.py @@ -34,7 +34,7 @@ device = torch.device("cpu") # global variable: version -__version__ = "0.14.0" +__version__ = "0.15.0" """The current version of the flair library installed.""" # global variable: arrow symbol diff --git a/setup.py b/setup.py index 0573896c19..faf1540a29 100644 --- a/setup.py +++ b/setup.py @@ -6,7 +6,7 @@ setup( name="flair", - version="0.14.0", + version="0.15.0", description="A very simple framework for state-of-the-art NLP", long_description=Path("README.md").read_text(encoding="utf-8"), long_description_content_type="text/markdown", From 7fc1ff1b47fc9c37767ef95f4fa30b31b35777ca Mon Sep 17 00:00:00 2001 From: Alan Akbik Date: Thu, 19 Dec 2024 21:31:01 +0100 Subject: [PATCH 2/4] Test change package link --- .../how-to-load-prepared-dataset.md | 16 +++------------- 1 file changed, 3 insertions(+), 13 deletions(-) diff --git a/docs/tutorial/tutorial-training/how-to-load-prepared-dataset.md b/docs/tutorial/tutorial-training/how-to-load-prepared-dataset.md index b53aeef917..3cc3000226 100644 --- a/docs/tutorial/tutorial-training/how-to-load-prepared-dataset.md +++ b/docs/tutorial/tutorial-training/how-to-load-prepared-dataset.md @@ -115,7 +115,8 @@ This will print out the created dictionary: Dictionary with 17 tags: PROPN, PUNCT, ADJ, NOUN, VERB, DET, ADP, AUX, PRON, PART, SCONJ, NUM, ADV, CCONJ, X, INTJ, SYM ``` -#### Dictionaries for other label types + +### Printing label statistics If you don't know the label types in a corpus, just call [`Corpus.make_label_dictionary`](#flair.data.Corpus.make_label_dictionary) with any random label name (e.g. `corpus.make_label_dictionary(label_type='abcd')`). This will print @@ -139,17 +140,6 @@ tense_dictionary = corpus.make_label_dictionary(label_type='number') If you print these dictionaries, you will find that the POS dictionary contains 50 tags and the number dictionary only 2 for this corpus (singular and plural). -#### Dictionaries for other corpora types - -The method [`Corpus.make_label_dictionary`](#flair.data.Corpus.make_label_dictionary) can be used for any corpus, including text classification corpora: - -```python -# create label dictionary for a text classification task -from flair.datasets import TREC_6 -corpus = TREC_6() -corpus.make_label_dictionary('question_class') -``` - ### The MultiCorpus Object If you want to train multiple tasks at once, you can use the [`MultiCorpus`](#flair.data.MultiCorpus) object. @@ -181,7 +171,7 @@ The following datasets are supported: | Task | Module | |-------------------------------------|---------------------------------------------------------------------------------------------------------------------------------------------| -| Named Entity Recognition | [flair.datasets.sequence_labeling](#flair.datasets.sequence_labeling) | +| Named Entity Recognition | [flair.datasets.sequence_labeling](../../api/datasets/sequence_labeling.html) | | Text Classification | [flair.datasets.document_classification](#flair.datasets.document_classification) | | Text Regression | [flair.datasets.document_classification](#flair.datasets.document_classification) | | Biomedical Named Entity Recognition | [flair.datasets.biomedical](#flair.datasets.biomedical) | From 5ac358fc9f22171ede58647c02319241b2bd3138 Mon Sep 17 00:00:00 2001 From: Alan Akbik Date: Thu, 19 Dec 2024 21:57:27 +0100 Subject: [PATCH 3/4] more documentation tests --- docs/tutorial/intro.md | 6 +++++- docs/tutorial/tutorial-basics/basic-types.md | 3 ++- .../tutorial-training/how-to-load-prepared-dataset.md | 11 ++++++++++- 3 files changed, 17 insertions(+), 3 deletions(-) diff --git a/docs/tutorial/intro.md b/docs/tutorial/intro.md index b8af9b5667..beb2c3422d 100644 --- a/docs/tutorial/intro.md +++ b/docs/tutorial/intro.md @@ -89,4 +89,8 @@ The output shows that the sentence "_I love Berlin and New York._" was tagged as ## Summary -Congrats, you now know how to use Flair to find entities and detect sentiment! \ No newline at end of file +Congrats, you now know how to use Flair to find entities and detect sentiment! + +## Next steps + +If you want to know more about Flair, next check out [Tutorial 1](tutorial-basics/) that gives an intro into the basics of Flair! \ No newline at end of file diff --git a/docs/tutorial/tutorial-basics/basic-types.md b/docs/tutorial/tutorial-basics/basic-types.md index 5ddf247166..2ff20c2af9 100644 --- a/docs/tutorial/tutorial-basics/basic-types.md +++ b/docs/tutorial/tutorial-basics/basic-types.md @@ -87,7 +87,8 @@ This print-out includes the token index (3) and the lexical value of the token ( When you create a [`Sentence`](#flair.data.Sentence) as above, the text is automatically tokenized (segmented into words) using the [segtok](https://pypi.org/project/segtok/) library. ```{note} -You can also use a different tokenizer if you like. To learn more about this, check out our tokenization tutorial. +You can also use a different tokenizer by passing a different [`Tokenizer`](#flair.tokenization.Tokenizer ) to the Sentence +when you initialize it. ``` diff --git a/docs/tutorial/tutorial-training/how-to-load-prepared-dataset.md b/docs/tutorial/tutorial-training/how-to-load-prepared-dataset.md index 3cc3000226..0d18fe8c58 100644 --- a/docs/tutorial/tutorial-training/how-to-load-prepared-dataset.md +++ b/docs/tutorial/tutorial-training/how-to-load-prepared-dataset.md @@ -165,13 +165,22 @@ The [`MultiCorpus`](#flair.data.MultiCorpus) inherits from [`Corpus`](#flair.dat Flair supports many datasets out of the box. It usually automatically downloads and sets up the data the first time you call the corresponding constructor ID. The datasets are split into multiple modules, however they all can be imported from `flair.datasets` too. + +They are imported from :mod:`flair.datasets`. + +They are imported from :mod:`flair.datasets` + +[datasets](module:flair.datasets) + +To learn more about the class, refer to the :class:`flair.data.Corpus` class. + You can look up the respective modules to find the possible datasets. The following datasets are supported: | Task | Module | |-------------------------------------|---------------------------------------------------------------------------------------------------------------------------------------------| -| Named Entity Recognition | [flair.datasets.sequence_labeling](../../api/datasets/sequence_labeling.html) | +| Named Entity Recognition | [flair.datasets.sequence_labeling](#flair.datasets.sequence_labeling) | | Text Classification | [flair.datasets.document_classification](#flair.datasets.document_classification) | | Text Regression | [flair.datasets.document_classification](#flair.datasets.document_classification) | | Biomedical Named Entity Recognition | [flair.datasets.biomedical](#flair.datasets.biomedical) | From 8a829fb787c0c01045700ef050dc6e5c60f57997 Mon Sep 17 00:00:00 2001 From: Alan Akbik Date: Thu, 19 Dec 2024 22:33:01 +0100 Subject: [PATCH 4/4] Undo changes --- .../tutorial-training/how-to-load-prepared-dataset.md | 8 -------- 1 file changed, 8 deletions(-) diff --git a/docs/tutorial/tutorial-training/how-to-load-prepared-dataset.md b/docs/tutorial/tutorial-training/how-to-load-prepared-dataset.md index 0d18fe8c58..ac9b38c524 100644 --- a/docs/tutorial/tutorial-training/how-to-load-prepared-dataset.md +++ b/docs/tutorial/tutorial-training/how-to-load-prepared-dataset.md @@ -166,14 +166,6 @@ Flair supports many datasets out of the box. It usually automatically downloads call the corresponding constructor ID. The datasets are split into multiple modules, however they all can be imported from `flair.datasets` too. -They are imported from :mod:`flair.datasets`. - -They are imported from :mod:`flair.datasets` - -[datasets](module:flair.datasets) - -To learn more about the class, refer to the :class:`flair.data.Corpus` class. - You can look up the respective modules to find the possible datasets. The following datasets are supported: