Skip to content

Commit

Permalink
minor change . readme
Browse files Browse the repository at this point in the history
  • Loading branch information
ofilangi committed Oct 21, 2024
1 parent 953f779 commit 768a06f
Show file tree
Hide file tree
Showing 7 changed files with 64 additions and 37 deletions.
20 changes: 2 additions & 18 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -24,9 +24,7 @@ This approach aims to significantly enrich the metadata of scientific articles,
## Installation

```bash
python -m venv env
source env/bin/activate
pip install git+https://github.com/p2m2/encoder-ontology-match-abstract
exec.sh <json_config_file>
```

## Configuration
Expand Down Expand Up @@ -103,22 +101,8 @@ check exemple on [config](./config) directory
</table>


### Execution
### Tests Execution

```bash
python -m llm_semantic_annotator config/test.json populate_owl_tag_embeddings
```

```bash
python -m llm_semantic_annotator config/test.json populate_ncbi_abstract_embeddings
```
```bash
python -m llm_semantic_annotator config/igepp.json populate_ncbi_abstract_embeddings
```

```bash
python -m llm_semantic_annotator config/test.json compute_tag_chunk_similarities
```

```bash
python -m unittest discover
Expand Down
4 changes: 0 additions & 4 deletions config/1-article.json
Original file line number Diff line number Diff line change
Expand Up @@ -77,10 +77,6 @@
"json_files" : [
"data/abstracts/abstracts_1.json",
"data/abstracts/abstracts_2.json"
],
"text_files" : [
"data/abstracts/abstracts_3.txt",
"data/abstracts/abstracts_4.txt"
]
}

Expand Down
58 changes: 58 additions & 0 deletions config/planteom-example.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
{
"encodeur" : "sentence-transformers/all-MiniLM-L6-v2",
"threshold_similarity_tag_chunk" : 0.60,
"threshold_similarity_tag" : 0.80,
"batch_size" : 32,

"populate_owl_tag_embeddings" : {
"ontologies": {
"planteome_link" : {
"peco": {
"url": "http://purl.obolibrary.org/obo/peco.owl",
"prefix": "http://purl.obolibrary.org/obo/PECO_",
"format": "xml",
"label" : "<http://www.w3.org/2000/01/rdf-schema#label>",
"properties": ["<http://purl.obolibrary.org/obo/IAO_0000115>"]
},
"po": {
"url": "http://purl.obolibrary.org/obo/po.owl",
"prefix": "http://purl.obolibrary.org/obo/PO_",
"format": "xml",
"label" : "<http://www.w3.org/2000/01/rdf-schema#label>",
"properties": ["<http://purl.obolibrary.org/obo/IAO_0000115>"]
},
"pso": {
"url": "http://purl.obolibrary.org/obo/pso.owl",
"prefix": "http://purl.obolibrary.org/obo/PSO_",
"format": "xml",
"label" : "<http://www.w3.org/2000/01/rdf-schema#label>",
"properties": ["<http://purl.obolibrary.org/obo/IAO_0000115>"]
},
"to": {
"url": "http://purl.obolibrary.org/obo/to.owl",
"prefix": "http://purl.obolibrary.org/obo/TO_",
"format": "xml",
"label" : "<http://www.w3.org/2000/01/rdf-schema#label>",
"properties": ["<http://purl.obolibrary.org/obo/IAO_0000115>"]
}
}
},
"debug_nb_terms_by_ontology" : -1
},
"populate_ncbi_taxon_tag_embeddings" : {
"regex" : "(assic.*)|(arab.*)" ,
"tags_per_file" : 2000
},
"populate_abstract_embeddings" : {
"abstracts_per_file" : 500,
"from_ncbi_api" : {
"ncbi_api_chunk_size" : 200,
"debug_nb_ncbi_request" : -1,
"retmax" : 2000,
"selected_term" : [
"Crops%2C+Agricultural%2Fmetabolism%5BMeSH%5D"
]
}

}
}
10 changes: 0 additions & 10 deletions exec-1article.sh

This file was deleted.

5 changes: 3 additions & 2 deletions exec.sh
Original file line number Diff line number Diff line change
Expand Up @@ -46,14 +46,14 @@ execute_command() {
create_venv_if_not_exists

echo "What would you like to execute?"
echo "1. Full workflow"
echo "1. Pseudo workflow [2,4,5,6,7]"
echo "2. populate_owl_tag_embeddings"
echo "3. populate_ncbi_taxon_tag_embeddings"
echo "4. populate_abstract_embeddings"
echo "5. compute similarities between tags and chunks abstracts"
echo "6. display similarities information"
echo "7. build turtle knowledge graph"
echo "8. evaluate encoder with mesh descriptors"
echo "8. evaluate encoder with mesh descriptors (experimental)"
read -p "Enter your choice (1-8): " choice

case $choice in
Expand All @@ -62,6 +62,7 @@ case $choice in
#run_command python3 -m llm_semantic_annotator "$conffile" populate_ncbi_taxon_tag_embeddings
run_command python3 -m llm_semantic_annotator "$conffile" populate_abstract_embeddings
run_command python3 -m llm_semantic_annotator "$conffile" compute_tag_chunk_similarities
run_command python3 -m llm_semantic_annotator "$conffile" build_graph
run_command python3 -m llm_semantic_annotator "$conffile" display_summary
;;
2|3|4|5|6|7|8)
Expand Down
2 changes: 1 addition & 1 deletion llm_semantic_annotator/abstract/abstract_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ def get_ncbi_abstracts_from_api(self):
chunk = id_list[i:i+self.ncbi_api_chunk_size]
ids = ",".join(chunk)
fetch_url = f"{base_url}efetch.fcgi?db=pubmed&id={ids}&rettype=abstract&retmode=xml"
print(fetch_url)

fetch_response = requests.post(fetch_url)

root = ET.fromstring(fetch_response.content)
Expand Down
2 changes: 0 additions & 2 deletions llm_semantic_annotator/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,8 +80,6 @@ def main_compute_tag_chunk_similarities(config_all):
if len(abstracts_pth_files) == 0:
raise FileNotFoundError("No abstracts embeddings found")



### Loading tags embeddings
### -----------------------
tag_embeddings_all = {}
Expand Down

0 comments on commit 768a06f

Please sign in to comment.