Skip to content

Commit

Permalink
fix name score according model. colorize message
Browse files Browse the repository at this point in the history
  • Loading branch information
ofilangi committed Oct 23, 2024
1 parent 932427b commit 42d789b
Show file tree
Hide file tree
Showing 6 changed files with 106 additions and 90 deletions.
2 changes: 1 addition & 1 deletion config/simple.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"encodeur" : "sentence-transformers/all-MiniLM-L6-v2",
"threshold_similarity_tag_chunk" : 0.65,
"threshold_similarity_tag_chunk" : 0.62,
"threshold_similarity_tag" : 0.80,
"batch_size" : 32,

Expand Down
24 changes: 23 additions & 1 deletion llm_semantic_annotator/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,4 +24,26 @@
from .core import main_build_dataset_abstracts_annotation
from .core import get_scores_files

from .similarity_evaluator import similarity_evaluator_main
from .similarity_evaluator import similarity_evaluator_main


from colorama import init, Fore, Back, Style
# Initialiser colorama
init(autoreset=True)

def custom_exception_handler(exc_type, exc_value, exc_traceback):
# Formater le message d'exception
error_msg = f"{exc_type.__name__}: {exc_value}"

# Afficher le message en rouge
print(f"{Fore.RED}{Back.WHITE}{Style.BRIGHT}{error_msg}{Style.RESET_ALL}")

# Afficher la traceback en jaune
import traceback
for line in traceback.format_tb(exc_traceback):
print(f"{Fore.YELLOW}{line}{Style.RESET_ALL}")

# Remplacer le gestionnaire d'exceptions par défaut
import sys
sys.excepthook = custom_exception_handler

120 changes: 57 additions & 63 deletions llm_semantic_annotator/__main__.py
Original file line number Diff line number Diff line change
@@ -1,95 +1,89 @@
import json, sys

from llm_semantic_annotator import get_retention_dir
from llm_semantic_annotator import main_populate_owl_tag_embeddings
from llm_semantic_annotator import main_populate_abstract_embeddings
from llm_semantic_annotator import main_populate_gbif_taxon_tag_embeddings
from llm_semantic_annotator import main_populate_ncbi_taxon_tag_embeddings
from llm_semantic_annotator import main_compute_tag_chunk_similarities
from llm_semantic_annotator import similarity_evaluator_main
from llm_semantic_annotator import main_display_summary
from llm_semantic_annotator import main_build_graph
from llm_semantic_annotator import main_build_dataset_abstracts_annotation

import json
import sys
import os
from rich import print
import argparse

from llm_semantic_annotator import (
get_retention_dir,
main_populate_owl_tag_embeddings,
main_populate_abstract_embeddings,
main_populate_gbif_taxon_tag_embeddings,
main_populate_ncbi_taxon_tag_embeddings,
main_compute_tag_chunk_similarities,
similarity_evaluator_main,
main_display_summary,
main_build_graph,
main_build_dataset_abstracts_annotation
)

def load_config(config_file):
"""Charge la configuration à partir d'un fichier JSON."""
"""Load configuration from a JSON file."""
try:
with open(config_file, 'r') as f:
return json.load(f)
except FileNotFoundError:
print(f"Le fichier de configuration {config_file} est introuvable.")
print(f"[bold red]Error:[/bold red] Configuration file {config_file} not found.")
sys.exit(1)
except json.JSONDecodeError:
print(f"Erreur de décodage JSON dans le fichier {config_file}.")
print(f"[bold red]Error:[/bold red] JSON decoding error in file {config_file}.")
sys.exit(1)

def parse_arguments():
"""Analyse les arguments de la ligne de commande."""
parser = argparse.ArgumentParser(description="Programme avec plusieurs types d'exécution.")
"""Parse command line arguments."""
parser = argparse.ArgumentParser(description="Program with multiple execution types.")
parser.add_argument(
"config_file",
help="Chemin vers le fichier de configuration JSON."
help="Path to the JSON configuration file."
)
parser.add_argument(
"execution_type",
choices=["populate_owl_tag_embeddings",
"populate_gbif_taxon_tag_embeddings",
"populate_ncbi_taxon_tag_embeddings",
"populate_abstract_embeddings",
"compute_tag_chunk_similarities",
"display_summary",
"build_rdf_graph",
"build_dataset_abstracts_annotations",
"evaluate_encoder"],
help="Type d'exécution à effectuer."
choices=[
"populate_owl_tag_embeddings",
"populate_gbif_taxon_tag_embeddings",
"populate_ncbi_taxon_tag_embeddings",
"populate_abstract_embeddings",
"compute_tag_chunk_similarities",
"display_summary",
"build_rdf_graph",
"build_dataset_abstracts_annotations",
"evaluate_encoder"
],
help="Type of execution to perform."
)

parser.add_argument('--force', action='store_true',
help="Forcer l'exécution sans demander de confirmation")

help="Force execution without asking for confirmation")
return parser.parse_args()

def main():
import os
args = parse_arguments()
config = load_config(args.config_file)

config['retention_dir'] = get_retention_dir(args.config_file)
config['force'] = args.force

if args.force:
config['force'] = True
else:
config['force'] = False
execution_functions = {
"populate_owl_tag_embeddings": main_populate_owl_tag_embeddings,
"populate_gbif_taxon_tag_embeddings": main_populate_gbif_taxon_tag_embeddings,
"populate_ncbi_taxon_tag_embeddings": main_populate_ncbi_taxon_tag_embeddings,
"populate_abstract_embeddings": main_populate_abstract_embeddings,
"compute_tag_chunk_similarities": main_compute_tag_chunk_similarities,
"display_summary": main_display_summary,
"build_rdf_graph": main_build_graph,
"build_dataset_abstracts_annotations": main_build_dataset_abstracts_annotation,
"evaluate_encoder": similarity_evaluator_main
}

if args.execution_type == "populate_owl_tag_embeddings":
main_populate_owl_tag_embeddings(config)
elif args.execution_type == "populate_gbif_taxon_tag_embeddings":
main_populate_gbif_taxon_tag_embeddings(config)
elif args.execution_type == "populate_ncbi_taxon_tag_embeddings":
main_populate_ncbi_taxon_tag_embeddings(config)
elif args.execution_type == "populate_abstract_embeddings":
main_populate_abstract_embeddings(config)
elif args.execution_type == "compute_tag_chunk_similarities":
main_compute_tag_chunk_similarities(config)
elif args.execution_type == "display_summary":
main_display_summary(config)
elif args.execution_type == "build_rdf_graph":
main_build_graph(config)
elif args.execution_type == "build_dataset_abstracts_annotations":
main_build_dataset_abstracts_annotation(config)
elif args.execution_type == "evaluate_encoder":
similarity_evaluator_main(config)
else:
raise ValueError("Type d'exécution non reconnu.")
try:
execution_function = execution_functions[args.execution_type]
print(f"[bold green]Executing:[/bold green] {args.execution_type}")
execution_function(config)
except KeyError:
print(f"[bold red]Error:[/bold red] Unrecognized execution type: {args.execution_type}")
sys.exit(1)
except Exception as e:
print(f"[bold red]Error during execution:[/bold red] {str(e)}")
sys.exit(1)

if __name__ == "__main__":
main()






11 changes: 6 additions & 5 deletions llm_semantic_annotator/abstract/abstract_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -260,8 +260,10 @@ def build_dataset_abstracts_annotations(self):
for filename in files:
if pattern.search(filename):
abstracts_json = os.path.join(root, filename)
abstracts_gen = filename.split('.json')[0]
abstracts_scores = self.mem.get_filename_pth(abstracts_gen).split('.pth')[0]+"_scores.json"
abstracts_origin_gen = filename.split('.json')[0]
abstracts_gen = self.mem.get_filename_pth(abstracts_origin_gen).split('.pth')[0]
abstracts_scores = abstracts_gen+"_scores.json"
abstracts_annotations_results_file = abstracts_gen+"_queryresults.json"
print(abstracts_json)
abstracts_data = self._get_data_abstracts_file(abstracts_json)
abstracts_annot = load_results(abstracts_scores)
Expand Down Expand Up @@ -305,7 +307,6 @@ def build_dataset_abstracts_annotations(self):
'reference_id' : reference_id_list
})
if not df.empty:
outf = self.config['retention_dir']+f"/QueryResultEntry_{abstracts_gen}.csv"
print(outf)
df.to_csv(outf, index=False)
print(abstracts_annotations_results_file)
df.to_csv(abstracts_annotations_results_file, index=False)

38 changes: 18 additions & 20 deletions llm_semantic_annotator/misc/utils.py
Original file line number Diff line number Diff line change
@@ -1,61 +1,59 @@
import os,csv,json
import os, csv, json
from pathlib import Path

def save_results(data,filename):
def save_results(data, filename):
"""
Sauvegarde les résultats dans un fichier JSON.
Saves the results to a JSON file.
"""
with open(filename, 'w') as f:
json.dump(data, f)
print(f"Résultats sauvegardés dans {filename}")
print(f"Results saved in {filename}")

def load_results(filename):
"""
Charge les résultats depuis un fichier JSON s'il existe.
Loads the results from a JSON file if it exists.
"""
if os.path.exists(filename):
with open(filename, 'r') as f:
return json.load(f)
return None
raise FileNotFoundError(f"The file {filename} does not exist.")

def list_of_dicts_to_csv(data, filename):
# Vérifier si la liste n'est pas vide
# Check if the list is not empty
if not data:
print("La liste est vide.")
print("The list is empty.")
return

# Obtenir les en-têtes (toutes les clés uniques de tous les dictionnaires)
# Get headers (all unique keys from all dictionaries)
headers = set().union(*(d.keys() for d in data))

# Ouvrir le fichier en mode écriture
# Open the file in write mode
with open(filename, 'w', newline='') as csvfile:
writer = csv.DictWriter(csvfile, fieldnames=headers)

# Écrire les en-têtes
# Write the headers
writer.writeheader()

# Écrire les données
# Write the data
for row in data:
writer.writerow(row)



def dict_to_csv(dictionary, filename):
# Déterminer les en-têtes (clés du dictionnaire)
# Determine the headers (keys of the dictionary)
headers = list(dictionary.keys())

# Ouvrir le fichier en mode écriture
# Open the file in write mode
with open(filename, 'w', newline='') as csvfile:
# Créer un objet writer CSV
# Create a CSV writer object
writer = csv.DictWriter(csvfile, fieldnames=headers)

# Écrire les en-têtes
# Write the headers
writer.writeheader()

# Écrire les données
# Write the data
writer.writerow(dictionary)

def get_retention_dir(config_file) :
def get_retention_dir(config_file):
config_base_name = os.path.basename(config_file)
config_name_without_ext = os.path.splitext(config_base_name)[0]
retention_dir = os.path.join(os.getcwd(), f"{config_name_without_ext}_workdir")
Expand Down
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -11,3 +11,4 @@ pandas
tabulate
np
pytest
colorama

0 comments on commit 42d789b

Please sign in to comment.