Skip to content

Commit

Permalink
Add code for v1.1.0
Browse files Browse the repository at this point in the history
  • Loading branch information
MikeXydas committed Sep 1, 2022
1 parent 50a19a8 commit 587a096
Show file tree
Hide file tree
Showing 34 changed files with 803 additions and 202 deletions.
4 changes: 2 additions & 2 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,10 @@ FROM python:3.9

WORKDIR /app

COPY api/requirements.txt /app/requirements.txt
COPY requirements.txt /app/requirements.txt

RUN pip install --no-cache-dir --upgrade -r /app/requirements.txt

COPY . /app

CMD ["python3.9", "recommendation_system_app.py", "--config_file", "api/config/backend-prod.yaml"]
CMD ["python3.9", "recommendation_system_app.py", "--config_file", "api/config/backend-prod.yaml"]
9 changes: 9 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,15 @@ INTERNAL_MONGO_URI="mongodb://admin:admin@mongo:27017"
RS_MONGO_URI="mongodb://admin:admin@rs_mongo:27017"
RS_MONGO_DB=recommender # The name of the database used in the RS mongo

INTERNAL_REDIS_HOST=redis # The hostname of the internal redis deployed by compose
INTERNAL_REDIS_PORT=6379 # The port of the internal redis deployed by compose
INTERNAL_REDIS_PASSWORD=redis_pswd # The password of the internal redis deployed by compose

# The private sdn key for sentry which we use for error logging
SENTRY_SDN=https://12345...

# Cronitor is used to monitor the offline updating of our RS data structures
# stored in redis
CRONITOR_API_KEY=123aababdas...

```
22 changes: 15 additions & 7 deletions api/config/backend-dev.yaml
Original file line number Diff line number Diff line change
@@ -1,22 +1,30 @@
VERSION_NAME: "v1"

FASTAPI:
WORKERS: 4
WORKERS: 1
DEBUG: True
RELOAD: True
HOST: '0.0.0.0'
PORT: 4559

# In dev no scheduled updates will occur
SCHEDULING:
EVERY_N_HOURS: 1

CREDENTIALS: "credentials.yml"

SIMILAR_SERVICES:
METADATA: {"categories", "scientific_domains", "target_users"}
METADATA: ["categories", "scientific_domains", "target_users"]
TEXT_ATTRIBUTES: ["name", "description"]

METADATA_WEIGHT: 0.5
VIEWED_WEIGHT: 0.5

SBERT:
MODEL: 'paraphrase-MiniLM-L6-v2'
DEVICE: "cpu"

BINARIZERS_STORAGE_PATH: "api/recommender/similar_services/storage/binarizers"
EMBEDDINGS_STORAGE_PATH: "api/recommender/similar_services/storage/embeddings/"
SIMILARITIES_STORAGE_PATH: "api/recommender/similar_services/storage/similarities/"

PROJECT_COMPLETION:
ASSOCIATION_RULES_PATH: "api/recommender/project_completion/storage/association_rules.parquet"

MIN_SUPPORT: 0.05
MIN_CONFIDENCE: 0.5
19 changes: 13 additions & 6 deletions api/config/backend-prod.yaml
Original file line number Diff line number Diff line change
@@ -1,22 +1,29 @@
VERSION_NAME: "v1"

FASTAPI:
WORKERS: 4
DEBUG: False
RELOAD: False
HOST: '0.0.0.0'
PORT: 4559

SCHEDULING:
EVERY_N_HOURS: 1

CREDENTIALS: "credentials.yml"

SIMILAR_SERVICES:
METADATA: {"categories", "scientific_domains", "target_users"}
METADATA: ["categories", "scientific_domains", "target_users"]
TEXT_ATTRIBUTES: ["name", "description"]

METADATA_WEIGHT: 0.5
VIEWED_WEIGHT: 0.5

SBERT:
MODEL: 'paraphrase-MiniLM-L6-v2'
DEVICE: "cpu"

BINARIZERS_STORAGE_PATH: "api/recommender/similar_services/storage/binarizers"
EMBEDDINGS_STORAGE_PATH: "api/recommender/similar_services/storage/embeddings/"
SIMILARITIES_STORAGE_PATH: "api/recommender/similar_services/storage/similarities/"

PROJECT_COMPLETION:
ASSOCIATION_RULES_PATH: "api/recommender/project_completion/storage/association_rules.parquet"

MIN_SUPPORT: 0.05
MIN_CONFIDENCE: 0.5
67 changes: 51 additions & 16 deletions api/databases/mongo.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,10 +37,15 @@ def get_services(self, attributes=None, conditions=None):
if attributes is None:
attributes = []

services = pd.DataFrame(list(self.mongo_connector.get_db()["service"].find(conditions)))
services.rename(columns={'_id': 'service_id'}, inplace=True)
services = list(self.mongo_connector.get_db()["service"].find(conditions))
if len(services):
servicesDf = pd.DataFrame(services)
servicesDf.rename(columns={'_id': 'service_id'}, inplace=True)
servicesDf = servicesDf[["service_id"] + attributes]
else: # If there are no services
servicesDf = pd.DataFrame(columns=["service_id"] + attributes)

return services[["service_id"] + attributes]
return servicesDf

def get_scientific_domains(self):
return [domain["_id"] for domain in self.mongo_connector.get_db()["scientific_domain"].find({}, {"_id": 1})]
Expand All @@ -51,9 +56,21 @@ def get_categories(self):
def get_target_users(self):
return [domain["_id"] for domain in self.mongo_connector.get_db()["target_user"].find({}, {"_id": 1})]

# TODO change it when i can get the info from recommender db
# TODO check with dump
def get_user_services(self, user_id):
return []
user_projects_services = self.mongo_connector.get_db()["project"].find({"user_id": user_id}, {"services": 1})
user_services = set()
for project_services in user_projects_services:
user_services.update(project_services["services"])
return list(user_services)

# TODO check with dump
def get_project_services(self, project_id):
return self.mongo_connector.get_db()["project"].find_one({"_id": project_id})["services"]

# TODO check with dump
def get_projects(self):
return [project["_id"] for project in self.mongo_connector.get_db()["project"].find({}, {"_id": 1})]

def get_users(self, attributes=None):
if attributes is None:
Expand All @@ -62,12 +79,12 @@ def get_users(self, attributes=None):
return [user["_id"] for user in self.mongo_connector.get_db()["user"].find({}, attributes)]

def is_valid_service(self, service_id):
result = self.mongo_connector.get_db()["service"].find({'_id': int(service_id)}, {"_id": 1}).limit(1)
return len(list(result)) == 1
result = self.mongo_connector.get_db()["service"].find_one({'_id': int(service_id)})
return result is not None

def is_valid_user(self, user_id):
result = self.mongo_connector.get_db()["user"].find({"_id": int(user_id)}, {"_id": 1}).limit(1)
return len(list(result)) == 1
result = self.mongo_connector.get_db()["user"].find_one({"_id": int(user_id)})
return result is not None


class InternalMongoDB:
Expand All @@ -76,20 +93,38 @@ def __init__(self):
APP_SETTINGS["CREDENTIALS"]['INTERNAL_MONGO_DATABASE'])
self.mongo_connector.connect()

# TODO: This is were functions like storing logging will be implemented
def save_recommendation(self, recommendation, user_id, service_id, history_service_ids):
document = {
"date": datetime.datetime.utcnow(),
# TODO: change it when versioning is available
"version": "1.0",
"service_id": service_id,
"version": "1.0",
"version": APP_SETTINGS["BACKEND"]["VERSION_NAME"],
"service_id": int(service_id),
"recommendation": recommendation,
"user_id": int(user_id),
"history_service_ids": history_service_ids
}

document_id = self.mongo_connector.get_db()['recommendation'].insert_one(document)
self.mongo_connector.get_db()['recommendation'].insert_one(document)

logger.debug("Recommendation was successfully saved!")

def update_version(self):
version = {
"name": APP_SETTINGS["BACKEND"]["VERSION_NAME"],
"similar_services": {
"metadata": APP_SETTINGS["BACKEND"]["SIMILAR_SERVICES"]["METADATA"],
"text_attributes": APP_SETTINGS["BACKEND"]["SIMILAR_SERVICES"]["TEXT_ATTRIBUTES"],
"metadata_weight": APP_SETTINGS["BACKEND"]["SIMILAR_SERVICES"]["METADATA_WEIGHT"],
"viewed_weight": APP_SETTINGS["BACKEND"]["SIMILAR_SERVICES"]["VIEWED_WEIGHT"],
"sbert_model": APP_SETTINGS["BACKEND"]["SIMILAR_SERVICES"]["SBERT"]["MODEL"]
},
"project_completion": {
"min_support": APP_SETTINGS["BACKEND"]["PROJECT_COMPLETION"]["MIN_SUPPORT"],
"min_confidence": APP_SETTINGS["BACKEND"]["PROJECT_COMPLETION"]["MIN_CONFIDENCE"]
}

}

# Update the version if it exists or create a new version document
self.mongo_connector.get_db()["version"].update_one({"name": version["name"]}, {"$set": version}, upsert=True)

logger.info("Recommendation was successfully saved!")
logger.debug("Recommender version was successfully saved!")
Loading

0 comments on commit 587a096

Please sign in to comment.