Skip to content

Commit

Permalink
Merge pull request #4 from TonicAI/f/add-telemetry
Browse files Browse the repository at this point in the history
Adding telemetry
  • Loading branch information
akamor authored Apr 5, 2024
2 parents 99fd072 + 3adf69f commit ab03043
Show file tree
Hide file tree
Showing 7 changed files with 466 additions and 309 deletions.
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -13,4 +13,5 @@ docs/
.dmypy.json
dmypy.json

.env
.env
.idea
22 changes: 21 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -68,4 +68,24 @@ After you execute `upload_results`, your results should be visible in the UI.
<img src="https://raw.githubusercontent.com/TonicAI/tonic_validate/main/readme_images/TonicValidate-Graph.png" width="800">
</picture>

Congratulations, now you have uploaded your Ragas results to Tonic Validate!
Congratulations, now you have uploaded your Ragas results to Tonic Validate!




### Telemetry
Tonic Ragas Logger collects minimal telemetry to help us figure out what users want and how they're using the product. We do not use any existing telemetry framework and instead created our own privacy focused setup. Only the following information is tracked

* What metrics were used for a run
* Number of questions in a run
* SDK Version
* Is being run on a CI machine

We do **NOT** track things such as the contents of the questions / answers, your scores, or any other sensitive information. For detecting CI/CD, we only check for common environment variables in different CI/CD environments. We do not log the values of these environment variables.

We also generate a random UUID to help us figure out how many users are using the product. This UUID is linked to your Tonic Validate account only to help track who is using the SDK and UI at once and to get user counts. If you want to see how we implemented telemetry, you can do so in the `tonic_ragas_logger/utils/telemetry.py` file

If you wish to opt out of telemetry, you only need to set the `TONIC_RAGAS_DO_NOT_TRACK` environment variable to `True`.


<p align="right">(<a href="#readme-top">back to top</a>)</p>
631 changes: 325 additions & 306 deletions poetry.lock

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "tonic-ragas-logger"
version = "1.2.1"
version = "1.2.2"
description = "Uploads results from ragas to Tonic Validate."
authors = []
readme = "README.md"
Expand Down
6 changes: 6 additions & 0 deletions tonic_ragas_logger/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,3 +12,9 @@ def __init__(self) -> None:
self.TONIC_VALIDATE_BASE_URL = os.getenv(
"TONIC_VALIDATE_BASE_URL", "https://validate.tonic.ai/api/v1"
)
self.TONIC_VALIDATE_TELEMETRY_URL = os.getenv(
"TONIC_VALIDATE_TELEMETRY_URL", "https://telemetry.tonic.ai/validate"
)
self.TONIC_RAGAS_DO_NOT_TRACK = os.getenv(
"TONIC_RAGAS_DO_NOT_TRACK", "false"
).lower() in ("true", "1", "t")
10 changes: 10 additions & 0 deletions tonic_ragas_logger/ragas_validate_api.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from typing import Any, List, Optional, Dict

from tonic_ragas_logger.utils.telemetry import Telemetry
from tonic_validate import Run, RunData
from tonic_ragas_logger.config import Config

Expand Down Expand Up @@ -31,6 +32,7 @@ def __init__(
)
raise Exception(exception_message)
self.client = HttpClient(self.config.TONIC_VALIDATE_BASE_URL, api_key)
self.telemetry = Telemetry()

def upload_results(
self,
Expand Down Expand Up @@ -60,6 +62,14 @@ def upload_results(
"data": [run_data.to_dict() for run_data in run.run_data],
},
)

try:
self.telemetry.log_run(
len(run.run_data), list(run.overall_scores.keys())
)
except Exception as _:
pass

return run_response["id"]

def __convert_to_run(self, results: Result) -> Run:
Expand Down
101 changes: 101 additions & 0 deletions tonic_ragas_logger/utils/telemetry.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
import json
import os
from typing import List, Optional
import uuid
from tonic_validate.classes.user_info import UserInfo
from tonic_validate.utils.http_client import HttpClient
from appdirs import user_data_dir
from tonic_ragas_logger.config import Config
APP_DIR_NAME = "tonic-ragas-logger"

# List of CI/CD environment variables
# 1. Github Actions: GITHUB_ACTIONS
# 2 Gitlab CI/CD: GITLAB_CI
# 3. Azure devops: TF_BUILD
# 4. CircleCI: CI
# 5. Jenkins: JENKINS_URL
# 6. TravisCI: CI
# 7. Bitbucket: CI
env_vars = ["GITHUB_ACTIONS", "GITLAB_CI", "TF_BUILD", "CI", "JENKINS_URL"]


class Telemetry:
def __init__(self, api_key: Optional[str] = None):
"""
Used to log telemetry data to the Tonic Validate server
Parameters
----------
api_key: Optional[str]
The API key to use for authentication
"""
self.config = Config()
self.http_client = HttpClient(self.config.TONIC_VALIDATE_TELEMETRY_URL, api_key)

def get_user(self) -> UserInfo:
"""
Retrieves the user information from the file. If the user does not exist, creates a new user
Returns
-------
UserInfo
Information about the user
"""
app_dir_path = user_data_dir(appname=APP_DIR_NAME)
user_id_path = os.path.join(app_dir_path, "user.json")
# check if user_id exists else we create a new uuid and write it to the file
if os.path.exists(user_id_path):
with open(user_id_path, "r") as f:
user_info = json.load(f)
else:
user_info: UserInfo = {"user_id": str(uuid.uuid4()), "linked": False}
json_info = json.dumps(user_info)
# create the directory if it does not exist
os.makedirs(app_dir_path, exist_ok=True)
with open(user_id_path, "w") as f:
f.write(json_info)
return user_info

def __is_ci(self):
"""
Checks whether the current environment is a CI/CD environment
"""
for var in env_vars:
if os.environ.get(var):
return True
return False

def log_run(self, num_of_questions: int, metrics: List[str]):
"""
Logs a run to the Tonic Validate server
Parameters
----------
num_of_questions: int
The number of questions asked
metrics: List[str]
The metrics that were used to evaluate the run
"""
if self.config.TONIC_RAGAS_DO_NOT_TRACK:
return

try:
from importlib.metadata import version
sdk_version = version('tonic-ragas-logger')
except Exception:
sdk_version = "unknown"

user_id = self.get_user()["user_id"]
self.http_client.http_post(
"/runs",
data={
"user_id": user_id,
"num_of_questions": num_of_questions,
"metrics": metrics,
"is_ci": self.__is_ci(),
"backend": "ragas",
"run_time": -1,
"sdk_version": sdk_version
},
timeout=5,
)

0 comments on commit ab03043

Please sign in to comment.