Skip to content

Commit

Permalink
Merge branch 'main' into Nolan/LMStudio
Browse files Browse the repository at this point in the history
  • Loading branch information
NolanTrem authored Jan 11, 2025
2 parents 66c0763 + dff76c5 commit 053311e
Show file tree
Hide file tree
Showing 34 changed files with 627 additions and 1,223 deletions.
20 changes: 20 additions & 0 deletions js/sdk/src/v3/clients/users.ts
Original file line number Diff line number Diff line change
Expand Up @@ -539,4 +539,24 @@ export class UsersClient {
async oauthGithubAuthorize(): Promise<{ redirect_url: string }> {
return this.client.makeRequest("GET", "users/oauth/github/authorize");
}

@feature("users.oauthGoogleCallback")
async oauthGoogleCallback(options: { code: string; state: string }): Promise<any> {
return this.client.makeRequest("GET", "users/oauth/google/callback", {
params: {
code: options.code,
state: options.state,
},
});
}

@feature("users.oauthGithubCallback")
async oauthGithubCallback(options: { code: string; state: string }): Promise<any> {
return this.client.makeRequest("GET", "users/oauth/github/callback", {
params: {
code: options.code,
state: options.state,
},
});
}
}
6 changes: 0 additions & 6 deletions py/cli/commands/system.py
Original file line number Diff line number Diff line change
Expand Up @@ -149,12 +149,6 @@ async def serve(

click.echo(f"Running on {host}:{port}, with docker={docker}")

# TODO: Remove after the next couple of releases
click.secho(
"Warning: if you are migrating from R2R version 3.3.18 or earlier, you must run `r2r db upgrade` before starting the server.",
fg="red",
)

if full:
click.echo(
"Running the full R2R setup which includes `Hatchet` and `Unstructured.io`."
Expand Down
1 change: 0 additions & 1 deletion py/core/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -213,7 +213,6 @@
## PIPES
"SearchPipe",
"EmbeddingPipe",
"GraphExtractionPipe",
"ParsingPipe",
"QueryTransformPipe",
"RAGPipe",
Expand Down
32 changes: 24 additions & 8 deletions py/core/base/providers/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,14 +3,35 @@

from pydantic import BaseModel

from ..abstractions import R2RSerializable

class InnerConfig(BaseModel, ABC):
"""A base provider configuration class"""

class Config:
populate_by_name = True
arbitrary_types_allowed = True
ignore_extra = True

class AppConfig(R2RSerializable):
@classmethod
def create(cls: Type["ProviderConfig"], **kwargs: Any) -> "ProviderConfig":
base_args = cls.model_fields.keys()
filtered_kwargs = {
k: v if v != "None" else None
for k, v in kwargs.items()
if k in base_args
}
instance = cls(**filtered_kwargs) # type: ignore
for k, v in kwargs.items():
if k not in base_args:
instance.extra_fields[k] = v
return instance


class AppConfig(InnerConfig):
project_name: Optional[str] = None
default_max_documents_per_user: Optional[int] = 100
default_max_chunks_per_user: Optional[int] = 10_000
default_max_collections_per_user: Optional[int] = 10
default_max_collections_per_user: Optional[int] = 5
default_max_upload_size: int = 2_000_000 # e.g. ~2 MB

# File extension to max-size mapping
Expand Down Expand Up @@ -51,11 +72,6 @@ class AppConfig(R2RSerializable):
"org": 5_000_000,
}

@classmethod
def create(cls, *args, **kwargs):
project_name = kwargs.get("project_name")
return AppConfig(project_name=project_name)


class ProviderConfig(BaseModel, ABC):
"""A base provider configuration class"""
Expand Down
10 changes: 10 additions & 0 deletions py/core/base/providers/email.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ class EmailConfig(ProviderConfig):
sendgrid_api_key: Optional[str] = None
verify_email_template_id: Optional[str] = None
reset_password_template_id: Optional[str] = None
password_changed_template_id: Optional[str] = None
frontend_url: Optional[str] = None
sender_name: Optional[str] = None

Expand Down Expand Up @@ -74,3 +75,12 @@ async def send_password_reset_email(
self, to_email: str, reset_token: str, *args, **kwargs
) -> None:
pass

@abstractmethod
async def send_password_changed_email(
self,
to_email: str,
*args,
**kwargs,
) -> None:
pass
34 changes: 0 additions & 34 deletions py/core/database/graphs.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
import datetime
import json
import logging
import math
import os
import tempfile
import time
Expand Down Expand Up @@ -2327,17 +2326,12 @@ async def perform_graph_clustering(
if offset >= count:
break

relationship_ids_cache = await self._get_relationship_ids_cache(
all_relationships
)

logger.info(
f"Clustering over {len(all_relationships)} relationships for {collection_id} with settings: {leiden_params}"
)

return await self._cluster_and_add_community_info(
relationships=all_relationships,
relationship_ids_cache=relationship_ids_cache,
leiden_params=leiden_params,
collection_id=collection_id,
clustering_mode=clustering_mode,
Expand Down Expand Up @@ -2415,7 +2409,6 @@ async def _create_graph_and_cluster(
async def _cluster_and_add_community_info(
self,
relationships: list[Relationship],
relationship_ids_cache: dict[str, list[int]],
leiden_params: dict[str, Any],
collection_id: Optional[UUID] = None,
clustering_mode: str = "local",
Expand All @@ -2441,13 +2434,6 @@ async def _cluster_and_add_community_info(
f"Computing Leiden communities completed, time {time.time() - start_time:.2f} seconds."
)

def relationship_ids(node: str) -> list[int]:
return relationship_ids_cache.get(node, [])

logger.info(
f"Cached {len(relationship_ids_cache)} relationship ids, time {time.time() - start_time:.2f} seconds."
)

# If remote: hierarchical_communities is a list of dicts like:
# [{"node": str, "cluster": int, "level": int}, ...]
# If local: hierarchical_communities is the returned structure from hierarchical_leiden (list of named tuples)
Expand Down Expand Up @@ -2476,26 +2462,6 @@ def relationship_ids(node: str) -> list[int]:

return num_communities, hierarchical_communities

async def _get_relationship_ids_cache(
self, relationships: list[Relationship]
) -> dict[str, list[int]]:
relationship_ids_cache: dict[str, list[int]] = {}
for relationship in relationships:
if relationship.subject is not None:
relationship_ids_cache.setdefault(relationship.subject, [])
if relationship.id is not None:
relationship_ids_cache[relationship.subject].append(
int(relationship.id)
)
if relationship.object is not None:
relationship_ids_cache.setdefault(relationship.object, [])
if relationship.id is not None:
relationship_ids_cache[relationship.object].append(
int(relationship.id)
)

return relationship_ids_cache

async def get_entity_map(
self, offset: int, limit: int, document_id: UUID
) -> dict[str, dict[str, list[dict[str, Any]]]]:
Expand Down
107 changes: 36 additions & 71 deletions py/core/database/prompts/graphrag_communities.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,57 +2,30 @@ graphrag_communities:
template: |
You are an AI assistant that helps a human analyst perform general information discovery. Information discovery is the process of identifying and assessing relevant information associated with certain entities (e.g., organizations and individuals) within a network.
# Context
Collection Overview:
Context Overview:
{collection_description}
# Goal
Write a comprehensive report of a community within this collection, given a list of entities that belong to the community as well as their relationships and optional associated claims. The report will inform decision-makers about information associated with the community and their potential impact within the broader context of the collection. The content includes an overview of the community's key entities and noteworthy claims.
# Report Structure
The report should include:
- NAME: A specific, concise community name representing its key entities
- SUMMARY: An executive summary that contextualizes the community within the broader collection, explaining its structure, relationships, and significant information
- IMPACT SEVERITY RATING: A float score (0-10) representing the community's IMPACT severity relative to the overall collection
- RATING EXPLANATION: A single sentence explaining the IMPACT severity rating in context of the broader collection
- DETAILED FINDINGS: 5-10 key insights about the community, incorporating relevant collection-level context where appropriate
Output Format:
```json
{{
"name": <report_name>,
"summary": <executive_summary>,
"rating": <impact_severity_rating>,
"rating_explanation": <rating_explanation>,
"findings": [
"<finding1>",
"<finding2>",
"<finding3>",
"<finding4>",
"<finding5>"
// Additional findings...
]
}}
```
# Grounding Rules
Points supported by data should list their data references as follows:
"This is an example sentence supported by multiple data references [Data: <dataset name> (record ids); <dataset name> (record ids)]."
Do not list more than 5 record ids in a single reference. Instead, list the top 5 most relevant record ids and add "+more" to indicate that there are more.
For example:
"Person X is the owner of Company Y and subject to many allegations of wrongdoing [Data: Reports (1), Entities (5, 7); Relationships (23)."
where 1, 5, 7, 23, 2, 34, 46, and 64 represent the id (not the index) of the relevant data record.
Do not include information where the supporting evidence for it is not provided.
# Example Input
Your Task:
Write a comprehensive report of a community as a single XML document. The report must follow this exact structure:
<community>
<name>A specific, concise community name representing its key entities</name>
<summary>An executive summary that contextualizes the community</summary>
<rating>A float score (0-10) representing impact severity</rating>
<rating_explanation>A single sentence explaining the rating</rating_explanation>
<findings>
<finding>First key insight about the community</finding>
<finding>Second key insight about the community</finding>
<!-- Include 5-10 findings total -->
</findings>
</community>
Data Reference Format:
Include data references in findings like this:
"Example sentence [Data: <dataset name> (record ids); <dataset name> (record ids)]"
Use no more than 5 record IDs per reference. Add "+more" to indicate additional records.
Example Input:
-----------
Text:
Expand All @@ -78,32 +51,24 @@ graphrag_communities:
203,Airbnb,OpenAI,Airbnb utilizes OpenAI's AI tools for customer service.
205,Airbnb,Stripe,Airbnb and Stripe collaborate on expanding global payment options.
Output:
{{
"name": "OpenAI, Stripe, and Airbnb",
"summary": "The comprises key startups like OpenAI, Stripe, and Airbnb, which are interconnected through strategic partnerships and investments. These relationships highlight a robust network focused on advancing AI technologies, payment infrastructure, and online marketplaces.",
"rating": 7.5,
"rating_explanation": "The impact severity rating is high due to the significant influence these startups have on technology, finance, and the global economy.",
"findings": [
"OpenAI stands out as a leader in artificial intelligence research and deployment within YCombinator. Its partnerships with companies like Stripe and Airbnb demonstrate its integral role in integrating AI solutions across various industries. OpenAI's influence is further amplified by its involvement in key projects that drive innovation and efficiency. [Data: Entities (101), Relationships (201, 203, 204, +more)]",
"Stripe serves as a critical financial infrastructure provider, facilitating payment processing for startups like Airbnb and partnering with OpenAI to enhance payment solutions. Its strategic investments and collaborations underscore its importance in the Y Combinator ecosystem, enabling seamless financial transactions and supporting startup growth. [Data: Entities (102), Relationships (201, 202, 204, 205, +more)]",
"Airbnb leverages OpenAI's artificial intelligence tools to enhance its customer service capabilities, showcasing the practical application of AI in improving user experience. This integration highlights Airbnb's commitment to innovation and efficiency, positioning it as a forward-thinking leader within the community. [Data: Entities (103), Relationships (203, 205, +more)]",
"Stripe's investment in OpenAI's latest funding round illustrates the strategic financial moves that drive growth and innovation. Such investments not only strengthen partnerships but also foster an environment of collaboration and shared success among startups. [Data: Relationships (204)]",
"The collaboration between Airbnb and Stripe to expand global payment options demonstrates a commitment to scalability and accessibility in the Y Combinator ecosystem. This initiative is pivotal in enabling startups to reach a broader international market, thereby increasing their impact and revenue potential. [Data: Relationships (205)]"
]
}}
# Real Data
Use the following text for your answer. Do not make anything up in your answer.
Collection Context:
{collection_description}
Example Output:
<community>
<name>OpenAI-Stripe-Airbnb Community</name>
<summary>The OpenAI-Stripe-Airbnb Community is a network of companies that collaborate on AI research, payment solutions, and customer service.</summary>
<rating>8.5</rating>
<rating_explanation>The OpenAI-Stripe-Airbnb Community has a high impact on the collection due to its significant contributions to AI research, payment solutions, and customer service.</rating_explanation>
<findings>
<finding>OpenAI and Stripe have a partnership to integrate payment solutions [Data: Relationships (201)].</finding>
<finding>OpenAI and Airbnb collaborate on AI tools for customer service [Data: Relationships (203)].</finding>
<finding>Stripe provides payment processing services to Airbnb [Data: Relationships (202)].</finding>
<finding>Stripe invested in OpenAI's latest funding round [Data: Relationships (204)].</finding>
<finding>Airbnb and Stripe collaborate on expanding global payment options [Data: Relationships (205)].</finding>
</findings>
</community>
Entity Data:
{input_text}
Output:
input_types:
collection_description: str
input_text: str
Loading

0 comments on commit 053311e

Please sign in to comment.