Merge branch 'main' into Nolan/LMStudio

SciPhi-AI · Jan 11, 2025 · 053311e · 053311e
2 parents 66c0763 + dff76c5
commit 053311e
Show file tree

Hide file tree

Showing 34 changed files with 627 additions and 1,223 deletions.
diff --git a/js/sdk/src/v3/clients/users.ts b/js/sdk/src/v3/clients/users.ts
@@ -539,4 +539,24 @@ export class UsersClient {
   async oauthGithubAuthorize(): Promise<{ redirect_url: string }> {
     return this.client.makeRequest("GET", "users/oauth/github/authorize");
   }
+
+  @feature("users.oauthGoogleCallback")
+  async oauthGoogleCallback(options: { code: string; state: string }): Promise<any> {
+    return this.client.makeRequest("GET", "users/oauth/google/callback", {
+      params: {
+        code: options.code,
+        state: options.state,
+      },
+    });
+  }
+
+  @feature("users.oauthGithubCallback")
+  async oauthGithubCallback(options: { code: string; state: string }): Promise<any> {
+    return this.client.makeRequest("GET", "users/oauth/github/callback", {
+      params: {
+        code: options.code,
+        state: options.state,
+      },
+    });
+  }
 }
diff --git a/py/cli/commands/system.py b/py/cli/commands/system.py
@@ -149,12 +149,6 @@ async def serve(
 
     click.echo(f"Running on {host}:{port}, with docker={docker}")
 
-    # TODO: Remove after the next couple of releases
-    click.secho(
-        "Warning: if you are migrating from R2R version 3.3.18 or earlier, you must run `r2r db upgrade` before starting the server.",
-        fg="red",
-    )
-
     if full:
         click.echo(
             "Running the full R2R setup which includes `Hatchet` and `Unstructured.io`."

diff --git a/py/core/__init__.py b/py/core/__init__.py
@@ -213,7 +213,6 @@
     ## PIPES
     "SearchPipe",
     "EmbeddingPipe",
-    "GraphExtractionPipe",
     "ParsingPipe",
     "QueryTransformPipe",
     "RAGPipe",

diff --git a/py/core/base/providers/base.py b/py/core/base/providers/base.py
@@ -3,14 +3,35 @@
 
 from pydantic import BaseModel
 
-from ..abstractions import R2RSerializable
 
+class InnerConfig(BaseModel, ABC):
+    """A base provider configuration class"""
+
+    class Config:
+        populate_by_name = True
+        arbitrary_types_allowed = True
+        ignore_extra = True
 
-class AppConfig(R2RSerializable):
+    @classmethod
+    def create(cls: Type["ProviderConfig"], **kwargs: Any) -> "ProviderConfig":
+        base_args = cls.model_fields.keys()
+        filtered_kwargs = {
+            k: v if v != "None" else None
+            for k, v in kwargs.items()
+            if k in base_args
+        }
+        instance = cls(**filtered_kwargs)  # type: ignore
+        for k, v in kwargs.items():
+            if k not in base_args:
+                instance.extra_fields[k] = v
+        return instance
+
+
+class AppConfig(InnerConfig):
     project_name: Optional[str] = None
     default_max_documents_per_user: Optional[int] = 100
     default_max_chunks_per_user: Optional[int] = 10_000
-    default_max_collections_per_user: Optional[int] = 10
+    default_max_collections_per_user: Optional[int] = 5
     default_max_upload_size: int = 2_000_000  # e.g. ~2 MB
 
     # File extension to max-size mapping
@@ -51,11 +72,6 @@ class AppConfig(R2RSerializable):
         "org": 5_000_000,
     }
 
-    @classmethod
-    def create(cls, *args, **kwargs):
-        project_name = kwargs.get("project_name")
-        return AppConfig(project_name=project_name)
-
 
 class ProviderConfig(BaseModel, ABC):
     """A base provider configuration class"""

diff --git a/py/core/base/providers/email.py b/py/core/base/providers/email.py
@@ -17,6 +17,7 @@ class EmailConfig(ProviderConfig):
     sendgrid_api_key: Optional[str] = None
     verify_email_template_id: Optional[str] = None
     reset_password_template_id: Optional[str] = None
+    password_changed_template_id: Optional[str] = None
     frontend_url: Optional[str] = None
     sender_name: Optional[str] = None
 
@@ -74,3 +75,12 @@ async def send_password_reset_email(
         self, to_email: str, reset_token: str, *args, **kwargs
     ) -> None:
         pass
+
+    @abstractmethod
+    async def send_password_changed_email(
+        self,
+        to_email: str,
+        *args,
+        **kwargs,
+    ) -> None:
+        pass
diff --git a/py/core/database/graphs.py b/py/core/database/graphs.py
@@ -4,7 +4,6 @@
 import datetime
 import json
 import logging
-import math
 import os
 import tempfile
 import time
@@ -2327,17 +2326,12 @@ async def perform_graph_clustering(
             if offset >= count:
                 break
 
-        relationship_ids_cache = await self._get_relationship_ids_cache(
-            all_relationships
-        )
-
         logger.info(
             f"Clustering over {len(all_relationships)} relationships for {collection_id} with settings: {leiden_params}"
         )
 
         return await self._cluster_and_add_community_info(
             relationships=all_relationships,
-            relationship_ids_cache=relationship_ids_cache,
             leiden_params=leiden_params,
             collection_id=collection_id,
             clustering_mode=clustering_mode,
@@ -2415,7 +2409,6 @@ async def _create_graph_and_cluster(
     async def _cluster_and_add_community_info(
         self,
         relationships: list[Relationship],
-        relationship_ids_cache: dict[str, list[int]],
         leiden_params: dict[str, Any],
         collection_id: Optional[UUID] = None,
         clustering_mode: str = "local",
@@ -2441,13 +2434,6 @@ async def _cluster_and_add_community_info(
             f"Computing Leiden communities completed, time {time.time() - start_time:.2f} seconds."
         )
 
-        def relationship_ids(node: str) -> list[int]:
-            return relationship_ids_cache.get(node, [])
-
-        logger.info(
-            f"Cached {len(relationship_ids_cache)} relationship ids, time {time.time() - start_time:.2f} seconds."
-        )
-
         # If remote: hierarchical_communities is a list of dicts like:
         # [{"node": str, "cluster": int, "level": int}, ...]
         # If local: hierarchical_communities is the returned structure from hierarchical_leiden (list of named tuples)
@@ -2476,26 +2462,6 @@ def relationship_ids(node: str) -> list[int]:
 
         return num_communities, hierarchical_communities
 
-    async def _get_relationship_ids_cache(
-        self, relationships: list[Relationship]
-    ) -> dict[str, list[int]]:
-        relationship_ids_cache: dict[str, list[int]] = {}
-        for relationship in relationships:
-            if relationship.subject is not None:
-                relationship_ids_cache.setdefault(relationship.subject, [])
-                if relationship.id is not None:
-                    relationship_ids_cache[relationship.subject].append(
-                        int(relationship.id)
-                    )
-            if relationship.object is not None:
-                relationship_ids_cache.setdefault(relationship.object, [])
-                if relationship.id is not None:
-                    relationship_ids_cache[relationship.object].append(
-                        int(relationship.id)
-                    )
-
-        return relationship_ids_cache
-
     async def get_entity_map(
         self, offset: int, limit: int, document_id: UUID
     ) -> dict[str, dict[str, list[dict[str, Any]]]]:

diff --git a/py/core/database/prompts/graphrag_communities.yaml b/py/core/database/prompts/graphrag_communities.yaml
@@ -2,57 +2,30 @@ graphrag_communities:
   template: |
       You are an AI assistant that helps a human analyst perform general information discovery. Information discovery is the process of identifying and assessing relevant information associated with certain entities (e.g., organizations and individuals) within a network.
 
-      # Context
-      Collection Overview:
+      Context Overview:
       {collection_description}
 
-      # Goal
-      Write a comprehensive report of a community within this collection, given a list of entities that belong to the community as well as their relationships and optional associated claims. The report will inform decision-makers about information associated with the community and their potential impact within the broader context of the collection. The content includes an overview of the community's key entities and noteworthy claims.
-
-      # Report Structure
-      The report should include:
-
-      - NAME: A specific, concise community name representing its key entities
-      - SUMMARY: An executive summary that contextualizes the community within the broader collection, explaining its structure, relationships, and significant information
-      - IMPACT SEVERITY RATING: A float score (0-10) representing the community's IMPACT severity relative to the overall collection
-      - RATING EXPLANATION: A single sentence explaining the IMPACT severity rating in context of the broader collection
-      - DETAILED FINDINGS: 5-10 key insights about the community, incorporating relevant collection-level context where appropriate
-
-
-      Output Format:
-      ```json
-      {{
-          "name": <report_name>,
-          "summary": <executive_summary>,
-          "rating": <impact_severity_rating>,
-          "rating_explanation": <rating_explanation>,
-          "findings": [
-              "<finding1>",
-              "<finding2>",
-              "<finding3>",
-              "<finding4>",
-              "<finding5>"
-              // Additional findings...
-          ]
-      }}
-      ```
-
-      # Grounding Rules
-
-      Points supported by data should list their data references as follows:
-
-      "This is an example sentence supported by multiple data references [Data: <dataset name> (record ids); <dataset name> (record ids)]."
-
-      Do not list more than 5 record ids in a single reference. Instead, list the top 5 most relevant record ids and add "+more" to indicate that there are more.
-
-      For example:
-      "Person X is the owner of Company Y and subject to many allegations of wrongdoing [Data: Reports (1), Entities (5, 7); Relationships (23)."
-
-      where 1, 5, 7, 23, 2, 34, 46, and 64 represent the id (not the index) of the relevant data record.
-
-      Do not include information where the supporting evidence for it is not provided.
-
-      # Example Input
+      Your Task:
+      Write a comprehensive report of a community as a single XML document. The report must follow this exact structure:
+
+      <community>
+          <name>A specific, concise community name representing its key entities</name>
+          <summary>An executive summary that contextualizes the community</summary>
+          <rating>A float score (0-10) representing impact severity</rating>
+          <rating_explanation>A single sentence explaining the rating</rating_explanation>
+          <findings>
+              <finding>First key insight about the community</finding>
+              <finding>Second key insight about the community</finding>
+              <!-- Include 5-10 findings total -->
+          </findings>
+      </community>
+
+      Data Reference Format:
+      Include data references in findings like this:
+      "Example sentence [Data: <dataset name> (record ids); <dataset name> (record ids)]"
+      Use no more than 5 record IDs per reference. Add "+more" to indicate additional records.
+
+      Example Input:
       -----------
       Text:
 
@@ -78,32 +51,24 @@ graphrag_communities:
         203,Airbnb,OpenAI,Airbnb utilizes OpenAI's AI tools for customer service.
         205,Airbnb,Stripe,Airbnb and Stripe collaborate on expanding global payment options.
 
-      Output:
-      {{
-          "name": "OpenAI, Stripe, and Airbnb",
-          "summary": "The comprises key startups like OpenAI, Stripe, and Airbnb, which are interconnected through strategic partnerships and investments. These relationships highlight a robust network focused on advancing AI technologies, payment infrastructure, and online marketplaces.",
-          "rating": 7.5,
-          "rating_explanation": "The impact severity rating is high due to the significant influence these startups have on technology, finance, and the global economy.",
-          "findings": [
-              "OpenAI stands out as a leader in artificial intelligence research and deployment within YCombinator. Its partnerships with companies like Stripe and Airbnb demonstrate its integral role in integrating AI solutions across various industries. OpenAI's influence is further amplified by its involvement in key projects that drive innovation and efficiency. [Data: Entities (101), Relationships (201, 203, 204, +more)]",
-              "Stripe serves as a critical financial infrastructure provider, facilitating payment processing for startups like Airbnb and partnering with OpenAI to enhance payment solutions. Its strategic investments and collaborations underscore its importance in the Y Combinator ecosystem, enabling seamless financial transactions and supporting startup growth. [Data: Entities (102), Relationships (201, 202, 204, 205, +more)]",
-              "Airbnb leverages OpenAI's artificial intelligence tools to enhance its customer service capabilities, showcasing the practical application of AI in improving user experience. This integration highlights Airbnb's commitment to innovation and efficiency, positioning it as a forward-thinking leader within the community. [Data: Entities (103), Relationships (203, 205, +more)]",
-              "Stripe's investment in OpenAI's latest funding round illustrates the strategic financial moves that drive growth and innovation. Such investments not only strengthen partnerships but also foster an environment of collaboration and shared success among startups. [Data: Relationships (204)]",
-              "The collaboration between Airbnb and Stripe to expand global payment options demonstrates a commitment to scalability and accessibility in the Y Combinator ecosystem. This initiative is pivotal in enabling startups to reach a broader international market, thereby increasing their impact and revenue potential. [Data: Relationships (205)]"
-          ]
-      }}
-
-      # Real Data
-
-      Use the following text for your answer. Do not make anything up in your answer.
-
-      Collection Context:
-      {collection_description}
+      Example Output:
+      <community>
+          <name>OpenAI-Stripe-Airbnb Community</name>
+          <summary>The OpenAI-Stripe-Airbnb Community is a network of companies that collaborate on AI research, payment solutions, and customer service.</summary>
+          <rating>8.5</rating>
+          <rating_explanation>The OpenAI-Stripe-Airbnb Community has a high impact on the collection due to its significant contributions to AI research, payment solutions, and customer service.</rating_explanation>
+          <findings>
+              <finding>OpenAI and Stripe have a partnership to integrate payment solutions [Data: Relationships (201)].</finding>
+              <finding>OpenAI and Airbnb collaborate on AI tools for customer service [Data: Relationships (203)].</finding>
+              <finding>Stripe provides payment processing services to Airbnb [Data: Relationships (202)].</finding>
+              <finding>Stripe invested in OpenAI's latest funding round [Data: Relationships (204)].</finding>
+              <finding>Airbnb and Stripe collaborate on expanding global payment options [Data: Relationships (205)].</finding>
+          </findings>
+      </community>
 
       Entity Data:
       {input_text}
 
-      Output:
   input_types:
     collection_description: str
     input_text: str