diff --git a/docs/asciidoc/modules/ROOT/pages/database-integration/vectordb/pinecone.adoc b/docs/asciidoc/modules/ROOT/pages/database-integration/vectordb/pinecone.adoc index f4a4b3c494..2f8578a5ce 100644 --- a/docs/asciidoc/modules/ROOT/pages/database-integration/vectordb/pinecone.adoc +++ b/docs/asciidoc/modules/ROOT/pages/database-integration/vectordb/pinecone.adoc @@ -1,24 +1,33 @@ = Pinecone +[NOTE] +==== +In Pinecone a collection is a static and non-queryable copy of an index, +therefore, unlike other vector dbs, the Pinecone procedures work on indexes instead of collections. + +However, the vectordb procedures to handle CRUD operations on collections are usually named `apoc.ml..createCollection` and `apoc.ml..deleteCollection`, +so to be consistent, the Pinecone index procedures are named `apoc.ml.pinecone.createCollection` and `apoc.ml.pinecone.deleteCollection`. +==== + Here is a list of all available Pinecone procedures: [opts=header, cols="1, 3"] |=== | name | description -| apoc.vectordb.pinecone.info(hostOrKey, collection, $config) | Get information about the specified existing collection or throws a 404 error if it does not exist +| apoc.vectordb.pinecone.info(hostOrKey, index, $config) | Get information about the specified existing index or throws a 404 error if it does not exist | apoc.vectordb.pinecone.createCollection(hostOrKey, index, similarity, size, $config) | Creates an index, with the name specified in the 2nd parameter, and with the specified `similarity` and `size`. The default endpoint is `/indexes`. | apoc.vectordb.pinecone.deleteCollection(hostOrKey, index, $config) | Deletes an index with the name specified in the 2nd parameter. - The default endpoint is `/indexes/`. + The default endpoint is `/indexes/`. | apoc.vectordb.pinecone.upsert(hostOrKey, index, vectors, $config) | Upserts, in the index with the name specified in the 2nd parameter, the vectors [{id: 'id', vector: '', medatada: ''}]. The default endpoint is `/vectors/upsert`. | apoc.vectordb.pinecone.delete(hostOrKey, index, ids, $config) | Delete the vectors with the specified `ids`. - The default endpoint is `/indexes/`. + The default endpoint is `/indexes/`. | apoc.vectordb.pinecone.get(hostOrKey, index, ids, $config) | Get the vectors with the specified `ids`. The default endpoint is `/vectors/fetch`. @@ -35,15 +44,6 @@ Here is a list of all available Pinecone procedures: where the 1st parameter can be a key defined by the apoc config `apoc.pinecone..host=myHost`. -[NOTE] -==== -The procedures create/drop/handle an index, instead of a collection like the other vectordb procedures, -since in Pinecone a collection is a static and non-queryable copy of an index. - -Anyway, the create / delete index procedures are named `.createCollection` and `.deleteCollection` to be consistent with the other. -==== - - The default `hostOrKey` is `"https://api.pinecone.io"`, therefore in general can be null with the `createCollection` and `deleteCollection` procedures, and equal to the host name, with the other ones, that is, the one indicated in the Pinecone dashboard: @@ -55,10 +55,10 @@ image::pinecone-index.png[width=800] The following example assume we want to create and manage an index called `test-index`. -.Get collection info (it leverages https://docs.pinecone.io/reference/api/control-plane/describe_collection[this API]) +.Get index info (it leverages https://docs.pinecone.io/guides/indexes/view-index-information[this API]) [source,cypher] ---- -CALL apoc.vectordb.pinecone.info(hostOrKey, 'test-collection', {}) +CALL apoc.vectordb.pinecone.info(hostOrKey, 'test-index', {}) ---- .Example results @@ -67,7 +67,7 @@ CALL apoc.vectordb.pinecone.info(hostOrKey, 'test-collection', {, ], $conf) YIELD node, metadata, id, vector +CALL apoc.vectordb.pinecone.getAndUpdate($host, $index, [, ], $conf) YIELD node, metadata, id, vector WITH collect(node) as paths CALL apoc.ml.rag(paths, $attributes, $question, $confPrompt) YIELD value RETURN value diff --git a/extended/src/main/java/apoc/vectordb/Pinecone.java b/extended/src/main/java/apoc/vectordb/Pinecone.java index 4801452d07..bf7ef95d5a 100644 --- a/extended/src/main/java/apoc/vectordb/Pinecone.java +++ b/extended/src/main/java/apoc/vectordb/Pinecone.java @@ -43,12 +43,12 @@ public class Pinecone { public URLAccessChecker urlAccessChecker; @Procedure("apoc.vectordb.pinecone.info") - @Description("apoc.vectordb.pinecone.info(hostOrKey, collection, $configuration) - Get information about the specified existing collection or throws an error if it does not exist") + @Description("apoc.vectordb.pinecone.info(hostOrKey, index, $configuration) - Get information about the specified existing index or throws an error if it does not exist") public Stream getInfo(@Name("hostOrKey") String hostOrKey, - @Name("collection") String collection, + @Name("index") String index, @Name(value = "configuration", defaultValue = "{}") Map configuration) throws Exception { - String url = "%s/collections/%s"; - Map config = getVectorDbInfo(hostOrKey, collection, configuration, url); + String url = "%s/indexes/%s"; + Map config = getVectorDbInfo(hostOrKey, index, configuration, url); methodAndPayloadNull(config); @@ -59,18 +59,18 @@ public Stream getInfo(@Name("hostOrKey") String hostOrKey, } @Procedure("apoc.vectordb.pinecone.createCollection") - @Description("apoc.vectordb.pinecone.createCollection(hostOrKey, collection, similarity, size, $configuration) - Creates a collection, with the name specified in the 2nd parameter, and with the specified `similarity` and `size`") + @Description("apoc.vectordb.pinecone.createCollection(hostOrKey, index, similarity, size, $configuration) - Creates a index, with the name specified in the 2nd parameter, and with the specified `similarity` and `size`") public Stream createCollection(@Name("hostOrKey") String hostOrKey, - @Name("collection") String collection, + @Name("index") String index, @Name("similarity") String similarity, @Name("size") Long size, @Name(value = "configuration", defaultValue = "{}") Map configuration) throws Exception { String url = "%s/indexes"; - Map config = getVectorDbInfo(hostOrKey, collection, configuration, url); + Map config = getVectorDbInfo(hostOrKey, index, configuration, url); config.putIfAbsent(METHOD_KEY, "POST"); Map additionalBodies = Map.of( - "name", collection, + "name", index, "dimension", size, "metric", similarity ); @@ -81,14 +81,14 @@ public Stream createCollection(@Name("hostOrKey") String hostOrKey, } @Procedure("apoc.vectordb.pinecone.deleteCollection") - @Description("apoc.vectordb.pinecone.deleteCollection(hostOrKey, collection, $configuration) - Deletes a collection with the name specified in the 2nd parameter") + @Description("apoc.vectordb.pinecone.deleteCollection(hostOrKey, index, $configuration) - Deletes a index with the name specified in the 2nd parameter") public Stream deleteCollection( @Name("hostOrKey") String hostOrKey, - @Name("collection") String collection, + @Name("index") String index, @Name(value = "configuration", defaultValue = "{}") Map configuration) throws Exception { String url = "%s/indexes/%s"; - Map config = getVectorDbInfo(hostOrKey, collection, configuration, url); + Map config = getVectorDbInfo(hostOrKey, index, configuration, url); config.putIfAbsent(METHOD_KEY, "DELETE"); RestAPIConfig restAPIConfig = new RestAPIConfig(config); @@ -98,16 +98,16 @@ public Stream deleteCollection( } @Procedure("apoc.vectordb.pinecone.upsert") - @Description("apoc.vectordb.pinecone.upsert(hostOrKey, collection, vectors, $configuration) - Upserts, in the collection with the name specified in the 2nd parameter, the vectors [{id: 'id', vector: '', medatada: ''}]") + @Description("apoc.vectordb.pinecone.upsert(hostOrKey, index, vectors, $configuration) - Upserts, in the index with the name specified in the 2nd parameter, the vectors [{id: 'id', vector: '', medatada: ''}]") public Stream upsert( @Name("hostOrKey") String hostOrKey, - @Name("collection") String collection, + @Name("index") String index, @Name("vectors") List> vectors, @Name(value = "configuration", defaultValue = "{}") Map configuration) throws Exception { String url = "%s/vectors/upsert"; - Map config = getVectorDbInfo(hostOrKey, collection, configuration, url); + Map config = getVectorDbInfo(hostOrKey, index, configuration, url); config.putIfAbsent(METHOD_KEY, "POST"); vectors = vectors.stream() @@ -126,15 +126,15 @@ public Stream upsert( } @Procedure("apoc.vectordb.pinecone.delete") - @Description("apoc.vectordb.pinecone.delete(hostOrKey, collection, ids, $configuration) - Delete the vectors with the specified `ids`") + @Description("apoc.vectordb.pinecone.delete(hostOrKey, index, ids, $configuration) - Delete the vectors with the specified `ids`") public Stream delete( @Name("hostOrKey") String hostOrKey, - @Name("collection") String collection, + @Name("index") String index, @Name("vectors") List ids, @Name(value = "configuration", defaultValue = "{}") Map configuration) throws Exception { String url = "%s/vectors/delete"; - Map config = getVectorDbInfo(hostOrKey, collection, configuration, url); + Map config = getVectorDbInfo(hostOrKey, index, configuration, url); config.putIfAbsent(METHOD_KEY, "POST"); Map additionalBodies = Map.of("ids", ids); @@ -145,29 +145,29 @@ public Stream delete( } @Procedure(value = "apoc.vectordb.pinecone.get") - @Description("apoc.vectordb.pinecone.get(hostOrKey, collection, ids, $configuration) - Get the vectors with the specified `ids`") + @Description("apoc.vectordb.pinecone.get(hostOrKey, index, ids, $configuration) - Get the vectors with the specified `ids`") public Stream get(@Name("hostOrKey") String hostOrKey, - @Name("collection") String collection, + @Name("index") String index, @Name("ids") List ids, @Name(value = "configuration", defaultValue = "{}") Map configuration) throws Exception { setReadOnlyMappingMode(configuration); - return getCommon(hostOrKey, collection, ids, configuration); + return getCommon(hostOrKey, index, ids, configuration); } @Procedure(value = "apoc.vectordb.pinecone.getAndUpdate", mode = Mode.WRITE) - @Description("apoc.vectordb.pinecone.getAndUpdate(hostOrKey, collection, ids, $configuration) - Get the vectors with the specified `ids`") + @Description("apoc.vectordb.pinecone.getAndUpdate(hostOrKey, index, ids, $configuration) - Get the vectors with the specified `ids`") public Stream getAndUpdate(@Name("hostOrKey") String hostOrKey, - @Name("collection") String collection, + @Name("index") String index, @Name("ids") List ids, @Name(value = "configuration", defaultValue = "{}") Map configuration) throws Exception { - return getCommon(hostOrKey, collection, ids, configuration); + return getCommon(hostOrKey, index, ids, configuration); } - private Stream getCommon(String hostOrKey, String collection, List ids, Map configuration) throws Exception { + private Stream getCommon(String hostOrKey, String index, List ids, Map configuration) throws Exception { String url = "%s/vectors/fetch"; - Map config = getVectorDbInfo(hostOrKey, collection, configuration, url); + Map config = getVectorDbInfo(hostOrKey, index, configuration, url); - VectorEmbeddingConfig conf = DB_HANDLER.getEmbedding().fromGet(config, procedureCallContext, ids, collection); + VectorEmbeddingConfig conf = DB_HANDLER.getEmbedding().fromGet(config, procedureCallContext, ids, index); return getEmbeddingResultStream(conf, procedureCallContext, urlAccessChecker, tx, v -> { @@ -178,33 +178,33 @@ private Stream getCommon(String hostOrKey, String } @Procedure(value = "apoc.vectordb.pinecone.query") - @Description("apoc.vectordb.pinecone.query(hostOrKey, collection, vector, filter, limit, $configuration) - Retrieve closest vectors the the defined `vector`, `limit` of results, in the collection with the name specified in the 2nd parameter") + @Description("apoc.vectordb.pinecone.query(hostOrKey, index, vector, filter, limit, $configuration) - Retrieve closest vectors the the defined `vector`, `limit` of results, in the index with the name specified in the 2nd parameter") public Stream query(@Name("hostOrKey") String hostOrKey, - @Name("collection") String collection, + @Name("index") String index, @Name(value = "vector", defaultValue = "[]") List vector, @Name(value = "filter", defaultValue = "{}") Map filter, @Name(value = "limit", defaultValue = "10") long limit, @Name(value = "configuration", defaultValue = "{}") Map configuration) throws Exception { setReadOnlyMappingMode(configuration); - return queryCommon(hostOrKey, collection, vector, filter, limit, configuration); + return queryCommon(hostOrKey, index, vector, filter, limit, configuration); } @Procedure(value = "apoc.vectordb.pinecone.queryAndUpdate", mode = Mode.WRITE) - @Description("apoc.vectordb.pinecone.queryAndUpdate(hostOrKey, collection, vector, filter, limit, $configuration) - Retrieve closest vectors the the defined `vector`, `limit` of results, in the collection with the name specified in the 2nd parameter") + @Description("apoc.vectordb.pinecone.queryAndUpdate(hostOrKey, index, vector, filter, limit, $configuration) - Retrieve closest vectors the the defined `vector`, `limit` of results, in the index with the name specified in the 2nd parameter") public Stream queryAndUpdate(@Name("hostOrKey") String hostOrKey, - @Name("collection") String collection, + @Name("index") String index, @Name(value = "vector", defaultValue = "[]") List vector, @Name(value = "filter", defaultValue = "{}") Map filter, @Name(value = "limit", defaultValue = "10") long limit, @Name(value = "configuration", defaultValue = "{}") Map configuration) throws Exception { - return queryCommon(hostOrKey, collection, vector, filter, limit, configuration); + return queryCommon(hostOrKey, index, vector, filter, limit, configuration); } - private Stream queryCommon(String hostOrKey, String collection, List vector, Map filter, long limit, Map configuration) throws Exception { + private Stream queryCommon(String hostOrKey, String index, List vector, Map filter, long limit, Map configuration) throws Exception { String url = "%s/query"; - Map config = getVectorDbInfo(hostOrKey, collection, configuration, url); + Map config = getVectorDbInfo(hostOrKey, index, configuration, url); - VectorEmbeddingConfig conf = DB_HANDLER.getEmbedding().fromQuery(config, procedureCallContext, vector, filter, limit, collection); + VectorEmbeddingConfig conf = DB_HANDLER.getEmbedding().fromQuery(config, procedureCallContext, vector, filter, limit, index); return getEmbeddingResultStream(conf, procedureCallContext, urlAccessChecker, tx, v -> { @@ -215,7 +215,7 @@ private Stream queryCommon(String hostOrKey, Strin } private Map getVectorDbInfo( - String hostOrKey, String collection, Map configuration, String templateUrl) { - return getCommonVectorDbInfo(hostOrKey, collection, configuration, templateUrl, DB_HANDLER); + String hostOrKey, String index, Map configuration, String templateUrl) { + return getCommonVectorDbInfo(hostOrKey, index, configuration, templateUrl, DB_HANDLER); } } diff --git a/extended/src/main/java/apoc/vectordb/PineconeHandler.java b/extended/src/main/java/apoc/vectordb/PineconeHandler.java index a996f8fe55..e2de74284d 100644 --- a/extended/src/main/java/apoc/vectordb/PineconeHandler.java +++ b/extended/src/main/java/apoc/vectordb/PineconeHandler.java @@ -53,7 +53,7 @@ static class PineconeEmbeddingHandler implements VectorEmbeddingHandler { * that makes the request to respond 200 OK, but returns an empty result */ @Override - public VectorEmbeddingConfig fromGet(Map config, ProcedureCallContext procedureCallContext, List ids, String collection) { + public VectorEmbeddingConfig fromGet(Map config, ProcedureCallContext procedureCallContext, List ids, String index) { List fields = procedureCallContext.outputFields().toList(); config.put(BODY_KEY, null); @@ -74,7 +74,7 @@ public VectorEmbeddingConfig fromGet(Map config, ProcedureCa } @Override - public VectorEmbeddingConfig fromQuery(Map config, ProcedureCallContext procedureCallContext, List vector, Object filter, long limit, String collection) { + public VectorEmbeddingConfig fromQuery(Map config, ProcedureCallContext procedureCallContext, List vector, Object filter, long limit, String index) { List fields = procedureCallContext.outputFields().toList(); Map additionalBodies = map("vector", vector, diff --git a/extended/src/main/java/apoc/vectordb/VectorDbUtil.java b/extended/src/main/java/apoc/vectordb/VectorDbUtil.java index d7a059a18a..18cd3f8db7 100644 --- a/extended/src/main/java/apoc/vectordb/VectorDbUtil.java +++ b/extended/src/main/java/apoc/vectordb/VectorDbUtil.java @@ -71,12 +71,17 @@ public static Map getCommonVectorDbInfo( * Retrieve, if exists, the properties stored via `apoc.vectordb.configure` procedure */ private static Map getSystemDbProps(String hostOrKey, VectorDbHandler handler) { - Map props = withSystemDb(transaction -> { - Label label = Label.label(handler.getLabel()); - Node node = transaction.findNode(label, SystemPropertyKeys.name.name(), hostOrKey); - return node == null ? Map.of() : node.getAllProperties(); - }); - return props; + try { + Map props = withSystemDb(transaction -> { + Label label = Label.label(handler.getLabel()); + Node node = transaction.findNode(label, SystemPropertyKeys.name.name(), hostOrKey); + return node == null ? Map.of() : node.getAllProperties(); + }); + return props; + } catch (Exception e) { + // Fallback in case of null keys/values + return Map.of(); + } } /** diff --git a/extended/src/main/java/apoc/vectordb/VectorEmbeddingHandler.java b/extended/src/main/java/apoc/vectordb/VectorEmbeddingHandler.java index 26284d4991..e6cb2c8148 100644 --- a/extended/src/main/java/apoc/vectordb/VectorEmbeddingHandler.java +++ b/extended/src/main/java/apoc/vectordb/VectorEmbeddingHandler.java @@ -15,12 +15,7 @@ VectorEmbeddingConfig fromGet(Map config, List ids, String collection); - VectorEmbeddingConfig fromQuery(Map config, - ProcedureCallContext procedureCallContext, - List vector, - Object filter, - long limit, - String collection); + VectorEmbeddingConfig fromQuery(Map config, ProcedureCallContext procedureCallContext, List vector, Object filter, long limit, String index); default VectorEmbeddingConfig populateApiBodyRequest(VectorEmbeddingConfig config, Map additionalBodies) { diff --git a/extended/src/test/java/apoc/vectordb/PineconeTest.java b/extended/src/test/java/apoc/vectordb/PineconeTest.java index ea0d49fa00..275945aa4c 100644 --- a/extended/src/test/java/apoc/vectordb/PineconeTest.java +++ b/extended/src/test/java/apoc/vectordb/PineconeTest.java @@ -16,10 +16,12 @@ import java.util.List; import java.util.Map; +import java.util.UUID; import static apoc.ml.Prompt.API_KEY_CONF; import static apoc.ml.RestAPIConfig.HEADERS_KEY; import static apoc.util.ExtendedTestUtil.assertFails; +import static apoc.util.ExtendedTestUtil.testRetryCallEventually; import static apoc.util.MapUtil.map; import static apoc.util.TestUtil.testCall; import static apoc.util.TestUtil.testCallEmpty; @@ -50,7 +52,7 @@ public class PineconeTest { private static String API_KEY; private static String HOST; - private static final String collName = "test-collection"; + private static final String collName = UUID.randomUUID().toString(); @ClassRule public static TemporaryFolder storeDir = new TemporaryFolder(); @@ -76,9 +78,9 @@ public static void setUp() { ADMIN_AUTHORIZATION = map("Api-Key", API_KEY); ADMIN_HEADER_CONF = map(HEADERS_KEY, ADMIN_AUTHORIZATION); - - testCall(db, "CALL apoc.vectordb.pinecone.createCollection($host, $coll, 'cosine', 4, $conf)", - map("host", null, "coll", collName, + + testRetryCallEventually(db, "CALL apoc.vectordb.pinecone.createCollection($host, $coll, 'cosine', 4, $conf)", + map("host", HOST, "coll", collName, "conf", map(HEADERS_KEY, ADMIN_AUTHORIZATION, "body", map("spec", map("serverless", map("cloud", "aws", "region", "us-east-1")) ) ) @@ -86,9 +88,13 @@ public static void setUp() { r -> { Map value = (Map) r.get("value"); assertEquals(map("ready", false, "state", "Initializing"), value.get("status")); - }); + HOST = "https://" + value.get("host"); + }, 5L); - testCall(db, """ + // the upsert takes a while + Util.sleep(5000); + + testResult(db, """ CALL apoc.vectordb.pinecone.upsert($host, $coll, [ {id: '1', vector: [0.05, 0.61, 0.76, 0.74], metadata: {city: "Berlin", foo: "one"}}, @@ -96,16 +102,17 @@ public static void setUp() { ], $conf) """, - map("host", "https://test-collection-ilx67g5.svc.aped-4627-b74a.pinecone.io", + map("host", HOST, "coll", collName, "conf", ADMIN_HEADER_CONF), r -> { - Map value = (Map) r.get("value"); + Map row = r.next(); + Map value = (Map) row.get("value"); assertEquals(2L, value.get("upsertedCount")); }); // the upsert takes a while - Util.sleep(5000); + Util.sleep(20000); } @AfterClass @@ -130,7 +137,7 @@ public void before() { @Test public void getInfo() { testResult(db, "CALL apoc.vectordb.pinecone.info($host, $coll, $conf) ", - map("host", HOST, "coll", collName, + map("host", null, "coll", collName, "conf", map(ALL_RESULTS_KEY, true, HEADERS_KEY, ADMIN_AUTHORIZATION) ), r -> { @@ -142,12 +149,12 @@ public void getInfo() { @Test public void getInfoNotExistentCollection() { - assertFails(db, "CALL apoc.vectordb.pinecone.info($host, 'wrong_collection', $conf) ", - map("host", HOST, "coll", collName, + String wrongCollection = "wrong_collection"; + assertFails(db, "CALL apoc.vectordb.pinecone.info($host, $coll, $conf)", + map("host", null, "coll", wrongCollection, "conf", map(ALL_RESULTS_KEY, true, HEADERS_KEY, ADMIN_AUTHORIZATION) ), - "Server returned HTTP response code: 500" - ); + "java.io.FileNotFoundException: https://api.pinecone.io/indexes/" + wrongCollection); } @Test @@ -201,7 +208,7 @@ public void deleteVector() { }); // the upsert takes a while - Util.sleep(5000); + Util.sleep(10000); testCall(db, "CALL apoc.vectordb.pinecone.delete($host, $coll, ['3', '4'], $conf) ", map("host", HOST, "coll", collName, "conf", ADMIN_HEADER_CONF),