From 0b4982987a3d6cba65dbd532dbd47e1b8c2057ad Mon Sep 17 00:00:00 2001 From: DenovVasil Date: Mon, 6 Jan 2025 18:16:47 +0200 Subject: [PATCH] feat(textract): add documentation handling for textract --- .../aws-textract-outbound-connector.json | 59 +++++++++++++++- ...ws-textract-outbound-connector-hybrid.json | 59 +++++++++++++++- .../textract/TextractConnectorFunction.java | 2 +- .../textract/caller/SyncTextractCaller.java | 17 ++++- .../textract/model/DocumentLocationType.java | 12 ++++ .../textract/model/TextractRequestData.java | 57 +++++++++++++--- .../caller/AsyncTextractCallerTest.java | 9 ++- .../caller/SyncTextractCallerTest.java | 67 +++++++++++++++++-- .../textract/caller/TextractCallerTest.java | 13 +++- .../textract/util/TextractTestUtils.java | 5 +- 10 files changed, 276 insertions(+), 24 deletions(-) create mode 100644 connectors/aws/aws-textract/src/main/java/io/camunda/connector/textract/model/DocumentLocationType.java diff --git a/connectors/aws/aws-textract/element-templates/aws-textract-outbound-connector.json b/connectors/aws/aws-textract/element-templates/aws-textract-outbound-connector.json index cd88e3d3bd..dffc61a051 100644 --- a/connectors/aws/aws-textract/element-templates/aws-textract-outbound-connector.json +++ b/connectors/aws/aws-textract/element-templates/aws-textract-outbound-connector.json @@ -7,7 +7,7 @@ "keywords" : [ "extract text", "extract data", "extract text from image", "extract data from image", "ocr" ] }, "documentationRef" : "https://docs.camunda.io/docs/next/components/connectors/out-of-the-box-connectors/amazon-textract/", - "version" : 1, + "version" : 2, "category" : { "id" : "connectors", "name" : "Connectors" @@ -151,6 +151,29 @@ "name" : "Polling", "value" : "POLLING" } ] + }, { + "id" : "input.documentLocationType", + "label" : "Document location type", + "description" : "Document location", + "optional" : false, + "group" : "input", + "binding" : { + "name" : "input.documentLocationType", + "type" : "zeebe:input" + }, + "condition" : { + "property" : "input.executionType", + "equals" : "SYNC", + "type" : "simple" + }, + "type" : "Dropdown", + "choices" : [ { + "name" : "S3", + "value" : "S3" + }, { + "name" : "Uploaded document", + "value" : "UPLOADED" + } ] }, { "id" : "input.documentS3Bucket", "label" : "Document bucket", @@ -165,6 +188,11 @@ "name" : "input.documentS3Bucket", "type" : "zeebe:input" }, + "condition" : { + "property" : "input.documentLocationType", + "equals" : "S3", + "type" : "simple" + }, "type" : "String" }, { "id" : "input.documentName", @@ -180,6 +208,11 @@ "name" : "input.documentName", "type" : "zeebe:input" }, + "condition" : { + "property" : "input.documentLocationType", + "equals" : "S3", + "type" : "simple" + }, "type" : "String" }, { "id" : "input.documentVersion", @@ -192,6 +225,11 @@ "name" : "input.documentVersion", "type" : "zeebe:input" }, + "condition" : { + "property" : "input.documentLocationType", + "equals" : "S3", + "type" : "simple" + }, "type" : "String" }, { "id" : "input.analyzeTables", @@ -372,6 +410,25 @@ "type" : "simple" }, "type" : "String" + }, { + "id" : "input.document", + "label" : "Document", + "optional" : false, + "constraints" : { + "notEmpty" : true + }, + "feel" : "required", + "group" : "input", + "binding" : { + "name" : "input.document", + "type" : "zeebe:input" + }, + "condition" : { + "property" : "input.documentLocationType", + "equals" : "UPLOADED", + "type" : "simple" + }, + "type" : "String" }, { "id" : "resultVariable", "label" : "Result variable", diff --git a/connectors/aws/aws-textract/element-templates/hybrid/aws-textract-outbound-connector-hybrid.json b/connectors/aws/aws-textract/element-templates/hybrid/aws-textract-outbound-connector-hybrid.json index 2fdaddb833..9fbf96d3e5 100644 --- a/connectors/aws/aws-textract/element-templates/hybrid/aws-textract-outbound-connector-hybrid.json +++ b/connectors/aws/aws-textract/element-templates/hybrid/aws-textract-outbound-connector-hybrid.json @@ -7,7 +7,7 @@ "keywords" : [ "extract text", "extract data", "extract text from image", "extract data from image", "ocr" ] }, "documentationRef" : "https://docs.camunda.io/docs/next/components/connectors/out-of-the-box-connectors/amazon-textract/", - "version" : 1, + "version" : 2, "category" : { "id" : "connectors", "name" : "Connectors" @@ -156,6 +156,29 @@ "name" : "Polling", "value" : "POLLING" } ] + }, { + "id" : "input.documentLocationType", + "label" : "Document location type", + "description" : "Document location", + "optional" : false, + "group" : "input", + "binding" : { + "name" : "input.documentLocationType", + "type" : "zeebe:input" + }, + "condition" : { + "property" : "input.executionType", + "equals" : "SYNC", + "type" : "simple" + }, + "type" : "Dropdown", + "choices" : [ { + "name" : "S3", + "value" : "S3" + }, { + "name" : "Uploaded document", + "value" : "UPLOADED" + } ] }, { "id" : "input.documentS3Bucket", "label" : "Document bucket", @@ -170,6 +193,11 @@ "name" : "input.documentS3Bucket", "type" : "zeebe:input" }, + "condition" : { + "property" : "input.documentLocationType", + "equals" : "S3", + "type" : "simple" + }, "type" : "String" }, { "id" : "input.documentName", @@ -185,6 +213,11 @@ "name" : "input.documentName", "type" : "zeebe:input" }, + "condition" : { + "property" : "input.documentLocationType", + "equals" : "S3", + "type" : "simple" + }, "type" : "String" }, { "id" : "input.documentVersion", @@ -197,6 +230,11 @@ "name" : "input.documentVersion", "type" : "zeebe:input" }, + "condition" : { + "property" : "input.documentLocationType", + "equals" : "S3", + "type" : "simple" + }, "type" : "String" }, { "id" : "input.analyzeTables", @@ -377,6 +415,25 @@ "type" : "simple" }, "type" : "String" + }, { + "id" : "input.document", + "label" : "Document", + "optional" : false, + "constraints" : { + "notEmpty" : true + }, + "feel" : "required", + "group" : "input", + "binding" : { + "name" : "input.document", + "type" : "zeebe:input" + }, + "condition" : { + "property" : "input.documentLocationType", + "equals" : "UPLOADED", + "type" : "simple" + }, + "type" : "String" }, { "id" : "resultVariable", "label" : "Result variable", diff --git a/connectors/aws/aws-textract/src/main/java/io/camunda/connector/textract/TextractConnectorFunction.java b/connectors/aws/aws-textract/src/main/java/io/camunda/connector/textract/TextractConnectorFunction.java index d6d59addd8..a770bc2727 100644 --- a/connectors/aws/aws-textract/src/main/java/io/camunda/connector/textract/TextractConnectorFunction.java +++ b/connectors/aws/aws-textract/src/main/java/io/camunda/connector/textract/TextractConnectorFunction.java @@ -34,7 +34,7 @@ "ocr" }), inputDataClass = TextractRequest.class, - version = 1, + version = 2, propertyGroups = { @ElementTemplate.PropertyGroup(id = "authentication", label = "Authentication"), @ElementTemplate.PropertyGroup(id = "configuration", label = "Configuration"), diff --git a/connectors/aws/aws-textract/src/main/java/io/camunda/connector/textract/caller/SyncTextractCaller.java b/connectors/aws/aws-textract/src/main/java/io/camunda/connector/textract/caller/SyncTextractCaller.java index dfcd667f67..474b88ec52 100644 --- a/connectors/aws/aws-textract/src/main/java/io/camunda/connector/textract/caller/SyncTextractCaller.java +++ b/connectors/aws/aws-textract/src/main/java/io/camunda/connector/textract/caller/SyncTextractCaller.java @@ -11,6 +11,8 @@ import com.amazonaws.services.textract.model.AnalyzeDocumentResult; import com.amazonaws.services.textract.model.Document; import io.camunda.connector.textract.model.TextractRequestData; +import java.nio.ByteBuffer; +import java.util.Objects; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -22,7 +24,8 @@ public class SyncTextractCaller implements TextractCaller public AnalyzeDocumentResult call( TextractRequestData requestData, AmazonTextract textractClient) { LOGGER.debug("Starting sync task for document analysis with request data: {}", requestData); - final Document document = new Document().withS3Object(prepareS3Obj(requestData)); + + final Document document = createDocument(requestData); final AnalyzeDocumentRequest analyzeDocumentRequest = new AnalyzeDocumentRequest() @@ -31,4 +34,16 @@ public AnalyzeDocumentResult call( return textractClient.analyzeDocument(analyzeDocumentRequest); } + + private Document createDocument(TextractRequestData requestData) { + final Document document = new Document(); + + if (Objects.isNull(requestData.document())) { + return document.withS3Object(prepareS3Obj(requestData)); + } + + byte[] docBytes = requestData.document().asByteArray(); + document.withBytes(ByteBuffer.wrap(docBytes)); + return document; + } } diff --git a/connectors/aws/aws-textract/src/main/java/io/camunda/connector/textract/model/DocumentLocationType.java b/connectors/aws/aws-textract/src/main/java/io/camunda/connector/textract/model/DocumentLocationType.java new file mode 100644 index 0000000000..aab98d6e0c --- /dev/null +++ b/connectors/aws/aws-textract/src/main/java/io/camunda/connector/textract/model/DocumentLocationType.java @@ -0,0 +1,12 @@ +/* + * Copyright Camunda Services GmbH and/or licensed to Camunda Services GmbH + * under one or more contributor license agreements. Licensed under a proprietary license. + * See the License.txt file for more information. You may not use this file + * except in compliance with the proprietary license. + */ +package io.camunda.connector.textract.model; + +public enum DocumentLocationType { + S3, + UPLOADED; +} diff --git a/connectors/aws/aws-textract/src/main/java/io/camunda/connector/textract/model/TextractRequestData.java b/connectors/aws/aws-textract/src/main/java/io/camunda/connector/textract/model/TextractRequestData.java index e1d9b41943..3e713826b3 100644 --- a/connectors/aws/aws-textract/src/main/java/io/camunda/connector/textract/model/TextractRequestData.java +++ b/connectors/aws/aws-textract/src/main/java/io/camunda/connector/textract/model/TextractRequestData.java @@ -6,10 +6,11 @@ */ package io.camunda.connector.textract.model; +import io.camunda.connector.generator.dsl.Property; import io.camunda.connector.generator.dsl.Property.FeelMode; import io.camunda.connector.generator.java.annotation.TemplateProperty; +import io.camunda.document.Document; import jakarta.validation.constraints.AssertTrue; -import jakarta.validation.constraints.NotBlank; import jakarta.validation.constraints.NotNull; import org.apache.commons.lang3.StringUtils; @@ -28,23 +29,52 @@ public record TextractRequestData( description = "Endpoint inference type") @NotNull TextractExecutionType executionType, + @TemplateProperty( + group = "input", + label = "Document location type", + description = "Document location", + feel = FeelMode.disabled, + type = TemplateProperty.PropertyType.Dropdown, + choices = { + @TemplateProperty.DropdownPropertyChoice(value = "S3", label = "S3"), + @TemplateProperty.DropdownPropertyChoice( + value = "UPLOADED", + label = "Uploaded document") + }, + condition = + @TemplateProperty.PropertyCondition( + property = "input.executionType", + equals = "SYNC")) + DocumentLocationType documentLocationType, @TemplateProperty( group = "input", label = "Document bucket", - description = "S3 bucket that contains document that needs to be processed") - @NotBlank + description = "S3 bucket that contains document that needs to be processed", + condition = + @TemplateProperty.PropertyCondition( + property = "input.documentLocationType", + equals = "S3"), + constraints = @TemplateProperty.PropertyConstraints(notEmpty = true)) String documentS3Bucket, @TemplateProperty( group = "input", label = "Document path", - description = "S3 document path to be processed") - @NotBlank + description = "S3 document path to be processed", + condition = + @TemplateProperty.PropertyCondition( + property = "input.documentLocationType", + equals = "S3"), + constraints = @TemplateProperty.PropertyConstraints(notEmpty = true)) String documentName, @TemplateProperty( group = "input", label = "Document version", description = "S3 document version to be processed", - optional = true) + optional = true, + condition = + @TemplateProperty.PropertyCondition( + property = "input.documentLocationType", + equals = "S3")) String documentVersion, @TemplateProperty( label = "Analyze tables", @@ -150,14 +180,25 @@ public record TextractRequestData( @TemplateProperty.PropertyCondition( property = "input.executionType", equals = "ASYNC")) - String outputConfigS3Prefix) { + String outputConfigS3Prefix, + @TemplateProperty( + group = "input", + label = "Document", + feel = Property.FeelMode.required, + type = TemplateProperty.PropertyType.String, + condition = + @TemplateProperty.PropertyCondition( + property = "input.documentLocationType", + equals = "UPLOADED"), + constraints = @TemplateProperty.PropertyConstraints(notEmpty = true)) + Document document) { @TemplateProperty(ignore = true) public static final String WRONG_OUTPUT_VALUES_MSG = "Output S3 bucket must be filled in if output S3 prefix is filled in"; @TemplateProperty(ignore = true) public static final String WRONG_NOTIFICATION_VALUES_MSG = - "either both notification values role ARN and topic ARN must be filled in or none of them"; + "Either both notification values role ARN and topic ARN must be filled in or none of them"; @AssertTrue(message = WRONG_NOTIFICATION_VALUES_MSG) public boolean isValidNotificationProperties() { diff --git a/connectors/aws/aws-textract/src/test/java/io/camunda/connector/textract/caller/AsyncTextractCallerTest.java b/connectors/aws/aws-textract/src/test/java/io/camunda/connector/textract/caller/AsyncTextractCallerTest.java index 2181647975..a542c72070 100644 --- a/connectors/aws/aws-textract/src/test/java/io/camunda/connector/textract/caller/AsyncTextractCallerTest.java +++ b/connectors/aws/aws-textract/src/test/java/io/camunda/connector/textract/caller/AsyncTextractCallerTest.java @@ -14,6 +14,7 @@ import com.amazonaws.services.textract.AmazonTextractAsyncClient; import com.amazonaws.services.textract.model.StartDocumentAnalysisRequest; import com.amazonaws.services.textract.model.StartDocumentAnalysisResult; +import io.camunda.connector.textract.model.DocumentLocationType; import io.camunda.connector.textract.model.TextractExecutionType; import io.camunda.connector.textract.model.TextractRequestData; import org.junit.jupiter.api.Test; @@ -100,6 +101,7 @@ void callWithoutOutputS3BucketShouldNotCreateOutputObj() { private TextractRequestData prepareReqData(String roleArn, String topicArn) { return new TextractRequestData( TextractExecutionType.ASYNC, + DocumentLocationType.S3, "test-bucket", "test-object", "1", @@ -113,12 +115,14 @@ private TextractRequestData prepareReqData(String roleArn, String topicArn) { roleArn, topicArn, "outputBucket", - "prefix"); + "prefix", + null); } private TextractRequestData prepareReqDataWithoutOutputS3Bucket() { return new TextractRequestData( TextractExecutionType.ASYNC, + DocumentLocationType.S3, "test-bucket", "test-object", "1", @@ -132,6 +136,7 @@ private TextractRequestData prepareReqDataWithoutOutputS3Bucket() { "roleArn", "topicArn", "", - "prefix"); + "prefix", + null); } } diff --git a/connectors/aws/aws-textract/src/test/java/io/camunda/connector/textract/caller/SyncTextractCallerTest.java b/connectors/aws/aws-textract/src/test/java/io/camunda/connector/textract/caller/SyncTextractCallerTest.java index 9e18ac88f2..1781ae999d 100644 --- a/connectors/aws/aws-textract/src/test/java/io/camunda/connector/textract/caller/SyncTextractCallerTest.java +++ b/connectors/aws/aws-textract/src/test/java/io/camunda/connector/textract/caller/SyncTextractCallerTest.java @@ -6,24 +6,30 @@ */ package io.camunda.connector.textract.caller; +import static com.amazonaws.services.textract.model.FeatureType.*; +import static org.assertj.core.api.Assertions.assertThat; import static org.mockito.ArgumentMatchers.any; -import static org.mockito.Mockito.verify; -import static org.mockito.Mockito.when; +import static org.mockito.Mockito.*; import com.amazonaws.services.textract.AmazonTextractClient; import com.amazonaws.services.textract.model.AnalyzeDocumentRequest; import com.amazonaws.services.textract.model.AnalyzeDocumentResult; +import io.camunda.connector.textract.model.DocumentLocationType; import io.camunda.connector.textract.model.TextractExecutionType; import io.camunda.connector.textract.model.TextractRequestData; +import io.camunda.document.Document; +import java.nio.ByteBuffer; +import java.util.HexFormat; import org.junit.jupiter.api.Test; -import org.mockito.Mockito; +import org.mockito.ArgumentCaptor; class SyncTextractCallerTest { @Test - void call() { + void callWithS3DocumentLocation() { TextractRequestData requestData = new TextractRequestData( TextractExecutionType.SYNC, + DocumentLocationType.S3, "test-bucket", "test-object", "1", @@ -37,9 +43,10 @@ void call() { "notification-channel", "role-arn", "outputBucket", - "prefix"); + "prefix", + null); - AmazonTextractClient textractClient = Mockito.mock(AmazonTextractClient.class); + AmazonTextractClient textractClient = mock(AmazonTextractClient.class); when(textractClient.analyzeDocument(any(AnalyzeDocumentRequest.class))) .thenReturn(new AnalyzeDocumentResult()); @@ -48,4 +55,52 @@ void call() { verify(textractClient).analyzeDocument(any(AnalyzeDocumentRequest.class)); } + + @Test + void callWithUploadDocumentLocation() { + final Document document = mock(Document.class); + byte[] bytes = HexFormat.of().parseHex("e04fd020ea3a6910a2d808002b30309d"); + + when(document.asByteArray()).thenReturn(bytes); + + TextractRequestData requestData = + new TextractRequestData( + TextractExecutionType.SYNC, + DocumentLocationType.UPLOADED, + null, + null, + null, + true, + false, + false, + false, + "token", + "client-request-token", + "job-tag", + "notification-channel", + "role-arn", + "outputBucket", + "prefix", + document); + + AmazonTextractClient textractClient = mock(AmazonTextractClient.class); + + when(textractClient.analyzeDocument(any(AnalyzeDocumentRequest.class))) + .thenReturn(new AnalyzeDocumentResult()); + + new SyncTextractCaller().call(requestData, textractClient); + + ArgumentCaptor argumentCaptor = + ArgumentCaptor.forClass(AnalyzeDocumentRequest.class); + + verify(textractClient).analyzeDocument(argumentCaptor.capture()); + AnalyzeDocumentRequest analyzeDocumentRequest = argumentCaptor.getValue(); + assertThat(analyzeDocumentRequest) + .isEqualTo( + new AnalyzeDocumentRequest() + .withFeatureTypes(TABLES.name()) + .withDocument( + new com.amazonaws.services.textract.model.Document() + .withBytes(ByteBuffer.wrap(bytes)))); + } } diff --git a/connectors/aws/aws-textract/src/test/java/io/camunda/connector/textract/caller/TextractCallerTest.java b/connectors/aws/aws-textract/src/test/java/io/camunda/connector/textract/caller/TextractCallerTest.java index 80584c3cd8..aff80d60ff 100644 --- a/connectors/aws/aws-textract/src/test/java/io/camunda/connector/textract/caller/TextractCallerTest.java +++ b/connectors/aws/aws-textract/src/test/java/io/camunda/connector/textract/caller/TextractCallerTest.java @@ -14,6 +14,7 @@ import com.amazonaws.services.textract.model.AnalyzeDocumentResult; import com.amazonaws.services.textract.model.DocumentLocation; import com.amazonaws.services.textract.model.S3Object; +import io.camunda.connector.textract.model.DocumentLocationType; import io.camunda.connector.textract.model.TextractExecutionType; import io.camunda.connector.textract.model.TextractRequestData; import java.util.Set; @@ -37,6 +38,7 @@ void prepareFeatureTypesAllEnabled() { TextractRequestData requestData1 = new TextractRequestData( TextractExecutionType.SYNC, + DocumentLocationType.S3, "test-bucket", "test-object", "1", @@ -50,7 +52,8 @@ void prepareFeatureTypesAllEnabled() { "notification-channel", "role-arn", "outputBucket", - "prefix"); + "prefix", + null); Set featureTypes = textractCaller.prepareFeatureTypes(requestData1); assertThat(featureTypes).containsExactlyInAnyOrder("FORMS", "LAYOUT", "SIGNATURES", "TABLES"); } @@ -60,6 +63,7 @@ void prepareFeatureTypesNoFeaturesEnabled() { TextractRequestData requestData = new TextractRequestData( TextractExecutionType.SYNC, + DocumentLocationType.S3, "test-bucket", "test-object", "1", @@ -73,7 +77,8 @@ void prepareFeatureTypesNoFeaturesEnabled() { "notification-channel", "role-arn", "outputBucket", - "prefix"); + "prefix", + null); Exception exception = assertThrows( @@ -87,6 +92,7 @@ void prepareFeatureTypesOnlyTablesAndLayout() { TextractRequestData requestData = new TextractRequestData( TextractExecutionType.SYNC, + DocumentLocationType.S3, "test-bucket", "test-object", "1", @@ -100,7 +106,8 @@ void prepareFeatureTypesOnlyTablesAndLayout() { "notification-channel", "role-arn", "outputBucket", - "prefix"); + "prefix", + null); Set featureTypes = textractCaller.prepareFeatureTypes(requestData); assertThat(featureTypes).containsExactlyInAnyOrder("TABLES", "LAYOUT"); } diff --git a/connectors/aws/aws-textract/src/test/java/io/camunda/connector/textract/util/TextractTestUtils.java b/connectors/aws/aws-textract/src/test/java/io/camunda/connector/textract/util/TextractTestUtils.java index 26acf894ea..3561ed55d4 100644 --- a/connectors/aws/aws-textract/src/test/java/io/camunda/connector/textract/util/TextractTestUtils.java +++ b/connectors/aws/aws-textract/src/test/java/io/camunda/connector/textract/util/TextractTestUtils.java @@ -6,6 +6,7 @@ */ package io.camunda.connector.textract.util; +import io.camunda.connector.textract.model.DocumentLocationType; import io.camunda.connector.textract.model.TextractExecutionType; import io.camunda.connector.textract.model.TextractRequestData; @@ -194,6 +195,7 @@ public class TextractTestUtils { public static final TextractRequestData FULL_FILLED_ASYNC_TEXTRACT_DATA = new TextractRequestData( TextractExecutionType.ASYNC, + DocumentLocationType.S3, "test-bucket", "test-object", "1", @@ -207,5 +209,6 @@ public class TextractTestUtils { "notification-channel", "sns-arn", "outputBucket", - "prefix"); + "prefix", + null); }