Skip to content

Commit

Permalink
feat(textract): add documentation handling for textract
Browse files Browse the repository at this point in the history
  • Loading branch information
DenovVasil committed Jan 7, 2025
1 parent 3d20ed2 commit 0b49829
Show file tree
Hide file tree
Showing 10 changed files with 276 additions and 24 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
"keywords" : [ "extract text", "extract data", "extract text from image", "extract data from image", "ocr" ]
},
"documentationRef" : "https://docs.camunda.io/docs/next/components/connectors/out-of-the-box-connectors/amazon-textract/",
"version" : 1,
"version" : 2,
"category" : {
"id" : "connectors",
"name" : "Connectors"
Expand Down Expand Up @@ -151,6 +151,29 @@
"name" : "Polling",
"value" : "POLLING"
} ]
}, {
"id" : "input.documentLocationType",
"label" : "Document location type",
"description" : "Document location",
"optional" : false,
"group" : "input",
"binding" : {
"name" : "input.documentLocationType",
"type" : "zeebe:input"
},
"condition" : {
"property" : "input.executionType",
"equals" : "SYNC",
"type" : "simple"
},
"type" : "Dropdown",
"choices" : [ {
"name" : "S3",
"value" : "S3"
}, {
"name" : "Uploaded document",
"value" : "UPLOADED"
} ]
}, {
"id" : "input.documentS3Bucket",
"label" : "Document bucket",
Expand All @@ -165,6 +188,11 @@
"name" : "input.documentS3Bucket",
"type" : "zeebe:input"
},
"condition" : {
"property" : "input.documentLocationType",
"equals" : "S3",
"type" : "simple"
},
"type" : "String"
}, {
"id" : "input.documentName",
Expand All @@ -180,6 +208,11 @@
"name" : "input.documentName",
"type" : "zeebe:input"
},
"condition" : {
"property" : "input.documentLocationType",
"equals" : "S3",
"type" : "simple"
},
"type" : "String"
}, {
"id" : "input.documentVersion",
Expand All @@ -192,6 +225,11 @@
"name" : "input.documentVersion",
"type" : "zeebe:input"
},
"condition" : {
"property" : "input.documentLocationType",
"equals" : "S3",
"type" : "simple"
},
"type" : "String"
}, {
"id" : "input.analyzeTables",
Expand Down Expand Up @@ -372,6 +410,25 @@
"type" : "simple"
},
"type" : "String"
}, {
"id" : "input.document",
"label" : "Document",
"optional" : false,
"constraints" : {
"notEmpty" : true
},
"feel" : "required",
"group" : "input",
"binding" : {
"name" : "input.document",
"type" : "zeebe:input"
},
"condition" : {
"property" : "input.documentLocationType",
"equals" : "UPLOADED",
"type" : "simple"
},
"type" : "String"
}, {
"id" : "resultVariable",
"label" : "Result variable",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
"keywords" : [ "extract text", "extract data", "extract text from image", "extract data from image", "ocr" ]
},
"documentationRef" : "https://docs.camunda.io/docs/next/components/connectors/out-of-the-box-connectors/amazon-textract/",
"version" : 1,
"version" : 2,
"category" : {
"id" : "connectors",
"name" : "Connectors"
Expand Down Expand Up @@ -156,6 +156,29 @@
"name" : "Polling",
"value" : "POLLING"
} ]
}, {
"id" : "input.documentLocationType",
"label" : "Document location type",
"description" : "Document location",
"optional" : false,
"group" : "input",
"binding" : {
"name" : "input.documentLocationType",
"type" : "zeebe:input"
},
"condition" : {
"property" : "input.executionType",
"equals" : "SYNC",
"type" : "simple"
},
"type" : "Dropdown",
"choices" : [ {
"name" : "S3",
"value" : "S3"
}, {
"name" : "Uploaded document",
"value" : "UPLOADED"
} ]
}, {
"id" : "input.documentS3Bucket",
"label" : "Document bucket",
Expand All @@ -170,6 +193,11 @@
"name" : "input.documentS3Bucket",
"type" : "zeebe:input"
},
"condition" : {
"property" : "input.documentLocationType",
"equals" : "S3",
"type" : "simple"
},
"type" : "String"
}, {
"id" : "input.documentName",
Expand All @@ -185,6 +213,11 @@
"name" : "input.documentName",
"type" : "zeebe:input"
},
"condition" : {
"property" : "input.documentLocationType",
"equals" : "S3",
"type" : "simple"
},
"type" : "String"
}, {
"id" : "input.documentVersion",
Expand All @@ -197,6 +230,11 @@
"name" : "input.documentVersion",
"type" : "zeebe:input"
},
"condition" : {
"property" : "input.documentLocationType",
"equals" : "S3",
"type" : "simple"
},
"type" : "String"
}, {
"id" : "input.analyzeTables",
Expand Down Expand Up @@ -377,6 +415,25 @@
"type" : "simple"
},
"type" : "String"
}, {
"id" : "input.document",
"label" : "Document",
"optional" : false,
"constraints" : {
"notEmpty" : true
},
"feel" : "required",
"group" : "input",
"binding" : {
"name" : "input.document",
"type" : "zeebe:input"
},
"condition" : {
"property" : "input.documentLocationType",
"equals" : "UPLOADED",
"type" : "simple"
},
"type" : "String"
}, {
"id" : "resultVariable",
"label" : "Result variable",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@
"ocr"
}),
inputDataClass = TextractRequest.class,
version = 1,
version = 2,
propertyGroups = {
@ElementTemplate.PropertyGroup(id = "authentication", label = "Authentication"),
@ElementTemplate.PropertyGroup(id = "configuration", label = "Configuration"),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@
import com.amazonaws.services.textract.model.AnalyzeDocumentResult;
import com.amazonaws.services.textract.model.Document;
import io.camunda.connector.textract.model.TextractRequestData;
import java.nio.ByteBuffer;
import java.util.Objects;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

Expand All @@ -22,7 +24,8 @@ public class SyncTextractCaller implements TextractCaller<AnalyzeDocumentResult>
public AnalyzeDocumentResult call(
TextractRequestData requestData, AmazonTextract textractClient) {
LOGGER.debug("Starting sync task for document analysis with request data: {}", requestData);
final Document document = new Document().withS3Object(prepareS3Obj(requestData));

final Document document = createDocument(requestData);

final AnalyzeDocumentRequest analyzeDocumentRequest =
new AnalyzeDocumentRequest()
Expand All @@ -31,4 +34,16 @@ public AnalyzeDocumentResult call(

return textractClient.analyzeDocument(analyzeDocumentRequest);
}

private Document createDocument(TextractRequestData requestData) {
final Document document = new Document();

if (Objects.isNull(requestData.document())) {
return document.withS3Object(prepareS3Obj(requestData));
}

byte[] docBytes = requestData.document().asByteArray();
document.withBytes(ByteBuffer.wrap(docBytes));
return document;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
/*
* Copyright Camunda Services GmbH and/or licensed to Camunda Services GmbH
* under one or more contributor license agreements. Licensed under a proprietary license.
* See the License.txt file for more information. You may not use this file
* except in compliance with the proprietary license.
*/
package io.camunda.connector.textract.model;

public enum DocumentLocationType {
S3,
UPLOADED;
}
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,11 @@
*/
package io.camunda.connector.textract.model;

import io.camunda.connector.generator.dsl.Property;
import io.camunda.connector.generator.dsl.Property.FeelMode;
import io.camunda.connector.generator.java.annotation.TemplateProperty;
import io.camunda.document.Document;
import jakarta.validation.constraints.AssertTrue;
import jakarta.validation.constraints.NotBlank;
import jakarta.validation.constraints.NotNull;
import org.apache.commons.lang3.StringUtils;

Expand All @@ -28,23 +29,52 @@ public record TextractRequestData(
description = "Endpoint inference type")
@NotNull
TextractExecutionType executionType,
@TemplateProperty(
group = "input",
label = "Document location type",
description = "Document location",
feel = FeelMode.disabled,
type = TemplateProperty.PropertyType.Dropdown,
choices = {
@TemplateProperty.DropdownPropertyChoice(value = "S3", label = "S3"),
@TemplateProperty.DropdownPropertyChoice(
value = "UPLOADED",
label = "Uploaded document")
},
condition =
@TemplateProperty.PropertyCondition(
property = "input.executionType",
equals = "SYNC"))
DocumentLocationType documentLocationType,
@TemplateProperty(
group = "input",
label = "Document bucket",
description = "S3 bucket that contains document that needs to be processed")
@NotBlank
description = "S3 bucket that contains document that needs to be processed",
condition =
@TemplateProperty.PropertyCondition(
property = "input.documentLocationType",
equals = "S3"),
constraints = @TemplateProperty.PropertyConstraints(notEmpty = true))
String documentS3Bucket,
@TemplateProperty(
group = "input",
label = "Document path",
description = "S3 document path to be processed")
@NotBlank
description = "S3 document path to be processed",
condition =
@TemplateProperty.PropertyCondition(
property = "input.documentLocationType",
equals = "S3"),
constraints = @TemplateProperty.PropertyConstraints(notEmpty = true))
String documentName,
@TemplateProperty(
group = "input",
label = "Document version",
description = "S3 document version to be processed",
optional = true)
optional = true,
condition =
@TemplateProperty.PropertyCondition(
property = "input.documentLocationType",
equals = "S3"))
String documentVersion,
@TemplateProperty(
label = "Analyze tables",
Expand Down Expand Up @@ -150,14 +180,25 @@ public record TextractRequestData(
@TemplateProperty.PropertyCondition(
property = "input.executionType",
equals = "ASYNC"))
String outputConfigS3Prefix) {
String outputConfigS3Prefix,
@TemplateProperty(
group = "input",
label = "Document",
feel = Property.FeelMode.required,
type = TemplateProperty.PropertyType.String,
condition =
@TemplateProperty.PropertyCondition(
property = "input.documentLocationType",
equals = "UPLOADED"),
constraints = @TemplateProperty.PropertyConstraints(notEmpty = true))
Document document) {
@TemplateProperty(ignore = true)
public static final String WRONG_OUTPUT_VALUES_MSG =
"Output S3 bucket must be filled in if output S3 prefix is filled in";

@TemplateProperty(ignore = true)
public static final String WRONG_NOTIFICATION_VALUES_MSG =
"either both notification values role ARN and topic ARN must be filled in or none of them";
"Either both notification values role ARN and topic ARN must be filled in or none of them";

@AssertTrue(message = WRONG_NOTIFICATION_VALUES_MSG)
public boolean isValidNotificationProperties() {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
import com.amazonaws.services.textract.AmazonTextractAsyncClient;
import com.amazonaws.services.textract.model.StartDocumentAnalysisRequest;
import com.amazonaws.services.textract.model.StartDocumentAnalysisResult;
import io.camunda.connector.textract.model.DocumentLocationType;
import io.camunda.connector.textract.model.TextractExecutionType;
import io.camunda.connector.textract.model.TextractRequestData;
import org.junit.jupiter.api.Test;
Expand Down Expand Up @@ -100,6 +101,7 @@ void callWithoutOutputS3BucketShouldNotCreateOutputObj() {
private TextractRequestData prepareReqData(String roleArn, String topicArn) {
return new TextractRequestData(
TextractExecutionType.ASYNC,
DocumentLocationType.S3,
"test-bucket",
"test-object",
"1",
Expand All @@ -113,12 +115,14 @@ private TextractRequestData prepareReqData(String roleArn, String topicArn) {
roleArn,
topicArn,
"outputBucket",
"prefix");
"prefix",
null);
}

private TextractRequestData prepareReqDataWithoutOutputS3Bucket() {
return new TextractRequestData(
TextractExecutionType.ASYNC,
DocumentLocationType.S3,
"test-bucket",
"test-object",
"1",
Expand All @@ -132,6 +136,7 @@ private TextractRequestData prepareReqDataWithoutOutputS3Bucket() {
"roleArn",
"topicArn",
"",
"prefix");
"prefix",
null);
}
}
Loading

0 comments on commit 0b49829

Please sign in to comment.