diff --git a/api/core/helper/ssrf_proxy.py b/api/core/helper/ssrf_proxy.py index c7a834a6b9632f..2e422cf444c08c 100644 --- a/api/core/helper/ssrf_proxy.py +++ b/api/core/helper/ssrf_proxy.py @@ -60,20 +60,17 @@ def make_request(method, url, max_retries=SSRF_DEFAULT_MAX_RETRIES, **kwargs): if response.status_code not in STATUS_FORCELIST: return response else: - logging.warning( - f"Received status code {response.status_code} for URL {url} which is in the force list") + logging.warning(f"Received status code {response.status_code} for URL {url} which is in the force list") except httpx.RequestError as e: - logging.warning(f"Request to URL {url} failed on attempt { - retries + 1}: {e}") + logging.warning(f"Request to URL {url} failed on attempt {retries + 1}: {e}") if max_retries == 0: raise retries += 1 if retries <= max_retries: time.sleep(BACKOFF_FACTOR * (2 ** (retries - 1))) - raise MaxRetriesExceededError( - f"Reached maximum retries ({max_retries}) for URL {url}") + raise MaxRetriesExceededError(f"Reached maximum retries ({max_retries}) for URL {url}") def get(url, max_retries=SSRF_DEFAULT_MAX_RETRIES, **kwargs): diff --git a/api/core/rag/datasource/vdb/lindorm/lindorm_vector.py b/api/core/rag/datasource/vdb/lindorm/lindorm_vector.py index 3b3e47578a438d..3fa0ce16d36064 100644 --- a/api/core/rag/datasource/vdb/lindorm/lindorm_vector.py +++ b/api/core/rag/datasource/vdb/lindorm/lindorm_vector.py @@ -17,8 +17,7 @@ from models.dataset import Dataset logger = logging.getLogger(__name__) -logging.basicConfig(level=logging.INFO, - format="%(asctime)s - %(levelname)s - %(message)s") +logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s") logging.getLogger("lindorm").setLevel(logging.WARN) ROUTING_FIELD = "routing_field" @@ -135,8 +134,7 @@ def delete_by_ids(self, ids: list[str]) -> None: self._client.delete(index=self._collection_name, id=id, params=params) self.refresh() else: - logger.warning( - f"DELETE BY ID: ID {id} does not exist in the index.") + logger.warning(f"DELETE BY ID: ID {id} does not exist in the index.") def delete(self) -> None: if self._using_ugc: @@ -147,8 +145,7 @@ def delete(self) -> None: self.refresh() else: if self._client.indices.exists(index=self._collection_name): - self._client.indices.delete( - index=self._collection_name, params={"timeout": 60}) + self._client.indices.delete(index=self._collection_name, params={"timeout": 60}) logger.info("Delete index success") else: logger.warning(f"Index '{self._collection_name}' does not exist. No deletion performed.") @@ -171,14 +168,13 @@ def search_by_vector(self, query_vector: list[float], **kwargs: Any) -> list[Doc raise ValueError("All elements in query_vector should be floats") top_k = kwargs.get("top_k", 10) - query = default_vector_search_query( - query_vector=query_vector, k=top_k, **kwargs) + query = default_vector_search_query(query_vector=query_vector, k=top_k, **kwargs) try: params = {} if self._using_ugc: params["routing"] = self._routing response = self._client.search(index=self._collection_name, body=query, params=params) - except Exception as e: + except Exception: logger.exception(f"Error executing vector search, query: {query}") raise @@ -224,8 +220,7 @@ def search_by_full_text(self, query: str, **kwargs: Any) -> list[Document]: routing=routing, routing_field=self._routing_field, ) - response = self._client.search( - index=self._collection_name, body=full_text_query) + response = self._client.search(index=self._collection_name, body=full_text_query) docs = [] for hit in response["hits"]["hits"]: docs.append( @@ -243,8 +238,7 @@ def create_collection(self, dimension: int, **kwargs): with redis_client.lock(lock_name, timeout=20): collection_exist_cache_key = f"vector_indexing_{self._collection_name}" if redis_client.get(collection_exist_cache_key): - logger.info( - f"Collection {self._collection_name} already exists.") + logger.info(f"Collection {self._collection_name} already exists.") return if self._client.indices.exists(index=self._collection_name): logger.info(f"{self._collection_name.lower()} already exists.") @@ -264,13 +258,10 @@ def create_collection(self, dimension: int, **kwargs): hnsw_ef_construction = kwargs.pop("hnsw_ef_construction", 500) ivfpq_m = kwargs.pop("ivfpq_m", dimension) nlist = kwargs.pop("nlist", 1000) - centroids_use_hnsw = kwargs.pop( - "centroids_use_hnsw", True if nlist >= 5000 else False) + centroids_use_hnsw = kwargs.pop("centroids_use_hnsw", True if nlist >= 5000 else False) centroids_hnsw_m = kwargs.pop("centroids_hnsw_m", 24) - centroids_hnsw_ef_construct = kwargs.pop( - "centroids_hnsw_ef_construct", 500) - centroids_hnsw_ef_search = kwargs.pop( - "centroids_hnsw_ef_search", 100) + centroids_hnsw_ef_construct = kwargs.pop("centroids_hnsw_ef_construct", 500) + centroids_hnsw_ef_search = kwargs.pop("centroids_hnsw_ef_search", 100) mapping = default_text_mapping( dimension, method_name, @@ -290,8 +281,7 @@ def create_collection(self, dimension: int, **kwargs): using_ugc=self._using_ugc, **kwargs, ) - self._client.indices.create( - index=self._collection_name.lower(), body=mapping) + self._client.indices.create(index=self._collection_name.lower(), body=mapping) redis_client.set(collection_exist_cache_key, 1, ex=3600) # logger.info(f"create index success: {self._collection_name}") @@ -396,8 +386,7 @@ def default_text_search_query( # build complex search_query when either of must/must_not/should/filter is specified if must: if not isinstance(must, list): - raise RuntimeError( - f"unexpected [must] clause with {type(filters)}") + raise RuntimeError(f"unexpected [must] clause with {type(filters)}") if query_clause not in must: must.append(query_clause) else: @@ -407,22 +396,19 @@ def default_text_search_query( if must_not: if not isinstance(must_not, list): - raise RuntimeError( - f"unexpected [must_not] clause with {type(filters)}") + raise RuntimeError(f"unexpected [must_not] clause with {type(filters)}") boolean_query["must_not"] = must_not if should: if not isinstance(should, list): - raise RuntimeError( - f"unexpected [should] clause with {type(filters)}") + raise RuntimeError(f"unexpected [should] clause with {type(filters)}") boolean_query["should"] = should if minimum_should_match != 0: boolean_query["minimum_should_match"] = minimum_should_match if filters: if not isinstance(filters, list): - raise RuntimeError( - f"unexpected [filter] clause with {type(filters)}") + raise RuntimeError(f"unexpected [filter] clause with {type(filters)}") boolean_query["filter"] = filters search_query = {"size": k, "query": {"bool": boolean_query}} diff --git a/api/core/workflow/nodes/question_classifier/question_classifier_node.py b/api/core/workflow/nodes/question_classifier/question_classifier_node.py index 9aa43f58d556ba..0ec44eefacf52f 100644 --- a/api/core/workflow/nodes/question_classifier/question_classifier_node.py +++ b/api/core/workflow/nodes/question_classifier/question_classifier_node.py @@ -44,13 +44,11 @@ def _run(self): variable_pool = self.graph_runtime_state.variable_pool # extract variables - variable = variable_pool.get( - node_data.query_variable_selector) if node_data.query_variable_selector else None + variable = variable_pool.get(node_data.query_variable_selector) if node_data.query_variable_selector else None query = variable.value if variable else None variables = {"query": query} # fetch model config - model_instance, model_config = self._fetch_model_config( - node_data.model) + model_instance, model_config = self._fetch_model_config(node_data.model) # fetch memory memory = self._fetch_memory( node_data_memory=node_data.memory, @@ -58,8 +56,7 @@ def _run(self): ) # fetch instruction node_data.instruction = node_data.instruction or "" - node_data.instruction = variable_pool.convert_template( - node_data.instruction).text + node_data.instruction = variable_pool.convert_template(node_data.instruction).text files = ( self._fetch_files( @@ -181,15 +178,12 @@ def _extract_variable_selector_to_variable_mapping( variable_mapping = {"query": node_data.query_variable_selector} variable_selectors = [] if node_data.instruction: - variable_template_parser = VariableTemplateParser( - template=node_data.instruction) - variable_selectors.extend( - variable_template_parser.extract_variable_selectors()) + variable_template_parser = VariableTemplateParser(template=node_data.instruction) + variable_selectors.extend(variable_template_parser.extract_variable_selectors()) for variable_selector in variable_selectors: variable_mapping[variable_selector.variable] = variable_selector.value_selector - variable_mapping = {node_id + "." + key: value for key, - value in variable_mapping.items()} + variable_mapping = {node_id + "." + key: value for key, value in variable_mapping.items()} return variable_mapping @@ -210,8 +204,7 @@ def _calculate_rest_token( context: Optional[str], ) -> int: prompt_transform = AdvancedPromptTransform(with_variable_tmpl=True) - prompt_template = self._get_prompt_template( - node_data, query, None, 2000) + prompt_template = self._get_prompt_template(node_data, query, None, 2000) prompt_messages = prompt_transform.get_prompt( prompt_template=prompt_template, inputs={}, @@ -224,15 +217,13 @@ def _calculate_rest_token( ) rest_tokens = 2000 - model_context_tokens = model_config.model_schema.model_properties.get( - ModelPropertyKey.CONTEXT_SIZE) + model_context_tokens = model_config.model_schema.model_properties.get(ModelPropertyKey.CONTEXT_SIZE) if model_context_tokens: model_instance = ModelInstance( provider_model_bundle=model_config.provider_model_bundle, model=model_config.model ) - curr_message_tokens = model_instance.get_llm_num_tokens( - prompt_messages) + curr_message_tokens = model_instance.get_llm_num_tokens(prompt_messages) max_tokens = 0 for parameter_rule in model_config.model_schema.parameter_rules: @@ -273,8 +264,7 @@ def _get_prompt_template( prompt_messages: list[LLMNodeChatModelMessage] = [] if model_mode == ModelMode.CHAT: system_prompt_messages = LLMNodeChatModelMessage( - role=PromptMessageRole.SYSTEM, text=QUESTION_CLASSIFIER_SYSTEM_PROMPT.format( - histories=memory_str) + role=PromptMessageRole.SYSTEM, text=QUESTION_CLASSIFIER_SYSTEM_PROMPT.format(histories=memory_str) ) prompt_messages.append(system_prompt_messages) user_prompt_message_1 = LLMNodeChatModelMessage( @@ -315,5 +305,4 @@ def _get_prompt_template( ) else: - raise InvalidModelTypeError( - f"Model mode {model_mode} not support.") + raise InvalidModelTypeError(f"Model mode {model_mode} not support.") diff --git a/api/pytest.ini b/api/pytest.ini index 993da4c9a78935..b08cca524030b7 100644 --- a/api/pytest.ini +++ b/api/pytest.ini @@ -7,6 +7,12 @@ env = CODE_EXECUTION_API_KEY = dify-sandbox CODE_EXECUTION_ENDPOINT = http://127.0.0.1:8194 CODE_MAX_STRING_LENGTH = 80000 + PLUGIN_API_KEY=lYkiYYT6owG+71oLerGzA7GXCgOT++6ovaezWAjpCjf+Sjc3ZtU+qUEi + PLUGIN_DAEMON_URL=http://127.0.0.1:5002 + PLUGIN_MAX_PACKAGE_SIZE=15728640 + INNER_API_KEY_FOR_PLUGIN=QaHbTe77CtuXmsfyhR7+vRjI/+XbV1AaFy691iy+kGDv2Jvy0/eAh8Y1 + MARKETPLACE_ENABLED=true + MARKETPLACE_API_URL=https://marketplace.dify.ai FIRECRAWL_API_KEY = fc- FIREWORKS_API_KEY = fw_aaaaaaaaaaaaaaaaaaaa GOOGLE_API_KEY = abcdefghijklmnopqrstuvwxyz diff --git a/api/tests/unit_tests/core/workflow/nodes/http_request/test_http_request_executor.py b/api/tests/unit_tests/core/workflow/nodes/http_request/test_http_request_executor.py index 285384ee6e4093..58b910e17bae4a 100644 --- a/api/tests/unit_tests/core/workflow/nodes/http_request/test_http_request_executor.py +++ b/api/tests/unit_tests/core/workflow/nodes/http_request/test_http_request_executor.py @@ -68,8 +68,7 @@ def test_executor_with_json_body_and_object_variable(): system_variables={}, user_inputs={}, ) - variable_pool.add(["pre_node_id", "object"], { - "name": "John Doe", "age": 30, "email": "john@example.com"}) + variable_pool.add(["pre_node_id", "object"], {"name": "John Doe", "age": 30, "email": "john@example.com"}) # Prepare the node data node_data = HttpRequestNodeData( @@ -124,8 +123,7 @@ def test_executor_with_json_body_and_nested_object_variable(): system_variables={}, user_inputs={}, ) - variable_pool.add(["pre_node_id", "object"], { - "name": "John Doe", "age": 30, "email": "john@example.com"}) + variable_pool.add(["pre_node_id", "object"], {"name": "John Doe", "age": 30, "email": "john@example.com"}) # Prepare the node data node_data = HttpRequestNodeData( diff --git a/api/tests/unit_tests/core/workflow/nodes/http_request/test_http_request_node.py b/api/tests/unit_tests/core/workflow/nodes/http_request/test_http_request_node.py index 1375d835d3af97..97bacada74572d 100644 --- a/api/tests/unit_tests/core/workflow/nodes/http_request/test_http_request_node.py +++ b/api/tests/unit_tests/core/workflow/nodes/http_request/test_http_request_node.py @@ -18,14 +18,6 @@ from models.workflow import WorkflowNodeExecutionStatus, WorkflowType -def test_plain_text_to_dict(): - assert _plain_text_to_dict("aa\n cc:") == {"aa": "", "cc": ""} - assert _plain_text_to_dict("aa:bb\n cc:dd") == {"aa": "bb", "cc": "dd"} - assert _plain_text_to_dict("aa:bb\n cc:dd\n") == {"aa": "bb", "cc": "dd"} - assert _plain_text_to_dict("aa:bb\n\n cc : dd\n\n") == { - "aa": "bb", "cc": "dd"} - - def test_http_request_node_binary_file(monkeypatch): data = HttpRequestNodeData( title="test", @@ -191,8 +183,7 @@ def test_http_request_node_form_with_file(monkeypatch): def attr_checker(*args, **kwargs): assert kwargs["data"] == {"name": "test"} - assert kwargs["files"] == { - "file": (None, b"test", "application/octet-stream")} + assert kwargs["files"] == {"file": (None, b"test", "application/octet-stream")} return httpx.Response(200, content=b"") monkeypatch.setattr(