Skip to content

Commit

Permalink
fix support bug (#142)
Browse files Browse the repository at this point in the history
  • Loading branch information
yaojin3616 authored Nov 12, 2023
2 parents 477bf79 + a2555db commit 06678fc
Show file tree
Hide file tree
Showing 5 changed files with 42 additions and 30 deletions.
2 changes: 1 addition & 1 deletion docker/bisheng/config/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ redis_url: "redis://redis:6379/0"

environment:
env: dev
uns_support: ['doc', 'docx', 'ppt', 'pptx', 'xls', 'xlsx', 'txt', 'md', 'html']
uns_support: ['png','jpg','jpeg','bmp','doc', 'docx', 'ppt', 'pptx', 'xls', 'xlsx', 'txt', 'md', 'html', 'pdf']

# admin 用户配置
admin:
Expand Down
16 changes: 15 additions & 1 deletion src/backend/bisheng/api/v1/endpoints.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
import copy
import json
from typing import Optional

import yaml
from bisheng import settings
from bisheng.api.v1 import knowledge
from bisheng.api.v1.schemas import ProcessResponse, UploadFileResponse
from bisheng.cache.redis import redis_client
from bisheng.cache.utils import save_uploaded_file
Expand All @@ -29,7 +31,19 @@ def get_all():

@router.get('/env')
def getn_env():
return {'data': settings.settings.environment}
uns_support = ['png', 'jpg', 'jpeg', 'bmp', 'doc', 'docx', 'ppt',
'pptx', 'xls', 'xlsx', 'txt', 'md', 'html', 'pdf']
env = {}
if isinstance(settings.settings.environment, str):
env['env'] = settings.settings.environment
else:
env = copy.deepcopy(settings.settings.environment)
if settings.settings.get_knowledge().get('unstructured_api_url'):
if not env.get('uns_support'):
env['uns_support'] = uns_support
else:
env['uns_support'] = list(knowledge.filetype_load_map.keys())
return {'data': env}


@router.get('/config')
Expand Down
29 changes: 16 additions & 13 deletions src/backend/bisheng/api/v1/knowledge.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,18 +21,33 @@
from bisheng.utils.logger import logger
from bisheng_langchain.document_loaders.elem_unstrcutured_loader import ElemUnstructuredLoader
from bisheng_langchain.embeddings.host_embedding import HostEmbeddings
from bisheng_langchain.text_splitter import ElemCharacterTextSplitter
from fastapi import APIRouter, Depends, File, HTTPException, UploadFile
from fastapi.encoders import jsonable_encoder
from fastapi_jwt_auth import AuthJWT
from langchain.document_loaders import (BSHTMLLoader, PyPDFLoader, TextLoader,
UnstructuredMarkdownLoader, UnstructuredPowerPointLoader,
UnstructuredWordDocumentLoader)
from langchain.embeddings.base import Embeddings
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.text_splitter import CharacterTextSplitter
from langchain.vectorstores import Milvus
from langchain.vectorstores.base import VectorStore
from sqlalchemy import func, or_
from sqlmodel import Session, select

# build router
router = APIRouter(prefix='/knowledge', tags=['Skills'])
filetype_load_map = {
'txt': TextLoader,
'pdf': PyPDFLoader,
'html': BSHTMLLoader,
'md': UnstructuredMarkdownLoader,
'doc': UnstructuredWordDocumentLoader,
'docx': UnstructuredWordDocumentLoader,
'ppt': UnstructuredPowerPointLoader,
'pptx': UnstructuredPowerPointLoader,
}


@router.post('/upload', response_model=UploadFileResponse, status_code=201)
Expand Down Expand Up @@ -335,9 +350,9 @@ async def addEmbedding(collection_name, model: str, chunk_size: int, separator:

for index, path in enumerate(file_paths):
knowledge_file = knowledge_files[index]
session = next(get_session())
try:
# 存储 mysql
session = next(get_session())
db_file = session.get(KnowledgeFile, knowledge_file.id)
setattr(db_file, 'status', 2)
setattr(db_file, 'object_name', knowledge_file.file_name)
Expand All @@ -363,7 +378,6 @@ async def addEmbedding(collection_name, model: str, chunk_size: int, separator:
session.refresh(db_file)
except Exception as e:
logger.exception(e)
session = next(get_session())
db_file = session.get(KnowledgeFile, knowledge_file.id)
setattr(db_file, 'status', 3)
setattr(db_file, 'remark', str(e)[:500])
Expand All @@ -373,17 +387,6 @@ async def addEmbedding(collection_name, model: str, chunk_size: int, separator:


def _read_chunk_text(input_file, file_name, size, chunk_overlap, separator):
from langchain.document_loaders import (PyPDFLoader, BSHTMLLoader, TextLoader,
UnstructuredMarkdownLoader)
from langchain.text_splitter import CharacterTextSplitter
from bisheng_langchain.text_splitter import ElemCharacterTextSplitter
filetype_load_map = {
'txt': TextLoader,
'pdf': PyPDFLoader,
'html': BSHTMLLoader,
'md': UnstructuredMarkdownLoader,
}

if not settings.get_knowledge().get('unstructured_api_url'):
file_type = file_name.split('.')[-1]
if file_type not in filetype_load_map:
Expand Down
15 changes: 7 additions & 8 deletions src/backend/bisheng/database/models/flow.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,9 @@

from bisheng.database.models.base import SQLModelSerializable
# if TYPE_CHECKING:
from bisheng.database.models.flow_style import FlowStyle, FlowStyleRead
from pydantic import validator
from sqlalchemy import Column, DateTime, text
from sqlmodel import JSON, Field, Relationship
from sqlmodel import JSON, Field


class FlowBase(SQLModelSerializable):
Expand Down Expand Up @@ -48,11 +47,11 @@ def validate_json(v):
class Flow(FlowBase, table=True):
id: UUID = Field(default_factory=uuid4, primary_key=True, unique=True)
data: Optional[Dict] = Field(default=None, sa_column=Column(JSON))
style: Optional['FlowStyle'] = Relationship(
back_populates='flow',
# use "uselist=False" to make it a one-to-one relationship
sa_relationship_kwargs={'uselist': False},
)
# style: Optional['FlowStyle'] = Relationship(
# back_populates='flow',
# # use "uselist=False" to make it a one-to-one relationship
# sa_relationship_kwargs={'uselist': False},
# )


class FlowCreate(FlowBase):
Expand All @@ -65,7 +64,7 @@ class FlowRead(FlowBase):


class FlowReadWithStyle(FlowRead):
style: Optional['FlowStyleRead'] = None
# style: Optional['FlowStyleRead'] = None
total: Optional[int] = None


Expand Down
10 changes: 3 additions & 7 deletions src/backend/bisheng/database/models/flow_style.py
Original file line number Diff line number Diff line change
@@ -1,24 +1,20 @@
# Path: src/backend/bisheng/database/models/flowstyle.py

from typing import TYPE_CHECKING, Optional
from typing import Optional
from uuid import UUID, uuid4

from bisheng.database.models.base import SQLModelSerializable
from sqlmodel import Field, Relationship

if TYPE_CHECKING:
from bisheng.database.models.flow import Flow
from sqlmodel import Field


class FlowStyleBase(SQLModelSerializable):
color: str
emoji: str
flow_id: UUID = Field(default=None, foreign_key='flow.id')
flow_id: UUID = Field(default=None)


class FlowStyle(FlowStyleBase, table=True):
id: UUID = Field(default_factory=uuid4, primary_key=True, unique=True)
flow: 'Flow' = Relationship(back_populates='style')


class FlowStyleUpdate(SQLModelSerializable):
Expand Down

0 comments on commit 06678fc

Please sign in to comment.