From d6fa55bc87b9de6d13278b148946b9e4d54bceb9 Mon Sep 17 00:00:00 2001 From: "Jonathan C. McKinney" Date: Thu, 31 Oct 2024 09:38:55 -0700 Subject: [PATCH] Protection --- src/gpt_langchain.py | 20 ++++++++++++-------- src/version.py | 2 +- 2 files changed, 13 insertions(+), 9 deletions(-) diff --git a/src/gpt_langchain.py b/src/gpt_langchain.py index 07afebd64..3eb7cd920 100644 --- a/src/gpt_langchain.py +++ b/src/gpt_langchain.py @@ -4815,14 +4815,18 @@ def file_to_doc(file, docs1.extend(docs1a) if len(docs1) == 0 and have_playwright or do_playwright: # then something went wrong, try another loader: - from langchain_community.document_loaders import PlaywrightURLLoader - docs1a = asyncio.run(PlaywrightURLLoader(urls=final_urls).aload()) - # docs1 = PlaywrightURLLoader(urls=[file]).load() - docs1a = [x for x in docs1a if - x.page_content and x.page_content != '403 Forbidden' and not x.page_content.startswith( - 'Access Denied')] - add_parser(docs1a, 'PlaywrightURLLoader') - docs1.extend(docs1a) + try: + from langchain_community.document_loaders import PlaywrightURLLoader + docs1a = asyncio.run(PlaywrightURLLoader(urls=final_urls).aload()) + # docs1 = PlaywrightURLLoader(urls=[file]).load() + docs1a = [x for x in docs1a if + x.page_content and x.page_content != '403 Forbidden' and not x.page_content.startswith( + 'Access Denied')] + add_parser(docs1a, 'PlaywrightURLLoader') + docs1.extend(docs1a) + except Exception as e0: + traceback.print_exc() + print("playwright failed: %s: %s" % (str(e0), traceback.print_exception(e0)), flush=True) if len(docs1) == 0 and have_selenium or do_selenium: # then something went wrong, try another loader: # but requires Chrome binary, else get: selenium.common.exceptions.WebDriverException: diff --git a/src/version.py b/src/version.py index 15ed4b002..f9168f1ec 100644 --- a/src/version.py +++ b/src/version.py @@ -1 +1 @@ -__version__ = "f5a3cf5b09f5845a7177d1fff1c97b5267804202" +__version__ = "f3ce8a2491b1387b727280424a61680be896013b"