Skip to content

Commit

Permalink
Resolve
Browse files Browse the repository at this point in the history
  • Loading branch information
pseudotensor committed Oct 31, 2024
2 parents b575c39 + d6fa55b commit fcb1cf3
Show file tree
Hide file tree
Showing 2 changed files with 13 additions and 9 deletions.
20 changes: 12 additions & 8 deletions src/gpt_langchain.py
Original file line number Diff line number Diff line change
Expand Up @@ -4815,14 +4815,18 @@ def file_to_doc(file,
docs1.extend(docs1a)
if len(docs1) == 0 and have_playwright or do_playwright:
# then something went wrong, try another loader:
from langchain_community.document_loaders import PlaywrightURLLoader
docs1a = asyncio.run(PlaywrightURLLoader(urls=final_urls).aload())
# docs1 = PlaywrightURLLoader(urls=[file]).load()
docs1a = [x for x in docs1a if
x.page_content and x.page_content != '403 Forbidden' and not x.page_content.startswith(
'Access Denied')]
add_parser(docs1a, 'PlaywrightURLLoader')
docs1.extend(docs1a)
try:
from langchain_community.document_loaders import PlaywrightURLLoader
docs1a = asyncio.run(PlaywrightURLLoader(urls=final_urls).aload())
# docs1 = PlaywrightURLLoader(urls=[file]).load()
docs1a = [x for x in docs1a if
x.page_content and x.page_content != '403 Forbidden' and not x.page_content.startswith(
'Access Denied')]
add_parser(docs1a, 'PlaywrightURLLoader')
docs1.extend(docs1a)
except Exception as e0:
traceback.print_exc()
print("playwright failed: %s: %s" % (str(e0), traceback.print_exception(e0)), flush=True)
if len(docs1) == 0 and have_selenium or do_selenium:
# then something went wrong, try another loader:
# but requires Chrome binary, else get: selenium.common.exceptions.WebDriverException:
Expand Down
2 changes: 1 addition & 1 deletion src/version.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = "5a8d993bc86b9b9e1f376a9d71fc9ed193c51e3a"
__version__ = "b575c3954deaa72eb3b52b64b78f2ac7d39461c2"

0 comments on commit fcb1cf3

Please sign in to comment.