Permission denied error "PermissionError: [Errno 13] Permission denied: 'C:\\conda_tmp\\tmpmssmgevw'" #12

Rohit2387 · 2024-07-18T16:15:03Z

Describe the bug
PermissionError: [Errno 13] Permission denied: 'C:\conda_tmp\tmpmssmgevw'

PermissionError Traceback (most recent call last)
Cell In[5], line 13
11 if local_path:
12 loader = UnstructuredPDFLoader(file_path=local_path)
---> 13 data = loader.load()
14 else:
15 print("Upload a PDF file")

File ~\anaconda3\Lib\site-packages\langchain_core\document_loaders\base.py:30, in BaseLoader.load(self)
28 def load(self) -> List[Document]:
29 """Load data into Document objects."""
---> 30 return list(self.lazy_load())

File ~\anaconda3\Lib\site-packages\langchain_community\document_loaders\unstructured.py:89, in UnstructuredBaseLoader.lazy_load(self)
87 def lazy_load(self) -> Iterator[Document]:
88 """Load file."""
---> 89 elements = self._get_elements()
90 self._post_process_elements(elements)
91 if self.mode == "elements":

File ~\anaconda3\Lib\site-packages\langchain_community\document_loaders\pdf.py:73, in UnstructuredPDFLoader._get_elements(self)
70 def _get_elements(self) -> List:
71 from unstructured.partition.pdf import partition_pdf
---> 73 return partition_pdf(filename=self.file_path, **self.unstructured_kwargs)

File ~\anaconda3\Lib\site-packages\unstructured\documents\elements.py:593, in process_metadata..decorator..wrapper(*args, **kwargs)
591 @functools.wraps(func)
592 def wrapper(*args: _P.args, **kwargs: _P.kwargs) -> list[Element]:
--> 593 elements = func(*args, **kwargs)
594 call_args = get_call_args_applying_defaults(func, *args, **kwargs)
596 regex_metadata: dict["str", "str"] = call_args.get("regex_metadata", {})

File ~\anaconda3\Lib\site-packages\unstructured\file_utils\filetype.py:626, in add_filetype..decorator..wrapper(*args, **kwargs)
624 @functools.wraps(func)
625 def wrapper(*args: _P.args, **kwargs: _P.kwargs) -> List[Element]:
--> 626 elements = func(*args, **kwargs)
627 params = get_call_args_applying_defaults(func, *args, **kwargs)
628 include_metadata = params.get("include_metadata", True)

File ~\anaconda3\Lib\site-packages\unstructured\file_utils\filetype.py:582, in add_metadata..wrapper(*args, **kwargs)
580 @functools.wraps(func)
581 def wrapper(*args: _P.args, **kwargs: _P.kwargs) -> List[Element]:
--> 582 elements = func(*args, **kwargs)
583 call_args = get_call_args_applying_defaults(func, *args, **kwargs)
584 include_metadata = call_args.get("include_metadata", True)

File ~\anaconda3\Lib\site-packages\unstructured\chunking\dispatch.py:74, in add_chunking_strategy..wrapper(*args, **kwargs)
71 """The decorated function is replaced with this one."""
73 # -- call the partitioning function to get the elements --
---> 74 elements = func(*args, **kwargs)
76 # -- look for a chunking-strategy argument --
77 call_args = get_call_args_applying_defaults(func, *args, **kwargs)

File ~\anaconda3\Lib\site-packages\unstructured\partition\pdf.py:202, in partition_pdf(filename, file, include_page_breaks, strategy, infer_table_structure, ocr_languages, languages, include_metadata, metadata_filename, metadata_last_modified, chunking_strategy, hi_res_model_name, extract_images_in_pdf, extract_image_block_types, extract_image_block_output_dir, extract_image_block_to_payload, date_from_file_object, starting_page_number, extract_forms, form_extraction_skip_tables, **kwargs)
198 exactly_one(filename=filename, file=file)
200 languages = check_language_args(languages or [], ocr_languages) or ["eng"]
--> 202 return partition_pdf_or_image(
203 filename=filename,
204 file=file,
205 include_page_breaks=include_page_breaks,
206 strategy=strategy,
207 infer_table_structure=infer_table_structure,
208 languages=languages,
209 metadata_last_modified=metadata_last_modified,
210 hi_res_model_name=hi_res_model_name,
211 extract_images_in_pdf=extract_images_in_pdf,
212 extract_image_block_types=extract_image_block_types,
213 extract_image_block_output_dir=extract_image_block_output_dir,
214 extract_image_block_to_payload=extract_image_block_to_payload,
215 date_from_file_object=date_from_file_object,
216 starting_page_number=starting_page_number,
217 extract_forms=extract_forms,
218 form_extraction_skip_tables=form_extraction_skip_tables,
219 **kwargs,
220 )

File ~\anaconda3\Lib\site-packages\unstructured\partition\pdf.py:341, in partition_pdf_or_image(filename, file, is_image, include_page_breaks, strategy, infer_table_structure, ocr_languages, languages, metadata_last_modified, hi_res_model_name, extract_images_in_pdf, extract_image_block_types, extract_image_block_output_dir, extract_image_block_to_payload, date_from_file_object, starting_page_number, extract_forms, form_extraction_skip_tables, **kwargs)
330 with warnings.catch_warnings():
331 elements = _partition_pdf_or_image_with_ocr(
332 filename=filename,
333 file=file,
(...)
339 **kwargs,
340 )
--> 341 out_elements = _process_uncategorized_text_elements(elements)
343 return out_elements

File ~\anaconda3\Lib\site-packages\unstructured\partition\pdf.py:920, in _process_uncategorized_text_elements(elements)
918 for el in elements:
919 if hasattr(el, "category") and el.category == ElementType.UNCATEGORIZED_TEXT:
--> 920 new_el = element_from_text(cast(Text, el).text)
921 new_el.metadata = el.metadata
922 else:

File ~\anaconda3\Lib\site-packages\unstructured\partition\text.py:294, in element_from_text(text, coordinates, coordinate_system)
288 elif is_possible_numbered_list(text):
289 return ListItem(
290 text=text,
291 coordinates=coordinates,
292 coordinate_system=coordinate_system,
293 )
--> 294 elif is_possible_narrative_text(text):
295 return NarrativeText(
296 text=text,
297 coordinates=coordinates,
298 coordinate_system=coordinate_system,
299 )
300 elif is_possible_title(text):

File ~\anaconda3\Lib\site-packages\unstructured\partition\text_type.py:80, in is_possible_narrative_text(text, cap_threshold, non_alpha_threshold, languages, language_checks)
75 # NOTE(robinson): it gets read in from the environment as a string so we need to
76 # cast it to a float
77 cap_threshold = float(
78 os.environ.get("UNSTRUCTURED_NARRATIVE_TEXT_CAP_THRESHOLD", cap_threshold),
79 )
---> 80 if exceeds_cap_ratio(text, threshold=cap_threshold):
81 trace_logger.detail(f"Not narrative. Text exceeds cap ratio {cap_threshold}:\n\n{text}") # type: ignore # noqa: E501
82 return False

File ~\anaconda3\Lib\site-packages\unstructured\partition\text_type.py:276, in exceeds_cap_ratio(text, threshold)
263 """Checks the title ratio in a section of text. If a sufficient proportion of the words
264 are capitalized, that can be indicated on non-narrative text (i.e. "1A. Risk Factors").
265
(...)
272 the function returns True
273 """
274 # NOTE(robinson) - Currently limiting this to only sections of text with one sentence.
275 # The assumption is that sections with multiple sentences are not titles.
--> 276 if sentence_count(text, 3) > 1:
277 return False
279 if text.isupper():

File ~\anaconda3\Lib\site-packages\unstructured\partition\text_type.py:225, in sentence_count(text, min_length)
214 def sentence_count(text: str, min_length: Optional[int] = None) -> int:
215 """Checks the sentence count for a section of text. Titles should not be more than one
216 sentence.
217
(...)
223 The min number of words a section needs to be for it to be considered a sentence.
224 """
--> 225 sentences = sent_tokenize(text)
226 count = 0
227 for sentence in sentences:

File ~\anaconda3\Lib\site-packages\unstructured\nlp\tokenize.py:136, in sent_tokenize(text)
133 @lru_cache(maxsize=CACHE_MAX_SIZE)
134 def sent_tokenize(text: str) -> List[str]:
135 """A wrapper around the NLTK sentence tokenizer with LRU caching enabled."""
--> 136 _download_nltk_packages_if_not_present()
137 return _sent_tokenize(text)

File ~\anaconda3\Lib\site-packages\unstructured\nlp\tokenize.py:130, in _download_nltk_packages_if_not_present()
125 tokenizer_available = check_for_nltk_package(
126 package_category="tokenizers", package_name="punkt"
127 )
129 if not (tokenizer_available and tagger_available):
--> 130 download_nltk_packages()

File ~\anaconda3\Lib\site-packages\unstructured\nlp\tokenize.py:88, in download_nltk_packages()
86 with tempfile.NamedTemporaryFile() as tmp_file:
87 tgz_file = tmp_file.name
---> 88 urllib.request.urlretrieve(NLTK_DATA_URL, tgz_file)
90 file_hash = sha256_checksum(tgz_file)
91 if file_hash != NLTK_DATA_SHA256:

File ~\anaconda3\Lib\urllib\request.py:251, in urlretrieve(url, filename, reporthook, data)
249 # Handle temporary file setup.
250 if filename:
--> 251 tfp = open(filename, 'wb')
252 else:
253 tfp = tempfile.NamedTemporaryFile(delete=False)

PermissionError: [Errno 13] Permission denied: 'C:\conda_tmp\tmpmssmgevw'
A clear and concise description of what the bug is.

Used Below code:-

import os
!echo %TESSDATA_PREFIX%
!echo %TMPDIR%
os.environ['TESSDATA_PREFIX'] = r'C:\Users\Rohit\anaconda3\envs\ocr_env\share\tessdata'
local_path = "WEF_The_Global_Cooperation_Barometer_2024.pdf"

os.environ['TMPDIR'] = r'C:\Users\Aditi Rohit\AppData\Local\Temp'
!echo %TESSDATA_PREFIX%
!echo %TMPDIR%

Local PDF file uploads

if local_path:
loader = UnstructuredPDFLoader(file_path=local_path)
data = loader.load()
else:
print("Upload a PDF file")

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Permission denied error "PermissionError: [Errno 13] Permission denied: 'C:\\conda_tmp\\tmpmssmgevw'" #12

Permission denied error "PermissionError: [Errno 13] Permission denied: 'C:\\conda_tmp\\tmpmssmgevw'" #12

Rohit2387 commented Jul 18, 2024

Permission denied error "PermissionError: [Errno 13] Permission denied: 'C:\\conda_tmp\\tmpmssmgevw'" #12

Permission denied error "PermissionError: [Errno 13] Permission denied: 'C:\\conda_tmp\\tmpmssmgevw'" #12

Comments

Rohit2387 commented Jul 18, 2024

Local PDF file uploads