diff --git a/README.md b/README.md
index f439854..297db83 100644
--- a/README.md
+++ b/README.md
@@ -19,7 +19,7 @@
 <div align="center">
 
 _Extractous offers a fast and efficient solution for extracting content and metadata from various documents types such as PDF, Word, HTML, and [many other formats](#supported-file-formats).
-Our goal is to deliver a fast and efficient comprehensive solution in Rust with bindings for many programming 
+Our goal is to deliver a fast and efficient comprehensive solution in Rust with bindings for many programming
 languages._
 
 </div>
@@ -27,7 +27,7 @@ languages._
 ---
 
 **Demo**: showing that [Extractous 🚀](https://github.com/yobix-ai/extractous) is **25x faster** than the popular
-[unstructured-io](https://github.com/Unstructured-IO/unstructured) library ($65m in funding and 8.5k GitHub stars). 
+[unstructured-io](https://github.com/Unstructured-IO/unstructured) library ($65m in funding and 8.5k GitHub stars).
 For complete benchmarking details please consult our [benchmarking repository](https://github.com/yobix-ai/extractous-benchmarks)
 
 ![unstructured_vs_extractous](https://github.com/yobix-ai/extractous-benchmarks/raw/main/docs/extractous_vs_unstructured.gif)
@@ -55,7 +55,7 @@ With Extractous, the need for external services or APIs is eliminated, making da
 * High-performance unstructured data extraction optimized for speed and low memory usage.
 * Clear and simple API for extracting text and metadata content.
 * Automatically identifies document types and extracts content accordingly
-* Supports [many file formats](#supported-file-formats) (most formats supported by Apache Tika). 
+* Supports [many file formats](#supported-file-formats) (most formats supported by Apache Tika).
 * Extracts text from images and scanned documents with OCR through [tesseract-ocr](https://github.com/tesseract-ocr/tesseract).
 * Core engine written in Rust with bindings for [Python](https://pypi.org/project/extractous/) and upcoming support for JavaScript/TypeScript.
 * Detailed documentation and examples to help you get started quickly and efficiently.
@@ -77,13 +77,20 @@ extractor.set_extract_string_max_length(1000)
 result = extractor.extract_file_to_string("README.md")
 print(result)
 ```
-* Extracting a file to a buffered stream:
+* Extracting a file(URL / bytearray) to a buffered stream:
 
 ```python
 from extractous import Extractor
 
 extractor = Extractor()
+# for file
 reader = extractor.extract_file("tests/quarkus.pdf")
+# for url
+# reader = extractor.extract_url("https://www.google.com")
+# for bytearray
+# with open("tests/quarkus.pdf", "rb") as file:
+#     buffer = bytearray(file.read())
+# reader = extractor.extract_bytes(buffer)
 
 result = ""
 buffer = reader.read(4096)
@@ -122,9 +129,10 @@ fn main() {
 }
 ```
 
-* Extract a content of a file to a `StreamReader` and perform buffered reading
+* Extract a content of a file(URL/ bytes) to a `StreamReader` and perform buffered reading
 ```rust
-use std::io::Read;
+use std::io::{BufReader, Read};
+// use std::fs::File; use for bytes
 use extractous::Extractor;
 
 fn main() {
@@ -135,17 +143,25 @@ fn main() {
     // Extract the provided file content to a string
     let extractor = Extractor::new();
     let stream = extractor.extract_file(file_path).unwrap();
+    // Extract url
+    // let stream = extractor.extract_url("https://www.google.com/").unwrap();
+    // Extract bytes
+    // let mut file = File::open(file_path)?;
+    // let mut buffer = Vec::new();
+    // file.read_to_end(&mut buffer)?;
+    // let stream= extractor.extract_bytes(&file_bytes);
 
     // Because stream implements std::io::Read trait we can perform buffered reading
     // For example we can use it to create a BufReader
+    let mut reader = BufReader::new(stream);
     let mut buffer = Vec::new();
-    stream.read_to_end(&mut buffer).unwrap();
+    reader.read_to_end(&mut buffer).unwrap();
 
     println!("{}", String::from_utf8(buffer).unwrap())
 }
 ```
 
-* Extract content of PDF with OCR. 
+* Extract content of PDF with OCR.
 
 You need to have Tesseract installed with the language pack. For example on debian `sudo apt install tesseract-ocr tesseract-ocr-deu`
 
@@ -154,7 +170,7 @@ use extractous::Extractor;
 
 fn main() {
   let file_path = "../test_files/documents/deu-ocr.pdf";
-  
+
     let extractor = Extractor::new()
           .set_ocr_config(TesseractOcrConfig::new().set_language("deu"))
           .set_pdf_config(PdfParserConfig::new().set_ocr_strategy(PdfOcrStrategy::OCR_ONLY));
@@ -204,4 +220,4 @@ fn main() {
 Contributions are welcome! Please open an issue or submit a pull request if you have any improvements or new features to propose.
 
 ## 🕮 License
-This project is licensed under the Apache License 2.0. See the LICENSE file for details.
\ No newline at end of file
+This project is licensed under the Apache License 2.0. See the LICENSE file for details.
diff --git a/bindings/extractous-python/README.md b/bindings/extractous-python/README.md
index 9aa83ee..998bbae 100644
--- a/bindings/extractous-python/README.md
+++ b/bindings/extractous-python/README.md
@@ -1,6 +1,6 @@
 # Extractous Python Bindings
 
-This project provides Python bindings for the Extractous library, allowing you to use extractous functionality in 
+This project provides Python bindings for the Extractous library, allowing you to use extractous functionality in
 your Python applications.
 
 ## Installation
@@ -25,13 +25,20 @@ result = extractor.extract_file_to_string("README.md")
 print(result)
 ```
 
-Extracting a file to a buffered stream:
+Extracting a file(URL / bytearray) to a buffered stream:
 
 ```python
 from extractous import Extractor
 
 extractor = Extractor()
+# for file
 reader = extractor.extract_file("tests/quarkus.pdf")
+# for url
+# reader = extractor.extract_url("https://www.google.com")
+# for bytearray
+# with open("tests/quarkus.pdf", "rb") as file:
+#     buffer = bytearray(file.read())
+# reader = extractor.extract_bytes(buffer)
 
 result = ""
 buffer = reader.read(4096)
@@ -51,4 +58,4 @@ extractor = Extractor().set_ocr_config(TesseractOcrConfig().set_language("deu"))
 result = extractor.extract_file_to_string("../../test_files/documents/eng-ocr.pdf")
 
 print(result)
-```
\ No newline at end of file
+```
diff --git a/bindings/extractous-python/examples/extract_to_stream.py b/bindings/extractous-python/examples/extract_to_stream.py
new file mode 100755
index 0000000..8068f14
--- /dev/null
+++ b/bindings/extractous-python/examples/extract_to_stream.py
@@ -0,0 +1,34 @@
+#!/usr/bin/env python3
+import os
+import sys
+
+from extractous import Extractor, PdfOcrStrategy, PdfParserConfig
+
+
+def extract_to_stream(file_path: str):
+
+    # Extract the file
+    extractor = Extractor()
+    reader = extractor.extract_file(in_file)
+
+    buffer = bytearray(4096 * 4096)
+    while True:
+        bytes_read = reader.readinto(buffer)
+        # If no more data, exit the loop
+        if bytes_read == 0:
+            break
+        # Decode the valid portion of the buffer and append it to the result
+        chunk = buffer[:bytes_read].decode('utf-8')
+        print(chunk)
+
+
+if __name__ == '__main__':
+    # Pare input args
+    if len(sys.argv) != 2:
+        print(f"Usage: '{sys.argv[0]}' <filename>")
+        sys.exit(1)
+    in_file = sys.argv[1]
+    if not os.path.isfile(in_file):
+        raise FileNotFoundError(f"No such file: '{in_file}'")
+
+    extract_to_stream(in_file)
diff --git a/bindings/extractous-python/src/extractor.rs b/bindings/extractous-python/src/extractor.rs
index 7376cca..ed95e7b 100644
--- a/bindings/extractous-python/src/extractor.rs
+++ b/bindings/extractous-python/src/extractor.rs
@@ -75,6 +75,18 @@ impl StreamReader {
             ))),
         }
     }
+
+    /// Reads into the specified buffer
+    pub fn readinto<'py>(&mut self, buf: Bound<'py, PyByteArray>) -> PyResult<usize> {
+        let bs = unsafe { buf.as_bytes_mut() };
+
+        let bytes_read = self.reader.read(bs)
+            .map_err(|e| PyErr::new::<pyo3::exceptions::PyIOError, _>(
+                format!("{}", e))
+            )?;
+        Ok(bytes_read)
+    }
+
 }
 
 /// `Extractor` is the entry for all extract APIs
@@ -147,6 +159,39 @@ impl Extractor {
             .map_err(|e| PyErr::new::<PyTypeError, _>(format!("{:?}", e)))
     }
 
+    /// Extracts text from a bytearray. Returns a stream of the extracted text
+    /// the stream is decoded using the extractor's `encoding`
+    pub fn extract_bytes(&self, buffer: &Bound<'_, PyByteArray>) -> PyResult<StreamReader> {
+        let slice = buffer.to_vec();
+        let reader = self
+            .0
+            .extract_bytes(&slice)
+            .map_err(|e| PyErr::new::<PyTypeError, _>(format!("{:?}", e)))?;
+
+        // Create a new `StreamReader` with initial buffer capacity of ecore::DEFAULT_BUF_SIZE bytes
+        Ok(StreamReader {
+            reader,
+            buffer: Vec::with_capacity(ecore::DEFAULT_BUF_SIZE),
+            py_bytes: None,
+        })
+    }
+
+    /// Extracts text from a url. Returns a string that is of maximum length
+    /// of the extractor's `extract_string_max_length`
+    pub fn extract_url(&self, url: &str) -> PyResult<StreamReader> {
+        let reader = self
+            .0
+            .extract_url(&url)
+            .map_err(|e| PyErr::new::<PyTypeError, _>(format!("{:?}", e)))?;
+
+        // Create a new `StreamReader` with initial buffer capacity of ecore::DEFAULT_BUF_SIZE bytes
+        Ok(StreamReader {
+            reader,
+            buffer: Vec::with_capacity(ecore::DEFAULT_BUF_SIZE),
+            py_bytes: None,
+        })
+    }
+
     fn __repr__(&self) -> String {
         format!("{:?}", self.0)
     }
diff --git a/bindings/extractous-python/tests/test_extract_bytes_to_stream.py b/bindings/extractous-python/tests/test_extract_bytes_to_stream.py
new file mode 100644
index 0000000..32be6a7
--- /dev/null
+++ b/bindings/extractous-python/tests/test_extract_bytes_to_stream.py
@@ -0,0 +1,40 @@
+import pytest
+
+from extractous import Extractor
+from utils import cosine_similarity, read_to_string, read_file_to_bytearray
+
+TEST_CASES = [
+    ("2022_Q3_AAPL.pdf", 0.9),
+    ("science-exploration-1p.pptx", 0.9),
+    ("simple.odt", 0.9),
+    ("table-multi-row-column-cells-actual.csv", 0.9),
+    ("vodafone.xlsx", 0.4),
+    ("category-level.docx", 0.9),
+    ("simple.doc", 0.9),
+    ("simple.pptx", 0.9),
+    ("table-multi-row-column-cells.png", -1.0),
+    ("winter-sports.epub", 0.9),
+    ("bug_16.docx", 0.9),
+    #("eng-ocr.pdf", 0.9),
+]
+
+
+@pytest.mark.parametrize("file_name, target_dist", TEST_CASES)
+def test_extract_bytes_to_stream(file_name, target_dist):
+    """Test the extraction from bytes of various file types."""
+    original_filepath = f"../../test_files/documents/{file_name}"
+    expected_result_filepath = f"../../test_files/expected_result/{file_name}.txt"
+
+    file_bytes = read_file_to_bytearray(original_filepath)
+
+    extractor = Extractor()
+    reader = extractor.extract_bytes(file_bytes)
+    result = read_to_string(reader)
+
+    # Expected
+    with open(expected_result_filepath, "r",  encoding="utf8") as file:
+        expected = file.read()
+    
+    assert cosine_similarity(result, expected) > target_dist, \
+        f"Cosine similarity is less than {target_dist} for file: {file_name}"
+
diff --git a/bindings/extractous-python/tests/test_extract_file_to_string.py b/bindings/extractous-python/tests/test_extract_file_to_string.py
index ed3dbe8..95b5bbb 100644
--- a/bindings/extractous-python/tests/test_extract_file_to_string.py
+++ b/bindings/extractous-python/tests/test_extract_file_to_string.py
@@ -15,9 +15,10 @@
     ("table-multi-row-column-cells.png", -1.0),
     ("winter-sports.epub", 0.9),
     ("bug_16.docx", 0.9),
-    ("deu-ocr.pdf", 0.9),
+    #("eng-ocr.pdf", 0.9),
 ]
 
+
 @pytest.mark.parametrize("file_name, target_dist", TEST_CASES)
 def test_extract_file_to_string(file_name, target_dist):
     """Test the extraction and comparison of various file types."""
diff --git a/bindings/extractous-python/tests/test_extract_url.py b/bindings/extractous-python/tests/test_extract_url.py
new file mode 100644
index 0000000..b6f4158
--- /dev/null
+++ b/bindings/extractous-python/tests/test_extract_url.py
@@ -0,0 +1,10 @@
+from extractous import Extractor
+from utils import read_to_string
+
+def test_extract_url():
+    extractor = Extractor()
+
+    reader = extractor.extract_url("https://www.google.com")
+    result = read_to_string(reader)
+
+    assert "Google" in result
diff --git a/bindings/extractous-python/tests/test_ocr.py b/bindings/extractous-python/tests/test_ocr.py
index 7f4de09..4baaf76 100644
--- a/bindings/extractous-python/tests/test_ocr.py
+++ b/bindings/extractous-python/tests/test_ocr.py
@@ -1,19 +1,20 @@
 from extractous import Extractor, PdfOcrStrategy, PdfParserConfig, TesseractOcrConfig
 from utils import cosine_similarity
 
+
 def test_ara_ocr_png():
     ocr_config = TesseractOcrConfig().set_language("ara")
     extractor = Extractor().set_ocr_config(ocr_config)
     result = extractor.extract_file_to_string("../../test_files/documents/ara-ocr.png")
 
-    with open("../../test_files/expected_result/ara-ocr.png.txt", "r",  encoding="utf8") as file:
+    with open("../../test_files/expected_result/ara-ocr.png.txt", "r", encoding="utf8") as file:
         expected = file.read()
 
-    assert cosine_similarity(result, expected)
+    assert cosine_similarity(result, expected) > 0.9
 
 
-def test_ocr_only_strategy_extract_deu_ocr_pdf_to_string():
-    test_file = "../../test_files/documents/eng-ocr.pdf"
+def test_extract_file_to_string_ocr_only_strategy_deu_ocr_pdf():
+    test_file = "../../test_files/documents/deu-ocr.pdf"
     expected_result_file = "../../test_files/expected_result/deu-ocr.pdf.txt"
 
     pdf_config = PdfParserConfig().set_ocr_strategy(PdfOcrStrategy.OCR_ONLY)
@@ -26,12 +27,13 @@ def test_ocr_only_strategy_extract_deu_ocr_pdf_to_string():
 
     result = extractor.extract_file_to_string(test_file)
 
-    with open(expected_result_file, "r",  encoding="utf8") as file:
+    with open(expected_result_file, "r", encoding="utf8") as file:
         expected = file.read()
 
-    assert cosine_similarity(result, expected)
+    assert cosine_similarity(result, expected) > 0.9
+
 
-def test_no_ocr_strategy_extract_deu_ocr_pdf_to_string():
+def test_test_extract_file_to_string_no_ocr_strategy_deu_ocr_pdf():
     test_file = "../../test_files/documents/deu-ocr.pdf"
 
     pdf_config = PdfParserConfig()
@@ -39,8 +41,8 @@ def test_no_ocr_strategy_extract_deu_ocr_pdf_to_string():
     ocr_config = TesseractOcrConfig()
     ocr_config = ocr_config.set_language("deu")
 
-    extractor = Extractor().set_ocr_config(ocr_config).set_pdf_config(PdfParserConfig().set_ocr_strategy(PdfOcrStrategy.NO_OCR))
+    extractor = Extractor().set_ocr_config(ocr_config).set_pdf_config(pdf_config)
 
     result = extractor.extract_file_to_string(test_file)
 
-    assert result.strip() == ""
\ No newline at end of file
+    assert result.strip() == ""
diff --git a/bindings/extractous-python/tests/test_pdf.py b/bindings/extractous-python/tests/test_pdf.py
index 5e85f3c..a14d9ed 100644
--- a/bindings/extractous-python/tests/test_pdf.py
+++ b/bindings/extractous-python/tests/test_pdf.py
@@ -1,4 +1,5 @@
 from extractous import Extractor
+from utils import read_to_string
 
 
 def expected_result():
@@ -12,16 +13,23 @@ def test_extract_file_to_string():
     #print(result)
     assert result == expected_result()
 
-
 def test_extract_file():
     extractor = Extractor()
     reader = extractor.extract_file("tests/quarkus.pdf")
 
-    result = ""
-    b = reader.read(4096)
-    while len(b) > 0:
-        result += b.decode("utf-8")
-        b = reader.read(4096)
+    result = read_to_string(reader)
 
     #print(result)
-    assert result == expected_result()
\ No newline at end of file
+    assert result == expected_result()
+
+def test_extract_bytes():
+    extractor = Extractor()
+
+    with open("tests/quarkus.pdf", "rb") as file:
+        buffer = bytearray(file.read())
+    reader = extractor.extract_bytes(buffer)
+
+    result = read_to_string(reader)
+
+    #print(result)
+    assert result == expected_result()
diff --git a/bindings/extractous-python/tests/utils.py b/bindings/extractous-python/tests/utils.py
index 30c3944..b153895 100644
--- a/bindings/extractous-python/tests/utils.py
+++ b/bindings/extractous-python/tests/utils.py
@@ -1,6 +1,7 @@
 from sklearn.feature_extraction.text import CountVectorizer
 from sklearn.metrics.pairwise import cosine_similarity as cosine_sim
 
+
 def cosine_similarity(text1, text2):
     """Calculate the cosine similarity between two texts."""
 
@@ -10,4 +11,37 @@ def cosine_similarity(text1, text2):
 
     # Calculate cosine similarity between the two vectors
     cos_sim = cosine_sim(vectors)
-    return cos_sim[0][1]
\ No newline at end of file
+    return cos_sim[0][1]
+
+
+# def read_to_string(reader):
+#     """Read from stream to string."""
+#     result = ""
+#     b = reader.read(4096)
+#     while len(b) > 0:
+#         result += b.decode("utf-8")
+#         b = reader.read(4096)
+#     return result
+
+def read_to_string(reader):
+    """Read from stream to string."""
+    utf8_string = []
+    buffer = bytearray(4096)
+
+    while True:
+        bytes_read = reader.readinto(buffer)
+        # If no more data, exit the loop
+        if bytes_read == 0:
+            break
+        # Decode the valid portion of the buffer and append it to the result
+        utf8_string.append(buffer[:bytes_read].decode('utf-8'))
+
+    # Join all parts into a single string
+    return ''.join(utf8_string)
+
+
+def read_file_to_bytearray(file_path: str):
+    """Read file to bytes array."""
+    with open(file_path, 'rb') as file:
+        file_content = bytearray(file.read())
+    return file_content
diff --git a/extractous-core/README.md b/extractous-core/README.md
index 4e04bbb..3e55a42 100644
--- a/extractous-core/README.md
+++ b/extractous-core/README.md
@@ -49,8 +49,9 @@ fn main() {
 }
 ```
 
-* Extract a content of a file to a `StreamReader` and perform buffered reading
+* Extract a content of a file(URL/ bytes) to a `StreamReader` and perform buffered reading
 ```rust
+// use std::fs::File; use for bytes
 use std::io::{BufReader, Read};
 use extractous::Extractor;
 
@@ -62,6 +63,13 @@ fn main() {
     // Extract the provided file content to a string
     let extractor = Extractor::new();
     let stream = extractor.extract_file(file_path).unwrap();
+    // Extract url
+    // let stream = extractor.extract_url("https://www.google.com/").unwrap();
+    // Extract bytes
+    // let mut file = File::open(file_path)?;
+    // let mut buffer = Vec::new();
+    // file.read_to_end(&mut buffer)?;
+    // let stream= extractor.extract_bytes(&file_bytes);
 
     // Because stream implements std::io::Read trait we can perform buffered reading
     // For example we can use it to create a BufReader
@@ -80,7 +88,7 @@ use extractous::Extractor;
 
 fn main() {
   let file_path = "../test_files/documents/deu-ocr.pdf";
-  
+
     let extractor = Extractor::new()
           .set_ocr_config(TesseractOcrConfig::new().set_language("deu"))
           .set_pdf_config(PdfParserConfig::new().set_ocr_strategy(PdfOcrStrategy::OCR_ONLY));
@@ -94,11 +102,11 @@ fn main() {
 ## Building
 
 ### Requirements
-* Extractous uses [Apache Tika](https://tika.apache.org/) for file formats that are not natively supported in Rust. 
-  However, to achieve one of Extractous goals, which is speed and efficiency, we do not set up any Tika as a servers or 
-  run any Java code. We instead, compile [Apache Tika](https://tika.apache.org/) as native shared libraries and use 
-  them on our Rust core as ffi. [GraalVm](https://www.graalvm.org/) is required to build Tika as native libs. 
-* The provided build script already takes care of installing the required GraalVM JDK. However, if you want to use a 
+* Extractous uses [Apache Tika](https://tika.apache.org/) for file formats that are not natively supported in Rust.
+  However, to achieve one of Extractous goals, which is speed and efficiency, we do not set up any Tika as a servers or
+  run any Java code. We instead, compile [Apache Tika](https://tika.apache.org/) as native shared libraries and use
+  them on our Rust core as ffi. [GraalVm](https://www.graalvm.org/) is required to build Tika as native libs.
+* The provided build script already takes care of installing the required GraalVM JDK. However, if you want to use a
   specific local version, you can do so by setting the GRAALVM_HOME environment variable
 * We recommend using [sdkman](https://sdkman.io/install) to install GraalVM JDKs
 * `sdk install java 22.0.1-graalce`
@@ -112,16 +120,18 @@ OpenJDK 64-Bit Server VM Liberica-NIK-24.0.1-1 (build 22.0.1+10, mixed mode, sha
 * On macOS the official GraalVM JDKs fail to work with code that use java awt. On macOS, we recommend using
   Bellsoft Liberica NIK
 * `sdk install java 24.0.1.r22-nik`
-* Extractous supports OCR through [tesseract](https://github.com/tesseract-ocr/tesseract), make sure tesseract is 
+* Extractous supports OCR through [tesseract](https://github.com/tesseract-ocr/tesseract), make sure tesseract is
 installed on your system because some of the OCR tests will fail if no tesseract is found.
 * `sudo apt install tesseract-ocr`
-* Install any language extensions you want. for example to install German and Arabic: 
+* Install any language extensions you want. for example to install German and Arabic:
 * `sudo apt install tesseract-ocr-deu tesseract-ocr-ara`
+* On Mac 
+* `brew install tesseract tesseract-lang`
 
 ### Building Extractous
-* To build Extractous, just run: 
+* To build Extractous, just run:
 * `cargo build`
 
 ### Running Tests
 * To run tests, just run:
-* `cargo test`
\ No newline at end of file
+* `cargo test`
diff --git a/extractous-core/examples/extract_to_stream.rs b/extractous-core/examples/extract_to_stream.rs
index 7c99f85..9bbb142 100644
--- a/extractous-core/examples/extract_to_stream.rs
+++ b/extractous-core/examples/extract_to_stream.rs
@@ -1,4 +1,5 @@
 use extractous::Extractor;
+// use std::fs::File; use for bytes
 use std::io::{BufReader, Read};
 
 fn main() {
@@ -9,6 +10,14 @@ fn main() {
     // Extract the provided file content to a string
     let extractor = Extractor::new();
     let stream = extractor.extract_file(file_path).unwrap();
+    // Extract url
+    // let stream = extractor.extract_url("https://www.google.com/").unwrap();
+    // Extract bytes
+    // let mut file = File::open(file_path)?;
+    // let mut buffer = Vec::new();
+    // file.read_to_end(&mut buffer)?;
+    // let stream= extractor.extract_bytes(&file_bytes).unwrap();
+
     // Because stream implements std::io::Read trait we can perform buffered reading
     // For example we can use it to create a BufReader
     let mut reader = BufReader::new(stream);
diff --git a/extractous-core/src/extractor.rs b/extractous-core/src/extractor.rs
index 113e303..9917afa 100644
--- a/extractous-core/src/extractor.rs
+++ b/extractous-core/src/extractor.rs
@@ -124,6 +124,30 @@ impl Extractor {
         )
     }
 
+    /// Extracts text from a byte buffer. Returns a stream of the extracted text
+    /// the stream is decoded using the extractor's `encoding`
+    pub fn extract_bytes(&self, buffer: &[u8]) -> ExtractResult<StreamReader> {
+        tika::parse_bytes(
+            buffer,
+            &self.encoding,
+            &self.pdf_config,
+            &self.office_config,
+            &self.ocr_config,
+        )
+    }
+
+    /// Extracts text from a url. Returns a stream of the extracted text
+    /// the stream is decoded using the extractor's `encoding`
+    pub fn extract_url(&self, url: &str) -> ExtractResult<StreamReader> {
+        tika::parse_url(
+            url,
+            &self.encoding,
+            &self.pdf_config,
+            &self.office_config,
+            &self.ocr_config,
+        )
+    }
+
     /// Extracts text from a file path. Returns a string that is of maximum length
     /// of the extractor's `extract_string_max_length`
     pub fn extract_file_to_string(&self, file_path: &str) -> ExtractResult<String> {
@@ -141,10 +165,13 @@ impl Extractor {
 mod tests {
     use crate::Extractor;
     use std::fs::File;
-    use std::io::prelude::*;
     use std::io::BufReader;
+    use std::io::{self, Read};
+
+    use super::StreamReader;
 
     const TEST_FILE: &str = "README.md";
+    const TEST_URL: &str = "https://www.google.com/";
 
     fn expected_content() -> String {
         let mut file = File::open(TEST_FILE).unwrap();
@@ -153,6 +180,15 @@ mod tests {
         content
     }
 
+    fn read_content_from_stream(stream: StreamReader) -> String {
+        let mut reader = BufReader::new(stream);
+        let mut buffer = Vec::new();
+        reader.read_to_end(&mut buffer).unwrap();
+
+        let content = String::from_utf8(buffer).unwrap();
+        content
+    }
+
     #[test]
     fn extract_file_test() {
         // Prepare expected_content
@@ -161,17 +197,8 @@ mod tests {
         // Parse the files using extractous
         let extractor = Extractor::new();
         let result = extractor.extract_file(TEST_FILE);
-        let mut reader = BufReader::new(result.unwrap());
-        let mut buffer = Vec::new();
-        reader.read_to_end(&mut buffer).unwrap();
-
-        let content = String::from_utf8(buffer).unwrap();
+        let content = read_content_from_stream(result.unwrap());
         assert_eq!(content.trim(), expected_content.trim());
-
-        // let mut reader = BufReader::new(result.unwrap());
-        // let mut line = String::new();
-        // let _len = reader.read_line(&mut line).unwrap();
-        //assert_eq!("# Extractous", line.trim());
     }
 
     #[test]
@@ -185,4 +212,33 @@ mod tests {
         let content = result.unwrap();
         assert_eq!(content.trim(), expected_content.trim());
     }
+
+    fn read_file_as_bytes(path: &str) -> io::Result<Vec<u8>> {
+        let mut file = File::open(path)?;
+        let mut buffer = Vec::new();
+        file.read_to_end(&mut buffer)?;
+        Ok(buffer)
+    }
+
+    #[test]
+    fn extract_bytes_test() {
+        // Prepare expected_content
+        let expected_content = expected_content();
+
+        // Parse the bytes using extractous
+        let file_bytes = read_file_as_bytes(TEST_FILE).unwrap();
+        let extractor = Extractor::new();
+        let result = extractor.extract_bytes(&file_bytes);
+        let content = read_content_from_stream(result.unwrap());
+        assert_eq!(content.trim(), expected_content.trim());
+    }
+
+    #[test]
+    fn extract_url_test() {
+        // Parse url by extractous
+        let extractor = Extractor::new();
+        let result = extractor.extract_url(&TEST_URL);
+        let content = read_content_from_stream(result.unwrap());
+        assert!(content.contains("Google"));
+    }
 }
diff --git a/extractous-core/src/tika/jni_utils.rs b/extractous-core/src/tika/jni_utils.rs
index 3eb9de6..a99bae2 100644
--- a/extractous-core/src/tika/jni_utils.rs
+++ b/extractous-core/src/tika/jni_utils.rs
@@ -1,11 +1,23 @@
 use std::os::raw::{c_char, c_void};
 
 use jni::errors::jni_error_code_to_result;
-use jni::objects::{JObject, JString, JValue, JValueOwned};
+use jni::objects::{JByteBuffer, JObject, JString, JValue, JValueOwned};
 use jni::{sys, JNIEnv, JavaVM};
 
 use crate::errors::{Error, ExtractResult};
 
+/// Calls a static method and prints any thrown exceptions to stderr
+pub fn jni_new_direct_buffer<'local>(
+    env: &mut JNIEnv<'local>,
+    data: *mut u8,
+    len: usize,
+) -> ExtractResult<JByteBuffer<'local>> {
+    let direct_byte_buffer = unsafe { env.new_direct_byte_buffer(data, len) }
+        .map_err(|_e| Error::JniEnvCall("Failed to create direct byte buffer"))?;
+
+    Ok(direct_byte_buffer)
+}
+
 /// Calls a static method and prints any thrown exceptions to stderr
 pub fn jni_call_static_method<'local>(
     env: &mut JNIEnv<'local>,
@@ -99,20 +111,23 @@ pub fn jni_check_exception(env: &mut JNIEnv) -> ExtractResult<bool> {
 /// linked in by the build script.
 pub fn create_vm_isolate() -> JavaVM {
     unsafe {
-        // let mut option0 = sys::JavaVMOption {
-        //     optionString: "-Djava.awt.headless=true".as_ptr() as *mut c_char,
-        //     extraInfo: std::ptr::null_mut(),
-        // };
-
-        // Set java.library.path to be able to load libawt.so, which must be in the same dir as libtika_native.so
-        let mut options = sys::JavaVMOption {
-            optionString: "-Djava.library.path=.".as_ptr() as *mut c_char,
-            extraInfo: std::ptr::null_mut(),
-        };
+        let vm_options: Vec<sys::JavaVMOption> = vec![
+            // Set java.library.path to be able to load libawt.so, which must be in the same dir as libtika_native.so
+            sys::JavaVMOption {
+                optionString: "-Djava.library.path=.".as_ptr() as *mut c_char,
+                extraInfo: std::ptr::null_mut(),
+            },
+            // enable awt headless mode
+            sys::JavaVMOption {
+                optionString: "Djava.awt.headless=true".as_ptr() as *mut c_char,
+                extraInfo: std::ptr::null_mut(),
+            },
+        ];
+
         let mut args = sys::JavaVMInitArgs {
             version: sys::JNI_VERSION_1_8,
-            nOptions: 1,
-            options: &mut options,
+            nOptions: vm_options.len() as sys::jint,
+            options: vm_options.as_ptr() as *mut sys::JavaVMOption,
             ignoreUnrecognized: sys::JNI_TRUE,
         };
         let mut ptr: *mut sys::JavaVM = std::ptr::null_mut();
diff --git a/extractous-core/src/tika/parse.rs b/extractous-core/src/tika/parse.rs
index a019e9b..8766d27 100644
--- a/extractous-core/src/tika/parse.rs
+++ b/extractous-core/src/tika/parse.rs
@@ -1,7 +1,7 @@
 use std::sync::OnceLock;
 
 use jni::objects::JValue;
-use jni::JavaVM;
+use jni::{AttachGuard, JavaVM};
 
 use crate::errors::ExtractResult;
 use crate::tika::jni_utils::*;
@@ -17,18 +17,23 @@ pub(crate) fn vm() -> &'static JavaVM {
     GRAAL_VM.get_or_init(create_vm_isolate)
 }
 
-pub fn parse_file(
-    file_path: &str,
+fn get_vm_attach_current_thread<'local>() -> ExtractResult<AttachGuard<'local>> {
+    // Attaching a thead that is already attached is a no-op. Good to have this in case this method
+    // is called from another thread
+    let env = vm().attach_current_thread()?;
+    Ok(env)
+}
+
+fn parse_to_stream(
+    mut env: AttachGuard,
+    data_source_val: JValue,
     char_set: &CharSet,
     pdf_conf: &PdfParserConfig,
     office_conf: &OfficeParserConfig,
     ocr_conf: &TesseractOcrConfig,
+    method_name: &str,
+    signature: &str,
 ) -> ExtractResult<StreamReader> {
-    // Attaching a thead that is already attached is a no-op. Good to have this in case this method
-    // is called from another thread
-    let mut env = vm().attach_current_thread()?;
-
-    let file_path_val = jni_new_string_as_jvalue(&mut env, file_path)?;
     let charset_name_val = jni_new_string_as_jvalue(&mut env, &char_set.to_string())?;
     let j_pdf_conf = JPDFParserConfig::new(&mut env, pdf_conf)?;
     let j_office_conf = JOfficeParserConfig::new(&mut env, office_conf)?;
@@ -38,15 +43,10 @@ pub fn parse_file(
     let call_result = jni_call_static_method(
         &mut env,
         "ai/yobix/TikaNativeMain",
-        "parseFile",
-        "(Ljava/lang/String;\
-        Ljava/lang/String;\
-        Lorg/apache/tika/parser/pdf/PDFParserConfig;\
-        Lorg/apache/tika/parser/microsoft/OfficeParserConfig;\
-        Lorg/apache/tika/parser/ocr/TesseractOCRConfig;\
-        )Lai/yobix/ReaderResult;",
+        method_name,
+        signature,
         &[
-            (&file_path_val).into(),
+            data_source_val,
             (&charset_name_val).into(),
             (&j_pdf_conf.internal).into(),
             (&j_office_conf.internal).into(),
@@ -62,6 +62,33 @@ pub fn parse_file(
     Ok(StreamReader { inner: j_reader })
 }
 
+pub fn parse_file(
+    file_path: &str,
+    char_set: &CharSet,
+    pdf_conf: &PdfParserConfig,
+    office_conf: &OfficeParserConfig,
+    ocr_conf: &TesseractOcrConfig,
+) -> ExtractResult<StreamReader> {
+    let mut env = get_vm_attach_current_thread()?;
+
+    let file_path_val = jni_new_string_as_jvalue(&mut env, file_path)?;
+    parse_to_stream(
+        env,
+        (&file_path_val).into(),
+        char_set,
+        pdf_conf,
+        office_conf,
+        ocr_conf,
+        "parseFile",
+        "(Ljava/lang/String;\
+        Ljava/lang/String;\
+        Lorg/apache/tika/parser/pdf/PDFParserConfig;\
+        Lorg/apache/tika/parser/microsoft/OfficeParserConfig;\
+        Lorg/apache/tika/parser/ocr/TesseractOCRConfig;\
+        )Lai/yobix/ReaderResult;",
+    )
+}
+
 /// Parses a file to a string using the Apache Tika library.
 pub fn parse_file_to_string(
     file_path: &str,
@@ -70,9 +97,7 @@ pub fn parse_file_to_string(
     office_conf: &OfficeParserConfig,
     ocr_conf: &TesseractOcrConfig,
 ) -> ExtractResult<String> {
-    // Attaching a thead that is already attached is a no-op. Good to have this in case this method
-    // is called from another thread
-    let mut env = vm().attach_current_thread()?;
+    let mut env = get_vm_attach_current_thread()?;
 
     // Create a new Java string from the Rust string
     let file_path_val = jni_new_string_as_jvalue(&mut env, file_path)?;
@@ -102,3 +127,62 @@ pub fn parse_file_to_string(
 
     Ok(result.content)
 }
+
+pub fn parse_bytes(
+    buffer: &[u8],
+    char_set: &CharSet,
+    pdf_conf: &PdfParserConfig,
+    office_conf: &OfficeParserConfig,
+    ocr_conf: &TesseractOcrConfig,
+) -> ExtractResult<StreamReader> {
+    let mut env = get_vm_attach_current_thread()?;
+
+    // Because we know the buffer is used for reading only, cast it to *mut u8 to satisfy the
+    // jni_new_direct_buffer call, which requires a mutable pointer
+    let mut_ptr: *mut u8 = buffer.as_ptr() as *mut u8;
+
+    let byte_buffer = jni_new_direct_buffer(&mut env, mut_ptr, buffer.len())?;
+
+    parse_to_stream(
+        env,
+        (&byte_buffer).into(),
+        char_set,
+        pdf_conf,
+        office_conf,
+        ocr_conf,
+        "parseBytes",
+        "(Ljava/nio/ByteBuffer;\
+        Ljava/lang/String;\
+        Lorg/apache/tika/parser/pdf/PDFParserConfig;\
+        Lorg/apache/tika/parser/microsoft/OfficeParserConfig;\
+        Lorg/apache/tika/parser/ocr/TesseractOCRConfig;\
+        )Lai/yobix/ReaderResult;",
+    )
+}
+
+pub fn parse_url(
+    url: &str,
+    char_set: &CharSet,
+    pdf_conf: &PdfParserConfig,
+    office_conf: &OfficeParserConfig,
+    ocr_conf: &TesseractOcrConfig,
+) -> ExtractResult<StreamReader> {
+    let mut env = get_vm_attach_current_thread()?;
+
+    let url_val = jni_new_string_as_jvalue(&mut env, url)?;
+    parse_to_stream(
+        env,
+        (&url_val).into(),
+        char_set,
+        pdf_conf,
+        office_conf,
+        ocr_conf,
+        "parseUrl",
+        "(Ljava/lang/String;\
+        Ljava/lang/String;\
+        Lorg/apache/tika/parser/pdf/PDFParserConfig;\
+        Lorg/apache/tika/parser/microsoft/OfficeParserConfig;\
+        Lorg/apache/tika/parser/ocr/TesseractOCRConfig;\
+        )Lai/yobix/ReaderResult;",
+    )
+}
diff --git a/extractous-core/tests/extract_to_stream_tests.rs b/extractous-core/tests/extract_to_stream_tests.rs
new file mode 100644
index 0000000..c29d089
--- /dev/null
+++ b/extractous-core/tests/extract_to_stream_tests.rs
@@ -0,0 +1,74 @@
+extern crate test_case;
+extern crate textdistance;
+
+use extractous::{Extractor, PdfOcrStrategy, PdfParserConfig, TesseractOcrConfig};
+use std::fs;
+use std::io::Read;
+use test_case::test_case;
+use textdistance::nstr::cosine;
+
+#[test_case("2022_Q3_AAPL.pdf", 0.9; "Test PDF file")]
+#[test_case("science-exploration-1p.pptx", 0.9; "Test PPTX file")]
+#[test_case("simple.odt", 0.8; "Test ODT file")]
+#[test_case("table-multi-row-column-cells-actual.csv", 0.8; "Test CSV file")]
+#[test_case("vodafone.xlsx", 0.4; "Test XLSX file")]
+#[test_case("category-level.docx", 0.9; "Test DOCX file")]
+#[test_case("simple.doc", 0.9; "Test DOC file")]
+#[test_case("simple.pptx", 0.9; "Test another PPTX file")]
+#[test_case("table-multi-row-column-cells.png", -1.0; "Test PNG file")]
+#[test_case("winter-sports.epub", 0.9; "Test EPUB file")]
+#[test_case("bug_16.docx", 0.9; "Test bug16 DOCX file")]
+//#[test_case("eng-ocr.pdf", 0.9; "Test eng-ocr PDF file")]
+fn test_extract_bytes_to_stream(file_name: &str, target_dist: f64) {
+    let extractor = Extractor::new();
+
+    let bytes = fs::read(&format!("../test_files/documents/{}", file_name)).unwrap();
+    let mut stream = extractor.extract_bytes(&bytes).unwrap();
+
+    let mut buffer = Vec::new();
+    stream.read_to_end(&mut buffer).unwrap();
+    let extracted = String::from_utf8_lossy(&buffer);
+
+    // read expected string
+    let expected =
+        fs::read_to_string(format!("../test_files/expected_result/{}.txt", file_name)).unwrap();
+
+    let dist = cosine(&expected, &extracted);
+    assert!(
+        dist > target_dist,
+        "Cosine similarity is less than {} for file: {}, dist: {}",
+        target_dist,
+        file_name,
+        dist
+    );
+    println!("{}: {}", file_name, dist);
+}
+
+#[test]
+fn test_extract_bytes_to_stream_ara_ocr_png() {
+    let extractor = Extractor::new()
+        .set_ocr_config(TesseractOcrConfig::new().set_language("ara"))
+        .set_pdf_config(PdfParserConfig::new().set_ocr_strategy(PdfOcrStrategy::NO_OCR));
+
+    // extract file with extractor
+    let bytes = fs::read(&"../test_files/documents/ara-ocr.png".to_string()).unwrap();
+    let mut stream = extractor.extract_bytes(&bytes).unwrap();
+
+    let mut buffer = Vec::new();
+    stream.read_to_end(&mut buffer).unwrap();
+    let extracted = String::from_utf8_lossy(&buffer);
+
+    println!("{}", extracted);
+
+    // read expected string
+    let expected =
+        fs::read_to_string("../test_files/expected_result/ara-ocr.png.txt".to_string()).unwrap();
+
+    let dist = cosine(&expected, &extracted);
+    assert!(
+        dist > 0.9,
+        "Cosine similarity is less than 0.9 for file: ara-ocr.png, dist: {}",
+        dist
+    );
+    println!("{}: {}", "ara-ocr.png", dist);
+}
diff --git a/extractous-core/tests/extractor_test.rs b/extractous-core/tests/extract_to_string_tests.rs
similarity index 87%
rename from extractous-core/tests/extractor_test.rs
rename to extractous-core/tests/extract_to_string_tests.rs
index 5322c3f..7456442 100644
--- a/extractous-core/tests/extractor_test.rs
+++ b/extractous-core/tests/extract_to_string_tests.rs
@@ -17,7 +17,7 @@ use textdistance::nstr::cosine;
 #[test_case("table-multi-row-column-cells.png", -1.0; "Test PNG file")]
 #[test_case("winter-sports.epub", 0.9; "Test EPUB file")]
 #[test_case("bug_16.docx", 0.9; "Test bug16 DOCX file")]
-#[test_case("eng-ocr.pdf", 0.9; "Test eng-ocr PDF file")]
+//#[test_case("eng-ocr.pdf", 0.9; "Test eng-ocr PDF file")]
 fn test_extract_file_to_string(file_name: &str, target_dist: f64) {
     let extractor = Extractor::new().set_extract_string_max_length(1000000);
     // extract file with extractor
@@ -40,7 +40,7 @@ fn test_extract_file_to_string(file_name: &str, target_dist: f64) {
 }
 
 #[test]
-fn test_extract_ara_ocr_png_to_string() {
+fn test_extract_file_to_string_ara_ocr_png() {
     let extractor = Extractor::new()
         .set_ocr_config(TesseractOcrConfig::new().set_language("ara"))
         .set_pdf_config(PdfParserConfig::new().set_ocr_strategy(PdfOcrStrategy::NO_OCR));
@@ -61,18 +61,18 @@ fn test_extract_ara_ocr_png_to_string() {
         "Cosine similarity is less than 0.9 for file: ara-ocr.png, dist: {}",
         dist
     );
-    println!("{}: {}", "ara-ocr.png", dist);
 }
 
+#[cfg(not(target_os = "macos"))]
 #[test]
-fn test_ocr_only_strategy_extract_deu_ocr_pdf_to_string() {
+fn test_extract_file_to_string_ocr_only_strategy_deu_ocr_pdf() {
     let extractor = Extractor::new()
         .set_ocr_config(TesseractOcrConfig::new().set_language("deu"))
         .set_pdf_config(
             PdfParserConfig::new()
-                .set_ocr_strategy(PdfOcrStrategy::OCR_ONLY)
-                .set_extract_inline_images(true)
-                .set_extract_unique_inline_images_only(true),
+                .set_ocr_strategy(PdfOcrStrategy::OCR_AND_TEXT_EXTRACTION)
+                .set_extract_inline_images(false)
+                .set_extract_unique_inline_images_only(false),
         );
     // extract file with extractor
     let extracted = extractor
@@ -89,11 +89,11 @@ fn test_ocr_only_strategy_extract_deu_ocr_pdf_to_string() {
         "Cosine similarity is less than 0.9 for file: ara-ocr.png, dist: {}",
         dist
     );
-    println!("{}: {}", "ara-ocr.png", dist);
 }
 
+#[cfg(not(target_os = "macos"))]
 #[test]
-fn test_no_ocr_strategy_extract_deu_ocr_pdf_to_string() {
+fn test_test_extract_file_to_string_no_ocr_strategy_deu_ocr_pdf() {
     let extractor = Extractor::new()
         .set_ocr_config(TesseractOcrConfig::new().set_language("deu"))
         .set_pdf_config(PdfParserConfig::new().set_ocr_strategy(PdfOcrStrategy::NO_OCR));
diff --git a/extractous-core/tika-native/build.gradle b/extractous-core/tika-native/build.gradle
index d153548..793ae26 100644
--- a/extractous-core/tika-native/build.gradle
+++ b/extractous-core/tika-native/build.gradle
@@ -66,6 +66,7 @@ graalvmNative {
 
             buildArgs.addAll(
                     "-H:+AddAllCharsets", // Very important to get UTF8 working
+                    "--enable-https", // Very important https working
                     "-O3",
                     "--parallelism=$numThreads",
                     "-march=compatibility" // VERY IMPORTANT to use compatibility flag. If not the libs will use the cpu arch of the build machine and will notwork on other CPUs if distributed
diff --git a/extractous-core/tika-native/src/main/java/ai/yobix/ByteBufferInputStream.java b/extractous-core/tika-native/src/main/java/ai/yobix/ByteBufferInputStream.java
new file mode 100644
index 0000000..9abf3a2
--- /dev/null
+++ b/extractous-core/tika-native/src/main/java/ai/yobix/ByteBufferInputStream.java
@@ -0,0 +1,90 @@
+package ai.yobix;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.nio.ByteBuffer;
+
+public class ByteBufferInputStream extends InputStream {
+
+    private ByteBuffer bb;
+
+    public ByteBufferInputStream(ByteBuffer bb) {
+        this.bb = bb;
+    }
+
+    @Override
+    public int read() throws IOException {
+        if (bb == null) {
+            throw new IOException("read on a closed InputStream");
+        }
+
+        if (bb.remaining() == 0) {
+            return -1;
+        }
+
+        return (bb.get() & 0xFF);   // need to be in the range 0 to 255
+    }
+
+    @Override
+    public int read(byte[] b, int off, int len) throws IOException {
+
+        if (bb == null) {
+            throw new IOException("read on a closed InputStream");
+        }
+
+        if (b == null) {
+            throw new NullPointerException();
+        } else if (off < 0 || len < 0 || len > b.length - off) {
+            throw new IndexOutOfBoundsException();
+        } else if (len == 0) {
+            return 0;
+        }
+
+        int length = Math.min(bb.remaining(), len);
+        if (length == 0) {
+            return -1;
+        }
+
+        bb.get(b, off, length);
+        return length;
+    }
+
+    @Override
+    public long skip(long n) throws IOException {
+
+        if (bb == null) {
+            throw new IOException("skip on a closed InputStream");
+        }
+
+        if (n <= 0) {
+            return 0;
+        }
+
+        /*
+         * ByteBuffers have at most an int, so lose the upper bits.
+         * The contract allows this.
+         */
+        int nInt = (int) n;
+        int skip = Math.min(bb.remaining(), nInt);
+
+        bb.position(bb.position() + skip);
+
+        return nInt;
+    }
+
+    @Override
+    public int available() throws IOException {
+
+        if (bb == null) {
+            throw new IOException("available on a closed InputStream");
+        }
+
+        return bb.remaining();
+    }
+
+    @Override
+    public void close() throws IOException {
+        bb = null;
+    }
+
+}
diff --git a/extractous-core/tika-native/src/main/java/ai/yobix/TikaNativeMain.java b/extractous-core/tika-native/src/main/java/ai/yobix/TikaNativeMain.java
index ba83662..b524b40 100644
--- a/extractous-core/tika-native/src/main/java/ai/yobix/TikaNativeMain.java
+++ b/extractous-core/tika-native/src/main/java/ai/yobix/TikaNativeMain.java
@@ -1,34 +1,22 @@
 package ai.yobix;
 
 import org.apache.commons.io.input.ReaderInputStream;
-import org.apache.tika.exception.WriteLimitReachedException;
-import org.apache.tika.parser.ParsingReader;
-import org.apache.tika.sax.BodyContentHandler;
-import org.apache.tika.sax.WriteOutContentHandler;
 import org.apache.tika.Tika;
 import org.apache.tika.config.TikaConfig;
 import org.apache.tika.exception.TikaException;
-
-import java.io.IOException;
-import java.io.InputStream;
-import java.io.Reader;
-import java.net.MalformedURLException;
-import java.net.URI;
-import java.net.URISyntaxException;
-import java.net.URL;
-import java.nio.charset.Charset;
-import java.nio.charset.StandardCharsets;
-import java.nio.file.Path;
-import java.nio.file.Paths;
-
+import org.apache.tika.exception.WriteLimitReachedException;
+import org.apache.tika.io.TemporaryResources;
 import org.apache.tika.io.TikaInputStream;
 import org.apache.tika.metadata.Metadata;
 import org.apache.tika.parser.AutoDetectParser;
 import org.apache.tika.parser.ParseContext;
 import org.apache.tika.parser.Parser;
+import org.apache.tika.parser.ParsingReader;
 import org.apache.tika.parser.microsoft.OfficeParserConfig;
 import org.apache.tika.parser.ocr.TesseractOCRConfig;
 import org.apache.tika.parser.pdf.PDFParserConfig;
+import org.apache.tika.sax.BodyContentHandler;
+import org.apache.tika.sax.WriteOutContentHandler;
 import org.graalvm.nativeimage.IsolateThread;
 import org.graalvm.nativeimage.c.function.CEntryPoint;
 import org.graalvm.nativeimage.c.type.CCharPointer;
@@ -36,6 +24,19 @@
 import org.graalvm.nativeimage.c.type.CTypeConversion;
 import org.xml.sax.SAXException;
 
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.Reader;
+import java.net.MalformedURLException;
+import java.net.URI;
+import java.net.URISyntaxException;
+import java.net.URL;
+import java.nio.ByteBuffer;
+import java.nio.charset.Charset;
+import java.nio.charset.StandardCharsets;
+import java.nio.file.Path;
+import java.nio.file.Paths;
+
 public class TikaNativeMain {
 
     private static final Tika tika = new Tika();
@@ -196,15 +197,17 @@ public static ReaderResult parseUrl(
      * @return ReaderResult
      */
     public static ReaderResult parseBytes(
-            byte[] data,
+            ByteBuffer data,
             String charsetName,
             PDFParserConfig pdfConfig,
             OfficeParserConfig officeConfig,
             TesseractOCRConfig tesseractConfig
     ) {
 
+
         final Metadata metadata = new Metadata();
-        final TikaInputStream stream = TikaInputStream.get(data, metadata);
+        final ByteBufferInputStream inStream = new ByteBufferInputStream(data);
+        final TikaInputStream stream = TikaInputStream.get(inStream, new TemporaryResources(), metadata);
 
         return parse(stream, metadata, charsetName, pdfConfig, officeConfig, tesseractConfig);
     }
diff --git a/extractous-core/tika-native/src/main/resources/META-INF/native-image/jni-config.json b/extractous-core/tika-native/src/main/resources/META-INF/native-image/jni-config.json
index 288d373..496d5d3 100644
--- a/extractous-core/tika-native/src/main/resources/META-INF/native-image/jni-config.json
+++ b/extractous-core/tika-native/src/main/resources/META-INF/native-image/jni-config.json
@@ -55,7 +55,7 @@
             {
                 "name": "parseBytes",
                 "parameterTypes": [
-                    "byte[]",
+                    "java.nio.ByteBuffer",
                     "java.lang.String",
                     "org.apache.tika.parser.pdf.PDFParserConfig",
                     "org.apache.tika.parser.microsoft.OfficeParserConfig",