Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ name = "openparse"
description = "Streamlines the process of preparing documents for LLM's."
readme = "README.md"
requires-python = ">=3.8"
version = "0.5.4"
version = "0.5.5"
authors = [{name = "Sergey Filimonov", email = "hello@sergey.fyi"}]
dependencies = [
"PyMuPDF >= 1.23.2",
Expand Down
File renamed without changes.
4 changes: 2 additions & 2 deletions src/openparse/doc_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

from openparse import tables, text, consts
from openparse.pdf import Pdf
from openparse.types import NOT_GIVEN, NotGiven
from openparse._types import NOT_GIVEN, NotGiven
from openparse.processing import (
IngestionPipeline,
BasicIngestionPipeline,
Expand Down Expand Up @@ -34,7 +34,7 @@ class PyMuPDFArgsDict(TypedDict, total=False):


def _table_args_dict_to_model(
args_dict: Union[TableTransformersArgsDict, PyMuPDFArgsDict]
args_dict: Union[TableTransformersArgsDict, PyMuPDFArgsDict],
) -> Union[tables.TableTransformersArgs, tables.PyMuPDFArgs]:
if args_dict["parsing_algorithm"] == "table-transformers":
return tables.TableTransformersArgs(**args_dict)
Expand Down
10 changes: 5 additions & 5 deletions src/openparse/pdf.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
import random
import tempfile
import io
from pathlib import Path
from typing import Iterator, List, Literal, Optional, Union, Tuple, Any
from pydantic import BaseModel

from pydantic import BaseModel
from pdfminer.high_level import extract_pages
from pdfminer.layout import LTPage
from pypdf import PdfReader, PdfWriter
Expand Down Expand Up @@ -115,9 +115,9 @@ def to_pymupdf_doc(self):
if not self.writer.pages:
return fitz.open(self.file_path)

with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmpfile:
self.writer.write(tmpfile.name)
return fitz.open(tmpfile.name)
byte_stream = io.BytesIO()
self.writer.write(byte_stream)
return fitz.open(None, byte_stream)

def _draw_bboxes(
self,
Expand Down
2 changes: 1 addition & 1 deletion src/openparse/version.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
OPEN_PARSE_VERSION = "0.5.4"
OPEN_PARSE_VERSION = "0.5.5"


def version_info() -> str:
Expand Down