Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions docling/cli/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -428,6 +428,12 @@ def convert( # noqa: C901
..., help="If enabled, the bitmap content will be processed using OCR."
),
] = True,
layout: Annotated[
bool,
typer.Option(
..., help=("If enabled document layout analysis processing will be done.")
),
] = True,
force_ocr: Annotated[
bool,
typer.Option(
Expand Down Expand Up @@ -733,6 +739,7 @@ def convert( # noqa: C901
enable_remote_services=enable_remote_services,
accelerator_options=accelerator_options,
do_ocr=ocr,
do_layout=layout,
ocr_options=ocr_options,
do_table_structure=tables,
do_code_enrichment=enrich_code,
Expand Down
9 changes: 9 additions & 0 deletions docling/datamodel/pipeline_options.py
Original file line number Diff line number Diff line change
Expand Up @@ -1416,6 +1416,15 @@ class PdfPipelineOptions(PaginatedPipelineOptions):
)
),
] = True
do_layout: Annotated[
bool,
Field(
description=(
"Enable document layout analysis to detect and classify page regions such as text blocks, headings, figures, "
"tables, and other structural elements. Required for accurate content segmentation and reading-order reconstruction."
)
),
] = True
do_code_enrichment: Annotated[
bool,
Field(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ class ThreadedLayoutVlmPipelineOptions(PaginatedPipelineOptions):
vlm_options: Union[InlineVlmOptions, ApiVlmOptions] = GRANITEDOCLING_TRANSFORMERS

# Layout model configuration
do_layout: bool = True
layout_options: LayoutOptions = LayoutOptions(
model_spec=DOCLING_LAYOUT_HERON, skip_cell_assignment=True
)
Expand Down
2 changes: 2 additions & 0 deletions docling/experimental/models/table_crops_layout_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,10 +33,12 @@ def __init__(
artifacts_path: Optional[Path],
accelerator_options: AcceleratorOptions,
options: TableCropsLayoutOptions,
enabled: bool,
enable_remote_services: bool = False,
):
_ = enable_remote_services
self.options = options
self.enabled = enabled
self.artifacts_path = artifacts_path
self.accelerator_options = accelerator_options

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,7 @@ def _init_models(self) -> None:
artifacts_path=art_path,
accelerator_options=self.pipeline_options.accelerator_options,
options=self.pipeline_options.layout_options,
enabled=self.pipeline_options.do_layout,
)

# VLM model based on options type
Expand Down
6 changes: 6 additions & 0 deletions docling/models/base_layout_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@
class BaseLayoutModel(BasePageModel, BaseModelWithOptions, ABC):
"""Shared interface for layout models."""

enabled: bool

@classmethod
@abstractmethod
def get_options_type(cls) -> Type[BaseLayoutOptions]:
Expand All @@ -31,6 +33,10 @@ def __call__(
conv_res: ConversionResult,
page_batch: Iterable[Page],
) -> Iterable[Page]:
if not self.enabled:
yield from page_batch
return

pages = list(page_batch)
predictions = self.predict_layout(conv_res, pages)

Expand Down
2 changes: 2 additions & 0 deletions docling/models/stages/layout/layout_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,12 +51,14 @@ def __init__(
artifacts_path: Optional[Path],
accelerator_options: AcceleratorOptions,
options: LayoutOptions,
enabled: bool,
enable_remote_services: bool = False,
):
from docling_ibm_models.layoutmodel.layout_predictor import LayoutPredictor

_ = enable_remote_services
self.options = options
self.enabled = enabled

device = decide_device(accelerator_options.device)
layout_model_config = options.model_spec
Expand Down
2 changes: 2 additions & 0 deletions docling/models/stages/layout/layout_object_detection_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,9 +35,11 @@ def __init__(
artifacts_path: Optional[Path],
accelerator_options: AcceleratorOptions,
options: LayoutObjectDetectionOptions,
enabled: bool,
enable_remote_services: bool = False,
) -> None:
self.options = options
self.enabled = enabled

self.engine: BaseObjectDetectionEngine = create_object_detection_engine(
options=options.engine_options,
Expand Down
7 changes: 6 additions & 1 deletion docling/models/stages/page_assemble/page_assemble_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,8 +43,9 @@ class PageAssembleOptions(BaseModel):


class PageAssembleModel(BasePageModel):
def __init__(self, options: PageAssembleOptions):
def __init__(self, options: PageAssembleOptions, enabled: bool):
self.options = options
self.enabled = enabled

def sanitize_text(self, lines):
if len(lines) == 0:
Expand Down Expand Up @@ -88,6 +89,10 @@ def sanitize_text(self, lines):
def __call__(
self, conv_res: ConversionResult, page_batch: Iterable[Page]
) -> Iterable[Page]:
if not self.enabled:
yield from page_batch
return

for page in page_batch:
assert page._backend is not None
if not page._backend.is_valid():
Expand Down
5 changes: 4 additions & 1 deletion docling/pipeline/legacy_standard_pdf_pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@ def __init__(self, pipeline_options: PdfPipelineOptions):
)
layout_model = layout_factory.create_instance(
options=pipeline_options.layout_options,
enabled=pipeline_options.do_layout,
artifacts_path=self.artifacts_path,
accelerator_options=pipeline_options.accelerator_options,
enable_remote_services=pipeline_options.enable_remote_services,
Expand Down Expand Up @@ -93,7 +94,9 @@ def __init__(self, pipeline_options: PdfPipelineOptions):
# Table structure model
table_model,
# Page assemble
PageAssembleModel(options=PageAssembleOptions()),
PageAssembleModel(
options=PageAssembleOptions(), enabled=pipeline_options.do_layout
),
]

self.enrichment_pipe = [
Expand Down
5 changes: 4 additions & 1 deletion docling/pipeline/standard_pdf_pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -488,6 +488,7 @@ def _init_models(self) -> None:
)
self.layout_model = layout_factory.create_instance(
options=self.pipeline_options.layout_options,
enabled=self.pipeline_options.do_layout,
artifacts_path=art_path,
accelerator_options=self.pipeline_options.accelerator_options,
enable_remote_services=self.pipeline_options.enable_remote_services,
Expand All @@ -502,7 +503,9 @@ def _init_models(self) -> None:
accelerator_options=self.pipeline_options.accelerator_options,
enable_remote_services=self.pipeline_options.enable_remote_services,
)
self.assemble_model = PageAssembleModel(options=PageAssembleOptions())
self.assemble_model = PageAssembleModel(
options=PageAssembleOptions(), enabled=self.pipeline_options.do_layout
)
self.reading_order_model = ReadingOrderModel(options=ReadingOrderOptions())

# --- optional enrichment ------------------------------------------------
Expand Down
30 changes: 30 additions & 0 deletions docs/examples/custom_convert.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,22 @@ def main():
# }
# )

# PyPdfium without EasyOCR and Layout Processing
# --------------------
# pipeline_options = PdfPipelineOptions()
# pipeline_options.do_ocr = False
# pipeline_options.do_layout = False
# pipeline_options.do_table_structure = True
# pipeline_options.table_structure_options = TableStructureOptions(do_cell_matching=False)

# doc_converter = DocumentConverter(
# format_options={
# InputFormat.PDF: PdfFormatOption(
# pipeline_options=pipeline_options, backend=PyPdfiumDocumentBackend
# )
# }
# )

# PyPdfium with EasyOCR
# -----------------
# pipeline_options = PdfPipelineOptions()
Expand Down Expand Up @@ -104,6 +120,20 @@ def main():
# }
# )

# Docling Parse without EasyOCR and Layout Processing
# -------------------------
# pipeline_options = PdfPipelineOptions()
# pipeline_options.do_ocr = False
# pipeline_options.do_layout = False
# pipeline_options.do_table_structure = True
# pipeline_options.table_structure_options = TableStructureOptions(do_cell_matching=True)

# doc_converter = DocumentConverter(
# format_options={
# InputFormat.PDF: PdfFormatOption(pipeline_options=pipeline_options)
# }
# )

# Docling Parse with EasyOCR (default)
# -------------------------------
# Enables OCR and table structure with EasyOCR, using automatic device
Expand Down
2 changes: 1 addition & 1 deletion tests/test_page_assemble_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@

@pytest.fixture
def model() -> PageAssembleModel:
return PageAssembleModel(options=PageAssembleOptions())
return PageAssembleModel(options=PageAssembleOptions(), enabled=True)


class TestSanitizeTextLigatures:
Expand Down