def create_demo_image(path):
img = Image.new("RGB", (320, 180), "white")
draw = ImageDraw.Draw(img)
draw.rectangle([20, 20, 300, 160], outline="black", width=3)
draw.ellipse([55, 45, 145, 135], outline="black", width=4)
draw.line([180, 140, 285, 45], fill="black", width=4)
draw.text((45, 145), "Embedded bitmap image", fill="black")
img.save(path)
create_demo_image(DEMO_IMAGE_PATH)
def build_pdf(pdf_path):
c = canvas.Canvas(str(pdf_path), pagesize=A4)
width, height = A4
c.setFont("Helvetica-Bold", 20)
c.drawString(60, height - 70, "Docling Parse Advanced PDF Parsing Tutorial")
c.setFont("Helvetica", 11)
intro = (
"This generated document is designed for testing text extraction, coordinate parsing, "
"line grouping, vector path detection, bitmap resources, and layout-aware reconstruction."
)
text_obj = c.beginText(60, height - 105)
text_obj.setLeading(15)
for line in textwrap.wrap(intro, width=90):
text_obj.textLine(line)
c.drawText(text_obj)
c.setFont("Helvetica-Bold", 14)
c.drawString(60, height - 170, "1. Two-column text region")
left_para = (
"The left column contains compact explanatory text. A parser should expose words, "
"characters, and line-level cells along with coordinates. These coordinates allow us "
"to reconstruct reading order and inspect the spatial structure of a page."
)
right_para = (
"The right column contains a separate paragraph. In document AI pipelines, layout "
"features are useful for retrieval, table extraction, chunking, and downstream RAG "
"applications where page position can matter."
)
y_start = height - 200
left_text = c.beginText(60, y_start)
left_text.setFont("Helvetica", 10)
left_text.setLeading(13)
for line in textwrap.wrap(left_para, width=42):
left_text.textLine(line)
c.drawText(left_text)
right_text = c.beginText(325, y_start)
right_text.setFont("Helvetica", 10)
right_text.setLeading(13)
for line in textwrap.wrap(right_para, width=42):
right_text.textLine(line)
c.drawText(right_text)
c.setStrokeColor(colors.darkblue)
c.setLineWidth(2)
c.rect(55, height - 315, 225, 130, stroke=1, fill=0)
c.rect(320, height - 315, 225, 130, stroke=1, fill=0)
c.setStrokeColor(colors.darkgreen)
c.setLineWidth(3)
c.circle(140, height - 390, 40, stroke=1, fill=0)
c.line(220, height - 430, 310, height - 355)
c.setFont("Helvetica-Bold", 14)
c.setFillColor(colors.black)
c.drawString(60, height - 470, "2. Simple table-like structure")
data = [
["Section", "Signal", "Expected parser behavior"],
["Text", "Words and lines", "Return text cells with coordinates"],
["Vector", "Boxes and lines", "Expose page path/vector resources"],
["Bitmap", "Embedded image", "Expose or render image resources"],
]
table = Table(data, colWidths=[100, 130, 260])
table.setStyle(TableStyle([
("BACKGROUND", (0, 0), (-1, 0), colors.lightgrey),
("GRID", (0, 0), (-1, -1), 0.7, colors.black),
("FONTNAME", (0, 0), (-1, 0), "Helvetica-Bold"),
("FONTSIZE", (0, 0), (-1, -1), 9),
("VALIGN", (0, 0), (-1, -1), "MIDDLE"),
]))
table.wrapOn(c, width, height)
table.drawOn(c, 60, height - 590)
c.setFont("Helvetica", 9)
c.drawString(60, 55, "Page 1: generated programmatic PDF with text, table-like layout, and vector paths.")
c.showPage()
c.setFont("Helvetica-Bold", 18)
c.drawString(60, height - 70, "Page 2: Bitmap, Dense Text, and Reading Order")
c.setFont("Helvetica", 10)
dense = (
"This page includes an embedded bitmap image and several short blocks of text. "
"We use it to test whether rendering works, whether the parser preserves page-level "
"coordinates, and whether our own reconstruction logic can group words into lines."
)
y = height - 105
for para_idx in range(4):
tx = c.beginText(60, y)
tx.setFont("Helvetica", 10)
tx.setLeading(13)
for line in textwrap.wrap(f"Block {para_idx + 1}: {dense}", width=92):
tx.textLine(line)
c.drawText(tx)
y -= 70
c.drawImage(str(DEMO_IMAGE_PATH), 110, height - 510, width=320, height=180, preserveAspectRatio=True)
c.setStrokeColor(colors.red)
c.setLineWidth(2)
c.roundRect(95, height - 525, 350, 210, 10, stroke=1, fill=0)
c.setFillColor(colors.black)
c.setFont("Helvetica-Bold", 12)
c.drawString(60, height - 570, "Coordinate-aware extraction lets us keep page, text, and position together.")
c.setFont("Helvetica", 9)
c.drawString(60, 55, "Page 2: embedded bitmap image and multiple text blocks.")
c.save()
build_pdf(PDF_PATH)
print("Created PDF:", PDF_PATH)