TextMap.vb
- ''
- '' This code is part of Document Solutions for PDF demos.
- '' Copyright (c) MESCIUS inc. All rights reserved.
- ''
- Imports System.IO
- Imports System.Drawing
- Imports System.Numerics
- Imports System.Collections.Generic
- Imports System.Linq
- Imports GrapeCity.Documents.Text
- Imports GrapeCity.Documents.Drawing
- Imports GrapeCity.Documents.Pdf
- Imports GrapeCity.Documents.Pdf.Annotations
- Imports GrapeCity.Documents.Pdf.Graphics
- Imports GrapeCity.Documents.Pdf.TextMap
-
- '' This sample shows how to use the text map for a page in a PDF
- '' to find geometric positions of text lines on the page,
- '' and to locate the text at a specific position.
- '' The PDF used in this sample was created by TimeSheet.
- Public Class TextMap
- Function CreatePDF(ByVal stream As Stream) As Integer
- Dim doc = New GcPdfDocument()
- Dim page = doc.NewPage()
-
- Dim rc = Util.AddNote(
- "This sample loads the PDF created by the TimeSheet sample into a temporary GcPdfDocument, " +
- "gets the text map for the first page, and prints out the coordinates and texts of all " +
- "line fragments in the map. " +
- "It also uses the map's HitTest method to find the text at specific coordinates in the PDF " +
- "and prints the result. " +
- "The original TimeSheet.pdf used by this sample (consisting of 1 page) is appended for reference.",
- page)
-
- '' Setup text formatting and layout:
- Dim tf = New TextFormat() With
- {
- .Font = StandardFonts.Times,
- .FontSize = 13
- }
- Dim tfFound = New TextFormat() With
- {
- .Font = StandardFonts.TimesBold,
- .FontSize = 14,
- .ForeColor = Color.DarkBlue
- }
- Dim tl = New TextLayout(72) With
- {
- .MaxWidth = doc.PageSize.Width,
- .MaxHeight = doc.PageSize.Height,
- .MarginAll = rc.Left,
- .MarginTop = rc.Bottom + 36,
- .TabStops = New List(Of TabStop)() From {New TabStop(72 * 2)}
- }
- Dim tso = New TextSplitOptions(tl) With
- {
- .MinLinesInFirstParagraph = 2,
- .MinLinesInLastParagraph = 2,
- .RestMarginTop = rc.Left
- }
-
- '' Open an arbitrary PDF, load it into a temp document and use the map to find some texts:
- Using fs = New FileStream(Path.Combine("Resources", "PDFs", "TimeSheet.pdf"), FileMode.Open, FileAccess.Read)
- Dim doc1 = New GcPdfDocument()
- doc1.Load(fs)
- Dim tmap = doc1.Pages(0).GetTextMap()
-
- '' We retrieve the text at a specific (known to us) geometric location on the page:
- Dim tx0 = 2.1F, ty0 = 3.37F, tx1 = 3.1F, ty1 = 3.5F
- Dim htiFrom = tmap.HitTest(tx0 * 72, ty0 * 72)
- Dim htiTo = tmap.HitTest(ty0 * 72, ty1 * 72)
- Dim range1 As TextMapFragment = Nothing, text1 As String = Nothing
- tmap.GetFragment(htiFrom.Pos, htiTo.Pos, range1, text1)
- tl.AppendLine($"Looked for text inside rectangle x={tx0:F2}"", y = {ty0: f2}"", width={tx1 - tx0:F2}"", height = {ty1 - ty0: f2}"", found:", tf)
- tl.AppendLine(text1, tfFound)
- tl.AppendLine()
-
- '' Get all text fragments and their locations on the page:
- tl.AppendLine("List of all texts found on the page", tf)
- Dim range As TextMapFragment = Nothing, text As String = Nothing
- tmap.GetFragment(range, text)
- For Each tlf In range
- Dim coords = tmap.GetCoords(tlf)
- tl.Append($"Text at ({coords.B.X / 72:F2}"", {coords.B.Y / 72:F2}""):{vbTab}", tf)
- tl.AppendLine(tmap.GetText(tlf), tfFound)
- Next
-
- '' Print the results:
- tl.PerformLayout(True)
- While True
- '' 'rest' will accept the text that did not fit:
- Dim rest As TextLayout = Nothing
- Dim splitResult = tl.Split(tso, rest)
- doc.Pages.Last.Graphics.DrawTextLayout(tl, PointF.Empty)
- If splitResult <> SplitResult.Split Then
- Exit While
- End If
- tl = rest
- doc.NewPage()
- End While
-
- '' Append the original document for reference:
- doc.MergeWithDocument(doc1, New MergeDocumentOptions())
-
- '' Done:
- doc.Save(stream)
- End Using
- Return doc.Pages.Count
- End Function
- End Class
-