TextMap.vb
  1. ''
  2. '' This code is part of Document Solutions for PDF demos.
  3. '' Copyright (c) MESCIUS inc. All rights reserved.
  4. ''
  5. Imports System.IO
  6. Imports System.Drawing
  7. Imports System.Numerics
  8. Imports System.Collections.Generic
  9. Imports System.Linq
  10. Imports GrapeCity.Documents.Text
  11. Imports GrapeCity.Documents.Drawing
  12. Imports GrapeCity.Documents.Pdf
  13. Imports GrapeCity.Documents.Pdf.Annotations
  14. Imports GrapeCity.Documents.Pdf.Graphics
  15. Imports GrapeCity.Documents.Pdf.TextMap
  16.  
  17. '' This sample shows how to use the text map for a page in a PDF
  18. '' to find geometric positions of text lines on the page,
  19. '' and to locate the text at a specific position.
  20. '' The PDF used in this sample was created by TimeSheet.
  21. Public Class TextMap
  22. Function CreatePDF(ByVal stream As Stream) As Integer
  23. Dim doc = New GcPdfDocument()
  24. Dim page = doc.NewPage()
  25.  
  26. Dim rc = Util.AddNote(
  27. "This sample loads the PDF created by the TimeSheet sample into a temporary GcPdfDocument, " +
  28. "gets the text map for the first page, and prints out the coordinates and texts of all " +
  29. "line fragments in the map. " +
  30. "It also uses the map's HitTest method to find the text at specific coordinates in the PDF " +
  31. "and prints the result. " +
  32. "The original TimeSheet.pdf used by this sample (consisting of 1 page) is appended for reference.",
  33. page)
  34.  
  35. '' Setup text formatting and layout:
  36. Dim tf = New TextFormat() With
  37. {
  38. .Font = StandardFonts.Times,
  39. .FontSize = 13
  40. }
  41. Dim tfFound = New TextFormat() With
  42. {
  43. .Font = StandardFonts.TimesBold,
  44. .FontSize = 14,
  45. .ForeColor = Color.DarkBlue
  46. }
  47. Dim tl = New TextLayout(72) With
  48. {
  49. .MaxWidth = doc.PageSize.Width,
  50. .MaxHeight = doc.PageSize.Height,
  51. .MarginAll = rc.Left,
  52. .MarginTop = rc.Bottom + 36,
  53. .TabStops = New List(Of TabStop)() From {New TabStop(72 * 2)}
  54. }
  55. Dim tso = New TextSplitOptions(tl) With
  56. {
  57. .MinLinesInFirstParagraph = 2,
  58. .MinLinesInLastParagraph = 2,
  59. .RestMarginTop = rc.Left
  60. }
  61.  
  62. '' Open an arbitrary PDF, load it into a temp document and use the map to find some texts:
  63. Using fs = New FileStream(Path.Combine("Resources", "PDFs", "TimeSheet.pdf"), FileMode.Open, FileAccess.Read)
  64. Dim doc1 = New GcPdfDocument()
  65. doc1.Load(fs)
  66. Dim tmap = doc1.Pages(0).GetTextMap()
  67.  
  68. '' We retrieve the text at a specific (known to us) geometric location on the page:
  69. Dim tx0 = 2.1F, ty0 = 3.37F, tx1 = 3.1F, ty1 = 3.5F
  70. Dim htiFrom = tmap.HitTest(tx0 * 72, ty0 * 72)
  71. Dim htiTo = tmap.HitTest(ty0 * 72, ty1 * 72)
  72. Dim range1 As TextMapFragment = Nothing, text1 As String = Nothing
  73. tmap.GetFragment(htiFrom.Pos, htiTo.Pos, range1, text1)
  74. tl.AppendLine($"Looked for text inside rectangle x={tx0:F2}"", y = {ty0: f2}"", width={tx1 - tx0:F2}"", height = {ty1 - ty0: f2}"", found:", tf)
  75. tl.AppendLine(text1, tfFound)
  76. tl.AppendLine()
  77.  
  78. '' Get all text fragments and their locations on the page:
  79. tl.AppendLine("List of all texts found on the page", tf)
  80. Dim range As TextMapFragment = Nothing, text As String = Nothing
  81. tmap.GetFragment(range, text)
  82. For Each tlf In range
  83. Dim coords = tmap.GetCoords(tlf)
  84. tl.Append($"Text at ({coords.B.X / 72:F2}"", {coords.B.Y / 72:F2}""):{vbTab}", tf)
  85. tl.AppendLine(tmap.GetText(tlf), tfFound)
  86. Next
  87.  
  88. '' Print the results:
  89. tl.PerformLayout(True)
  90. While True
  91. '' 'rest' will accept the text that did not fit:
  92. Dim rest As TextLayout = Nothing
  93. Dim splitResult = tl.Split(tso, rest)
  94. doc.Pages.Last.Graphics.DrawTextLayout(tl, PointF.Empty)
  95. If splitResult <> SplitResult.Split Then
  96. Exit While
  97. End If
  98. tl = rest
  99. doc.NewPage()
  100. End While
  101.  
  102. '' Append the original document for reference:
  103. doc.MergeWithDocument(doc1, New MergeDocumentOptions())
  104.  
  105. '' Done:
  106. doc.Save(stream)
  107. End Using
  108. Return doc.Pages.Count
  109. End Function
  110. End Class
  111.