TextMap.cs
  1. //
  2. // This code is part of Document Solutions for PDF demos.
  3. // Copyright (c) MESCIUS inc. All rights reserved.
  4. //
  5. using System;
  6. using System.IO;
  7. using System.Drawing;
  8. using System.Numerics;
  9. using System.Collections.Generic;
  10. using System.Linq;
  11. using GrapeCity.Documents.Text;
  12. using GrapeCity.Documents.Drawing;
  13. using GrapeCity.Documents.Pdf;
  14. using GrapeCity.Documents.Pdf.Annotations;
  15. using GrapeCity.Documents.Pdf.Graphics;
  16. using GrapeCity.Documents.Pdf.TextMap;
  17.  
  18. namespace DsPdfWeb.Demos
  19. {
  20. // This sample shows how to use the text map for a page in a PDF
  21. // to find geometric positions of text lines on the page,
  22. // and to locate the text at a specific position.
  23. // The PDF used in this sample was created by TimeSheet.
  24. public class TextMap
  25. {
  26. public int CreatePDF(Stream stream)
  27. {
  28. var doc = new GcPdfDocument();
  29. var page = doc.NewPage();
  30.  
  31. var rc = Common.Util.AddNote(
  32. "This sample loads the PDF created by the TimeSheet sample into a temporary GcPdfDocument, " +
  33. "gets the text map for the first page, and prints out the coordinates and texts of all " +
  34. "line fragments in the map. " +
  35. "It also uses the map's HitTest method to find the text at specific coordinates in the PDF " +
  36. "and prints the result. " +
  37. "The original TimeSheet.pdf used by this sample (consisting of 1 page) is appended for reference.",
  38. page);
  39.  
  40. // Setup text formatting and layout:
  41. var tf = new TextFormat()
  42. {
  43. Font = StandardFonts.Times,
  44. FontSize = 13
  45. };
  46. var tfFound = new TextFormat()
  47. {
  48. Font = StandardFonts.TimesBold,
  49. FontSize = 14,
  50. ForeColor = Color.DarkBlue
  51. };
  52. var tl = new TextLayout(72)
  53. {
  54. MaxWidth = doc.PageSize.Width,
  55. MaxHeight = doc.PageSize.Height,
  56. MarginAll = rc.Left,
  57. MarginTop = rc.Bottom + 36,
  58. TabStops = new List<TabStop>() { new TabStop(72 * 2) },
  59. };
  60. var to = new TextSplitOptions(tl)
  61. {
  62. MinLinesInFirstParagraph = 2,
  63. MinLinesInLastParagraph = 2,
  64. RestMarginTop = rc.Left,
  65. };
  66.  
  67. // Open an arbitrary PDF, load it into a temp document and use the map to find some texts:
  68. using var fs = File.OpenRead(Path.Combine("Resources", "PDFs", "TimeSheet.pdf"));
  69. var doc1 = new GcPdfDocument();
  70. doc1.Load(fs);
  71. var tmap = doc1.Pages[0].GetTextMap();
  72.  
  73. // We retrieve the text at a specific (known to us) geometric location on the page:
  74. float tx0 = 2.1f, ty0 = 3.37f, tx1 = 3.1f, ty1 = 3.4f;
  75. HitTestInfo htiFrom = tmap.HitTest(tx0 * 72, ty0 * 72);
  76. HitTestInfo htiTo = tmap.HitTest(tx1 * 72, ty1 * 72);
  77. tmap.GetFragment(htiFrom.Pos, htiTo.Pos, out TextMapFragment range1, out string text1);
  78. tl.AppendLine($"Looked for text in rectangle x={tx0:F2}\", y={ty0:F2}\", width={tx1 - tx0:F2}\", height={ty1 - ty0:F2}\", found:", tf);
  79. tl.AppendLine(text1, tfFound);
  80. tl.AppendLine();
  81.  
  82. // Get all text fragments and their locations on the page:
  83. tl.AppendLine("List of all texts found on the page", tf);
  84. tmap.GetFragment(out TextMapFragment range, out string text);
  85. foreach (TextLineFragment tlf in range)
  86. {
  87. var coords = tmap.GetCoords(tlf);
  88. tl.Append($"Text at ({coords.B.X / 72:F2}\",{coords.B.Y / 72:F2}\"):\t", tf);
  89. tl.AppendLine(tmap.GetText(tlf), tfFound);
  90. }
  91.  
  92. // Print the results:
  93. tl.PerformLayout(true);
  94. while (true)
  95. {
  96. // 'rest' will accept the text that did not fit:
  97. var splitResult = tl.Split(to, out TextLayout rest);
  98. doc.Pages.Last.Graphics.DrawTextLayout(tl, PointF.Empty);
  99. if (splitResult != SplitResult.Split)
  100. break;
  101. tl = rest;
  102. doc.NewPage();
  103. }
  104.  
  105. // Append the original document for reference:
  106. doc.MergeWithDocument(doc1, new MergeDocumentOptions());
  107. // Done:
  108. doc.Save(stream);
  109. return doc.Pages.Count;
  110. }
  111. }
  112. }
  113.