ReadTagsShowParas.cs
  1. //
  2. // This code is part of Document Solutions for PDF demos.
  3. // Copyright (c) MESCIUS inc. All rights reserved.
  4. //
  5. using System;
  6. using System.IO;
  7. using System.Drawing;
  8. using System.Linq;
  9. using System.Collections.Generic;
  10. using GrapeCity.Documents.Pdf;
  11. using GrapeCity.Documents.Text;
  12. using GrapeCity.Documents.Pdf.TextMap;
  13. using GrapeCity.Documents.Pdf.Annotations;
  14. using GrapeCity.Documents.Pdf.Structure;
  15. using GrapeCity.Documents.Pdf.Recognition.Structure;
  16.  
  17. namespace DsPdfWeb.Demos
  18. {
  19. // Highlight paragraphs which have associated structure tags.
  20. public class ReadTagsShowParas
  21. {
  22. public int CreatePDF(Stream stream)
  23. {
  24. var user = "DsPdfWeb Demo";
  25.  
  26. var doc = new GcPdfDocument();
  27. using var s = File.OpenRead(Path.Combine("Resources", "PDFs", "C1Olap-QuickStart.pdf"));
  28. doc.Load(s);
  29.  
  30. // 1st step - remove all but the first 5 pages from the loaded PDF,
  31. // also removing tags that point to the removed pages:
  32. void removeStructNodesForPage(StructElementCollection ses, Page p)
  33. {
  34. for (int i = ses.Count - 1; i >= 0; --i)
  35. {
  36. var se = ses[i];
  37. if (se.DefaultPage == p)
  38. ses.RemoveAt(i);
  39. else
  40. removeStructNodesForPage(se.Children, p);
  41. }
  42. }
  43. for (int i = doc.Pages.Count - 1; i >= 5; --i)
  44. {
  45. removeStructNodesForPage(doc.StructTreeRoot.Children, doc.Pages[i]);
  46. doc.Pages.RemoveAt(i);
  47. }
  48.  
  49. // 2nd step - get the logical structure, highlight paragraphs
  50. // and add sticky notes to them:
  51. void highlightParagraphs(IReadOnlyList<Element> items)
  52. {
  53. var color = Color.FromArgb(64, Color.Magenta);
  54. foreach (var e in items)
  55. {
  56. if (e.HasContentItems)
  57. foreach (var i in e.ContentItems)
  58. {
  59. if (i is ContentItem ci)
  60. {
  61. var p = ci.GetParagraph();
  62. if (p != null)
  63. {
  64. var rc = p.GetCoords().ToRect();
  65. rc.Offset(rc.Width, 0);
  66. rc.Size = new SizeF(16, 12);
  67. var ta = new TextAnnotation()
  68. {
  69. UserName = user,
  70. Rect = rc,
  71. Page = ci.Page,
  72. Contents = p.GetText(),
  73. Color = Color.Yellow,
  74. };
  75. ci.Page.Graphics.DrawPolygon(p.GetCoords(), color, 1, null);
  76. }
  77. }
  78. }
  79. if (e.HasChildren)
  80. highlightParagraphs(e.Children);
  81. }
  82. }
  83. // Get the LogicalStructure and use it to highlight paragraphs:
  84. LogicalStructure ls = doc.GetLogicalStructure();
  85. highlightParagraphs(ls.Elements);
  86.  
  87. // Done:
  88. doc.Save(stream);
  89. return doc.Pages.Count;
  90. }
  91. }
  92. }
  93.