ReadTagsToOutlines.cs
  1. //
  2. // This code is part of Document Solutions for PDF demos.
  3. // Copyright (c) MESCIUS inc. All rights reserved.
  4. //
  5. using System;
  6. using System.IO;
  7. using System.Drawing;
  8. using System.Linq;
  9. using System.Collections.Generic;
  10. using GrapeCity.Documents.Pdf;
  11. using GrapeCity.Documents.Text;
  12. using GrapeCity.Documents.Pdf.TextMap;
  13. using GrapeCity.Documents.Pdf.Structure;
  14. using GrapeCity.Documents.Pdf.Recognition.Structure;
  15.  
  16. namespace DsPdfWeb.Demos
  17. {
  18. // Find tables and read their data using structure tags.
  19. public class ReadTagsToOutlines
  20. {
  21. public int CreatePDF(Stream stream)
  22. {
  23. var doc = new GcPdfDocument();
  24. using var s = File.OpenRead(Path.Combine("Resources", "PDFs", "C1Olap-QuickStart.pdf"));
  25. doc.Load(s);
  26.  
  27. // Get the LogicalStructure and top parent element:
  28. LogicalStructure ls = doc.GetLogicalStructure();
  29. Element root = ls.Elements[0];
  30.  
  31. // Iterate over elements and select all heading elements (H1, H2, H3 etc.):
  32. OutlineNodeCollection outlines = doc.Outlines;
  33. int outlinesLevel = 1;
  34. foreach (Element e in root.Children)
  35. {
  36. string type = e.StructElement.Type;
  37. if (string.IsNullOrEmpty(type) || !type.StartsWith("H"))
  38. continue;
  39. // Note: topmost level is 1:
  40. if (!int.TryParse(type.Substring(1), out int headingLevel) || headingLevel < 1)
  41. continue;
  42. // Get the element text:
  43. string text = e.GetText();
  44. // Find the target page:
  45. var page = FindPage(e.StructElement);
  46. if (page != null)
  47. {
  48. var o = new OutlineNode(text, new DestinationFit(page));
  49. if (headingLevel > outlinesLevel)
  50. {
  51. ++outlinesLevel;
  52. outlines = outlines.Last().Children;
  53. }
  54. else if (headingLevel < outlinesLevel)
  55. {
  56. --outlinesLevel;
  57. var p = ((OutlineNode)outlines.Owner).Parent;
  58. outlines = p == null ? doc.Outlines : p.Children;
  59. }
  60. outlines.Add(o);
  61. }
  62. }
  63. doc.Save(stream);
  64. return doc.Pages.Count;
  65. }
  66.  
  67. private Page FindPage(StructElement se)
  68. {
  69. if (se.DefaultPage != null)
  70. return se.DefaultPage;
  71. if (se.HasChildren)
  72. foreach (var child in se.Children)
  73. {
  74. var p = FindPage(child);
  75. if (p != null)
  76. return p;
  77. }
  78. return null;
  79. }
  80. }
  81. }
  82.