ReadTagsToOutlines.cs
- //
- // This code is part of Document Solutions for PDF demos.
- // Copyright (c) MESCIUS inc. All rights reserved.
- //
- using System;
- using System.IO;
- using System.Drawing;
- using System.Linq;
- using System.Collections.Generic;
- using GrapeCity.Documents.Pdf;
- using GrapeCity.Documents.Text;
- using GrapeCity.Documents.Pdf.TextMap;
- using GrapeCity.Documents.Pdf.Structure;
- using GrapeCity.Documents.Pdf.Recognition.Structure;
-
- namespace DsPdfWeb.Demos
- {
- // Find tables and read their data using structure tags.
- public class ReadTagsToOutlines
- {
- public int CreatePDF(Stream stream)
- {
- var doc = new GcPdfDocument();
- using var s = File.OpenRead(Path.Combine("Resources", "PDFs", "C1Olap-QuickStart.pdf"));
- doc.Load(s);
-
- // Get the LogicalStructure and top parent element:
- LogicalStructure ls = doc.GetLogicalStructure();
- Element root = ls.Elements[0];
-
- // Iterate over elements and select all heading elements (H1, H2, H3 etc.):
- OutlineNodeCollection outlines = doc.Outlines;
- int outlinesLevel = 1;
- foreach (Element e in root.Children)
- {
- string type = e.StructElement.Type;
- if (string.IsNullOrEmpty(type) || !type.StartsWith("H"))
- continue;
- // Note: topmost level is 1:
- if (!int.TryParse(type.Substring(1), out int headingLevel) || headingLevel < 1)
- continue;
- // Get the element text:
- string text = e.GetText();
- // Find the target page:
- var page = FindPage(e.StructElement);
- if (page != null)
- {
- var o = new OutlineNode(text, new DestinationFit(page));
- if (headingLevel > outlinesLevel)
- {
- ++outlinesLevel;
- outlines = outlines.Last().Children;
- }
- else if (headingLevel < outlinesLevel)
- {
- --outlinesLevel;
- var p = ((OutlineNode)outlines.Owner).Parent;
- outlines = p == null ? doc.Outlines : p.Children;
- }
- outlines.Add(o);
- }
- }
- doc.Save(stream);
- return doc.Pages.Count;
- }
-
- private Page FindPage(StructElement se)
- {
- if (se.DefaultPage != null)
- return se.DefaultPage;
- if (se.HasChildren)
- foreach (var child in se.Children)
- {
- var p = FindPage(child);
- if (p != null)
- return p;
- }
- return null;
- }
- }
- }
-