GetTableData.cs
  1. //
  2. // This code is part of Document Solutions for PDF demos.
  3. // Copyright (c) MESCIUS inc. All rights reserved.
  4. //
  5. using System;
  6. using System.IO;
  7. using System.Drawing;
  8. using System.Collections.Generic;
  9. using GrapeCity.Documents.Pdf;
  10. using GrapeCity.Documents.Pdf.Recognition;
  11. using GrapeCity.Documents.Text;
  12. using GrapeCity.Documents.Common;
  13. using GCTEXT = GrapeCity.Documents.Text;
  14. using GCDRAW = GrapeCity.Documents.Drawing;
  15.  
  16. namespace DsPdfWeb.Demos
  17. {
  18. // Extract data from a table.
  19. public class GetTableData
  20. {
  21. public int CreatePDF(Stream stream)
  22. {
  23. const float DPI = 72;
  24. const float margin = 36;
  25. var doc = new GcPdfDocument();
  26.  
  27. var tf = new TextFormat()
  28. {
  29. Font = GCTEXT.Font.FromFile(Path.Combine("Resources", "Fonts", "segoeui.ttf")),
  30. FontSize = 9,
  31. ForeColor = Color.Black
  32. };
  33. var tfHdr = new TextFormat(tf)
  34. {
  35. Font = GCTEXT.Font.FromFile(Path.Combine("Resources", "Fonts", "segoeuib.ttf")),
  36. FontSize = 11,
  37. ForeColor = Color.DarkBlue
  38. };
  39. var tfRed = new TextFormat(tf) { ForeColor = Color.Red };
  40.  
  41. using (var fs = File.OpenRead(Path.Combine("Resources", "PDFs", "zugferd-invoice.pdf")))
  42. {
  43. // The approx table bounds:
  44. var tableBounds = new RectangleF(0, 3 * DPI, 8.5f * DPI, 3.75f * DPI);
  45.  
  46. var page = doc.NewPage();
  47. page.Landscape = true;
  48. var g = page.Graphics;
  49.  
  50. var rc = Common.Util.AddNote(
  51. "This sample loads a PDF that contains a table (a sample invoice), " +
  52. "and extracts the table from the PDF using the Page.GetTable() method. " +
  53. "The extracted data is printed as a list of rows and cells. " +
  54. "The sample invoice with the table is appended to the generated PDF for reference.",
  55. page,
  56. new RectangleF(margin, margin, page.Bounds.Width - margin * 2, page.Bounds.Height - margin * 2));
  57.  
  58. var tl = g.CreateTextLayout();
  59. tl.MaxWidth = page.Bounds.Width;
  60. tl.MaxHeight = page.Bounds.Height;
  61. tl.MarginAll = margin;
  62. tl.MarginTop = rc.Bottom;
  63. tl.DefaultTabStops = 150;
  64. tl.LineSpacingScaleFactor = 1.2f;
  65.  
  66. var docSrc = new GcPdfDocument();
  67. docSrc.Load(fs);
  68.  
  69. var itable = docSrc.Pages[0].GetTable(tableBounds);
  70.  
  71. if (itable == null)
  72. {
  73. tl.AppendLine($"No table was found at the specified coordinates.", tfRed);
  74. }
  75. else
  76. {
  77. tl.Append($"\nThe table has {itable.Cols.Count} column(s) and {itable.Rows.Count} row(s), table data is:", tfHdr);
  78. tl.AppendParagraphBreak();
  79. for (int row = 0; row < itable.Rows.Count; ++row)
  80. {
  81. var tfmt = row == 0 ? tfHdr : tf;
  82. for (int col = 0; col < itable.Cols.Count; ++col)
  83. {
  84. var cell = itable.GetCell(row, col);
  85. if (col > 0)
  86. tl.Append("\t", tfmt);
  87. if (cell == null)
  88. tl.Append("<no cell>", tfRed);
  89. else
  90. tl.Append(cell.Text, tfmt);
  91. }
  92. tl.AppendLine();
  93. }
  94. }
  95.  
  96. var to = new TextSplitOptions(tl) { RestMarginTop = margin, MinLinesInFirstParagraph = 2, MinLinesInLastParagraph = 2 };
  97. tl.PerformLayout(true);
  98. while (true)
  99. {
  100. var splitResult = tl.Split(to, out TextLayout rest);
  101. doc.Pages.Last.Graphics.DrawTextLayout(tl, PointF.Empty);
  102. if (splitResult != SplitResult.Split)
  103. break;
  104. tl = rest;
  105. doc.NewPage().Landscape = true;
  106. }
  107.  
  108. // Append the original document for reference:
  109. doc.MergeWithDocument(docSrc);
  110.  
  111. doc.Save(stream);
  112. return doc.Pages.Count;
  113. }
  114. }
  115. }
  116. }
  117.