''
'' This code is part of Document Solutions for PDF demos.
'' Copyright (c) MESCIUS inc. All rights reserved.
''
Imports System.IO
Imports System.Drawing
Imports GrapeCity.Documents.Text
Imports GrapeCity.Documents.Pdf
Imports GCTEXT = GrapeCity.Documents.Text
Imports GCDRAW = GrapeCity.Documents.Drawing
'' This sample demonstrates how to extract text from an existing PDF.
'' It loads an arbitrary PDF into a temporary GcPdfDocument, then
'' retrieves text from each page of that document using the Page.GetText() method,
'' adds all those texts to a TextLayout And renders it into the current document.
'' An alternative to Page.GetText() Is the method GcPdfDocument.GetText()
'' which retrieves the text from the whole document at once.
Public Class ExtractText
Function CreatePDF(ByVal stream As Stream) As Integer
Dim doc = New GcPdfDocument()
Dim page = doc.NewPage()
Dim rc = Util.AddNote(
"This sample loads an arbitrary PDF into a temporary GcPdfDocument, " +
"then retrieves text from each page of the loaded document using the Page.GetText() method, " +
"adds all those texts to a TextLayout and renders it into the current document. " +
"An alternative to Page.GetText() is the method GcPdfDocument.GetText() " +
"which retrieves the text from the whole document at once.",
page)
'' Text format for captions:
Dim tf = New TextFormat() With
{
.Font = GCTEXT.Font.FromFile(Path.Combine("Resources", "Fonts", "yumin.ttf")),
.FontSize = 14,
.ForeColor = Color.Blue
}
'' Text layout to render the text:
Dim tl = New TextLayout(72)
tl.DefaultFormat.Font = StandardFonts.Times
tl.DefaultFormat.FontSize = 12
tl.MaxWidth = doc.PageSize.Width
tl.MaxHeight = doc.PageSize.Height
tl.MarginAll = rc.Left
tl.MarginTop = rc.Bottom + 36
'' Text split options for widow/orphan control:
Dim topt = New TextSplitOptions(tl) With
{
.MinLinesInFirstParagraph = 2,
.MinLinesInLastParagraph = 2,
.RestMarginTop = rc.Left
}
'' Open an arbitrary PDF, load it into a temp document and get all page texts:
Using fs As New FileStream(Path.Combine("Resources", "PDFs", "Wetlands.pdf"), FileMode.Open, FileAccess.Read)
Dim doc1 = New GcPdfDocument()
doc1.Load(fs)
'' Get the texts of the loaded document's pages:
Dim texts = New List(Of String)()
doc1.Pages.ToList().ForEach(Sub(p_) texts.Add(p_.GetText()))
'' Add texts and captions to the text layout:
For i = 0 To texts.Count - 1
tl.AppendLine(String.Format("Text from page {0} of the loaded document:", i + 1), tf)
tl.AppendLine(texts(i))
Next
tl.PerformLayout(True)
While True
'' 'rest' will accept the text that did not fit:
Dim rest As TextLayout = Nothing
Dim splitResult = tl.Split(topt, rest)
doc.Pages.Last.Graphics.DrawTextLayout(tl, PointF.Empty)
If splitResult <> SplitResult.Split Then
Exit While
End If
tl = rest
doc.NewPage()
End While
End Using
'' Done:
doc.Save(stream)
Return doc.Pages.Count
End Function
End Class