''
'' This code is part of Document Solutions for PDF demos.
'' Copyright (c) MESCIUS inc. All rights reserved.
''
Imports System.IO
Imports System.Drawing
Imports GrapeCity.Documents.Pdf
Imports GrapeCity.Documents.Pdf.TextMap
Imports GrapeCity.Documents.Text
Imports GrapeCity.Documents.Common
Imports GrapeCity.Documents.Pdf.Annotations
'' This sample loads an existing PDF, and imports a predefined list of key words,
'' builds an alphabetical index of those words linked to pages where they occur
'' in the document. The generated index pages are appended to the original document,
'' and saved in a new PDF.
'' The index is rendered in two balanced columns, imports the technique
'' demonstrated in the BalancedColumns sample.
''
'' NOTE: if you download this sample and run it locally on your own system
'' without a valid DsPdf license, only the first five pages of the sample PDF
'' will be loaded, and the index will be generated for those five pages only.
Public Class WordIndex
'' Font collection to hold the fonts we need:
Private _fc As FontCollection = New FontCollection()
'' Font family used throughout this sample (this is not case-sensitive):
Const _fontFamily = "segoe ui"
'' Main sample entry:
Function CreatePDF(ByVal stream As Stream) As Integer
'' Set up a font collection with the fonts we need:
_fc.RegisterDirectory(Path.Combine("Resources", "Fonts"))
'' Get the PDF to add index to:
Dim tfile = Path.Combine("Resources", "PDFs", "CompleteJavaScriptBook.pdf")
'' The list of words on which we will build the index:
Dim words = _keywords.Distinct(StringComparer.InvariantCultureIgnoreCase).Where(Function(w_) Not String.IsNullOrEmpty(w_))
'' Load the PDF and add the index:
Using fs = New FileStream(tfile, FileMode.Open, FileAccess.Read)
Dim doc = New GcPdfDocument()
doc.Load(fs)
''
Dim origPageCount = doc.Pages.Count
'' Build and add the index:
AddWordIndex(doc, words)
'' Open document on the first index page by default
'' (may not work in browser viewers, but works in Acrobat):
doc.OpenAction = New DestinationFit(origPageCount)
'' Done:
doc.Save(stream)
Return doc.Pages.Count
End Using
End Function
'' The list of words to build the index on:
Private ReadOnly _keywords() As String =
{
"JavaScript", "Framework", "MVC", "npm", "URL", "CDN", "HTML5", "CSS", "ES2015", "web",
"Node.js", "API", "model", "view", "controller", "data management", "UI", "HTML",
"API", "function", "var", "component", "design pattern", "React.js", "Angular", "AJAX",
"DOM", "TypeScript", "ECMAScript", "CLI", "Wijmo", "CoffeeScript", "Elm",
"plugin", "VueJS", "Knockout", "event", "AngularJS", "pure JS", "data binding", "OOP", "GrapeCity",
"gauge", "JSX", "mobile", "desktop", "Vue", "template", "server-side", "client-side",
"SPEC", "RAM", "ECMA"
}
'' Calling FindText() on a document Or a page builds text maps for each page on the fly.
'' Reusing cached text maps speeds things up a lot.
Private Function FindTextPages(ByVal maps As ITextMap(), ByVal tp As FindTextParams) As SortedSet(Of Integer)
Dim finds = New SortedSet(Of Integer)
Dim currPageIdx = -1
For Each map In maps
currPageIdx = map.Page.Index
map.FindText(tp, Function(fp_) finds.Add(currPageIdx))
Next
Return finds
End Function
'' Adds a word index to the end of the passed document:
Private Sub AddWordIndex(ByVal doc As GcPdfDocument, ByVal words As IEnumerable(Of String))
Dim tStart = Util.TimeNow()
'' Build text maps for all pages to speed up FindText() calls
Dim textMaps(doc.Pages.Count - 1) As ITextMap
For i = 0 To doc.Pages.Count - 1
textMaps(i) = doc.Pages(i).GetTextMap()
Next
'' Words and page indices where they occur, sorted on words:
Dim index = New SortedDictionary(Of String, List(Of Integer))()
'' Here the main loop building the index is on key words.
'' An alternative would be to loop over the pages.
'' Depending on the relative sizes of the keyword dictionary vs
'' the number of pages in the document, one or the other might be better,
'' but this is beyond the scope of this sample.
For Each word In words
Dim wholeWord As Boolean = word.IndexOf(" "c) = -1
Dim pgs = FindTextPages(textMaps, New FindTextParams(word, wholeWord, False))
'' A very simplistic way of also finding plurals:
If wholeWord AndAlso Not word.EndsWith("s") Then
pgs.UnionWith(FindTextPages(textMaps, New FindTextParams(word + "s", wholeWord, False)))
End If
If (pgs.Any()) Then
index.Add(word, pgs.ToList())
End If
Next
'' Prepare to render the index. The whole index is built
'' in a single TextLayout instance, set up to render it
'' in two columns per page.
'' The main rendering loop uses the TextLayout.SplitAndBalance method
'' imports the approach demonstrated in BalancedColumns sample.
'' The complication here is that we need to associate a link to the
'' relevant page with each page number rendered, see linkIndices below.
'' Set up the TextLayout:
Const margin = 72.0F
Dim pageWidth = doc.PageSize.Width
Dim pageHeight = doc.PageSize.Height
Dim cW = pageWidth - margin * 2
'' Caption (index letter) format:
Dim tfCap = New TextFormat() With {
.FontName = _fontFamily,
.FontBold = True,
.FontSize = 16,
.LineGap = 24
}
'' Index word and pages format:
Dim tfRun = New TextFormat() With {
.FontName = _fontFamily,
.FontSize = 10
}
'' Page headers/footers:
Dim tfHdr = New TextFormat() With {
.FontName = _fontFamily,
.FontItalic = True,
.FontSize = 10
}
'' FirstLineIndent = -18 sets up hanging indent:
Dim tl = New TextLayout(72) With {
.FontCollection = _fc,
.FirstLineIndent = -18,
.MaxWidth = pageWidth,
.MaxHeight = pageHeight,
.MarginLeft = margin,
.MarginRight = margin,
.MarginBottom = margin,
.MarginTop = margin,
.ColumnWidth = cW * 0.46F,
.TextAlignment = TextAlignment.Leading,
.ParagraphSpacing = 4,
.LineGapBeforeFirstLine = False
}
'' The list of text runs created for page numbers:
Dim pgnumRuns = New List(Of Tuple(Of TextRun, Integer))()
'' This loop builds the index on the TextLayout, saving the text runs
'' created for each page number rendered. Note that at this point
'' (prior to the PerformLayout(true) call) the text runs do not contain any info
'' about their code points and render locations, so we can only save the text runs here.
'' Later they will be used to add links to referenced pages in the PDF:
Dim litera As Char = " "
For Each kvp In index
Dim word = kvp.Key
Dim pageIndices = kvp.Value
If Char.ToUpper(word(0)) <> litera Then
litera = Char.ToUpper(word(0))
tl.Append($"{litera}{ChrW(&H2029)}", tfCap)
End If
tl.Append(word, tfRun)
tl.Append(" ", tfRun)
For i = 0 To pageIndices.Count - 1
Dim from_ = pageIndices(i)
Dim tr = tl.Append((from_ + 1).ToString(), tfRun)
pgnumRuns.Add(Tuple.Create(Of TextRun, Integer)(tr, from_))
'' We merge sequential pages into "..-M":
Dim k = i
For j = i + 1 To pageIndices.Count - 1
If pageIndices(j) <> pageIndices(j - 1) + 1 Then
Exit For
End If
k = j
Next
If (k > i + 1) Then
tl.Append("-", tfRun)
Dim to_ = pageIndices(k)
tr = tl.Append((to_ + 1).ToString(), tfRun)
pgnumRuns.Add(Tuple.Create(Of TextRun, Integer)(tr, to_))
'' Fast forward:
i = k
End If
If (i < pageIndices.Count - 1) Then
tl.Append(", ", tfRun)
Else
tl.AppendLine(tfRun)
End If
Next
Next
'' This calculates the glyphs and lays out the whole index.
'' The tl.SplitAndBalance() call in the loop below does not require redoing the layout:
tl.PerformLayout(True)
''
'' Now we are ready to split and render the text layout, and also add links to page numbers.
''
'' Split areas and options - see BalancedColumns for details:
Dim psas() As PageSplitArea = {
New PageSplitArea(tl) With {.MarginLeft = tl.MarginLeft + (cW * 0.54F)}
}
Dim tso = New TextSplitOptions(tl) With {
.KeepParagraphLinesTogether = True
}
'' First original code point index in the current column:
Dim cpiStart = 0
'' Max+1 original code point index in the current column:
Dim cpiEnd = 0
'' Current index in pgnumRuns:
Dim pgnumRunsIdx = 0
'' Method to add links to actual pages over page numbers in the current column:
Dim linkIndices As Action(Of TextLayout, Page) =
Sub(tl_, page_)
cpiEnd += tl_.CodePointCount
While pgnumRunsIdx < pgnumRuns.Count
Dim run = pgnumRuns(pgnumRunsIdx)
Dim textRun = run.Item1
Dim cpi = textRun.CodePointIndex
If cpi >= cpiEnd Then
Exit While
End If
cpi -= cpiStart
Dim rects = tl_.GetTextRects(cpi, textRun.CodePointCount)
Debug.Assert(rects.Count > 0)
page_.Annotations.Add(New LinkAnnotation(rects(0).ToRectangleF(), New DestinationFit(run.Item2)))
pgnumRunsIdx += 1
End While
cpiStart += tl_.CodePointCount
End Sub
'' Split and render the index in 2 columns:
Dim page = doc.Pages.Add()
While True
Dim g = Page.Graphics
'' Add a simple page header:
g.DrawString($"Index generated by DsPdf on {tStart:R}", tfHdr,
New RectangleF(margin, 0, pageWidth - margin * 2, margin),
TextAlignment.Center, ParagraphAlignment.Center, False)
'' 'rest' will accept the text that did not fit on this page:
Dim rest As TextLayout = Nothing
Dim splitResult = tl.SplitAndBalance(psas, tso, rest)
'' Render text:
g.DrawTextLayout(tl, PointF.Empty)
g.DrawTextLayout(psas(0).TextLayout, PointF.Empty)
'' Add links from page numbers to pages:
linkIndices(tl, page)
linkIndices(psas(0).TextLayout, page)
'' Are we done yet?
If splitResult <> SplitResult.Split Then
Exit While
End If
tl = rest
page = doc.Pages.Add()
End While
'' Done:
End Sub
'' Creates a sample document with 100 pages of 'lorem ipsum':
Private Function MakeDocumentToIndex() As String
Const N = 100
Dim tfile = Path.GetTempFileName()
Using fsOut = New FileStream(tfile, FileMode.Open, FileAccess.ReadWrite)
Dim tdoc = New GcPdfDocument()
'' See StartEndDoc for details on StartDoc/EndDoc mode:
tdoc.StartDoc(fsOut)
'' Prep a TextLayout to hold/format the text:
Dim tl = New TextLayout(72)
tl.FontCollection = _fc
tl.DefaultFormat.FontName = _fontFamily
tl.DefaultFormat.FontSize = 12
'' Use TextLayout to layout the whole page including margins:
tl.MaxHeight = tdoc.PageSize.Height
tl.MaxWidth = tdoc.PageSize.Width
tl.MarginAll = 72
tl.FirstLineIndent = 72 / 2
'' Generate the document:
For pageIdx = 0 To N - 1
tl.Append(Util.LoremIpsum(1))
tl.PerformLayout(True)
tdoc.NewPage().Graphics.DrawTextLayout(tl, PointF.Empty)
tl.Clear()
Next
tdoc.EndDoc()
End Using
Return tfile
End Function
End Class