Custom Extraction Options

This example shows how to configure the Table Extractor options programmatically and extract table data from a predefined region without user interaction. Since PDFs do not contain explicit table structures like spreadsheets, tables must be inferred from text positioning. Table Extractor options help to fine-tune table detection, improving adaptability to various document layouts and formatting styles.

var React, viewer, tableDataExtractor; /** * Coordinates defining the selected area for table data extraction. * Format: [x1, y1, x2, y2] * Origin is at the bottom-left corner of the page. */ const selectionBounds = [29.259600728808035, 334.3295250560871, 688.918658156098, 490.80681277047995]; /** * Logs extracted table data information. * @param {Object} data - The extracted table data. */ function logTableData(data) { if (Array.isArray(data) && data.length > 0) { data = data[0]; } if (data?.cols && data?.rows) { console.log(`Extracted columns: ${data.cols.length}, rows: ${data.rows.length}`); } else { console.log("No table data extracted."); } } /** * Extracts table data with the given extraction options. * @param {Object} extractOptions - The options to customize table extraction. */ async function applyExtraction(extractOptions) { const extractedData = await tableDataExtractor.extractTableData(0, selectionBounds, undefined, { extractOptions }); logTableData(extractedData); } /** * Creates a styled toolbar button. * @param {string} text - Button label. * @param {Function} callback - Function to execute on click. * @returns {Object} React element representing the button. */ function createToolbarButton(text, callback) { const disabled = !viewer.canEditDocument; return React.createElement("button", { className: "gc-btn gc-btn--accent", disabled, style: { padding: "0 10px" }, onClick: disabled ? null : callback, title: text }, text); } /** * Initializes and loads the PDF viewer. * @param {string} selector - The DOM selector for the viewer container. */ function loadPdfViewer(selector, pdfUrlToOpen) { viewer = new DsPdfViewer(selector, { supportApi: getSupportApiSettings() }); React = viewer.getType("React"); tableDataExtractor = viewer.tableDataExtractor; tableDataExtractor.activate(); const panelHandle = viewer.addTableExtractionPanel(); viewer.expandPanel(panelHandle); // Configure a custom second toolbar for table extraction settings viewer.options.secondToolbar = { render: function(toolbarKey) { if (toolbarKey === "sample-toolbar") { return [ createToolbarButton("Column spacing: 0.2", async () => { applyExtraction({ CoefMinimumDistanceBetweenCols: 0.2 }); }), createToolbarButton("Column spacing: 1.0", async () => { applyExtraction({ CoefMinimumDistanceBetweenCols: 1.0 }); }), createToolbarButton("Row spacing: 0.5", async () => { applyExtraction({ CoefMinimumDistanceBetweenRows: 0.5 }); }), createToolbarButton("Row spacing: 0.9", async () => { applyExtraction({ CoefMinimumDistanceBetweenRows: 0.9 }); }), createToolbarButton("Min row height: 1.0", async () => { applyExtraction({ MinimumRowHeight: 1.0 }); }), createToolbarButton("Min row height: 20.0", async () => { applyExtraction({ MinimumRowHeight: 20.0 }); }), createToolbarButton("Min col width: 1.0", async () => { applyExtraction({ MinimumColWidth: 1.0 }); }), createToolbarButton("Min col width: 30.0", async () => { applyExtraction({ MinimumColWidth: 30.0 }); }) ]; } return null; } }; viewer.addDefaultPanels(); viewer.open(pdfUrlToOpen).then(function() { viewer.zoomMode = 1; }); // Display the custom toolbar viewer.showSecondToolbar("sample-toolbar"); } window.onload = function() { //DsPdfViewer.LicenseKey = "***key***"; var pdfUrlToOpen = "/document-solutions/javascript-pdf-viewer/demos/product-bundles/assets/pdf/table-data-one-page.pdf"; loadPdfViewer("#viewer", pdfUrlToOpen); }
<!DOCTYPE html> <html lang="en"> <head> <meta charset="utf-8"> <meta http-equiv="X-UA-Compatible" content="IE=edge"> <title>Custom Extraction Options.</title> <meta name="viewport" content="width=device-width, initial-scale=1.0"> <link rel="stylesheet" href="./src/styles.css"> <script src="/document-solutions/javascript-pdf-viewer/demos/product-bundles/build/dspdfviewer.js"></script> <script src="/document-solutions/javascript-pdf-viewer/demos/product-bundles/build/wasmSupportApi.js"></script> <script src="/document-solutions/javascript-pdf-viewer/demos/resource/js/init.js"></script> <script src="./src/app.js"></script> </head> <body> <div id="viewer"></div> </body> </html>
#viewer { height: 100%; }