Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Binary file added Examples/Data/Source/sample.sqlite
Binary file not shown.
7 changes: 6 additions & 1 deletion Examples/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,12 @@
<dependency>
<groupId>com.groupdocs</groupId>
<artifactId>groupdocs-parser</artifactId>
<version>18.7</version>
<version>18.9</version>
</dependency>
<dependency>
<groupId>org.xerial</groupId>
<artifactId>sqlite-jdbc</artifactId>
<version>3.23.1</version>
</dependency>
</dependencies>
<repositories>
Expand Down
11 changes: 11 additions & 0 deletions Examples/src/main/java/com/groupdocs/parser/examples/Common.java
Original file line number Diff line number Diff line change
Expand Up @@ -96,4 +96,15 @@ public static void useDynabicMeteredAccount() {

}

// gets connection string
public static String getConnectionString(String inputFileName) {
try {
String connectionString = "jdbc:sqlite:" + STORAGE_PATH + "/" + inputFileName;
return connectionString;
} catch (Exception e) {
e.printStackTrace();
return e.getMessage();
}
}

}
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
package com.groupdocs.parser.examples;

import java.io.BufferedReader;
import java.io.InputStream;

import com.groupdocs.parser.CellsMediaTypeDetector;
import com.groupdocs.parser.Container;
import com.groupdocs.parser.ContainerEnumerator;
import com.groupdocs.parser.DbContainer;
import com.groupdocs.parser.EmailConnectionInfo;
import com.groupdocs.parser.EmailContainer;
import com.groupdocs.parser.EmailTextExtractor;
Expand All @@ -22,7 +24,7 @@ public class Containers {
// ExStart:SourceOSTDocumentFilePath
private final static String OST_FILE_PATH = "sample.ost";
private final static String ZIP_FILE_PATH = "sample.zip";

private final static String DB_FILE_PATH = "sample.sqlite";
// ExEnd:SourceOSTZIPDocumentFilePath
/**
* Creates containers
Expand Down Expand Up @@ -347,4 +349,52 @@ public static void retrieveAllEntitiesInZIPArchiveByName() {
exp.printStackTrace();
}
}

/**
* Reads text from database
*
*/
public static void extractTextFromDatabase() {
try {
// ExStart:extractTextFromDatabase_18.9
String connectionString = Common.getConnectionString(DB_FILE_PATH);
DbContainer container = new DbContainer(java.sql.DriverManager.getConnection(connectionString));
try {
// Iterate over entities
for (Container.Entity entity : container.getEntities()) {
// Print a table name
System.out.println(entity.getName());
// Print a media type
System.out.println(entity.getMediaType());
// Create a stream reader for CSV document: OpenStream
// method converts a table to the CSV file and returns it as
// Stream
java.io.InputStreamReader reader = new java.io.InputStreamReader(entity.openStream());
try {
BufferedReader br = new BufferedReader(reader);

// Read a line
String line = br.readLine();
// Loop to the end of the file
while (line != null) {
// Print a line from the document
System.out.println(line);
// Read the next line
line = br.readLine();
}
} finally {
reader.close();
}

}
} finally {
container.dispose();
}
// ExEnd:extractTextFromDatabase_18.9
} catch (Exception exp) {
System.out.println("Exception: " + exp.getMessage());
exp.printStackTrace();
}
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
import com.groupdocs.parser.examples.TextFormatters.Markdown;
import com.groupdocs.parser.examples.TextFormatters.PlainText;
import com.groupdocs.parser.examples.Tools.EncodingDetector;
import com.groupdocs.parser.examples.Tools.Indexer;
import com.groupdocs.parser.examples.Tools.Loggers;
import com.groupdocs.parser.examples.Tools.MediaTypeDetectors;

Expand Down Expand Up @@ -92,6 +93,7 @@ public static void main(String[] args) throws Throwable {
//// Working with PDF documents
// PDFDocuments.extractTextAsWhole();
// PDFDocuments.extractTextByLines();
//PDFDocuments.extractDataFromPDFForms();

//// Working with OneNote documents
// OneNoteDocuments.extractTextAsWhole();
Expand Down Expand Up @@ -144,20 +146,30 @@ public static void main(String[] args) throws Throwable {
// MediaTypeDetectors.detectMediaTypeByContent();
// MediaTypeDetectors.detectMediaTypeByExtension();
// MediaTypeDetectors.IsMediaTypeSupportedByDetector();


// This feature allows providing a password for protected documents on-demand
//ExStart:requestPasswordForProtectedDocument_usage_18.9
// Indexer indexer = new Indexer();
// indexer.process(new java.io.File(Common.STORAGE_PATH.toString()));
//ExEnd:requestPasswordForProtectedDocument_usage_18.9

//// Containers
// Containers.createEmailConnectionInfo();
// Containers.enumerateAllEntitiesOfGroupOfContainers();
// Containers.extractMessagesFromOST();
// Containers.getListOfEmailsFromEWS();
// Containers.retrieveAnEmailFromEWS();
//Containers.extractTextFromDatabase();

//// Structured handlers
// StructuredHandlers.Headers hd = new Headers();
// hd.extractHeader(new java.io.FileInputStream(Common.mapSourceFilePath("sample.docx")));

// StructuredHandlers.Hyperlinks hl = new StructuredHandlers.Hyperlinks();
// hl.extract(new java.io.FileInputStream(Common.mapSourceFilePath("sample.docx")));
// hd.extractHeader(new
//// java.io.FileInputStream(Common.mapSourceFilePath("sample.docx")));

// StructuredHandlers.Hyperlinks hl = new
// StructuredHandlers.Hyperlinks();
// hl.extract(new
// java.io.FileInputStream(Common.mapSourceFilePath("sample.docx")));

System.out.println("Operation Completed...");
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -54,4 +54,33 @@ public static void extractTextByLines() {
}
}

/**
* Extracts data from PDF forms.
*
*/
public static void extractDataFromPDFForms() {
try {
// ExStart:extractDataFromPDFForms_18.9
// Create a text extractor for PDF documents
PdfTextExtractor extractor = new PdfTextExtractor(Common.mapSourceFilePath(FILE_PATH));
try {
// Extract forms data
java.util.Dictionary<String, String> fields = extractor.getFormData();
// Iterate over fields
java.util.Enumeration<String> e = fields.keys();
while (e.hasMoreElements()) {
String key = e.nextElement();
// Print field name and value
System.out.println(String.format("%s: %s", key, fields.get(key)));
}
} finally {
extractor.dispose();
}
// ExEnd:extractDataFromPDFForms_18.9
} catch (Exception exp) {
System.out.println("Exception: " + exp.getMessage());
exp.printStackTrace();
}
}

}
Original file line number Diff line number Diff line change
@@ -1,8 +1,11 @@
package com.groupdocs.parser.examples.TextExtractors;

import com.groupdocs.parser.MarkdownDocumentFormatter;
import com.groupdocs.parser.Rectangle;
import com.groupdocs.parser.SlidesFormattedTextExtractor;
import com.groupdocs.parser.SlidesTextExtractor;
import com.groupdocs.parser.TextArea;
import com.groupdocs.parser.TextAreaSearchOptions;
import com.groupdocs.parser.examples.Common;

public class PresentationDocuments {
Expand Down Expand Up @@ -76,5 +79,37 @@ public static void extractFormattedText() {
exp.printStackTrace();
}
}

/**
* Extracts a text area from a presentation document.
*
*/
public static void extractTextAreaFromPresentationDocument() {
try {
// ExStart:ExtractTextAreaFromPresentationDocument_18.9
// Create a text extractor for presentations
try (SlidesTextExtractor extractor = new SlidesTextExtractor (
Common.mapOutputFilePath(FILE_PATH))) {
// Create search options
TextAreaSearchOptions searchOptions = new TextAreaSearchOptions();
// Set a regular expression to search 'Published: XXXX.XX.XX' text
searchOptions.setExpression("\\s?Published\\:\\s?[0-9]{4}\\.[0-9]{2}\\.[0-9]{2}");
// Limit the search with a rectangle
searchOptions.setRectangle(new Rectangle(10, 10, 300, 150));

// Get text areas
java.util.List<TextArea> texts = extractor.getDocumentContent().getTextAreas(0, searchOptions);

for (TextArea area : texts) {
// Print a text
System.out.println(area.getText());
}
}
// ExEnd:ExtractTextAreaFromPresentationDocument_18.9
} catch (Exception exp) {
System.out.println("Exception: " + exp.getMessage());
exp.printStackTrace();
}
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,9 @@
import com.groupdocs.parser.CellsSheetInfo;
import com.groupdocs.parser.CellsTextExtractor;
import com.groupdocs.parser.MarkdownDocumentFormatter;
import com.groupdocs.parser.Rectangle;
import com.groupdocs.parser.TextArea;
import com.groupdocs.parser.TextAreaSearchOptions;
import com.groupdocs.parser.examples.Common;

public class SpreadsheetDocuments {
Expand Down Expand Up @@ -155,4 +158,35 @@ public static void extractSelectedColumnsByRows() {
exp.printStackTrace();
}
}
/**
* Extracts a text area from a spreadsheet document.
*
*/
public static void extractTextAreaFromSpreadsheetDocument() {
try {
// ExStart:extractTextAreaFromSpreadsheetDocument_18.9
// Create a text extractor for spreadsheets
try (CellsTextExtractor extractor = new CellsTextExtractor(Common.mapSourceFilePath(FILE_PATH))) {
// Create search options
TextAreaSearchOptions searchOptions = new TextAreaSearchOptions();
// Set a regular expression to search 'Invoice # XXX' text
searchOptions.setExpression("\\s?INVOICE\\s?#\\s?[0-9]+");
// Limit the search with a rectangle
searchOptions.setRectangle(new Rectangle(10, 10, 300, 150));

// Get text areas
java.util.List<TextArea> texts = extractor.getDocumentContent().getTextAreas(0, searchOptions);

// Iterate over a list
for (TextArea area : texts) {
// Print a text
System.out.println(area.getText());
}
}
// ExEnd:extractTextAreaFromSpreadsheetDocument_18.9
} catch (Exception exp) {
System.out.println("Exception: " + exp.getMessage());
exp.printStackTrace();
}
}
}
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
package com.groupdocs.parser.examples.TextExtractors;

import com.groupdocs.parser.MarkdownDocumentFormatter;
import com.groupdocs.parser.Rectangle;
import com.groupdocs.parser.TextArea;
import com.groupdocs.parser.TextAreaSearchOptions;
import com.groupdocs.parser.WordsFormattedTextExtractor;
import com.groupdocs.parser.WordsTextExtractor;
import com.groupdocs.parser.examples.Common;
Expand Down Expand Up @@ -76,4 +79,36 @@ public static void extractFormattedText() {
exp.printStackTrace();
}
}

/**
* Extracts a text area from a text document.
*
*/
public static void extractTextAreaFromTextDocument() {
try {
// ExStart:extractTextAreaFromTextDocument_18.9
// Create a text extractor
try (WordsTextExtractor extractor = new WordsTextExtractor(Common.mapSourceFilePath(FILE_PATH))) {
// Create search options
TextAreaSearchOptions searchOptions = new TextAreaSearchOptions();
// Set a regular expression to search 'Invoice # XXX' text
searchOptions.setExpression("\\s?INVOICE\\s?#\\s?[0-9]+");
// Limit the search with a rectangle
searchOptions.setRectangle(new Rectangle(10, 10, 300, 150));

// Get text areas
java.util.List<TextArea> texts = extractor.getDocumentContent().getTextAreas(0, searchOptions);

// Iterate over a list
for (TextArea area : texts) {
// Print a text
System.out.println(area.getText());
}
}
// ExEnd:extractTextAreaFromTextDocument_18.9
} catch (Exception exp) {
System.out.println("Exception: " + exp.getMessage());
exp.printStackTrace();
}
}
}
Loading