Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
106 changes: 58 additions & 48 deletions Examples/pom.xml
Original file line number Diff line number Diff line change
@@ -1,55 +1,65 @@
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>

<groupId>GroupDocs.Text-for-Java</groupId>
<artifactId>GroupDocs.Text-for-Java</artifactId>
<version>1.0-SNAPSHOT</version>
<groupId>GroupDocs.Text-for-Java</groupId>
<artifactId>GroupDocs.Text-for-Java</artifactId>
<version>1.0-SNAPSHOT</version>

<properties>
<maven.compiler.source>8</maven.compiler.source>
<maven.compiler.target>8</maven.compiler.target>
</properties>
<properties>
<maven.compiler.source>8</maven.compiler.source>
<maven.compiler.target>8</maven.compiler.target>
</properties>

<dependencies>
<dependency>
<groupId>commons-net</groupId>
<artifactId>commons-net</artifactId>
<version>3.5</version>
</dependency>
<dependency>
<groupId>org.apache.commons</groupId>
<artifactId>commons-lang3</artifactId>
<version>3.4</version>
</dependency>
<dependency>
<groupId>com.microsoft.azure</groupId>
<artifactId>azure-keyvault</artifactId>
<version>0.9.4</version>
</dependency>
<dependency>
<groupId>com.microsoft.azure</groupId>
<artifactId>azure-storage</artifactId>
<version>4.3.0</version>
</dependency>
<!-- https://mvnrepository.com/artifact/org.apache.commons/commons-io -->
<dependency>
<groupId>org.apache.commons</groupId>
<artifactId>commons-io</artifactId>
<version>1.3.2</version>
</dependency>
<dependencies>
<dependency>
<groupId>commons-net</groupId>
<artifactId>commons-net</artifactId>
<version>3.5</version>
</dependency>
<dependency>
<groupId>org.apache.commons</groupId>
<artifactId>commons-lang3</artifactId>
<version>3.4</version>
</dependency>
<dependency>
<groupId>com.microsoft.azure</groupId>
<artifactId>azure-keyvault</artifactId>
<version>0.9.4</version>
</dependency>
<dependency>
<groupId>com.microsoft.azure</groupId>
<artifactId>azure-storage</artifactId>
<version>4.3.0</version>
</dependency>
<!-- https://mvnrepository.com/artifact/org.apache.commons/commons-io -->
<dependency>
<groupId>org.apache.commons</groupId>
<artifactId>commons-io</artifactId>
<version>1.3.2</version>
</dependency>
<dependency>
<groupId>com.groupdocs</groupId>
<artifactId>groupdocs-parser</artifactId>
<version>18.7</version>
</dependency>
</dependencies>
<repositories>
<repository>
<id>GroupDocsJavaAPI</id>
<name>GroupDocs Java API</name>
<url>http://artifact.groupdocs.com/repo/</url>
</repository>
</repositories>
<build>
<resources>
<resource>
<directory>src/main/resources</directory>
<filtering>true</filtering>
</resource>
</resources>

</dependencies>
<build>
<resources>
<resource>
<directory>src/main/resources</directory>
<filtering>true</filtering>
</resource>
</resources>

</build>
</build>

</project>
Original file line number Diff line number Diff line change
Expand Up @@ -20,9 +20,13 @@
import com.groupdocs.parser.MediaTypeDetector;
import com.groupdocs.parser.MediaTypeNames;
import com.groupdocs.parser.MetadataNames;
import com.groupdocs.parser.PdfTextExtractor;
import com.groupdocs.parser.PersonalStorageContainer;
import com.groupdocs.parser.Rectangle;
import com.groupdocs.parser.SearchHighlightOptions;
import com.groupdocs.parser.SearchOptions;
import com.groupdocs.parser.TextArea;
import com.groupdocs.parser.TextAreaSearchOptions;
import com.groupdocs.parser.TextExtractor;
import com.groupdocs.parser.WordsFormattedTextExtractor;
import com.groupdocs.parser.WordsMediaTypeDetector;
Expand All @@ -33,6 +37,7 @@ public class TextExtraction {
private final static String DOC_FILE_PATH = "sample.docx";
private final static String EXCEL_FILE_PATH = "sample.xlsx";
private final static String OST_FILE_PATH = "sample.ost";
private final static String PDF_FILE_PATH = "sample.pdf";
// ExEnd:SourceDocumentFilePath

/**
Expand Down Expand Up @@ -807,7 +812,7 @@ public static void extractTextFromPasswordProtectedDocument() {
WordsTextExtractor extractor = null;
try {
// Create a text extractor for the password-protected document
extractor = new WordsTextExtractor("document.doc", loadOptions);
extractor = new WordsTextExtractor(Common.mapSourceFilePath(DOC_FILE_PATH), loadOptions);
// Extract all the text from the document
System.out.println(extractor.extractAll());
} catch (InvalidPasswordException ex) {
Expand All @@ -816,4 +821,36 @@ public static void extractTextFromPasswordProtectedDocument() {
}
// ExEnd:extractTextFromPasswordProtectedDocument
}

/**
* Extracts text area from PDF document.
*
*/
public static void extractTextAreaFromDocument() {
// ExStart:extractTextAreaFromDocument_18.7
try {
// Create a text extractor
PdfTextExtractor extractor = new PdfTextExtractor(Common.mapSourceFilePath(PDF_FILE_PATH));

// Create search options
TextAreaSearchOptions searchOptions = new TextAreaSearchOptions();
// Set a regular expression to search 'Invoice # XXX' text
searchOptions.setExpression("\\s?INVOICE\\s?#\\s?[0-9]+");
// Limit the search with a rectangle
searchOptions.setRectangle(new Rectangle(10, 10, 300, 150));

// Get text areas
java.util.List<TextArea> texts = extractor.getDocumentContent().getTextAreas(0, searchOptions);

// Iterate over a list
for (TextArea area : texts) {
// Print a text
System.out.println(area.getText());
}
} catch (Exception ex) {
// Print the message if the password is incorrect (or empty)
System.out.println("Invalid password.");
}
// ExEnd:extractTextAreaFromDocument_18.7
}
}