|
6 | 6 | import java.io.FileWriter; |
7 | 7 | import java.io.IOException; |
8 | 8 | import java.util.ArrayList; |
| 9 | +import java.util.HashMap; |
| 10 | +import java.util.Iterator; |
9 | 11 | import java.util.List; |
| 12 | +import java.util.Map; |
10 | 13 |
|
11 | 14 | import org.apache.commons.cli.CommandLine; |
12 | 15 | import org.apache.commons.cli.CommandLineParser; |
@@ -34,15 +37,16 @@ public class CommandLineApp { |
34 | 37 | private static String BANNER = "\nTabula helps you extract tables from PDFs\n\n"; |
35 | 38 |
|
36 | 39 | private Appendable defaultOutput; |
37 | | - private Rectangle pageArea; |
| 40 | + |
| 41 | + private List<Pair<Integer, Rectangle>> pageAreas; |
38 | 42 | private List<Integer> pages; |
39 | 43 | private OutputFormat outputFormat; |
40 | 44 | private String password; |
41 | 45 | private TableExtractor tableExtractor; |
42 | 46 |
|
43 | 47 | public CommandLineApp(Appendable defaultOutput, CommandLine line) throws ParseException { |
44 | 48 | this.defaultOutput = defaultOutput; |
45 | | - this.pageArea = CommandLineApp.whichArea(line); |
| 49 | + this.pageAreas = CommandLineApp.whichAreas(line); |
46 | 50 | this.pages = CommandLineApp.whichPages(line); |
47 | 51 | this.outputFormat = CommandLineApp.whichOutputFormat(line); |
48 | 52 | this.tableExtractor = CommandLineApp.createExtractor(line); |
@@ -156,11 +160,13 @@ private void extractFile(File pdfFile, Appendable outFile) throws ParseException |
156 | 160 | while (pageIterator.hasNext()) { |
157 | 161 | Page page = pageIterator.next(); |
158 | 162 |
|
159 | | - if (pageArea != null) { |
160 | | - page = page.getArea(pageArea); |
| 163 | + if (pageAreas != null) { |
| 164 | + for (Pair<Integer, Rectangle> areaPair : pageAreas) { |
| 165 | + tables.addAll(tableExtractor.extractTables(page.getArea(areaPair.getRight(), areaPair.getLeft()))); |
| 166 | + } |
| 167 | + } else { |
| 168 | + tables.addAll(tableExtractor.extractTables(page)); |
161 | 169 | } |
162 | | - |
163 | | - tables.addAll(tableExtractor.extractTables(page)); |
164 | 170 | } |
165 | 171 | writeTables(tables, outFile); |
166 | 172 | } catch (IOException e) { |
@@ -200,16 +206,28 @@ private static OutputFormat whichOutputFormat(CommandLine line) throws ParseExce |
200 | 206 | } |
201 | 207 | } |
202 | 208 |
|
203 | | - private static Rectangle whichArea(CommandLine line) throws ParseException { |
| 209 | + private static List<Pair<Integer, Rectangle>> whichAreas(CommandLine line) throws ParseException { |
204 | 210 | if (!line.hasOption('a')) { |
205 | 211 | return null; |
206 | 212 | } |
207 | | - |
208 | | - List<Float> f = parseFloatList(line.getOptionValue('a')); |
209 | | - if (f.size() != 4) { |
210 | | - throw new ParseException("area parameters must be top,left,bottom,right"); |
| 213 | + |
| 214 | + String[] optionValues = line.getOptionValues('a'); |
| 215 | + |
| 216 | + List<Pair<Integer, Rectangle>> areaList = new ArrayList<Pair<Integer, Rectangle>>(); |
| 217 | + for (String optionValue: optionValues) { |
| 218 | + int areaCalculationMode = Page.ABSOLUTE_AREA_CALCULATION_MODE; |
| 219 | + int startIndex = 0; |
| 220 | + if (optionValue.startsWith("%")) { |
| 221 | + startIndex = 1; |
| 222 | + areaCalculationMode = Page.RELATIVE_AREA_CALCULATION_MODE; |
| 223 | + } |
| 224 | + List<Float> f = parseFloatList(optionValue.substring(startIndex)); |
| 225 | + if (f.size() != 4) { |
| 226 | + throw new ParseException("area parameters must be top,left,bottom,right optionally preceded by %"); |
| 227 | + } |
| 228 | + areaList.add(new Pair<Integer, Rectangle>(areaCalculationMode, new Rectangle(f.get(0), f.get(1), f.get(3) - f.get(1), f.get(2) - f.get(0)))); |
211 | 229 | } |
212 | | - return new Rectangle(f.get(0), f.get(1), f.get(3) - f.get(1), f.get(2) - f.get(0)); |
| 230 | + return areaList; |
213 | 231 | } |
214 | 232 |
|
215 | 233 | private static List<Integer> whichPages(CommandLine line) throws ParseException { |
@@ -307,7 +325,9 @@ public static Options buildOptions() { |
307 | 325 | .build()); |
308 | 326 | o.addOption(Option.builder("a") |
309 | 327 | .longOpt("area") |
310 | | - .desc("Portion of the page to analyze (top,left,bottom,right). Example: --area 269.875,12.75,790.5,561. Default is entire page") |
| 328 | + .desc("-a/--area = Portion of the page to analyze. Accepts top,left,bottom,right . Example: --area 269.875,12.75,790.5,561. " |
| 329 | + + "If all values are between 0-100 (inclusive) and preceded by '%', input will be taken as % of actual height or width of the page. " |
| 330 | + + "Example: --area %0,0,100,50. To specify multiple areas, -a option should be repeated. Default is entire page") |
311 | 331 | .hasArg() |
312 | 332 | .argName("AREA") |
313 | 333 | .build()); |
|
0 commit comments