|
3 | 3 | import static org.junit.Assert.*; |
4 | 4 |
|
5 | 5 | import java.awt.geom.Point2D; |
| 6 | +import java.io.File; |
6 | 7 | import java.io.IOException; |
7 | | -import java.util.Arrays; |
8 | | -import java.util.Collections; |
9 | | -import java.util.List; |
10 | | -import java.util.Map; |
| 8 | +import java.nio.charset.Charset; |
| 9 | +import java.util.*; |
11 | 10 |
|
12 | 11 | import org.apache.commons.csv.CSVFormat; |
| 12 | +import org.apache.commons.csv.CSVParser; |
13 | 13 | import org.apache.commons.csv.CSVRecord; |
14 | 14 | import org.junit.Test; |
15 | 15 |
|
|
21 | 21 | import technology.tabula.writers.CSVWriter; |
22 | 22 | import technology.tabula.UtilsForTesting; |
23 | 23 | import technology.tabula.writers.JSONWriter; |
24 | | -import java.text.Normalizer; |
25 | 24 |
|
26 | 25 | public class TestSpreadsheetExtractor { |
27 | 26 |
|
28 | | - private static final Cell[] CELLS = new Cell[] { |
29 | | - new Cell(40.0f, 18.0f, 208.0f, 4.0f), |
30 | | - new Cell(44.0f, 18.0f, 52.0f, 6.0f), |
31 | | - new Cell(50.0f, 18.0f, 52.0f, 4.0f), |
32 | | - new Cell(54.0f, 18.0f, 52.0f, 6.0f), |
33 | | - new Cell(60.0f, 18.0f, 52.0f, 4.0f), |
34 | | - new Cell(64.0f, 18.0f, 52.0f, 6.0f), |
35 | | - new Cell(70.0f, 18.0f, 52.0f, 4.0f), |
36 | | - new Cell(74.0f, 18.0f, 52.0f, 6.0f), |
37 | | - new Cell(90.0f, 18.0f, 52.0f, 4.0f), |
38 | | - new Cell(94.0f, 18.0f, 52.0f, 6.0f), |
39 | | - new Cell(100.0f, 18.0f, 52.0f, 28.0f), |
40 | | - new Cell(128.0f, 18.0f, 52.0f, 4.0f), |
41 | | - new Cell(132.0f, 18.0f, 52.0f, 64.0f), |
42 | | - new Cell(196.0f, 18.0f, 52.0f, 66.0f), |
43 | | - new Cell(262.0f, 18.0f, 52.0f, 4.0f), |
44 | | - new Cell(266.0f, 18.0f, 52.0f, 84.0f), |
45 | | - new Cell(350.0f, 18.0f, 52.0f, 4.0f), |
46 | | - new Cell(354.0f, 18.0f, 52.0f, 32.0f), |
47 | | - new Cell(386.0f, 18.0f, 52.0f, 38.0f), |
48 | | - new Cell(424.0f, 18.0f, 52.0f, 18.0f), |
49 | | - new Cell(442.0f, 18.0f, 52.0f, 74.0f), |
50 | | - new Cell(516.0f, 18.0f, 52.0f, 28.0f), |
51 | | - new Cell(544.0f, 18.0f, 52.0f, 4.0f), |
52 | | - new Cell(44.0f, 70.0f, 156.0f, 6.0f), |
53 | | - new Cell(50.0f, 70.0f, 156.0f, 4.0f), |
54 | | - new Cell(54.0f, 70.0f, 156.0f, 6.0f), |
55 | | - new Cell(60.0f, 70.0f, 156.0f, 4.0f), |
56 | | - new Cell(64.0f, 70.0f, 156.0f, 6.0f), |
57 | | - new Cell(70.0f, 70.0f, 156.0f, 4.0f), |
58 | | - new Cell(74.0f, 70.0f, 156.0f, 6.0f), |
59 | | - new Cell(84.0f, 70.0f, 2.0f, 6.0f), |
60 | | - new Cell(90.0f, 70.0f, 156.0f, 4.0f), |
61 | | - new Cell(94.0f, 70.0f, 156.0f, 6.0f), |
62 | | - new Cell(100.0f, 70.0f, 156.0f, 28.0f), |
63 | | - new Cell(128.0f, 70.0f, 156.0f, 4.0f), |
64 | | - new Cell(132.0f, 70.0f, 156.0f, 64.0f), |
65 | | - new Cell(196.0f, 70.0f, 156.0f, 66.0f), |
66 | | - new Cell(262.0f, 70.0f, 156.0f, 4.0f), |
67 | | - new Cell(266.0f, 70.0f, 156.0f, 84.0f), |
68 | | - new Cell(350.0f, 70.0f, 156.0f, 4.0f), |
69 | | - new Cell(354.0f, 70.0f, 156.0f, 32.0f), |
70 | | - new Cell(386.0f, 70.0f, 156.0f, 38.0f), |
71 | | - new Cell(424.0f, 70.0f, 156.0f, 18.0f), |
72 | | - new Cell(442.0f, 70.0f, 156.0f, 74.0f), |
73 | | - new Cell(516.0f, 70.0f, 156.0f, 28.0f), |
74 | | - new Cell(544.0f, 70.0f, 156.0f, 4.0f), |
75 | | - new Cell(84.0f, 72.0f, 446.0f, 6.0f), |
76 | | - new Cell(90.0f, 226.0f, 176.0f, 4.0f), |
77 | | - new Cell(94.0f, 226.0f, 176.0f, 6.0f), |
78 | | - new Cell(100.0f, 226.0f, 176.0f, 28.0f), |
79 | | - new Cell(128.0f, 226.0f, 176.0f, 4.0f), |
80 | | - new Cell(132.0f, 226.0f, 176.0f, 64.0f), |
81 | | - new Cell(196.0f, 226.0f, 176.0f, 66.0f), |
82 | | - new Cell(262.0f, 226.0f, 176.0f, 4.0f), |
83 | | - new Cell(266.0f, 226.0f, 176.0f, 84.0f), |
84 | | - new Cell(350.0f, 226.0f, 176.0f, 4.0f), |
85 | | - new Cell(354.0f, 226.0f, 176.0f, 32.0f), |
86 | | - new Cell(386.0f, 226.0f, 176.0f, 38.0f), |
87 | | - new Cell(424.0f, 226.0f, 176.0f, 18.0f), |
88 | | - new Cell(442.0f, 226.0f, 176.0f, 74.0f), |
89 | | - new Cell(516.0f, 226.0f, 176.0f, 28.0f), |
90 | | - new Cell(544.0f, 226.0f, 176.0f, 4.0f), |
91 | | - new Cell(90.0f, 402.0f, 116.0f, 4.0f), |
92 | | - new Cell(94.0f, 402.0f, 116.0f, 6.0f), |
93 | | - new Cell(100.0f, 402.0f, 116.0f, 28.0f), |
94 | | - new Cell(128.0f, 402.0f, 116.0f, 4.0f), |
95 | | - new Cell(132.0f, 402.0f, 116.0f, 64.0f), |
96 | | - new Cell(196.0f, 402.0f, 116.0f, 66.0f), |
97 | | - new Cell(262.0f, 402.0f, 116.0f, 4.0f), |
98 | | - new Cell(266.0f, 402.0f, 116.0f, 84.0f), |
99 | | - new Cell(350.0f, 402.0f, 116.0f, 4.0f), |
100 | | - new Cell(354.0f, 402.0f, 116.0f, 32.0f), |
101 | | - new Cell(386.0f, 402.0f, 116.0f, 38.0f), |
102 | | - new Cell(424.0f, 402.0f, 116.0f, 18.0f), |
103 | | - new Cell(442.0f, 402.0f, 116.0f, 74.0f), |
104 | | - new Cell(516.0f, 402.0f, 116.0f, 28.0f), |
105 | | - new Cell(544.0f, 402.0f, 116.0f, 4.0f), |
106 | | - new Cell(84.0f, 518.0f, 246.0f, 6.0f), |
107 | | - new Cell(90.0f, 518.0f, 186.0f, 4.0f), |
108 | | - new Cell(94.0f, 518.0f, 186.0f, 6.0f), |
109 | | - new Cell(100.0f, 518.0f, 186.0f, 28.0f), |
110 | | - new Cell(128.0f, 518.0f, 186.0f, 4.0f), |
111 | | - new Cell(132.0f, 518.0f, 186.0f, 64.0f), |
112 | | - new Cell(196.0f, 518.0f, 186.0f, 66.0f), |
113 | | - new Cell(262.0f, 518.0f, 186.0f, 4.0f), |
114 | | - new Cell(266.0f, 518.0f, 186.0f, 84.0f), |
115 | | - new Cell(350.0f, 518.0f, 186.0f, 4.0f), |
116 | | - new Cell(354.0f, 518.0f, 186.0f, 32.0f), |
117 | | - new Cell(386.0f, 518.0f, 186.0f, 38.0f), |
118 | | - new Cell(424.0f, 518.0f, 186.0f, 18.0f), |
119 | | - new Cell(442.0f, 518.0f, 186.0f, 74.0f), |
120 | | - new Cell(516.0f, 518.0f, 186.0f, 28.0f), |
121 | | - new Cell(544.0f, 518.0f, 186.0f, 4.0f), |
122 | | - new Cell(90.0f, 704.0f, 60.0f, 4.0f), |
123 | | - new Cell(94.0f, 704.0f, 60.0f, 6.0f), |
124 | | - new Cell(100.0f, 704.0f, 60.0f, 28.0f), |
125 | | - new Cell(128.0f, 704.0f, 60.0f, 4.0f), |
126 | | - new Cell(132.0f, 704.0f, 60.0f, 64.0f), |
127 | | - new Cell(196.0f, 704.0f, 60.0f, 66.0f), |
128 | | - new Cell(262.0f, 704.0f, 60.0f, 4.0f), |
129 | | - new Cell(266.0f, 704.0f, 60.0f, 84.0f), |
130 | | - new Cell(350.0f, 704.0f, 60.0f, 4.0f), |
131 | | - new Cell(354.0f, 704.0f, 60.0f, 32.0f), |
132 | | - new Cell(386.0f, 704.0f, 60.0f, 38.0f), |
133 | | - new Cell(424.0f, 704.0f, 60.0f, 18.0f), |
134 | | - new Cell(442.0f, 704.0f, 60.0f, 74.0f), |
135 | | - new Cell(516.0f, 704.0f, 60.0f, 28.0f), |
136 | | - new Cell(544.0f, 704.0f, 60.0f, 4.0f), |
137 | | - new Cell(84.0f, 764.0f, 216.0f, 6.0f), |
138 | | - new Cell(90.0f, 764.0f, 216.0f, 4.0f), |
139 | | - new Cell(94.0f, 764.0f, 216.0f, 6.0f), |
140 | | - new Cell(100.0f, 764.0f, 216.0f, 28.0f), |
141 | | - new Cell(128.0f, 764.0f, 216.0f, 4.0f), |
142 | | - new Cell(132.0f, 764.0f, 216.0f, 64.0f), |
143 | | - new Cell(196.0f, 764.0f, 216.0f, 66.0f), |
144 | | - new Cell(262.0f, 764.0f, 216.0f, 4.0f), |
145 | | - new Cell(266.0f, 764.0f, 216.0f, 84.0f), |
146 | | - new Cell(350.0f, 764.0f, 216.0f, 4.0f), |
147 | | - new Cell(354.0f, 764.0f, 216.0f, 32.0f), |
148 | | - new Cell(386.0f, 764.0f, 216.0f, 38.0f), |
149 | | - new Cell(424.0f, 764.0f, 216.0f, 18.0f), |
150 | | - new Cell(442.0f, 764.0f, 216.0f, 74.0f), |
151 | | - new Cell(516.0f, 764.0f, 216.0f, 28.0f), |
152 | | - new Cell(544.0f, 764.0f, 216.0f, 4.0f) }; |
153 | | - |
| 27 | + |
154 | 28 | public static final Rectangle[] EXPECTED_RECTANGLES = { |
155 | 29 | new Rectangle(40.0f, 18.0f, 208.0f, 40.0f), |
156 | 30 | new Rectangle(84.0f, 18.0f, 962.0f, 464.0f) |
@@ -282,9 +156,23 @@ public void testDetectTwoSingleCells() { |
282 | 156 | } |
283 | 157 |
|
284 | 158 | @Test |
285 | | - public void testFindSpreadsheetsFromCells() { |
| 159 | + public void testFindSpreadsheetsFromCells() throws IOException { |
| 160 | + |
| 161 | + CSVParser parse = org.apache.commons.csv.CSVParser.parse(new File("src/test/resources/technology/tabula/csv/TestSpreadsheetExtractor-CELLS.csv"), |
| 162 | + Charset.forName("utf-8"), |
| 163 | + CSVFormat.DEFAULT); |
| 164 | + |
| 165 | + List<Cell> cells = new ArrayList<Cell>(); |
| 166 | + |
| 167 | + for (CSVRecord record: parse) { |
| 168 | + cells.add(new Cell(Float.parseFloat(record.get(0)), |
| 169 | + Float.parseFloat(record.get(1)), |
| 170 | + Float.parseFloat(record.get(2)), |
| 171 | + Float.parseFloat(record.get(3)))); |
| 172 | + } |
| 173 | + |
| 174 | + |
286 | 175 | SpreadsheetExtractionAlgorithm se = new SpreadsheetExtractionAlgorithm(); |
287 | | - List<? extends Rectangle> cells = Arrays.asList(CELLS); |
288 | 176 | List<Rectangle> expected = Arrays.asList(EXPECTED_RECTANGLES); |
289 | 177 | Collections.sort(expected); |
290 | 178 | List<Rectangle> foundRectangles = se.findSpreadsheetsFromCells(cells); |
|
0 commit comments