Skip to content

Commit a1775ec

Browse files
committed
Moved test fixture to a CSV file
1 parent cbe1787 commit a1775ec

2 files changed

Lines changed: 145 additions & 133 deletions

File tree

src/test/java/technology/tabula/TestSpreadsheetExtractor.java

Lines changed: 21 additions & 133 deletions
Original file line numberDiff line numberDiff line change
@@ -3,13 +3,13 @@
33
import static org.junit.Assert.*;
44

55
import java.awt.geom.Point2D;
6+
import java.io.File;
67
import java.io.IOException;
7-
import java.util.Arrays;
8-
import java.util.Collections;
9-
import java.util.List;
10-
import java.util.Map;
8+
import java.nio.charset.Charset;
9+
import java.util.*;
1110

1211
import org.apache.commons.csv.CSVFormat;
12+
import org.apache.commons.csv.CSVParser;
1313
import org.apache.commons.csv.CSVRecord;
1414
import org.junit.Test;
1515

@@ -21,136 +21,10 @@
2121
import technology.tabula.writers.CSVWriter;
2222
import technology.tabula.UtilsForTesting;
2323
import technology.tabula.writers.JSONWriter;
24-
import java.text.Normalizer;
2524

2625
public class TestSpreadsheetExtractor {
2726

28-
private static final Cell[] CELLS = new Cell[] {
29-
new Cell(40.0f, 18.0f, 208.0f, 4.0f),
30-
new Cell(44.0f, 18.0f, 52.0f, 6.0f),
31-
new Cell(50.0f, 18.0f, 52.0f, 4.0f),
32-
new Cell(54.0f, 18.0f, 52.0f, 6.0f),
33-
new Cell(60.0f, 18.0f, 52.0f, 4.0f),
34-
new Cell(64.0f, 18.0f, 52.0f, 6.0f),
35-
new Cell(70.0f, 18.0f, 52.0f, 4.0f),
36-
new Cell(74.0f, 18.0f, 52.0f, 6.0f),
37-
new Cell(90.0f, 18.0f, 52.0f, 4.0f),
38-
new Cell(94.0f, 18.0f, 52.0f, 6.0f),
39-
new Cell(100.0f, 18.0f, 52.0f, 28.0f),
40-
new Cell(128.0f, 18.0f, 52.0f, 4.0f),
41-
new Cell(132.0f, 18.0f, 52.0f, 64.0f),
42-
new Cell(196.0f, 18.0f, 52.0f, 66.0f),
43-
new Cell(262.0f, 18.0f, 52.0f, 4.0f),
44-
new Cell(266.0f, 18.0f, 52.0f, 84.0f),
45-
new Cell(350.0f, 18.0f, 52.0f, 4.0f),
46-
new Cell(354.0f, 18.0f, 52.0f, 32.0f),
47-
new Cell(386.0f, 18.0f, 52.0f, 38.0f),
48-
new Cell(424.0f, 18.0f, 52.0f, 18.0f),
49-
new Cell(442.0f, 18.0f, 52.0f, 74.0f),
50-
new Cell(516.0f, 18.0f, 52.0f, 28.0f),
51-
new Cell(544.0f, 18.0f, 52.0f, 4.0f),
52-
new Cell(44.0f, 70.0f, 156.0f, 6.0f),
53-
new Cell(50.0f, 70.0f, 156.0f, 4.0f),
54-
new Cell(54.0f, 70.0f, 156.0f, 6.0f),
55-
new Cell(60.0f, 70.0f, 156.0f, 4.0f),
56-
new Cell(64.0f, 70.0f, 156.0f, 6.0f),
57-
new Cell(70.0f, 70.0f, 156.0f, 4.0f),
58-
new Cell(74.0f, 70.0f, 156.0f, 6.0f),
59-
new Cell(84.0f, 70.0f, 2.0f, 6.0f),
60-
new Cell(90.0f, 70.0f, 156.0f, 4.0f),
61-
new Cell(94.0f, 70.0f, 156.0f, 6.0f),
62-
new Cell(100.0f, 70.0f, 156.0f, 28.0f),
63-
new Cell(128.0f, 70.0f, 156.0f, 4.0f),
64-
new Cell(132.0f, 70.0f, 156.0f, 64.0f),
65-
new Cell(196.0f, 70.0f, 156.0f, 66.0f),
66-
new Cell(262.0f, 70.0f, 156.0f, 4.0f),
67-
new Cell(266.0f, 70.0f, 156.0f, 84.0f),
68-
new Cell(350.0f, 70.0f, 156.0f, 4.0f),
69-
new Cell(354.0f, 70.0f, 156.0f, 32.0f),
70-
new Cell(386.0f, 70.0f, 156.0f, 38.0f),
71-
new Cell(424.0f, 70.0f, 156.0f, 18.0f),
72-
new Cell(442.0f, 70.0f, 156.0f, 74.0f),
73-
new Cell(516.0f, 70.0f, 156.0f, 28.0f),
74-
new Cell(544.0f, 70.0f, 156.0f, 4.0f),
75-
new Cell(84.0f, 72.0f, 446.0f, 6.0f),
76-
new Cell(90.0f, 226.0f, 176.0f, 4.0f),
77-
new Cell(94.0f, 226.0f, 176.0f, 6.0f),
78-
new Cell(100.0f, 226.0f, 176.0f, 28.0f),
79-
new Cell(128.0f, 226.0f, 176.0f, 4.0f),
80-
new Cell(132.0f, 226.0f, 176.0f, 64.0f),
81-
new Cell(196.0f, 226.0f, 176.0f, 66.0f),
82-
new Cell(262.0f, 226.0f, 176.0f, 4.0f),
83-
new Cell(266.0f, 226.0f, 176.0f, 84.0f),
84-
new Cell(350.0f, 226.0f, 176.0f, 4.0f),
85-
new Cell(354.0f, 226.0f, 176.0f, 32.0f),
86-
new Cell(386.0f, 226.0f, 176.0f, 38.0f),
87-
new Cell(424.0f, 226.0f, 176.0f, 18.0f),
88-
new Cell(442.0f, 226.0f, 176.0f, 74.0f),
89-
new Cell(516.0f, 226.0f, 176.0f, 28.0f),
90-
new Cell(544.0f, 226.0f, 176.0f, 4.0f),
91-
new Cell(90.0f, 402.0f, 116.0f, 4.0f),
92-
new Cell(94.0f, 402.0f, 116.0f, 6.0f),
93-
new Cell(100.0f, 402.0f, 116.0f, 28.0f),
94-
new Cell(128.0f, 402.0f, 116.0f, 4.0f),
95-
new Cell(132.0f, 402.0f, 116.0f, 64.0f),
96-
new Cell(196.0f, 402.0f, 116.0f, 66.0f),
97-
new Cell(262.0f, 402.0f, 116.0f, 4.0f),
98-
new Cell(266.0f, 402.0f, 116.0f, 84.0f),
99-
new Cell(350.0f, 402.0f, 116.0f, 4.0f),
100-
new Cell(354.0f, 402.0f, 116.0f, 32.0f),
101-
new Cell(386.0f, 402.0f, 116.0f, 38.0f),
102-
new Cell(424.0f, 402.0f, 116.0f, 18.0f),
103-
new Cell(442.0f, 402.0f, 116.0f, 74.0f),
104-
new Cell(516.0f, 402.0f, 116.0f, 28.0f),
105-
new Cell(544.0f, 402.0f, 116.0f, 4.0f),
106-
new Cell(84.0f, 518.0f, 246.0f, 6.0f),
107-
new Cell(90.0f, 518.0f, 186.0f, 4.0f),
108-
new Cell(94.0f, 518.0f, 186.0f, 6.0f),
109-
new Cell(100.0f, 518.0f, 186.0f, 28.0f),
110-
new Cell(128.0f, 518.0f, 186.0f, 4.0f),
111-
new Cell(132.0f, 518.0f, 186.0f, 64.0f),
112-
new Cell(196.0f, 518.0f, 186.0f, 66.0f),
113-
new Cell(262.0f, 518.0f, 186.0f, 4.0f),
114-
new Cell(266.0f, 518.0f, 186.0f, 84.0f),
115-
new Cell(350.0f, 518.0f, 186.0f, 4.0f),
116-
new Cell(354.0f, 518.0f, 186.0f, 32.0f),
117-
new Cell(386.0f, 518.0f, 186.0f, 38.0f),
118-
new Cell(424.0f, 518.0f, 186.0f, 18.0f),
119-
new Cell(442.0f, 518.0f, 186.0f, 74.0f),
120-
new Cell(516.0f, 518.0f, 186.0f, 28.0f),
121-
new Cell(544.0f, 518.0f, 186.0f, 4.0f),
122-
new Cell(90.0f, 704.0f, 60.0f, 4.0f),
123-
new Cell(94.0f, 704.0f, 60.0f, 6.0f),
124-
new Cell(100.0f, 704.0f, 60.0f, 28.0f),
125-
new Cell(128.0f, 704.0f, 60.0f, 4.0f),
126-
new Cell(132.0f, 704.0f, 60.0f, 64.0f),
127-
new Cell(196.0f, 704.0f, 60.0f, 66.0f),
128-
new Cell(262.0f, 704.0f, 60.0f, 4.0f),
129-
new Cell(266.0f, 704.0f, 60.0f, 84.0f),
130-
new Cell(350.0f, 704.0f, 60.0f, 4.0f),
131-
new Cell(354.0f, 704.0f, 60.0f, 32.0f),
132-
new Cell(386.0f, 704.0f, 60.0f, 38.0f),
133-
new Cell(424.0f, 704.0f, 60.0f, 18.0f),
134-
new Cell(442.0f, 704.0f, 60.0f, 74.0f),
135-
new Cell(516.0f, 704.0f, 60.0f, 28.0f),
136-
new Cell(544.0f, 704.0f, 60.0f, 4.0f),
137-
new Cell(84.0f, 764.0f, 216.0f, 6.0f),
138-
new Cell(90.0f, 764.0f, 216.0f, 4.0f),
139-
new Cell(94.0f, 764.0f, 216.0f, 6.0f),
140-
new Cell(100.0f, 764.0f, 216.0f, 28.0f),
141-
new Cell(128.0f, 764.0f, 216.0f, 4.0f),
142-
new Cell(132.0f, 764.0f, 216.0f, 64.0f),
143-
new Cell(196.0f, 764.0f, 216.0f, 66.0f),
144-
new Cell(262.0f, 764.0f, 216.0f, 4.0f),
145-
new Cell(266.0f, 764.0f, 216.0f, 84.0f),
146-
new Cell(350.0f, 764.0f, 216.0f, 4.0f),
147-
new Cell(354.0f, 764.0f, 216.0f, 32.0f),
148-
new Cell(386.0f, 764.0f, 216.0f, 38.0f),
149-
new Cell(424.0f, 764.0f, 216.0f, 18.0f),
150-
new Cell(442.0f, 764.0f, 216.0f, 74.0f),
151-
new Cell(516.0f, 764.0f, 216.0f, 28.0f),
152-
new Cell(544.0f, 764.0f, 216.0f, 4.0f) };
153-
27+
15428
public static final Rectangle[] EXPECTED_RECTANGLES = {
15529
new Rectangle(40.0f, 18.0f, 208.0f, 40.0f),
15630
new Rectangle(84.0f, 18.0f, 962.0f, 464.0f)
@@ -282,9 +156,23 @@ public void testDetectTwoSingleCells() {
282156
}
283157

284158
@Test
285-
public void testFindSpreadsheetsFromCells() {
159+
public void testFindSpreadsheetsFromCells() throws IOException {
160+
161+
CSVParser parse = org.apache.commons.csv.CSVParser.parse(new File("src/test/resources/technology/tabula/csv/TestSpreadsheetExtractor-CELLS.csv"),
162+
Charset.forName("utf-8"),
163+
CSVFormat.DEFAULT);
164+
165+
List<Cell> cells = new ArrayList<Cell>();
166+
167+
for (CSVRecord record: parse) {
168+
cells.add(new Cell(Float.parseFloat(record.get(0)),
169+
Float.parseFloat(record.get(1)),
170+
Float.parseFloat(record.get(2)),
171+
Float.parseFloat(record.get(3))));
172+
}
173+
174+
286175
SpreadsheetExtractionAlgorithm se = new SpreadsheetExtractionAlgorithm();
287-
List<? extends Rectangle> cells = Arrays.asList(CELLS);
288176
List<Rectangle> expected = Arrays.asList(EXPECTED_RECTANGLES);
289177
Collections.sort(expected);
290178
List<Rectangle> foundRectangles = se.findSpreadsheetsFromCells(cells);
Lines changed: 124 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,124 @@
1+
40.0,18.0,208.0,4.0
2+
44.0,18.0,52.0,6.0
3+
50.0,18.0,52.0,4.0
4+
54.0,18.0,52.0,6.0
5+
60.0,18.0,52.0,4.0
6+
64.0,18.0,52.0,6.0
7+
70.0,18.0,52.0,4.0
8+
74.0,18.0,52.0,6.0
9+
90.0,18.0,52.0,4.0
10+
94.0,18.0,52.0,6.0
11+
100.0,18.0,52.0,28.0
12+
128.0,18.0,52.0,4.0
13+
132.0,18.0,52.0,64.0
14+
196.0,18.0,52.0,66.0
15+
262.0,18.0,52.0,4.0
16+
266.0,18.0,52.0,84.0
17+
350.0,18.0,52.0,4.0
18+
354.0,18.0,52.0,32.0
19+
386.0,18.0,52.0,38.0
20+
424.0,18.0,52.0,18.0
21+
442.0,18.0,52.0,74.0
22+
516.0,18.0,52.0,28.0
23+
544.0,18.0,52.0,4.0
24+
44.0,70.0,156.0,6.0
25+
50.0,70.0,156.0,4.0
26+
54.0,70.0,156.0,6.0
27+
60.0,70.0,156.0,4.0
28+
64.0,70.0,156.0,6.0
29+
70.0,70.0,156.0,4.0
30+
74.0,70.0,156.0,6.0
31+
84.0,70.0,2.0,6.0
32+
90.0,70.0,156.0,4.0
33+
94.0,70.0,156.0,6.0
34+
100.0,70.0,156.0,28.0
35+
128.0,70.0,156.0,4.0
36+
132.0,70.0,156.0,64.0
37+
196.0,70.0,156.0,66.0
38+
262.0,70.0,156.0,4.0
39+
266.0,70.0,156.0,84.0
40+
350.0,70.0,156.0,4.0
41+
354.0,70.0,156.0,32.0
42+
386.0,70.0,156.0,38.0
43+
424.0,70.0,156.0,18.0
44+
442.0,70.0,156.0,74.0
45+
516.0,70.0,156.0,28.0
46+
544.0,70.0,156.0,4.0
47+
84.0,72.0,446.0,6.0
48+
90.0,226.0,176.0,4.0
49+
94.0,226.0,176.0,6.0
50+
100.0,226.0,176.0,28.0
51+
128.0,226.0,176.0,4.0
52+
132.0,226.0,176.0,64.0
53+
196.0,226.0,176.0,66.0
54+
262.0,226.0,176.0,4.0
55+
266.0,226.0,176.0,84.0
56+
350.0,226.0,176.0,4.0
57+
354.0,226.0,176.0,32.0
58+
386.0,226.0,176.0,38.0
59+
424.0,226.0,176.0,18.0
60+
442.0,226.0,176.0,74.0
61+
516.0,226.0,176.0,28.0
62+
544.0,226.0,176.0,4.0
63+
90.0,402.0,116.0,4.0
64+
94.0,402.0,116.0,6.0
65+
100.0,402.0,116.0,28.0
66+
128.0,402.0,116.0,4.0
67+
132.0,402.0,116.0,64.0
68+
196.0,402.0,116.0,66.0
69+
262.0,402.0,116.0,4.0
70+
266.0,402.0,116.0,84.0
71+
350.0,402.0,116.0,4.0
72+
354.0,402.0,116.0,32.0
73+
386.0,402.0,116.0,38.0
74+
424.0,402.0,116.0,18.0
75+
442.0,402.0,116.0,74.0
76+
516.0,402.0,116.0,28.0
77+
544.0,402.0,116.0,4.0
78+
84.0,518.0,246.0,6.0
79+
90.0,518.0,186.0,4.0
80+
94.0,518.0,186.0,6.0
81+
100.0,518.0,186.0,28.0
82+
128.0,518.0,186.0,4.0
83+
132.0,518.0,186.0,64.0
84+
196.0,518.0,186.0,66.0
85+
262.0,518.0,186.0,4.0
86+
266.0,518.0,186.0,84.0
87+
350.0,518.0,186.0,4.0
88+
354.0,518.0,186.0,32.0
89+
386.0,518.0,186.0,38.0
90+
424.0,518.0,186.0,18.0
91+
442.0,518.0,186.0,74.0
92+
516.0,518.0,186.0,28.0
93+
544.0,518.0,186.0,4.0
94+
90.0,704.0,60.0,4.0
95+
94.0,704.0,60.0,6.0
96+
100.0,704.0,60.0,28.0
97+
128.0,704.0,60.0,4.0
98+
132.0,704.0,60.0,64.0
99+
196.0,704.0,60.0,66.0
100+
262.0,704.0,60.0,4.0
101+
266.0,704.0,60.0,84.0
102+
350.0,704.0,60.0,4.0
103+
354.0,704.0,60.0,32.0
104+
386.0,704.0,60.0,38.0
105+
424.0,704.0,60.0,18.0
106+
442.0,704.0,60.0,74.0
107+
516.0,704.0,60.0,28.0
108+
544.0,704.0,60.0,4.0
109+
84.0,764.0,216.0,6.0
110+
90.0,764.0,216.0,4.0
111+
94.0,764.0,216.0,6.0
112+
100.0,764.0,216.0,28.0
113+
128.0,764.0,216.0,4.0
114+
132.0,764.0,216.0,64.0
115+
196.0,764.0,216.0,66.0
116+
262.0,764.0,216.0,4.0
117+
266.0,764.0,216.0,84.0
118+
350.0,764.0,216.0,4.0
119+
354.0,764.0,216.0,32.0
120+
386.0,764.0,216.0,38.0
121+
424.0,764.0,216.0,18.0
122+
442.0,764.0,216.0,74.0
123+
516.0,764.0,216.0,28.0
124+
544.0,764.0,216.0,4.0f

0 commit comments

Comments
 (0)