Skip to content

Commit 7ed21be

Browse files
committed
adding dictionary option for table loading
1 parent 6c9ae42 commit 7ed21be

3 files changed

Lines changed: 48 additions & 24 deletions

File tree

core/src/processing/core/PApplet.java

Lines changed: 19 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -6249,22 +6249,28 @@ public Table loadTable(String filename) {
62496249

62506250

62516251
/**
6252-
* @param options may contain "header", "tsv", "csv", or "bin" separated by commas
6252+
* Options may contain "header", "tsv", "csv", or "bin" separated by commas.
6253+
*
6254+
* Another option is "dictionary=filename.tsv", which allows users to
6255+
* specify a "dictionary" file that contains a mapping of the column titles
6256+
* and the data types used in the table file. This can be far more efficient
6257+
* (in terms of speed and memory usage) for loading and parsing tables. The
6258+
* dictionary file can only be tab separated values (.tsv) and its extension
6259+
* will be ignored. This option was added in Processing 2.0.2.
62536260
*/
62546261
public Table loadTable(String filename, String options) {
62556262
try {
6256-
// String ext = checkExtension(filename);
6257-
// if (ext != null) {
6258-
// if (ext.equals("csv") || ext.equals("tsv") || ext.equals("bin")) {
6259-
// if (options == null) {
6260-
// options = ext;
6261-
// } else {
6262-
// options = ext + "," + options;
6263-
// }
6264-
// }
6265-
// }
6266-
return new Table(createInput(filename),
6267-
Table.extensionOptions(true, filename, options));
6263+
String optionStr = Table.extensionOptions(true, filename, options);
6264+
String[] optionList = split(optionStr, ',');
6265+
6266+
Table dictionary = null;
6267+
for (String opt : optionList) {
6268+
if (opt.startsWith("dictionary=")) {
6269+
dictionary = loadTable(opt.substring(opt.indexOf('=') + 1), "tsv");
6270+
return dictionary.typedParse(createInput(filename), optionStr);
6271+
}
6272+
}
6273+
return new Table(createInput(filename), optionStr);
62686274

62696275
} catch (IOException e) {
62706276
e.printStackTrace();

core/src/processing/data/Table.java

Lines changed: 24 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -119,6 +119,7 @@ public Table(File file) throws IOException {
119119
*/
120120
public Table(File file, String options) throws IOException {
121121
// uses createInput() to handle .gz (and eventually .bz2) files
122+
init();
122123
parse(PApplet.createInput(file),
123124
extensionOptions(true, file.getName(), options));
124125
}
@@ -146,9 +147,11 @@ public Table(InputStream input) throws IOException {
146147
* @throws IOException
147148
*/
148149
public Table(InputStream input, String options) throws IOException {
150+
init();
149151
parse(input, options);
150152
}
151153

154+
152155
/**
153156
* @nowebref
154157
*/
@@ -210,6 +213,14 @@ public Table(ResultSet rs) {
210213
}
211214

212215

216+
public Table typedParse(InputStream input, String options) throws IOException {
217+
Table table = new Table();
218+
table.setColumnTypes(this);
219+
table.parse(input, options);
220+
return table;
221+
}
222+
223+
213224
protected void init() {
214225
columns = new Object[0];
215226
columnTypes = new int[0];
@@ -272,7 +283,7 @@ static public String extensionOptions(boolean loading, String filename, String o
272283

273284

274285
protected void parse(InputStream input, String options) throws IOException {
275-
init();
286+
//init();
276287

277288
boolean awfulCSV = false;
278289
boolean header = false;
@@ -302,6 +313,8 @@ protected void parse(InputStream input, String options) throws IOException {
302313
header = true;
303314
} else if (opt.startsWith(sheetParam)) {
304315
worksheet = opt.substring(sheetParam.length());
316+
} else if (opt.startsWith("dictionary=")) {
317+
// ignore option, this is only handled by PApplet
305318
} else {
306319
throw new IllegalArgumentException("'" + opt + "' is not a valid option for loading a Table");
307320
}
@@ -352,24 +365,25 @@ protected void parseBasic(BufferedReader reader,
352365
row++;
353366
}
354367

355-
/*
356-
// this is problematic unless we're going to calculate rowCount first
357-
if (row % 10000 == 0) {
368+
// this is problematic unless we're going to calculate rowCount first
369+
if (row % 10000 == 0) {
370+
/*
358371
if (row < rowCount) {
359372
int pct = (100 * row) / rowCount;
360373
if (pct != prev) { // also prevents "0%" from showing up
361374
System.out.println(pct + "%");
362375
prev = pct;
363376
}
364377
}
365-
try {
366-
Thread.sleep(5);
367-
} catch (InterruptedException e) {
368-
e.printStackTrace();
378+
*/
379+
try {
380+
// Sleep this thread so that the GC can catch up
381+
Thread.sleep(10);
382+
} catch (InterruptedException e) {
383+
e.printStackTrace();
384+
}
369385
}
370386
}
371-
*/
372-
}
373387
} catch (Exception e) {
374388
throw new RuntimeException("Error reading table on line " + row, e);
375389
}

core/todo.txt

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,11 @@ _ draw(s) doesn't work on the returned PShape
7070

7171
table
7272
X add sort() to Table
73-
_ implement version of Table that takes a dictionary file
73+
X implement version of Table that takes a dictionary file
74+
X dictionary=blah.tsv
75+
X tsv only, ignores extension
76+
X if allowed extension, we couldn't use .dict instead
77+
X and that's probably the most useful
7478
_ addRow() is not efficient, probably need to do the doubling
7579
_ or have a setIncrement() function?
7680
_ it would default to 1 on tables loaded from a file

0 commit comments

Comments
 (0)