|
51 | 51 | :accessor dimension-metadata |
52 | 52 | :type list |
53 | 53 | :initform '() |
54 | | - :documentation "An alist that stores some useful information, e.g. the hashtable (for equality) for a category dimension." |
55 | | - ))) |
| 54 | + :documentation "An alist that stores some useful information, e.g. the hashtable (for equality) for a category dimension."))) |
| 55 | + |
56 | 56 |
|
57 | 57 | (define-condition dimension-unknown-type-error (simple-error) |
58 | 58 | ((dimension :initarg :dimension))) |
|
492 | 492 | (csv-delimiter #\,) |
493 | 493 | (missing-value-check t) |
494 | 494 | missing-values-list) |
495 | | - "Reads CSV data from file. The normal convention is first line is column name. |
496 | | -However if CSV-HEADER-P is a list of strings then CSV-HEADER-P specifies the column names" |
497 | | - (assert (member type '(:sexp :csv))) |
| 495 | + "Reads an unspecialized dataset from file. |
| 496 | + - If TYPE is :SEXP or NIL (the default), a list is read from a s-expression |
| 497 | +file. The first element is a list of column names and the rest of the elements |
| 498 | +are the data. EXTERNAL-FORMAT defaults to :DEFAULT for opening the file. |
| 499 | + - If TYPE is :CSV, the data is read as csv. The normal convention is first |
| 500 | +line is column name. However if CSV-HEADER-P is a list of strings then |
| 501 | +CSV-HEADER-P specifies the column names. EXTERNAL-FORMAT defaults to shift-jis." |
| 502 | + (when (and (not external-format-p) |
| 503 | + (eql type :csv)) |
| 504 | + (setf external-format clml.utility.csv::*csv-default-external-format*)) |
| 505 | + (with-open-file (stream filename :external-format external-format) |
| 506 | + #+allegro (setf (excl:eol-convention f) :anynl-dos) |
| 507 | + (read-data-from-stream stream |
| 508 | + :type type |
| 509 | + :csv-type-spec csv-type-spec |
| 510 | + :csv-header-p csv-header-p |
| 511 | + :csv-delimiter csv-delimiter |
| 512 | + :missing-value-check missing-value-check |
| 513 | + :missing-values-list missing-values-list))) |
498 | 514 |
|
499 | | - (ecase type |
500 | | - ((:sexp nil) |
501 | | - (let (tmp) |
502 | | - (with-open-file (f filename :external-format external-format) |
503 | | - (with-standard-io-syntax |
504 | | - (let ((*read-eval* nil) |
505 | | - (*read-default-float-format* 'double-float)) |
506 | | - (setf tmp (read f))))) |
507 | | - |
508 | | - (make-unspecialized-dataset |
509 | | - (first tmp) |
510 | | - (map 'vector |
511 | | - (lambda (p) |
512 | | - (coerce p 'vector)) |
513 | | - (rest tmp)) |
514 | | - :missing-value-check missing-value-check |
515 | | - :missing-values-list missing-values-list))) |
516 | | - (:csv |
517 | | - (multiple-value-bind (data header) |
518 | | - (clml.utility.csv:read-csv-file filename :header csv-header-p :type-spec csv-type-spec :delimiter csv-delimiter |
519 | | - :external-format (if external-format-p external-format |
520 | | - #+allegro :932 |
521 | | - #+ccl :Windows-31j |
522 | | - #+sbcl :sjis |
523 | | - #+lispworks :sjis |
524 | | - )) |
525 | | - (make-unspecialized-dataset (coerce header 'list) data |
526 | | - |
527 | | - :missing-value-check missing-value-check |
528 | | - :missing-values-list missing-values-list))))) |
529 | 515 |
|
530 | 516 | ;;;; read and process data |
531 | 517 | ;;@ function-type: string -> unspecialized-dataset |
532 | 518 | (defun read-data-from-stream (stream &key |
| 519 | + (type :csv) |
533 | 520 | csv-type-spec |
534 | 521 | (csv-header-p t) |
535 | 522 | (csv-delimiter #\,) |
536 | 523 | (missing-value-check t) |
537 | | - missing-values-list) |
538 | | - "Reads CSV data from a stream. The normal convention is first line is column name. |
539 | | -However if CSV-HEADER-P is a list of strings then CSV-HEADER-P specifies the column names" |
540 | | - (multiple-value-bind (data header) |
541 | | - (clml.utility.csv:read-csv-stream stream :header csv-header-p :type-spec csv-type-spec :delimiter csv-delimiter) |
542 | | - (make-unspecialized-dataset (coerce header 'list) data |
543 | | - |
544 | | - :missing-value-check missing-value-check |
545 | | - :missing-values-list missing-values-list)) |
546 | | - ) |
547 | | - |
| 524 | + (missing-values-list nil missing-values-list-p)) |
| 525 | + "Reads an unspecialized dataset from file. |
| 526 | + - If TYPE is :SEXP or NIL, a list is read from a s-expression file. |
| 527 | +The first element is a list of column names and the rest of the elements are the |
| 528 | +data. |
| 529 | + - If TYPE is :CSV (the default), the data is read as csv. The normal |
| 530 | +convention is first line is column name. However if CSV-HEADER-P is a list |
| 531 | +of strings then CSV-HEADER-P specifies the column names. |
| 532 | + - If TYPE is :ARFF, the data is read as arff. By default, NIL and \"?\" |
| 533 | +are used as missing values with arff format." |
| 534 | + (assert (member type '(:sexp :csv :arff))) |
| 535 | + (ecase type |
| 536 | + ((:sexp nil) |
| 537 | + (let ((tmp (with-standard-io-syntax |
| 538 | + (let ((*read-eval* nil) |
| 539 | + (*read-default-float-format* 'double-float)) |
| 540 | + (read stream))))) |
| 541 | + (make-unspecialized-dataset |
| 542 | + (first tmp) |
| 543 | + (map 'vector |
| 544 | + (lambda (p) |
| 545 | + (coerce p 'vector)) |
| 546 | + (rest tmp)) |
| 547 | + :missing-value-check missing-value-check |
| 548 | + :missing-values-list missing-values-list))) |
| 549 | + (:csv |
| 550 | + (multiple-value-bind (data header) |
| 551 | + (clml.utility.csv:read-csv-stream stream :header csv-header-p :type-spec csv-type-spec :delimiter csv-delimiter) |
| 552 | + (make-unspecialized-dataset (coerce header 'list) data |
| 553 | + :missing-value-check missing-value-check |
| 554 | + :missing-values-list missing-values-list))) |
| 555 | + (:arff |
| 556 | + ;arff uses ? as a missing value symbol, so use that unless told otherwise |
| 557 | + (unless missing-values-list-p |
| 558 | + (setf missing-values-list '(nil "?"))) |
| 559 | + (multiple-value-bind (data header) |
| 560 | + (clml.utility.arff:read-arff-stream stream) |
| 561 | + (make-unspecialized-dataset (coerce header 'list) data |
| 562 | + :missing-value-check missing-value-check |
| 563 | + :missing-values-list missing-values-list))))) |
548 | 564 |
|
549 | 565 | ;;; function-type: unspecialized-dataset -> specialized-dataset |
550 | 566 |
|
|
0 commit comments