Skip to content

Commit b20e4b0

Browse files
More tests
1 parent d11cda0 commit b20e4b0

File tree

5 files changed

+107
-33
lines changed

5 files changed

+107
-33
lines changed

r/R/RcppExports.R

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -105,16 +105,16 @@ ChunkedArray__chunk <- function(chunked_array, i) {
105105
.Call(`_arrow_ChunkedArray__chunk`, chunked_array, i)
106106
}
107107

108-
ChunkedArray__chunks <- function(chunked_array, i) {
109-
.Call(`_arrow_ChunkedArray__chunks`, chunked_array, i)
108+
ChunkedArray__chunks <- function(chunked_array) {
109+
.Call(`_arrow_ChunkedArray__chunks`, chunked_array)
110110
}
111111

112112
ChunkedArray__type <- function(chunked_array) {
113113
.Call(`_arrow_ChunkedArray__type`, chunked_array)
114114
}
115115

116-
ChunkedArray_as_vector <- function(chunked_array) {
117-
.Call(`_arrow_ChunkedArray_as_vector`, chunked_array)
116+
ChunkedArray__as_vector <- function(chunked_array) {
117+
.Call(`_arrow_ChunkedArray__as_vector`, chunked_array)
118118
}
119119

120120
RecordBatch_to_dataframe <- function(batch) {
@@ -173,6 +173,10 @@ Column__type <- function(column) {
173173
.Call(`_arrow_Column__type`, column)
174174
}
175175

176+
Column__data <- function(column) {
177+
.Call(`_arrow_Column__data`, column)
178+
}
179+
176180
Field_initialize <- function(name, type, nullable = TRUE) {
177181
.Call(`_arrow_Field_initialize`, name, type, nullable)
178182
}

r/R/array.R

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -95,7 +95,8 @@ read_record_batch <- function(path){
9595
public = list(
9696
length = function() Column__length(self),
9797
null_count = function() Column__null_count(self),
98-
type = function() `arrow::DataType`$dispatch(Column__type(self))
98+
type = function() `arrow::DataType`$dispatch(Column__type(self)),
99+
data = function() `arrow::ChunkedArray`$new(Column__data(self))
99100
)
100101
)
101102

@@ -106,7 +107,8 @@ read_record_batch <- function(path){
106107
num_chunks = function() ChunkedArray__num_chunks(self),
107108
chunk = function(i) `arrow::Array`$new(ChunkedArray__chunk(self, i)),
108109
chunks = function() purrr::map(ChunkedArray__chunks(self), `arrow::Array`$new),
109-
type = function() `arrow::DataType`$dispatch(ChunkedArray__type(self))
110+
type = function() `arrow::DataType`$dispatch(ChunkedArray__type(self)),
111+
as_vector = function() ChunkedArray__as_vector(self)
110112
)
111113
)
112114

r/src/RcppExports.cpp

Lines changed: 21 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -299,14 +299,13 @@ BEGIN_RCPP
299299
END_RCPP
300300
}
301301
// ChunkedArray__chunks
302-
List ChunkedArray__chunks(const std::shared_ptr<arrow::ChunkedArray>& chunked_array, int i);
303-
RcppExport SEXP _arrow_ChunkedArray__chunks(SEXP chunked_arraySEXP, SEXP iSEXP) {
302+
List ChunkedArray__chunks(const std::shared_ptr<arrow::ChunkedArray>& chunked_array);
303+
RcppExport SEXP _arrow_ChunkedArray__chunks(SEXP chunked_arraySEXP) {
304304
BEGIN_RCPP
305305
Rcpp::RObject rcpp_result_gen;
306306
Rcpp::RNGScope rcpp_rngScope_gen;
307307
Rcpp::traits::input_parameter< const std::shared_ptr<arrow::ChunkedArray>& >::type chunked_array(chunked_arraySEXP);
308-
Rcpp::traits::input_parameter< int >::type i(iSEXP);
309-
rcpp_result_gen = Rcpp::wrap(ChunkedArray__chunks(chunked_array, i));
308+
rcpp_result_gen = Rcpp::wrap(ChunkedArray__chunks(chunked_array));
310309
return rcpp_result_gen;
311310
END_RCPP
312311
}
@@ -321,14 +320,14 @@ BEGIN_RCPP
321320
return rcpp_result_gen;
322321
END_RCPP
323322
}
324-
// ChunkedArray_as_vector
325-
SEXP ChunkedArray_as_vector(const std::shared_ptr<arrow::ChunkedArray>& chunked_array);
326-
RcppExport SEXP _arrow_ChunkedArray_as_vector(SEXP chunked_arraySEXP) {
323+
// ChunkedArray__as_vector
324+
SEXP ChunkedArray__as_vector(const std::shared_ptr<arrow::ChunkedArray>& chunked_array);
325+
RcppExport SEXP _arrow_ChunkedArray__as_vector(SEXP chunked_arraySEXP) {
327326
BEGIN_RCPP
328327
Rcpp::RObject rcpp_result_gen;
329328
Rcpp::RNGScope rcpp_rngScope_gen;
330329
Rcpp::traits::input_parameter< const std::shared_ptr<arrow::ChunkedArray>& >::type chunked_array(chunked_arraySEXP);
331-
rcpp_result_gen = Rcpp::wrap(ChunkedArray_as_vector(chunked_array));
330+
rcpp_result_gen = Rcpp::wrap(ChunkedArray__as_vector(chunked_array));
332331
return rcpp_result_gen;
333332
END_RCPP
334333
}
@@ -489,6 +488,17 @@ BEGIN_RCPP
489488
return rcpp_result_gen;
490489
END_RCPP
491490
}
491+
// Column__data
492+
std::shared_ptr<arrow::ChunkedArray> Column__data(const std::shared_ptr<arrow::Column>& column);
493+
RcppExport SEXP _arrow_Column__data(SEXP columnSEXP) {
494+
BEGIN_RCPP
495+
Rcpp::RObject rcpp_result_gen;
496+
Rcpp::RNGScope rcpp_rngScope_gen;
497+
Rcpp::traits::input_parameter< const std::shared_ptr<arrow::Column>& >::type column(columnSEXP);
498+
rcpp_result_gen = Rcpp::wrap(Column__data(column));
499+
return rcpp_result_gen;
500+
END_RCPP
501+
}
492502
// Field_initialize
493503
std::shared_ptr<arrow::Field> Field_initialize(const std::string& name, const std::shared_ptr<arrow::DataType>& type, bool nullable);
494504
RcppExport SEXP _arrow_Field_initialize(SEXP nameSEXP, SEXP typeSEXP, SEXP nullableSEXP) {
@@ -1077,9 +1087,9 @@ static const R_CallMethodDef CallEntries[] = {
10771087
{"_arrow_ChunkedArray__null_count", (DL_FUNC) &_arrow_ChunkedArray__null_count, 1},
10781088
{"_arrow_ChunkedArray__num_chunks", (DL_FUNC) &_arrow_ChunkedArray__num_chunks, 1},
10791089
{"_arrow_ChunkedArray__chunk", (DL_FUNC) &_arrow_ChunkedArray__chunk, 2},
1080-
{"_arrow_ChunkedArray__chunks", (DL_FUNC) &_arrow_ChunkedArray__chunks, 2},
1090+
{"_arrow_ChunkedArray__chunks", (DL_FUNC) &_arrow_ChunkedArray__chunks, 1},
10811091
{"_arrow_ChunkedArray__type", (DL_FUNC) &_arrow_ChunkedArray__type, 1},
1082-
{"_arrow_ChunkedArray_as_vector", (DL_FUNC) &_arrow_ChunkedArray_as_vector, 1},
1092+
{"_arrow_ChunkedArray__as_vector", (DL_FUNC) &_arrow_ChunkedArray__as_vector, 1},
10831093
{"_arrow_RecordBatch_to_dataframe", (DL_FUNC) &_arrow_RecordBatch_to_dataframe, 1},
10841094
{"_arrow_dataframe_to_Table", (DL_FUNC) &_arrow_dataframe_to_Table, 1},
10851095
{"_arrow_Table_num_columns", (DL_FUNC) &_arrow_Table_num_columns, 1},
@@ -1094,6 +1104,7 @@ static const R_CallMethodDef CallEntries[] = {
10941104
{"_arrow_Column__length", (DL_FUNC) &_arrow_Column__length, 1},
10951105
{"_arrow_Column__null_count", (DL_FUNC) &_arrow_Column__null_count, 1},
10961106
{"_arrow_Column__type", (DL_FUNC) &_arrow_Column__type, 1},
1107+
{"_arrow_Column__data", (DL_FUNC) &_arrow_Column__data, 1},
10971108
{"_arrow_Field_initialize", (DL_FUNC) &_arrow_Field_initialize, 3},
10981109
{"_arrow_Field_ToString", (DL_FUNC) &_arrow_Field_ToString, 1},
10991110
{"_arrow_Field_name", (DL_FUNC) &_arrow_Field_name, 1},

r/src/buffer.cpp

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -231,7 +231,7 @@ std::shared_ptr<arrow::Array> ChunkedArray__chunk(const std::shared_ptr<arrow::C
231231
}
232232

233233
// [[Rcpp::export]]
234-
List ChunkedArray__chunks(const std::shared_ptr<arrow::ChunkedArray>& chunked_array, int i){
234+
List ChunkedArray__chunks(const std::shared_ptr<arrow::ChunkedArray>& chunked_array){
235235
return wrap(chunked_array->chunks());
236236
}
237237

@@ -241,7 +241,7 @@ std::shared_ptr<arrow::DataType> ChunkedArray__type(const std::shared_ptr<arrow:
241241
}
242242

243243
// [[Rcpp::export]]
244-
SEXP ChunkedArray_as_vector(const std::shared_ptr<arrow::ChunkedArray>& chunked_array){
244+
SEXP ChunkedArray__as_vector(const std::shared_ptr<arrow::ChunkedArray>& chunked_array){
245245
switch(chunked_array->type()->id()){
246246
case Type::INT8: return simple_ChunkedArray_to_Vector<RAWSXP>(chunked_array);
247247
case Type::INT32: return simple_ChunkedArray_to_Vector<INTSXP>(chunked_array);
@@ -369,7 +369,7 @@ List Table_to_dataframe(const std::shared_ptr<arrow::Table>& table){
369369
CharacterVector names(nc);
370370
for(int i=0; i<nc; i++) {
371371
auto column = table->column(i);
372-
tbl[i] = ChunkedArray_as_vector(column->data());
372+
tbl[i] = ChunkedArray__as_vector(column->data());
373373
names[i] = column->name();
374374
}
375375
tbl.attr("names") = names;
@@ -397,3 +397,8 @@ int Column__null_count(const std::shared_ptr<arrow::Column>& column) {
397397
std::shared_ptr<arrow::DataType> Column__type(const std::shared_ptr<arrow::Column>& column) {
398398
return column->type();
399399
}
400+
401+
// [[Rcpp::export]]
402+
std::shared_ptr<arrow::ChunkedArray> Column__data(const std::shared_ptr<arrow::Column>& column) {
403+
return column->data();
404+
}

r/tests/testthat/test-read-write.R

Lines changed: 66 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -18,24 +18,72 @@
1818
context("test-read-write")
1919

2020
test_that("arrow::table round trip", {
21-
tbl <- tibble(int = 1:10, dbl = as.numeric(1:10), raw = as.raw(1:10))
21+
tbl <- tibble::tibble(
22+
int = 1:10,
23+
dbl = as.numeric(1:10),
24+
raw = as.raw(1:10)
25+
)
2226

2327
tab <- arrow::table(tbl)
2428
expect_equal(tab$num_columns(), 3L)
2529
expect_equal(tab$num_rows(), 10L)
2630

27-
expect_equal(tab$column(0)$length(), 10L)
28-
expect_equal(tab$column(1)$length(), 10L)
29-
expect_equal(tab$column(2)$length(), 10L)
30-
31-
expect_equal(tab$column(0)$null_count(), 0L)
32-
expect_equal(tab$column(1)$null_count(), 0L)
33-
expect_equal(tab$column(2)$null_count(), 0L)
34-
35-
expect_equal(tab$column(0)$type(), int32())
36-
expect_equal(tab$column(1)$type(), float64())
37-
expect_equal(tab$column(2)$type(), int8())
38-
31+
# arrow::Column
32+
col_int <- tab$column(0)
33+
expect_equal(col_int$length(), 10L)
34+
expect_equal(col_int$null_count(), 0L)
35+
expect_equal(col_int$type(), int32())
36+
37+
# arrow::ChunkedArray
38+
chunked_array_int <- col_int$data()
39+
expect_equal(chunked_array_int$length(), 10L)
40+
expect_equal(chunked_array_int$null_count(), 0L)
41+
expect_equal(chunked_array_int$as_vector(), tbl$int)
42+
43+
# arrow::Array
44+
chunks_int <- chunked_array_int$chunks()
45+
expect_equal(length(chunks_int), chunked_array_int$num_chunks())
46+
for( i in seq_along(chunks_int)){
47+
expect_equal(chunked_array_int$chunk(i-1L), chunks_int[[i]])
48+
}
49+
50+
# arrow::Column
51+
col_dbl <- tab$column(1)
52+
expect_equal(col_dbl$length(), 10L)
53+
expect_equal(col_dbl$null_count(), 0L)
54+
expect_equal(col_dbl$type(), float64())
55+
56+
# arrow::ChunkedArray
57+
chunked_array_dbl <- col_dbl$data()
58+
expect_equal(chunked_array_dbl$length(), 10L)
59+
expect_equal(chunked_array_dbl$null_count(), 0L)
60+
expect_equal(chunked_array_dbl$as_vector(), tbl$dbl)
61+
62+
# arrow::Array
63+
chunks_dbl <- chunked_array_dbl$chunks()
64+
expect_equal(length(chunks_dbl), chunked_array_dbl$num_chunks())
65+
for( i in seq_along(chunks_dbl)){
66+
expect_equal(chunked_array_dbl$chunk(i-1L), chunks_dbl[[i]])
67+
}
68+
69+
# arrow::Colmumn
70+
col_raw <- tab$column(2)
71+
expect_equal(col_raw$length(), 10L)
72+
expect_equal(col_raw$null_count(), 0L)
73+
expect_equal(col_raw$type(), int8())
74+
75+
# arrow::ChunkedArray
76+
chunked_array_raw <- col_raw$data()
77+
expect_equal(chunked_array_raw$length(), 10L)
78+
expect_equal(chunked_array_raw$null_count(), 0L)
79+
expect_equal(chunked_array_raw$as_vector(), tbl$raw)
80+
81+
# arrow::Array
82+
chunks_raw <- chunked_array_raw$chunks()
83+
expect_equal(length(chunks_raw), chunked_array_raw$num_chunks())
84+
for( i in seq_along(chunks_raw)){
85+
expect_equal(chunked_array_raw$chunk(i-1L), chunks_raw[[i]])
86+
}
3987
tf <- tempfile(); on.exit(unlink(tf))
4088
write_arrow(tbl, path = tf)
4189

@@ -44,7 +92,11 @@ test_that("arrow::table round trip", {
4492
})
4593

4694
test_that("arrow::table round trip handles NA in integer and numeric", {
47-
tbl <- tibble(int = c(NA, 2:10), dbl = as.numeric(c(1:5, NA, 7:9, NA)), raw = as.raw(1:10))
95+
tbl <- tibble::tibble(
96+
int = c(NA, 2:10),
97+
dbl = as.numeric(c(1:5, NA, 7:9, NA)),
98+
raw = as.raw(1:10)
99+
)
48100

49101
tab <- arrow::table(tbl)
50102
expect_equal(tab$num_columns(), 3L)

0 commit comments

Comments
 (0)