Skip to content

Commit 97659ff

Browse files
+ as_tibble.arrow::RecordBatch
1 parent fa4ee22 commit 97659ff

File tree

8 files changed

+126
-3
lines changed

8 files changed

+126
-3
lines changed

r/DESCRIPTION

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,8 @@ Imports:
2121
glue,
2222
R6,
2323
vctrs,
24-
fs
24+
fs,
25+
tibble
2526
Roxygen: list(markdown = TRUE)
2627
RoxygenNote: 6.0.1.9000
2728
Suggests:
@@ -32,5 +33,6 @@ Collate:
3233
'RcppExports.R'
3334
'array.R'
3435
'memory_pool.R'
36+
'reexports-tibble.R'
3537
'status.R'
3638
'zzz.R'

r/NAMESPACE

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,13 +7,15 @@ S3method("$","arrow-enum")
77
S3method("==","arrow::Array")
88
S3method("==","arrow::DataType")
99
S3method("==","arrow::Field")
10+
S3method(as_tibble,"arrow::RecordBatch")
1011
S3method(length,"arrow::Array")
1112
S3method(print,"arrow-enum")
1213
export(DateUnit)
1314
export(StatusCode)
1415
export(TimeUnit)
1516
export(Type)
1617
export(array)
18+
export(as_tibble)
1719
export(boolean)
1820
export(date32)
1921
export(date64)
@@ -52,4 +54,5 @@ importFrom(rlang,dots_n)
5254
importFrom(rlang,quo_name)
5355
importFrom(rlang,seq2)
5456
importFrom(rlang,set_names)
57+
importFrom(tibble,as_tibble)
5558
useDynLib(arrow, .registration = TRUE)

r/R/RcppExports.R

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,14 @@ RecordBatch_schema <- function(x) {
7777
.Call(`_arrow_RecordBatch_schema`, x)
7878
}
7979

80+
RecordBatch_column <- function(batch, i) {
81+
.Call(`_arrow_RecordBatch_column`, batch, i)
82+
}
83+
84+
RecordBatch_to_dataframe <- function(batch) {
85+
.Call(`_arrow_RecordBatch_to_dataframe`, batch)
86+
}
87+
8088
dataframe_to_Table <- function(tbl) {
8189
.Call(`_arrow_dataframe_to_Table`, tbl)
8290
}

r/R/array.R

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@
3939
type_id = function() Array_type_id(self),
4040
Equals = function(other) Array_Equals(self, other),
4141
ApproxEquals = function(othet) Array_ApproxEquals(self, other),
42-
data = function() Array_data(self)
42+
data = function() `arrow::ArrayData`$new(Array_data(self))
4343
)
4444
)
4545

@@ -66,10 +66,17 @@ array <- function(...){
6666
num_columns = function() RecordBatch_num_columns(self),
6767
num_rows = function() RecordBatch_num_rows(self),
6868
schema = function() `arrow::Schema`$new(RecordBatch_schema(self)),
69-
to_file = function(path) invisible(RecordBatch_to_file(self, fs::path_abs(path)))
69+
to_file = function(path) invisible(RecordBatch_to_file(self, fs::path_abs(path))),
70+
column = function(i) `arrow::Array`$new(RecordBatch_column(self, i))
7071
)
7172
)
7273

74+
#' @export
75+
`as_tibble.arrow::RecordBatch` <- function(x, ...){
76+
RecordBatch_to_dataframe(x)
77+
}
78+
79+
7380
#' Create an arrow::RecordBatch from a data frame
7481
#'
7582
#' @param .data a data frame

r/R/reexports-tibble.R

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
# Licensed to the Apache Software Foundation (ASF) under one
2+
# or more contributor license agreements. See the NOTICE file
3+
# distributed with this work for additional information
4+
# regarding copyright ownership. The ASF licenses this file
5+
# to you under the Apache License, Version 2.0 (the
6+
# "License"); you may not use this file except in compliance
7+
# with the License. You may obtain a copy of the License at
8+
#
9+
# http://www.apache.org/licenses/LICENSE-2.0
10+
#
11+
# Unless required by applicable law or agreed to in writing,
12+
# software distributed under the License is distributed on an
13+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
# KIND, either express or implied. See the License for the
15+
# specific language governing permissions and limitations
16+
# under the License.
17+
18+
#' @importFrom tibble as_tibble
19+
#' @export
20+
tibble::as_tibble

r/man/reexports.Rd

Lines changed: 16 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

r/src/RcppExports.cpp

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -219,6 +219,29 @@ BEGIN_RCPP
219219
return rcpp_result_gen;
220220
END_RCPP
221221
}
222+
// RecordBatch_column
223+
std::shared_ptr<arrow::Array> RecordBatch_column(const std::shared_ptr<arrow::RecordBatch>& batch, int i);
224+
RcppExport SEXP _arrow_RecordBatch_column(SEXP batchSEXP, SEXP iSEXP) {
225+
BEGIN_RCPP
226+
Rcpp::RObject rcpp_result_gen;
227+
Rcpp::RNGScope rcpp_rngScope_gen;
228+
Rcpp::traits::input_parameter< const std::shared_ptr<arrow::RecordBatch>& >::type batch(batchSEXP);
229+
Rcpp::traits::input_parameter< int >::type i(iSEXP);
230+
rcpp_result_gen = Rcpp::wrap(RecordBatch_column(batch, i));
231+
return rcpp_result_gen;
232+
END_RCPP
233+
}
234+
// RecordBatch_to_dataframe
235+
List RecordBatch_to_dataframe(const std::shared_ptr<arrow::RecordBatch>& batch);
236+
RcppExport SEXP _arrow_RecordBatch_to_dataframe(SEXP batchSEXP) {
237+
BEGIN_RCPP
238+
Rcpp::RObject rcpp_result_gen;
239+
Rcpp::RNGScope rcpp_rngScope_gen;
240+
Rcpp::traits::input_parameter< const std::shared_ptr<arrow::RecordBatch>& >::type batch(batchSEXP);
241+
rcpp_result_gen = Rcpp::wrap(RecordBatch_to_dataframe(batch));
242+
return rcpp_result_gen;
243+
END_RCPP
244+
}
222245
// dataframe_to_Table
223246
std::shared_ptr<arrow::Table> dataframe_to_Table(DataFrame tbl);
224247
RcppExport SEXP _arrow_dataframe_to_Table(SEXP tblSEXP) {
@@ -866,6 +889,8 @@ static const R_CallMethodDef CallEntries[] = {
866889
{"_arrow_RecordBatch_num_columns", (DL_FUNC) &_arrow_RecordBatch_num_columns, 1},
867890
{"_arrow_RecordBatch_num_rows", (DL_FUNC) &_arrow_RecordBatch_num_rows, 1},
868891
{"_arrow_RecordBatch_schema", (DL_FUNC) &_arrow_RecordBatch_schema, 1},
892+
{"_arrow_RecordBatch_column", (DL_FUNC) &_arrow_RecordBatch_column, 2},
893+
{"_arrow_RecordBatch_to_dataframe", (DL_FUNC) &_arrow_RecordBatch_to_dataframe, 1},
869894
{"_arrow_dataframe_to_Table", (DL_FUNC) &_arrow_dataframe_to_Table, 1},
870895
{"_arrow_Table_num_columns", (DL_FUNC) &_arrow_Table_num_columns, 1},
871896
{"_arrow_Table_num_rows", (DL_FUNC) &_arrow_Table_num_rows, 1},

r/src/buffer.cpp

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -115,6 +115,48 @@ std::shared_ptr<arrow::Schema> RecordBatch_schema(const std::shared_ptr<arrow::R
115115
return x->schema();
116116
}
117117

118+
// [[Rcpp::export]]
119+
std::shared_ptr<arrow::Array> RecordBatch_column(const std::shared_ptr<arrow::RecordBatch>& batch, int i){
120+
return batch->column(i);
121+
}
122+
123+
template <int RTYPE>
124+
inline SEXP simple_Array_to_Vector(const std::shared_ptr<arrow::Array>& array ){
125+
// ignoring null buffer for now
126+
using stored_type = typename Rcpp::Vector<RTYPE>::stored_type;
127+
auto start = reinterpret_cast<const stored_type*>(array->data()->buffers[1]->data());
128+
129+
return Rcpp::wrap(start, start + array->length());
130+
}
131+
132+
SEXP Array_to_R(const std::shared_ptr<arrow::Array>& array){
133+
switch(array->type_id()){
134+
case Type::INT8: return simple_Array_to_Vector<RAWSXP>(array);
135+
case Type::INT32: return simple_Array_to_Vector<INTSXP>(array);
136+
case Type::DOUBLE: return simple_Array_to_Vector<REALSXP>(array);
137+
default:
138+
break;
139+
}
140+
141+
stop(tfm::format("cannot handle Array of type %d", array->type_id()));
142+
return R_NilValue;
143+
}
144+
145+
// [[Rcpp::export]]
146+
List RecordBatch_to_dataframe(const std::shared_ptr<arrow::RecordBatch>& batch){
147+
int nc = batch->num_columns();
148+
int nr = batch->num_rows();
149+
List tbl(nc);
150+
for(int i=0; i<nc; i++) {
151+
tbl[i] = Array_to_R(batch->column(i));
152+
}
153+
tbl.attr("class") = CharacterVector::create("tbf_df", "tbl", "data.frame");
154+
tbl.attr("row.names") = IntegerVector::create(NA_INTEGER, -nr);
155+
return tbl;
156+
}
157+
158+
159+
118160
// [[Rcpp::export]]
119161
std::shared_ptr<arrow::Table> dataframe_to_Table(DataFrame tbl){
120162
auto rb = dataframe_to_RecordBatch(tbl);

0 commit comments

Comments
 (0)