Skip to content

Commit a8ad3a4

Browse files
committed
PARQUET-639: Do not export DCHECK in public headers
I added a test so that DCHECK does not leak in the public headers. I prefer this to renaming the macro Author: Wes McKinney <[email protected]> Closes apache#127 from wesm/no-export-dcheck and squashes the following commits: 52a2d22 [Wes McKinney] Remove exposure of DCHECK macros from publicly-visible headers Change-Id: Ib79d09fe31de928fe55ffa1f04d34a84092f5494
1 parent 427d0a2 commit a8ad3a4

6 files changed

Lines changed: 170 additions & 109 deletions

File tree

cpp/src/parquet/column/levels.cc

Lines changed: 146 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,146 @@
1+
// Licensed to the Apache Software Foundation (ASF) under one
2+
// or more contributor license agreements. See the NOTICE file
3+
// distributed with this work for additional information
4+
// regarding copyright ownership. The ASF licenses this file
5+
// to you under the Apache License, Version 2.0 (the
6+
// "License"); you may not use this file except in compliance
7+
// with the License. You may obtain a copy of the License at
8+
//
9+
// http://www.apache.org/licenses/LICENSE-2.0
10+
//
11+
// Unless required by applicable law or agreed to in writing,
12+
// software distributed under the License is distributed on an
13+
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
// KIND, either express or implied. See the License for the
15+
// specific language governing permissions and limitations
16+
// under the License.
17+
18+
#include "parquet/column/levels.h"
19+
20+
#include <cstdint>
21+
22+
#include "parquet/util/rle-encoding.h"
23+
24+
namespace parquet {
25+
26+
LevelEncoder::LevelEncoder() {}
27+
LevelEncoder::~LevelEncoder() {}
28+
29+
void LevelEncoder::Init(Encoding::type encoding, int16_t max_level,
30+
int num_buffered_values, uint8_t* data, int data_size) {
31+
bit_width_ = BitUtil::Log2(max_level + 1);
32+
encoding_ = encoding;
33+
switch (encoding) {
34+
case Encoding::RLE: {
35+
rle_encoder_.reset(new RleEncoder(data, data_size, bit_width_));
36+
break;
37+
}
38+
case Encoding::BIT_PACKED: {
39+
int num_bytes = BitUtil::Ceil(num_buffered_values * bit_width_, 8);
40+
bit_packed_encoder_.reset(new BitWriter(data, num_bytes));
41+
break;
42+
}
43+
default:
44+
throw ParquetException("Unknown encoding type for levels.");
45+
}
46+
}
47+
48+
int LevelEncoder::MaxBufferSize(
49+
Encoding::type encoding, int16_t max_level, int num_buffered_values) {
50+
int bit_width = BitUtil::Log2(max_level + 1);
51+
int num_bytes = 0;
52+
switch (encoding) {
53+
case Encoding::RLE: {
54+
// TODO: Due to the way we currently check if the buffer is full enough,
55+
// we need to have MinBufferSize as head room.
56+
num_bytes = RleEncoder::MaxBufferSize(bit_width, num_buffered_values) +
57+
RleEncoder::MinBufferSize(bit_width);
58+
break;
59+
}
60+
case Encoding::BIT_PACKED: {
61+
num_bytes = BitUtil::Ceil(num_buffered_values * bit_width, 8);
62+
break;
63+
}
64+
default:
65+
throw ParquetException("Unknown encoding type for levels.");
66+
}
67+
return num_bytes;
68+
}
69+
70+
int LevelEncoder::Encode(int batch_size, const int16_t* levels) {
71+
int num_encoded = 0;
72+
if (!rle_encoder_ && !bit_packed_encoder_) {
73+
throw ParquetException("Level encoders are not initialized.");
74+
}
75+
76+
if (encoding_ == Encoding::RLE) {
77+
for (int i = 0; i < batch_size; ++i) {
78+
if (!rle_encoder_->Put(*(levels + i))) { break; }
79+
++num_encoded;
80+
}
81+
rle_encoder_->Flush();
82+
rle_length_ = rle_encoder_->len();
83+
} else {
84+
for (int i = 0; i < batch_size; ++i) {
85+
if (!bit_packed_encoder_->PutValue(*(levels + i), bit_width_)) { break; }
86+
++num_encoded;
87+
}
88+
bit_packed_encoder_->Flush();
89+
}
90+
return num_encoded;
91+
}
92+
93+
LevelDecoder::LevelDecoder()
94+
: num_values_remaining_(0) {}
95+
96+
LevelDecoder::~LevelDecoder() {}
97+
98+
int LevelDecoder::SetData(Encoding::type encoding, int16_t max_level,
99+
int num_buffered_values, const uint8_t* data) {
100+
uint32_t num_bytes = 0;
101+
encoding_ = encoding;
102+
num_values_remaining_ = num_buffered_values;
103+
bit_width_ = BitUtil::Log2(max_level + 1);
104+
switch (encoding) {
105+
case Encoding::RLE: {
106+
num_bytes = *reinterpret_cast<const uint32_t*>(data);
107+
const uint8_t* decoder_data = data + sizeof(uint32_t);
108+
if (!rle_decoder_) {
109+
rle_decoder_.reset(new RleDecoder(decoder_data, num_bytes, bit_width_));
110+
} else {
111+
rle_decoder_->Reset(decoder_data, num_bytes, bit_width_);
112+
}
113+
return sizeof(uint32_t) + num_bytes;
114+
}
115+
case Encoding::BIT_PACKED: {
116+
num_bytes = BitUtil::Ceil(num_buffered_values * bit_width_, 8);
117+
if (!bit_packed_decoder_) {
118+
bit_packed_decoder_.reset(new BitReader(data, num_bytes));
119+
} else {
120+
bit_packed_decoder_->Reset(data, num_bytes);
121+
}
122+
return num_bytes;
123+
}
124+
default:
125+
throw ParquetException("Unknown encoding type for levels.");
126+
}
127+
return -1;
128+
}
129+
130+
int LevelDecoder::Decode(int batch_size, int16_t* levels) {
131+
int num_decoded = 0;
132+
133+
int num_values = std::min(num_values_remaining_, batch_size);
134+
if (encoding_ == Encoding::RLE) {
135+
num_decoded = rle_decoder_->GetBatch(levels, num_values);
136+
} else {
137+
for (int i = 0; i < num_values; ++i) {
138+
if (!bit_packed_decoder_->GetValue(bit_width_, levels + i)) { break; }
139+
++num_decoded;
140+
}
141+
}
142+
num_values_remaining_ -= num_decoded;
143+
return num_decoded;
144+
}
145+
146+
} // namespace parquet

cpp/src/parquet/column/levels.h

Lines changed: 14 additions & 107 deletions
Original file line numberDiff line numberDiff line change
@@ -23,79 +23,28 @@
2323

2424
#include "parquet/exception.h"
2525
#include "parquet/types.h"
26-
#include "parquet/util/rle-encoding.h"
2726

2827
namespace parquet {
2928

29+
class BitReader;
30+
class BitWriter;
31+
class RleDecoder;
32+
class RleEncoder;
33+
3034
class LevelEncoder {
3135
public:
32-
LevelEncoder() {}
36+
LevelEncoder();
37+
~LevelEncoder();
3338

3439
static int MaxBufferSize(
35-
Encoding::type encoding, int16_t max_level, int num_buffered_values) {
36-
int bit_width = BitUtil::Log2(max_level + 1);
37-
int num_bytes = 0;
38-
switch (encoding) {
39-
case Encoding::RLE: {
40-
// TODO: Due to the way we currently check if the buffer is full enough,
41-
// we need to have MinBufferSize as head room.
42-
num_bytes = RleEncoder::MaxBufferSize(bit_width, num_buffered_values) +
43-
RleEncoder::MinBufferSize(bit_width);
44-
break;
45-
}
46-
case Encoding::BIT_PACKED: {
47-
num_bytes = BitUtil::Ceil(num_buffered_values * bit_width, 8);
48-
break;
49-
}
50-
default:
51-
throw ParquetException("Unknown encoding type for levels.");
52-
}
53-
return num_bytes;
54-
}
40+
Encoding::type encoding, int16_t max_level, int num_buffered_values);
5541

5642
// Initialize the LevelEncoder.
5743
void Init(Encoding::type encoding, int16_t max_level, int num_buffered_values,
58-
uint8_t* data, int data_size) {
59-
bit_width_ = BitUtil::Log2(max_level + 1);
60-
encoding_ = encoding;
61-
switch (encoding) {
62-
case Encoding::RLE: {
63-
rle_encoder_.reset(new RleEncoder(data, data_size, bit_width_));
64-
break;
65-
}
66-
case Encoding::BIT_PACKED: {
67-
int num_bytes = BitUtil::Ceil(num_buffered_values * bit_width_, 8);
68-
bit_packed_encoder_.reset(new BitWriter(data, num_bytes));
69-
break;
70-
}
71-
default:
72-
throw ParquetException("Unknown encoding type for levels.");
73-
}
74-
}
44+
uint8_t* data, int data_size);
7545

7646
// Encodes a batch of levels from an array and returns the number of levels encoded
77-
int Encode(int batch_size, const int16_t* levels) {
78-
int num_encoded = 0;
79-
if (!rle_encoder_ && !bit_packed_encoder_) {
80-
throw ParquetException("Level encoders are not initialized.");
81-
}
82-
83-
if (encoding_ == Encoding::RLE) {
84-
for (int i = 0; i < batch_size; ++i) {
85-
if (!rle_encoder_->Put(*(levels + i))) { break; }
86-
++num_encoded;
87-
}
88-
rle_encoder_->Flush();
89-
rle_length_ = rle_encoder_->len();
90-
} else {
91-
for (int i = 0; i < batch_size; ++i) {
92-
if (!bit_packed_encoder_->PutValue(*(levels + i), bit_width_)) { break; }
93-
++num_encoded;
94-
}
95-
bit_packed_encoder_->Flush();
96-
}
97-
return num_encoded;
98-
}
47+
int Encode(int batch_size, const int16_t* levels);
9948

10049
int32_t len() {
10150
if (encoding_ != Encoding::RLE) {
@@ -114,58 +63,16 @@ class LevelEncoder {
11463

11564
class LevelDecoder {
11665
public:
117-
LevelDecoder() : num_values_remaining_(0) {}
66+
LevelDecoder();
67+
~LevelDecoder();
11868

11969
// Initialize the LevelDecoder state with new data
12070
// and return the number of bytes consumed
12171
int SetData(Encoding::type encoding, int16_t max_level, int num_buffered_values,
122-
const uint8_t* data) {
123-
uint32_t num_bytes = 0;
124-
encoding_ = encoding;
125-
num_values_remaining_ = num_buffered_values;
126-
bit_width_ = BitUtil::Log2(max_level + 1);
127-
switch (encoding) {
128-
case Encoding::RLE: {
129-
num_bytes = *reinterpret_cast<const uint32_t*>(data);
130-
const uint8_t* decoder_data = data + sizeof(uint32_t);
131-
if (!rle_decoder_) {
132-
rle_decoder_.reset(new RleDecoder(decoder_data, num_bytes, bit_width_));
133-
} else {
134-
rle_decoder_->Reset(decoder_data, num_bytes, bit_width_);
135-
}
136-
return sizeof(uint32_t) + num_bytes;
137-
}
138-
case Encoding::BIT_PACKED: {
139-
num_bytes = BitUtil::Ceil(num_buffered_values * bit_width_, 8);
140-
if (!bit_packed_decoder_) {
141-
bit_packed_decoder_.reset(new BitReader(data, num_bytes));
142-
} else {
143-
bit_packed_decoder_->Reset(data, num_bytes);
144-
}
145-
return num_bytes;
146-
}
147-
default:
148-
throw ParquetException("Unknown encoding type for levels.");
149-
}
150-
return -1;
151-
}
72+
const uint8_t* data);
15273

15374
// Decodes a batch of levels into an array and returns the number of levels decoded
154-
int Decode(int batch_size, int16_t* levels) {
155-
int num_decoded = 0;
156-
157-
int num_values = std::min(num_values_remaining_, batch_size);
158-
if (encoding_ == Encoding::RLE) {
159-
num_decoded = rle_decoder_->GetBatch(levels, num_values);
160-
} else {
161-
for (int i = 0; i < num_values; ++i) {
162-
if (!bit_packed_decoder_->GetValue(bit_width_, levels + i)) { break; }
163-
++num_decoded;
164-
}
165-
}
166-
num_values_remaining_ -= num_decoded;
167-
return num_decoded;
168-
}
75+
int Decode(int batch_size, int16_t* levels);
16976

17077
private:
17178
int bit_width_;

cpp/src/parquet/public-api-test.cc

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
#include "parquet/api/io.h"
2121
#include "parquet/api/reader.h"
2222
#include "parquet/api/schema.h"
23+
#include "parquet/api/writer.h"
2324

2425
namespace parquet {
2526

@@ -29,4 +30,10 @@ TEST(TestPublicAPI, DoesNotIncludeThrift) {
2930
#endif
3031
}
3132

33+
TEST(TestPublicAPI, DoesNotExportDCHECK) {
34+
#ifdef DCHECK
35+
FAIL() << "parquet/util/logging.h should not be transitively included";
36+
#endif
37+
}
38+
3239
} // namespace parquet

cpp/src/parquet/util/input.cc

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424

2525
#include "parquet/exception.h"
2626
#include "parquet/util/buffer.h"
27+
#include "parquet/util/logging.h"
2728

2829
namespace parquet {
2930

cpp/src/parquet/util/mem-allocator.h

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,8 +18,7 @@
1818
#ifndef PARQUET_UTIL_MEMORY_POOL_H
1919
#define PARQUET_UTIL_MEMORY_POOL_H
2020

21-
#include "parquet/util/logging.h"
22-
#include "parquet/util/bit-util.h"
21+
#include <cstdint>
2322

2423
namespace parquet {
2524

cpp/src/parquet/util/output.cc

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222

2323
#include "parquet/exception.h"
2424
#include "parquet/util/buffer.h"
25+
#include "parquet/util/logging.h"
2526

2627
namespace parquet {
2728

0 commit comments

Comments
 (0)