Skip to content

Commit

Permalink
ARROW-3075: [C++] Incorporate parquet-cpp codebase into Arrow C++ build
Browse files Browse the repository at this point in the history
system. Add unit test label granularity options, ability to add component group
targets like 'make parquet' that build libraries and tests
  • Loading branch information
wesm committed Sep 6, 2018
1 parent e5662a3 commit 23093d7
Show file tree
Hide file tree
Showing 92 changed files with 852 additions and 868 deletions.
3 changes: 3 additions & 0 deletions .gitmodules
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
[submodule "cpp/submodules/parquet-testing"]
path = cpp/submodules/parquet-testing
url = https://github.com/apache/parquet-testing.git
6 changes: 4 additions & 2 deletions .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@ matrix:
- ARROW_TRAVIS_ORC=1
- ARROW_TRAVIS_CLANG_FORMAT=1
- ARROW_TRAVIS_COVERAGE=1
- ARROW_TRAVIS_PARQUET=1
- ARROW_TRAVIS_PYTHON_DOCS=1
- ARROW_BUILD_WARNING_LEVEL=CHECKIN
- ARROW_TRAVIS_PYTHON_JVM=1
Expand All @@ -77,11 +78,11 @@ matrix:
- $TRAVIS_BUILD_DIR/ci/travis_install_clang_tools.sh
- $TRAVIS_BUILD_DIR/ci/travis_lint.sh
# If either C++ or Python changed, we must install the C++ libraries
- git submodule update --init
- $TRAVIS_BUILD_DIR/ci/travis_before_script_cpp.sh
script:
# All test steps are required for accurate C++ coverage info
- $TRAVIS_BUILD_DIR/ci/travis_script_cpp.sh
- $TRAVIS_BUILD_DIR/ci/travis_build_parquet_cpp.sh
# Build Arrow Java to test the pyarrow<->JVM in-process bridge
- $TRAVIS_BUILD_DIR/ci/travis_script_java.sh
# Only run Plasma tests with valgrind in one of the Python builds because
Expand All @@ -102,14 +103,15 @@ matrix:
- ARROW_TRAVIS_USE_TOOLCHAIN=1
- ARROW_TRAVIS_PLASMA=1
- ARROW_TRAVIS_ORC=1
- ARROW_TRAVIS_PARQUET=1
- ARROW_BUILD_WARNING_LEVEL=CHECKIN
before_script:
- if [ $ARROW_CI_PYTHON_AFFECTED != "1" ]; then exit; fi
# If either C++ or Python changed, we must install the C++ libraries
- git submodule update --init
- $TRAVIS_BUILD_DIR/ci/travis_before_script_cpp.sh
script:
- if [ $ARROW_CI_CPP_AFFECTED == "1" ]; then $TRAVIS_BUILD_DIR/ci/travis_script_cpp.sh; fi
- $TRAVIS_BUILD_DIR/ci/travis_build_parquet_cpp.sh
- $TRAVIS_BUILD_DIR/ci/travis_script_python.sh 2.7
- $TRAVIS_BUILD_DIR/ci/travis_script_python.sh 3.6
# [manylinux1] Python
Expand Down
5 changes: 2 additions & 3 deletions appveyor.yml
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,6 @@ environment:
GENERATOR: Ninja
CONFIGURATION: "Release"
APPVEYOR_BUILD_WORKER_IMAGE: Visual Studio 2017
BOOST_ROOT: C:\Libraries\boost_1_64_0
- JOB: "Toolchain"
GENERATOR: Visual Studio 14 2015 Win64
CONFIGURATION: "Release"
Expand All @@ -67,8 +66,8 @@ environment:
USE_CLCACHE: false

MSVC_DEFAULT_OPTIONS: ON
BOOST_ROOT: C:\Libraries\boost_1_63_0
BOOST_LIBRARYDIR: C:\Libraries\boost_1_63_0\lib64-msvc-14.0
BOOST_ROOT: C:\Libraries\boost_1_67_0
BOOST_LIBRARYDIR: C:\Libraries\boost_1_67_0\lib64-msvc-14.0
APPVEYOR_SAVE_CACHE_ON_ERROR: true

install:
Expand Down
34 changes: 12 additions & 22 deletions ci/cpp-python-msvc-build.bat
Original file line number Diff line number Diff line change
Expand Up @@ -68,16 +68,20 @@ if "%JOB%" == "Build_Debug" (
exit /B 0
)

conda create -n arrow -q -y python=%PYTHON% ^
conda create -n arrow -q -y -c conda-forge ^
python=%PYTHON% ^
six pytest setuptools numpy pandas cython ^
thrift-cpp=0.11.0
thrift-cpp=0.11.0 boost-cpp

call activate arrow

@rem Use Boost from conda-forge
set BOOST_ROOT=%CONDA_PREFIX%\Library
set BOOST_LIBRARYDIR=%CONDA_PREFIX%\Library\lib

if "%JOB%" == "Toolchain" (
@rem Install pre-built "toolchain" packages for faster builds
conda install -q -y -c conda-forge ^
boost-cpp ^
brotli ^
cmake ^
flatbuffers ^
Expand All @@ -94,6 +98,10 @@ if "%JOB%" == "Toolchain" (

set ARROW_HOME=%CONDA_PREFIX%\Library

@rem Retrieve git submodules, configure env var for Parquet unit tests
git submodule update --init
set PARQUET_TEST_DATA=%CD%\cpp\submodules\parquet-testing\data

@rem Build and test Arrow C++ libraries

mkdir cpp\build
Expand All @@ -104,6 +112,7 @@ cmake -G "%GENERATOR%" ^
-DARROW_BOOST_USE_SHARED=OFF ^
-DCMAKE_BUILD_TYPE=%CONFIGURATION% ^
-DARROW_CXXFLAGS="/WX /MP" ^
-DARROW_PARQUET=ON ^
-DARROW_PYTHON=ON ^
.. || exit /B
cmake --build . --target install --config %CONFIGURATION% || exit /B
Expand All @@ -117,26 +126,7 @@ ctest -VV || exit /B
set PYTHONHOME=%OLD_PYTHONHOME%
popd

@rem Build parquet-cpp

git clone https://github.com/apache/parquet-cpp.git || exit /B
mkdir parquet-cpp\build
pushd parquet-cpp\build

set PARQUET_BUILD_TOOLCHAIN=%CONDA_PREFIX%\Library
set PARQUET_HOME=%CONDA_PREFIX%\Library
cmake -G "%GENERATOR%" ^
-DCMAKE_INSTALL_PREFIX=%PARQUET_HOME% ^
-DCMAKE_BUILD_TYPE=%CONFIGURATION% ^
-DPARQUET_BOOST_USE_SHARED=OFF ^
-DPARQUET_BUILD_TESTS=OFF ^
.. || exit /B
cmake --build . --target install --config %CONFIGURATION% || exit /B
popd

@rem Build and install pyarrow
@rem parquet-cpp has some additional runtime dependencies that we need to figure out
@rem see PARQUET-1018

pushd python

Expand Down
4 changes: 4 additions & 0 deletions ci/travis_before_script_cpp.sh
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,10 @@ if [ $ARROW_TRAVIS_ORC == "1" ]; then
CMAKE_COMMON_FLAGS="$CMAKE_COMMON_FLAGS -DARROW_ORC=ON"
fi

if [ $ARROW_TRAVIS_PARQUET == "1" ]; then
CMAKE_COMMON_FLAGS="$CMAKE_COMMON_FLAGS -DARROW_PARQUET=ON"
fi

if [ $ARROW_TRAVIS_VALGRIND == "1" ]; then
CMAKE_COMMON_FLAGS="$CMAKE_COMMON_FLAGS -DARROW_TEST_MEMCHECK=ON"
fi
Expand Down
50 changes: 0 additions & 50 deletions ci/travis_build_parquet_cpp.sh

This file was deleted.

4 changes: 2 additions & 2 deletions ci/travis_env_common.sh
Original file line number Diff line number Diff line change
Expand Up @@ -44,8 +44,6 @@ export ARROW_CPP_BUILD_DIR=$TRAVIS_BUILD_DIR/cpp-build
export ARROW_C_GLIB_INSTALL_AUTOTOOLS=$TRAVIS_BUILD_DIR/c-glib-install-autotools
export ARROW_C_GLIB_INSTALL_MESON=$TRAVIS_BUILD_DIR/c-glib-install-meson

export ARROW_PYTHON_PARQUET_HOME=$TRAVIS_BUILD_DIR/parquet-env

export CMAKE_EXPORT_COMPILE_COMMANDS=1

export ARROW_BUILD_TYPE=${ARROW_BUILD_TYPE:=debug}
Expand All @@ -70,3 +68,5 @@ fi
if [ $TRAVIS_OS_NAME == "osx" ]; then
export GOPATH=$TRAVIS_BUILD_DIR/gopath
fi

export PARQUET_TEST_DATA=$TRAVIS_BUILD_DIR/cpp/submodules/parquet-testing/data
4 changes: 0 additions & 4 deletions ci/travis_install_toolchain.sh
Original file line number Diff line number Diff line change
Expand Up @@ -40,8 +40,4 @@ if [ ! -e $CPP_TOOLCHAIN ]; then
thrift-cpp=0.11.0 \
zlib \
zstd

# HACK(wesm): We started experiencing OpenSSL failures when Miniconda was
# updated sometime on October 2 or October 3
# conda update -y -q -p $CPP_TOOLCHAIN ca-certificates -c defaults
fi
4 changes: 2 additions & 2 deletions ci/travis_script_python.sh
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,8 @@ source $TRAVIS_BUILD_DIR/ci/travis_env_common.sh
source $TRAVIS_BUILD_DIR/ci/travis_install_conda.sh

export ARROW_HOME=$ARROW_CPP_INSTALL
export PARQUET_HOME=$ARROW_PYTHON_PARQUET_HOME
export LD_LIBRARY_PATH=$ARROW_HOME/lib:$PARQUET_HOME/lib:$LD_LIBRARY_PATH
export PARQUET_HOME=$ARROW_CPP_INSTALL
export LD_LIBRARY_PATH=$ARROW_HOME/lib:$LD_LIBRARY_PATH
export PYARROW_CXXFLAGS="-Werror"

PYARROW_PYTEST_FLAGS=" -r sxX --durations=15 --parquet"
Expand Down
25 changes: 21 additions & 4 deletions cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -275,6 +275,20 @@ Pass multiple labels by dividing with semicolons")
"Build Arrow with statically linked CRT"
OFF)
endif()

# Parquet-related build options
option(ARROW_PARQUET
"Build the Parquet libraries"
OFF)

option(PARQUET_MINIMAL_DEPENDENCY
"Depend only on Thirdparty headers to build libparquet. \
Always OFF if building binaries"
OFF)

set(PARQUET_ARROW_LINKAGE "shared" CACHE STRING
"How to link Arrow libraries with libparquet.so. static|shared (default shared)")

endif()

if(ARROW_BUILD_TESTS OR ARROW_BUILD_BENCHMARKS)
Expand Down Expand Up @@ -584,6 +598,8 @@ endif(UNIX)
############################################################

set(ARROW_LINK_LIBS)

# Libraries to link statically with libarrow.so
set(ARROW_STATIC_LINK_LIBS)

if (ARROW_WITH_BROTLI)
Expand Down Expand Up @@ -643,11 +659,8 @@ set(ARROW_BENCHMARK_LINK_LIBS
gtest
${ARROW_STATIC_LINK_LIBS})

set(ARROW_LINK_LIBS
${ARROW_STATIC_LINK_LIBS}
${ARROW_LINK_LIBS})

set(ARROW_SHARED_PRIVATE_LINK_LIBS
${ARROW_STATIC_LINK_LIBS}
${BOOST_SYSTEM_LIBRARY}
${BOOST_FILESYSTEM_LIBRARY}
${BOOST_REGEX_LIBRARY})
Expand Down Expand Up @@ -729,3 +742,7 @@ endif()
if(ARROW_HIVESERVER2)
add_subdirectory(src/arrow/dbi/hiveserver2)
endif()

if(ARROW_PARQUET)
add_subdirectory(src/parquet)
endif()
44 changes: 26 additions & 18 deletions cpp/build-support/lint_cpp_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@
import argparse
import re
import os
import sys
import traceback

parser = argparse.ArgumentParser(
description="Check for illegal headers for C++/CLI applications")
Expand Down Expand Up @@ -59,23 +61,29 @@ def lint_file(path):
'arrow/util/macros.h',
'arrow/python/iterators.h',
'arrow/util/parallel.h',
'arrow/io/hdfs-internal.h'
'arrow/io/hdfs-internal.h',
'parquet/arrow/test-util.h',
'parquet/encoding-internal.h',
'parquet/test-util.h'
]


for dirpath, _, filenames in os.walk(arguments.source_path):
for filename in filenames:
full_path = os.path.join(dirpath, filename)

exclude = False
for exclusion in EXCLUSIONS:
if exclusion in full_path:
exclude = True
break

if exclude:
continue

# Only run on header files
if filename.endswith('.h'):
lint_file(full_path)
try:
for dirpath, _, filenames in os.walk(arguments.source_path):
for filename in filenames:
full_path = os.path.join(dirpath, filename)

exclude = False
for exclusion in EXCLUSIONS:
if exclusion in full_path:
exclude = True
break

if exclude:
continue

# Only run on header files
if filename.endswith('.h'):
lint_file(full_path)
except Exception:
traceback.print_exc()
sys.exit(1)
Loading

0 comments on commit 23093d7

Please sign in to comment.