Skip to content

Commit

Permalink
add testing for azure
Browse files Browse the repository at this point in the history
  • Loading branch information
samansmink committed Jun 27, 2024
1 parent 49810da commit aa0b52b
Show file tree
Hide file tree
Showing 12 changed files with 426 additions and 60 deletions.
80 changes: 80 additions & 0 deletions .github/workflows/CloudTesting.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
name: Cloud functional tests
on: [push, repository_dispatch]
concurrency:
group: ${{ github.workflow }}-${{ github.ref }}-${{ github.head_ref || '' }}-${{ github.base_ref || '' }}-${{ github.ref != 'refs/heads/main' || github.sha }}
cancel-in-progress: true
defaults:
run:
shell: bash

jobs:
azure-tests-linux:
name: Azure tests (Linux)
runs-on: ubuntu-latest
env:
VCPKG_TARGET_TRIPLET: x64-linux
VCPKG_TOOLCHAIN_PATH: ${{ github.workspace }}/vcpkg/scripts/buildsystems/vcpkg.cmake
GEN: Ninja
DUCKDB_PLATFORM: linux_amd64

steps:
- name: Install required ubuntu packages
run: |
sudo apt-get update -y -qq
sudo apt-get install -y -qq software-properties-common
sudo add-apt-repository ppa:git-core/ppa
sudo apt-get update -y -qq
sudo apt-get install -y -qq ninja-build make gcc-multilib g++-multilib zip unzip build-essential checkinstall curl libz-dev openssh-client
- uses: actions/checkout@v3
with:
fetch-depth: 0
submodules: 'true'

- name: Setup Ccache
uses: hendrikmuhs/[email protected] # Note: pinned due to GLIBC incompatibility in later releases
with:
key: ${{ github.job }}-${{ matrix.duckdb_arch }}

- name: Setup vcpkg
uses: lukka/[email protected]
with:
vcpkgGitCommitId: a1a1cbc975abf909a6c8985a6a2b8fe20bbd9bd6

- name: Setup Rust
uses: dtolnay/rust-toolchain@stable

- name: Build extension
env:
GEN: ninja
run: |
make release
- name: Test with Service Principal (SPN) in env vars
env:
AZURE_CLIENT_ID: ${{secrets.AZURE_CLIENT_ID}}
AZURE_CLIENT_SECRET: ${{secrets.AZURE_CLIENT_SECRET}}
AZURE_TENANT_ID: ${{secrets.AZURE_TENANT_ID}}
AZURE_STORAGE_ACCOUNT: ${{secrets.AZURE_STORAGE_ACCOUNT}}
run: |
python3 duckdb/scripts/run_tests_one_by_one.py ./build/release/test/unittest "*test/sql/cloud/*"
- name: Test with SPN logged in in azure-cli
env:
AZURE_STORAGE_ACCOUNT: ${{secrets.AZURE_STORAGE_ACCOUNT}}
DUCKDB_AZ_CLI_LOGGED_IN: 1
run: |
az login --service-principal -u ${{secrets.AZURE_CLIENT_ID}} -p ${{secrets.AZURE_CLIENT_SECRET}} --tenant ${{secrets.AZURE_TENANT_ID}}
python3 duckdb/scripts/run_tests_one_by_one.py ./build/release/test/unittest "*test/sql/cloud/*"
- name: Log out azure-cli
if: always()
run: |
az logout
- name: Tests that focus on public non-authenticated requests
env:
AZURE_STORAGE_ACCOUNT: ${{secrets.AZURE_STORAGE_ACCOUNT}}
DUCKDB_AZURE_PUBLIC_CONTAINER_AVAILABLE: 1
run: |
python3 duckdb/scripts/run_tests_one_by_one.py ./build/release/test/unittest "*test/sql/cloud/*"
66 changes: 66 additions & 0 deletions .github/workflows/LocalTesting.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
name: Local functional tests
on: [push, pull_request,repository_dispatch]
concurrency:
group: ${{ github.workflow }}-${{ github.ref }}-${{ github.head_ref || '' }}-${{ github.base_ref || '' }}-${{ github.ref != 'refs/heads/main' || github.sha }}
cancel-in-progress: true
defaults:
run:
shell: bash

jobs:
azurite-tests-linux:
name: Azurite (local azure test server) tests (Linux)
runs-on: ubuntu-latest
container: 'quay.io/pypa/manylinux2014_x86_64'
env:
VCPKG_TARGET_TRIPLET: 'x64-linux'
GEN: Ninja
VCPKG_TOOLCHAIN_PATH: ${{ github.workspace }}/vcpkg/scripts/buildsystems/vcpkg.cmake
AZURE_STORAGE_CONNECTION_STRING: 'DefaultEndpointsProtocol=http;AccountName=devstoreaccount1;AccountKey=Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==;BlobEndpoint=http://127.0.0.1:10000/devstoreaccount1;QueueEndpoint=http://127.0.0.1:10001/devstoreaccount1;TableEndpoint=http://127.0.0.1:10002/devstoreaccount1;'
AZURE_STORAGE_ACCOUNT: devstoreaccount1

steps:
- uses: actions/checkout@v3
with:
fetch-depth: 0
submodules: 'true'

- name: install Azure test service
run: |
yum install -y nodejs npm
npm install -g azurite
echo -e "[azure-cli]\nname=Azure CLI\nbaseurl=https://packages.microsoft.com/yumrepos/azure-cli\nenabled=1\ngpgcheck=1\ngpgkey=https://packages.microsoft.com/keys/microsoft.asc" | tee /etc/yum.repos.d/azure-cli.repo
yum install -y azure-cli
- name: Setup ManyLinux2014
run: |
./duckdb/scripts/setup_manylinux2014.sh general aws-cli ccache ssh openssl python_alias
- name: Setup vcpkg
uses: lukka/[email protected]
with:
vcpkgGitCommitId: a1a1cbc975abf909a6c8985a6a2b8fe20bbd9bd6

# Build extension
- name: Build extension
env:
GEN: ninja
run: |
make release
- name: Launch & populate Azure test service
run: |
azurite > azurite_log.txt 2>&1 &
sleep 10
./scripts/upload_test_files_to_azurite.sh
- name: Test extension
run: |
make test
- name: Azure test server log
if: always()
shell: bash
run: |
echo "## azurite"
cat azurite_log.txt
5 changes: 1 addition & 4 deletions .gitmodules
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,4 @@
[submodule "extension-ci-tools"]
path = extension-ci-tools
url = [email protected]:duckdb/extension-ci-tools.git
branch = main
[submodule "duckdb_azure"]
path = duckdb_azure
url = https://github.com/duckdb/duckdb_azure.git
branch = main
1 change: 0 additions & 1 deletion duckdb_azure
Submodule duckdb_azure deleted from 097bb1
4 changes: 3 additions & 1 deletion extension_config.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,9 @@ duckdb_extension_load(httpfs)

# Build the azure extension to test with azure
duckdb_extension_load(azure
SOURCE_DIR ${CMAKE_CURRENT_LIST_DIR}/duckdb_azure
LOAD_TESTS
GIT_URL https://github.com/duckdb/duckdb_azure
GIT_TAG 49b63dc8cd166952a0a34dfd54e6cfe5b823e05e
)

# Build the tpch and tpcds extension for testing/benchmarking
Expand Down
21 changes: 21 additions & 0 deletions scripts/upload_test_files_to_azurite.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
#!/bin/bash

# Default Azurite connection string (see: https://github.com/Azure/Azurite)
conn_string="DefaultEndpointsProtocol=http;AccountName=devstoreaccount1;AccountKey=Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==;BlobEndpoint=http://127.0.0.1:10000/devstoreaccount1;QueueEndpoint=http://127.0.0.1:10001/devstoreaccount1;TableEndpoint=http://127.0.0.1:10002/devstoreaccount1;"

# Create container
az storage container create -n delta-testing-private --connection-string "${conn_string}"
az storage container create -n delta-testing-public --connection-string "${conn_string}" --public-access blob

copy_file() {
local from="${1}"
local to="${2}"
az storage blob upload --file "${from}" --name "${to}" --container-name "delta-testing-private" --connection-string "${conn_string}"
az storage blob upload --file "${from}" --name "${to}" --container-name "delta-testing-public" --connection-string "${conn_string}"
}

cd ./build/debug/rust/src/delta_kernel/acceptance/tests/dat/out/reader_tests/generated
while read filepath; do
remote_filepath=dat/"$(echo "${filepath}" | cut -c 3-)"
copy_file "${filepath}" "${remote_filepath}"
done < <(find . -type f)
114 changes: 60 additions & 54 deletions src/functions/delta_scan.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -66,16 +66,25 @@ static void visit_callback(ffi::NullableCvoid engine_context, struct ffi::Kernel
ffi::visit_scan_data(engine_data, selection_vec, engine_context, visit_callback);
}

std::string parseFromConnectionString(const std::string& connectionString, const std::string& key) {
std::regex pattern(key + "=([^;]+);");
string ParseAccountNameFromEndpoint(const string& endpoint) {
if (!StringUtil::StartsWith(endpoint, "https://")) {
return "";
}
auto result = endpoint.find('.', 8);
if (result == endpoint.npos) {
return "";
}
return endpoint.substr(8,result-8);
}

string parseFromConnectionString(const string& connectionString, const string& key) {
std::regex pattern(key + "=([^;]+)(?=;|$)");
std::smatch matches;
if (std::regex_search(connectionString, matches, pattern) && matches.size() > 1) {
// The second match ([1]) contains the access key
return matches[1].str();
} else {
// If no access key is found, return an empty string or handle as needed
return "";
}
return "";
}

static ffi::EngineBuilder* CreateBuilder(ClientContext &context, const string &path) {
Expand Down Expand Up @@ -169,75 +178,72 @@ static ffi::EngineBuilder* CreateBuilder(ClientContext &context, const string &p
ffi::set_builder_option(builder, KernelUtils::ToDeltaString("aws_region"), KernelUtils::ToDeltaString(region));

} else if (secret_type == "azure") {

// azure seems to be super complicated as we need to cover duckdb azure plugin and delta RS builder
// and both require different settings

auto connection_string = kv_secret.TryGetValue("connection_string").ToString();
auto account_name = kv_secret.TryGetValue("account_name").ToString();
auto endpoint = kv_secret.TryGetValue("endpoint").ToString();
auto client_id = kv_secret.TryGetValue("client_id").ToString();
auto client_secret = kv_secret.TryGetValue("client_secret").ToString();
auto tenant_id = kv_secret.TryGetValue("tenant_id").ToString();
auto certificate_path = kv_secret.TryGetValue("certificate_path").ToString();
auto http_proxy = kv_secret.TryGetValue("http_proxy").ToString();
auto proxy_user_name = kv_secret.TryGetValue("proxy_user_name").ToString();
auto proxy_password = kv_secret.TryGetValue("proxy_password").ToString();
auto chain = kv_secret.TryGetValue("chain").ToString();
auto provider = kv_secret.GetProvider();

if (account_name == "devstoreaccount1" || connection_string.find("devstoreaccount1") != string::npos) {
ffi::set_builder_option(builder, KernelUtils::ToDeltaString("use_emulator"), KernelUtils::ToDeltaString("true")); //needed for delta RS builder
}
if (provider == "credential_chain") {
// Authentication option 1a: using the cli authentication
if (chain.find("cli") != std::string::npos) {
ffi::set_builder_option(builder, KernelUtils::ToDeltaString("use_azure_cli"), KernelUtils::ToDeltaString("true"));
}
// Authentication option 1b: non-cli credential chains will just "hope for the best" technically since we are using the default
// credential chain provider duckDB and delta-kernel-rs should find the same auth
} else if (!connection_string.empty() && connection_string != "NULL") {

if (!connection_string.empty() && connection_string != "NULL") {
ffi::set_builder_option(builder, KernelUtils::ToDeltaString("azure_storage_connection_string"), KernelUtils::ToDeltaString(connection_string)); //needed for duckdb azure plugin
ffi::set_builder_option(builder, KernelUtils::ToDeltaString("connection_string"), KernelUtils::ToDeltaString(connection_string)); //needed for duckdb azure plugin
// Authentication option 2: a connection string based on account key
auto account_key = parseFromConnectionString(connection_string, "AccountKey");
account_name = parseFromConnectionString(connection_string, "AccountName");
ffi::set_builder_option(builder, KernelUtils::ToDeltaString("access_key"), KernelUtils::ToDeltaString(parseFromConnectionString(connection_string, "AccountKey"))); //needed for delta RS builder
// Authentication option 2: a connection string based on account key
if (!account_name.empty() && !account_key.empty()) {
ffi::set_builder_option(builder, KernelUtils::ToDeltaString("account_key"),
KernelUtils::ToDeltaString(account_key));
} else {
// Authentication option 2b: a connection string based on SAS token
endpoint = parseFromConnectionString(connection_string, "BlobEndpoint");
if (account_name.empty()) {
account_name = ParseAccountNameFromEndpoint(endpoint);
}
auto sas_token = parseFromConnectionString(connection_string, "SharedAccessSignature");
if (!sas_token.empty()) {
ffi::set_builder_option(builder, KernelUtils::ToDeltaString("sas_token"),
KernelUtils::ToDeltaString(sas_token));
}
}
} else if (provider == "service_principal") {
if (!client_id.empty() && client_id != "NULL") {
ffi::set_builder_option(builder, KernelUtils::ToDeltaString("azure_client_id"), KernelUtils::ToDeltaString(client_id));
}
if (!client_secret.empty() && client_secret != "NULL") {
ffi::set_builder_option(builder, KernelUtils::ToDeltaString("azure_client_secret"), KernelUtils::ToDeltaString(client_secret));
}
if (!tenant_id.empty() && tenant_id != "NULL") {
ffi::set_builder_option(builder, KernelUtils::ToDeltaString("azure_tenant_id"), KernelUtils::ToDeltaString(tenant_id));
}
} else {
// Authentication option 3: no authentication, just an account name
ffi::set_builder_option(builder, KernelUtils::ToDeltaString("azure_skip_signature"), KernelUtils::ToDeltaString("true"));
}
// Set the use_emulator option for when the azurite test server is used
if (account_name == "devstoreaccount1" || connection_string.find("devstoreaccount1") != string::npos) {
ffi::set_builder_option(builder, KernelUtils::ToDeltaString("use_emulator"), KernelUtils::ToDeltaString("true"));
}
if (!account_name.empty() && account_name != "NULL") {
ffi::set_builder_option(builder, KernelUtils::ToDeltaString("azure_account_name"), KernelUtils::ToDeltaString(account_name)); //needed for duckdb azure plugin
ffi::set_builder_option(builder, KernelUtils::ToDeltaString("account_name"), KernelUtils::ToDeltaString(account_name)); //needed for delta RS builder
}
if (!endpoint.empty() && endpoint != "NULL") {
ffi::set_builder_option(builder, KernelUtils::ToDeltaString("azure_endpoint"), KernelUtils::ToDeltaString(endpoint));
} else {
ffi::set_builder_option(builder, KernelUtils::ToDeltaString("azure_endpoint"), KernelUtils::ToDeltaString("https://" + account_name + ".blob.core.windows.net/")); //needed? Does that work with dfs files system?
}
if (!chain.empty() && chain != "NULL") {
ffi::set_builder_option(builder, KernelUtils::ToDeltaString("provider"), KernelUtils::ToDeltaString("credential_chain")); //needed for duckdb azure plugin

if (chain.find("cli") != std::string::npos) {
ffi::set_builder_option(builder, KernelUtils::ToDeltaString("use_azure_cli"), KernelUtils::ToDeltaString("true")); //dont know if that is the right way, but we need to tell delta RS builder to authenticate with azure cli
}

ffi::set_builder_option(builder, KernelUtils::ToDeltaString("azure_credential_chain"), KernelUtils::ToDeltaString(chain)); //needed for duckdb azure plugin, dont know if all three are necessary
ffi::set_builder_option(builder, KernelUtils::ToDeltaString("credential_chain"), KernelUtils::ToDeltaString(chain)); //needed for duckdb azure plugin, dont know if all three are necessary
ffi::set_builder_option(builder, KernelUtils::ToDeltaString("chain"), KernelUtils::ToDeltaString(chain)); //needed for duckdb azure plugin, dont know if all three are necessary
ffi::set_builder_option(builder, KernelUtils::ToDeltaString("azure_endpoint"), KernelUtils::ToDeltaString("https://" + account_name + ".blob.core.windows.net/"));
}
if (!client_id.empty() && client_id != "NULL") {
ffi::set_builder_option(builder, KernelUtils::ToDeltaString("azure_client_id"), KernelUtils::ToDeltaString(client_id)); //untested
}
if (!client_secret.empty() && client_secret != "NULL") {
ffi::set_builder_option(builder, KernelUtils::ToDeltaString("azure_client_secret"), KernelUtils::ToDeltaString(client_secret)); //untested
}
if (!tenant_id.empty() && tenant_id != "NULL") {
ffi::set_builder_option(builder, KernelUtils::ToDeltaString("azure_tenant_id"), KernelUtils::ToDeltaString(tenant_id)); //needed for duckdb azure plugin
}
if (!certificate_path.empty() && certificate_path != "NULL") {
ffi::set_builder_option(builder, KernelUtils::ToDeltaString("azure_client_certificate_path"), KernelUtils::ToDeltaString(certificate_path)); //untested
}
if (!http_proxy.empty() && http_proxy != "NULL") {
ffi::set_builder_option(builder, KernelUtils::ToDeltaString("http_proxy"), KernelUtils::ToDeltaString(http_proxy)); //untested
}
if (!proxy_user_name.empty() && proxy_user_name != "NULL") {
ffi::set_builder_option(builder, KernelUtils::ToDeltaString("proxy_user_name"), KernelUtils::ToDeltaString(proxy_user_name)); //untested
}
if (!proxy_password.empty() && proxy_password != "NULL") {
ffi::set_builder_option(builder, KernelUtils::ToDeltaString("proxy_password"), KernelUtils::ToDeltaString(proxy_password)); //untested
}
ffi::set_builder_option(builder, KernelUtils::ToDeltaString("container_name"), KernelUtils::ToDeltaString(bucket)); // needed ?

ffi::set_builder_option(builder, KernelUtils::ToDeltaString("container_name"), KernelUtils::ToDeltaString(bucket));
}
return builder;
}
Expand Down
37 changes: 37 additions & 0 deletions test/sql/cloud/azure/cli_auth.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
# name: test/sql/cloud/basic.test
# description: confirm queried data is correct
# group: [azure]

require azure

require parquet

require delta

require-env DUCKDB_AZ_CLI_LOGGED_IN

require-env AZURE_STORAGE_ACCOUNT

statement ok
set allow_persistent_secrets=false

statement ok
CREATE SECRET az1 (
TYPE AZURE,
PROVIDER CREDENTIAL_CHAIN,
CHAIN 'cli',
ACCOUNT_NAME '${AZURE_STORAGE_ACCOUNT}'
)

mode output_result

# Run a remote DAT test
query I rowsort all_primitive_types
SELECT *
FROM delta_scan('azure://delta-testing-private/dat/all_primitive_types/delta')
----

query I rowsort all_primitive_types
SELECT *
FROM parquet_scan('azure://delta-testing-private/dat/all_primitive_types/expected/latest/**/*.parquet')
----
Loading

0 comments on commit aa0b52b

Please sign in to comment.