Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(spark-lineage): simplified jars, config, auto publish to maven #3924

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
73 changes: 73 additions & 0 deletions .github/workflows/publish-datahub-jars.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
name: Publish Datahub Client

on:
workflow_run:
workflows: ["build & test"]
types:
- completed

release:
types: [published, edited]

# Allows you to run this workflow manually from the Actions tab
workflow_dispatch:

jobs:

check-secret:
runs-on: ubuntu-latest
if: ${{ github.event.workflow_run.conclusion == 'success' }}
outputs:
publish-enabled: ${{ steps.publish-enabled.outputs.defined }}
steps:
- id: publish-enabled
if: "${{ env.SIGNING_KEY != '' }}"
run: echo "::set-output name=defined::true"
env:
SIGNING_KEY: ${{ secrets.SIGNING_KEY }}

publish:
runs-on: ubuntu-latest
needs: [check-secret]
if: needs.check-secret.outputs.publish-enabled == 'true'
steps:
- uses: actions/checkout@v2
with:
fetch-depth: 0
- name: Set up JDK 1.8
uses: actions/setup-java@v1
with:
java-version: 1.8
- uses: actions/setup-python@v2
with:
python-version: "3.6"
- name: checkout upstream repo
run: |
git remote add upstream https://github.com/linkedin/datahub.git
git fetch upstream --tags
- name: publish datahub-client jar
env:
RELEASE_USERNAME: ${{ secrets.RELEASE_USERNAME }}
RELEASE_PASSWORD: ${{ secrets.RELEASE_PASSWORD }}
SIGNING_PASSWORD: ${{ secrets.SIGNING_PASSWORD }}
SIGNING_KEY: ${{ secrets.SIGNING_KEY }}
NEXUS_USERNAME: ${{ secrets.NEXUS_USERNAME }}
NEXUS_PASSWORD: ${{ secrets.NEXUS_PASSWORD }}
run: |
echo signingKey=$SIGNING_KEY >> gradle.properties
./gradlew :metadata-integration:java:datahub-client:printVersion
./gradlew :metadata-integration:java:datahub-client:publishToMavenLocal
#./gradlew :metadata-integration:java:datahub-client:closeAndReleaseRepository --info
- name: publish datahub-spark jar
env:
RELEASE_USERNAME: ${{ secrets.RELEASE_USERNAME }}
RELEASE_PASSWORD: ${{ secrets.RELEASE_PASSWORD }}
SIGNING_PASSWORD: ${{ secrets.SIGNING_PASSWORD }}
SIGNING_KEY: ${{ secrets.SIGNING_KEY }}
NEXUS_USERNAME: ${{ secrets.NEXUS_USERNAME }}
NEXUS_PASSWORD: ${{ secrets.NEXUS_PASSWORD }}
run: |
echo signingKey=$SIGNING_KEY >> gradle.properties
./gradlew :metadata-integration:java:spark-lineage:printVersion
./gradlew :metadata-integration:java:spark-lineage:publishToMavenLocal
#./gradlew :metadata-integration:java:datahub-client:closeAndReleaseRepository --info
2 changes: 2 additions & 0 deletions build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@ buildscript {
classpath 'com.commercehub.gradle.plugin:gradle-avro-plugin:0.8.1'
classpath 'org.springframework.boot:spring-boot-gradle-plugin:2.1.4.RELEASE'
classpath 'com.github.jengelman.gradle.plugins:shadow:5.2.0'
classpath "io.codearte.gradle.nexus:gradle-nexus-staging-plugin:0.30.0"
classpath "com.palantir.gradle.gitversion:gradle-git-version:0.12.3"
}
}

Expand Down
4 changes: 4 additions & 0 deletions gradle.properties
Original file line number Diff line number Diff line change
Expand Up @@ -7,3 +7,7 @@ org.gradle.caching=false
org.gradle.internal.repository.max.retries=5
org.gradle.internal.repository.max.tentatives=5
org.gradle.internal.repository.initial.backoff=1000

# Needed to publish to Nexus from a sub-module
gnsp.disableApplyOnlyOnRootProjectEnforcement=true

94 changes: 94 additions & 0 deletions metadata-integration/java/datahub-client/build.gradle
Original file line number Diff line number Diff line change
@@ -1,6 +1,11 @@
apply plugin: 'java'
apply plugin: 'com.github.johnrengelman.shadow'
apply plugin: 'jacoco'
apply plugin: 'signing'
apply plugin: 'io.codearte.nexus-staging'
apply plugin: 'maven-publish'
apply plugin: 'com.palantir.git-version'
import org.apache.tools.ant.filters.ReplaceTokens

jar.enabled = false // Since we only want to build shadow jars, disabling the regular jar creation

Expand All @@ -21,6 +26,19 @@ jacocoTestReport {
dependsOn test // tests are required to run before generating the report
}


def details = versionDetails()
version = details.lastTag
version = version.startsWith("v")? version.substring(1): version
// trim version if it is of size 4 to size 3
def versionParts = version.tokenize(".")
def lastPart = details.isCleanTag? versionParts[2]: (versionParts[2].toInteger()+1).toString() + "-SNAPSHOT"
version = versionParts[0] + "." + versionParts[1] + "." + lastPart

processResources {
filter(ReplaceTokens, tokens:[fullVersion: gitVersion()])
}

test {
useJUnit()
finalizedBy jacocoTestReport
Expand Down Expand Up @@ -64,3 +82,79 @@ checkShadowJar {
assemble {
dependsOn shadowJar
}

task sourcesJar(type: Jar) {
archiveClassifier = 'sources'
from sourceSets.main.allSource
}

task javadocJar(type: Jar) {
archiveClassifier = 'javadoc'
from javadoc
}


publishing {
publications {
shadow(MavenPublication) {
publication -> project.shadow.component(publication)
pom {
name = 'Datahub Client'
group = 'io.acryl'
artifactId = 'datahub-client'
description = 'DataHub Java client for metadata integration'
url = 'https://datahubproject.io'
artifacts = [ shadowJar, javadocJar, sourcesJar ]

scm {
connection = 'scm:git:git://github.com/linkedin/datahub.git'
developerConnection = 'scm:git:ssh://github.com:linkedin/datahub.git'
url = 'https://github.com/linkedin/datahub.git'
}

licenses {
license {
name = 'The Apache License, Version 2.0'
url = 'http://www.apache.org/licenses/LICENSE-2.0.txt'
}
}

developers {
developer {
id = 'datahub'
name = 'Datahub'
email = '[email protected]'
}
}
}
}
}

repositories {
maven {
def releasesRepoUrl = "https://s01.oss.sonatype.org/service/local/staging/deploy/maven2/"
def snapshotsRepoUrl = "https://s01.oss.sonatype.org/content/repositories/snapshots/"
def ossrhUsername = System.getenv('RELEASE_USERNAME')
def ossrhPassword = System.getenv('RELEASE_PASSWORD')
credentials {
username ossrhUsername
password ossrhPassword
}
url = version.endsWith('SNAPSHOT') ? snapshotsRepoUrl : releasesRepoUrl
}
}
}


signing {
def signingKey = findProperty("signingKey")
def signingPassword = System.getenv("SIGNING_PASSWORD")
useInMemoryPgpKeys(signingKey, signingPassword)
sign publishing.publications.shadow
}

nexusStaging {
serverUrl = "https://s01.oss.sonatype.org/service/local/" //required only for projects registered in Sonatype after 2021-02-24
username = System.getenv("NEXUS_USERNAME")
password = System.getenv("NEXUS_PASSWORD")
}
14 changes: 9 additions & 5 deletions metadata-integration/java/datahub-client/scripts/check_jar.sh
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
# This script checks the shadow jar to ensure that we only have allowed classes being exposed through the jar
jar -tvf build/libs/datahub-client.jar |\
jarFiles=$(find build/libs -name "datahub-client*.jar" | grep -v sources | grep -v javadoc)
for jarFile in ${jarFiles}; do
jar -tvf $jarFile |\
grep -v "datahub/shaded" |\
grep -v "META-INF" |\
grep -v "com/linkedin" |\
Expand All @@ -9,12 +11,14 @@ jar -tvf build/libs/datahub-client.jar |\
grep -v "pegasus/" |\
grep -v "legacyPegasusSchemas/" |\
grep -v " com/$" |\
grep -v "git.properties"
grep -v "git.properties" |\
grep -v "client.properties"

if [ $? -ne 0 ]; then
echo "No other packages found. Great"
exit 0
echo "✅ No unexpected class paths found in ${jarFile}"
else
echo "Found other packages than what we were expecting"
echo "💥 Found unexpected class paths in ${jarFile}"
exit 1
fi
done
exit 0
Original file line number Diff line number Diff line change
Expand Up @@ -41,17 +41,17 @@
*
* Constructing a REST Emitter follows a lambda-based fluent builder pattern using the `create` method.
* e.g.
* RestEmitter emitter = RestEmitter.create(b -> b
* RestEmitter emitter = RestEmitter.create(b :: b
* .server("http://localhost:8080")
* .extraHeaders(Collections.singletonMap("Custom-Header", "custom-val")
* );
* You can also customize the underlying
* http client by calling the `customizeHttpAsyncClient` method on the builder.
* e.g.
* RestEmitter emitter = RestEmitter.create(b -> b
* RestEmitter emitter = RestEmitter.create(b :: b
* .server("http://localhost:8080")
* .extraHeaders(Collections.singletonMap("Custom-Header", "custom-val")
* .customizeHttpAsyncClient(c -> c.setConnectionTimeToLive(30, TimeUnit.SECONDS))
* .customizeHttpAsyncClient(c :: c.setConnectionTimeToLive(30, TimeUnit.SECONDS))
* );
*/
public class RestEmitter implements Emitter {
Expand Down Expand Up @@ -117,16 +117,16 @@ private static MetadataWriteResponse mapResponse(HttpResponse response) {
/**
* Constructing a REST Emitter follows a lambda-based fluent builder pattern using the `create` method.
* e.g.
* RestEmitter emitter = RestEmitter.create(b -> b
* RestEmitter emitter = RestEmitter.create(b :: b
* .server("http://localhost:8080") // coordinates of gms server
* .extraHeaders(Collections.singletonMap("Custom-Header", "custom-val")
* );
* You can also customize the underlying http client by calling the `customizeHttpAsyncClient` method on the builder.
* e.g.
* RestEmitter emitter = RestEmitter.create(b -> b
* RestEmitter emitter = RestEmitter.create(b :: b
* .server("http://localhost:8080")
* .extraHeaders(Collections.singletonMap("Custom-Header", "custom-val")
* .customizeHttpAsyncClient(c -> c.setConnectionTimeToLive(30, TimeUnit.SECONDS))
* .customizeHttpAsyncClient(c :: c.setConnectionTimeToLive(30, TimeUnit.SECONDS))
* );
* @param builderSupplier
* @return a constructed RestEmitter. Call #testConnection to make sure this emitter has a valid connection to the server
Expand Down
Original file line number Diff line number Diff line change
@@ -1,23 +1,28 @@
package datahub.client.rest;

import datahub.event.EventFormatter;
import java.io.InputStream;
import java.util.Collections;
import java.util.Map;
import java.util.Properties;
import java.util.function.Consumer;
import lombok.Builder;
import lombok.NonNull;
import lombok.Value;
import lombok.extern.slf4j.Slf4j;
import org.apache.http.client.config.RequestConfig;
import org.apache.http.impl.nio.client.HttpAsyncClientBuilder;


@Value
@Builder
@Slf4j
public class RestEmitterConfig {

public static final int DEFAULT_CONNECT_TIMEOUT_SEC = 10;
public static final int DEFAULT_READ_TIMEOUT_SEC = 10;
public static final String DEFAULT_AUTH_TOKEN = null;
public static final String CLIENT_VERSION_PROPERTY = "clientVersion";

@Builder.Default
private final String server = "http://localhost:8080";
Expand All @@ -38,12 +43,25 @@ public class RestEmitterConfig {

public static class RestEmitterConfigBuilder {

private String getVersion() {
try (
InputStream foo = this.getClass().getClassLoader().getResourceAsStream("client.properties")) {
Properties properties = new Properties();
properties.load(foo);
return properties.getProperty(CLIENT_VERSION_PROPERTY, "unknown");
} catch (Exception e) {
log.warn("Unable to find a version for datahub-client. Will set to unknown", e);
return "unknown";
}
}

private HttpAsyncClientBuilder asyncHttpClientBuilder = HttpAsyncClientBuilder
.create()
.setDefaultRequestConfig(RequestConfig.custom()
.setConnectTimeout(DEFAULT_CONNECT_TIMEOUT_SEC * 1000)
.setSocketTimeout(DEFAULT_READ_TIMEOUT_SEC * 1000)
.build());
.build())
.setUserAgent("DataHub-RestClient/" + getVersion());

public RestEmitterConfigBuilder with(Consumer<RestEmitterConfigBuilder> builderFunction) {
builderFunction.accept(this);
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
clientVersion=@fullVersion@
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
import java.util.Collections;
import java.util.List;
import java.util.Map;
import java.util.Properties;
import java.util.Random;
import java.util.concurrent.CountDownLatch;
import java.util.concurrent.ExecutionException;
Expand Down Expand Up @@ -363,4 +364,20 @@ public void testTimeoutOnGetWithTimeout() {
Assert.assertTrue(ioe instanceof TimeoutException);
}
}

@Test
public void testUserAgentHeader() throws IOException, ExecutionException, InterruptedException {
TestDataHubServer testDataHubServer = new TestDataHubServer();
Integer port = testDataHubServer.getMockServer().getPort();
RestEmitter emitter = RestEmitter.create(b -> b.server("http://localhost:" + port));
testDataHubServer.getMockServer().reset();
emitter.testConnection();
Properties properties = new Properties();
properties.load(emitter.getClass().getClassLoader().getResourceAsStream("client.properties"));
Assert.assertNotNull(properties.getProperty("clientVersion"));
String version = properties.getProperty("clientVersion");
testDataHubServer.getMockServer().verify(
request("/config")
.withHeader("User-Agent", "DataHub-RestClient/" + version));
}
}
Loading