-
Notifications
You must be signed in to change notification settings - Fork 3k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
fix(datahub-client): support utf8 encoding #4878
Changes from 2 commits
23902f6
e810a32
b2130cc
d549aa5
41c0266
ddad174
02c3238
574bb00
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -15,6 +15,7 @@ | |
import java.io.ByteArrayOutputStream; | ||
import java.io.IOException; | ||
import java.io.InputStream; | ||
import java.net.URISyntaxException; | ||
import java.util.List; | ||
import java.util.concurrent.CountDownLatch; | ||
import java.util.concurrent.ExecutionException; | ||
|
@@ -154,7 +155,12 @@ public static RestEmitter createWithDefaults() { | |
@Override | ||
public Future<MetadataWriteResponse> emit(MetadataChangeProposalWrapper mcpw, | ||
Callback callback) throws IOException { | ||
return emit(this.eventFormatter.convert(mcpw), callback); | ||
try { | ||
return emit(StringEscapeUtils.convert(mcpw), callback); | ||
} catch (URISyntaxException e) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Do we need to catch any IO exception? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Let the caller handle ioexception. We cannt do anything about exception at this point. |
||
log.error(e.toString()); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Generally we try to avoid both logging and throwing simultaneously There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Done |
||
throw new RuntimeException(e); | ||
} | ||
} | ||
|
||
@Override | ||
|
@@ -309,4 +315,4 @@ public void cancelled() { | |
Future<HttpResponse> requestFuture = httpClient.execute(httpPost, httpCallback); | ||
return new MetadataResponseFuture(requestFuture, responseAtomicReference, responseLatch); | ||
} | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,140 @@ | ||
/** | ||
* Licensed to the Apache Software Foundation (ASF) under one | ||
* or more contributor license agreements. See the NOTICE file | ||
* distributed with this work for additional information | ||
* regarding copyright ownership. The ASF licenses this file | ||
* to you under the Apache License, Version 2.0 (the | ||
* "License"); you may not use this file except in compliance | ||
* with the License. You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, | ||
* software distributed under the License is distributed on an | ||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | ||
* KIND, either express or implied. See the License for the | ||
* specific language governing permissions and limitations | ||
* under the License. | ||
*/ | ||
|
||
package datahub.client.rest; | ||
|
||
import java.io.IOException; | ||
import java.io.StringWriter; | ||
import java.io.Writer; | ||
import java.net.URISyntaxException; | ||
import java.nio.charset.StandardCharsets; | ||
import java.util.Locale; | ||
|
||
import org.apache.commons.lang.UnhandledException; | ||
|
||
import com.fasterxml.jackson.annotation.JsonInclude; | ||
import com.fasterxml.jackson.databind.ObjectMapper; | ||
import com.linkedin.common.urn.Urn; | ||
import com.linkedin.data.ByteString; | ||
import com.linkedin.data.template.JacksonDataTemplateCodec; | ||
import com.linkedin.mxe.GenericAspect; | ||
import com.linkedin.mxe.MetadataChangeProposal; | ||
|
||
import datahub.event.MetadataChangeProposalWrapper; | ||
|
||
public class StringEscapeUtils { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We try to include a unit test for all Util classes. Let's add one for this! There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Unit test added |
||
|
||
private StringEscapeUtils() { | ||
|
||
} | ||
|
||
private final static ObjectMapper OBJECT_MAPPER = new ObjectMapper() | ||
.setSerializationInclusion(JsonInclude.Include.NON_NULL); | ||
private final static JacksonDataTemplateCodec DATA_TEMPLATE_CODEC = new JacksonDataTemplateCodec( | ||
OBJECT_MAPPER.getFactory()); | ||
|
||
public static MetadataChangeProposal convert(MetadataChangeProposalWrapper mcpw) | ||
throws IOException, URISyntaxException { | ||
String serializedAspect = escapeJava(DATA_TEMPLATE_CODEC.dataTemplateToString(mcpw.getAspect())); | ||
MetadataChangeProposal mcp = new MetadataChangeProposal().setEntityType(mcpw.getEntityType()) | ||
.setAspectName(mcpw.getAspectName()).setEntityUrn(Urn.createFromString(mcpw.getEntityUrn())) | ||
.setChangeType(mcpw.getChangeType()); | ||
|
||
mcp.setAspect(new GenericAspect().setContentType("application/json") | ||
.setValue(ByteString.unsafeWrap(serializedAspect.getBytes(StandardCharsets.UTF_8)))); | ||
return mcp; | ||
} | ||
|
||
private static void escapeJavaStyleString(Writer out, String str, boolean escapeSingleQuote, | ||
boolean escapeForwardSlash) throws IOException { | ||
if (out == null) { | ||
throw new IllegalArgumentException("The Writer must not be null"); | ||
} else if (str != null) { | ||
int sz = str.length(); | ||
|
||
for (int i = 0; i < sz; ++i) { | ||
char ch = str.charAt(i); | ||
if (ch > 4095) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This feels like fairly low-level code. Are we sure we need to manually do these substitutions? Are there any off the shelf libs to aid us in this? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is an off the shelf library (Apache), but requires a little bit of customization to get it working the way we want. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Note the License Header 😄 |
||
out.write("\\u" + hex(ch)); | ||
} else if (ch > 255) { | ||
out.write("\\u0" + hex(ch)); | ||
} else if (ch > 127) { | ||
out.write("\\u00" + hex(ch)); | ||
} else if (ch < ' ') { | ||
switch (ch) { | ||
case '\b': | ||
out.write(92); | ||
out.write(98); | ||
break; | ||
case '\t': | ||
out.write(92); | ||
out.write(116); | ||
break; | ||
case '\n': | ||
out.write(92); | ||
out.write(110); | ||
break; | ||
case '\u000b': | ||
|
||
case '\f': | ||
out.write(92); | ||
out.write(102); | ||
break; | ||
case '\r': | ||
out.write(92); | ||
out.write(114); | ||
break; | ||
default: | ||
if (ch > 15) { | ||
out.write("\\u00" + hex(ch)); | ||
} else { | ||
out.write("\\u000" + hex(ch)); | ||
} | ||
break; | ||
} | ||
|
||
} else { | ||
out.write(ch); | ||
} | ||
} | ||
} | ||
} | ||
|
||
private static String hex(char ch) { | ||
return Integer.toHexString(ch).toUpperCase(Locale.ENGLISH); | ||
} | ||
|
||
private static String escapeJavaStyleString(String str, boolean escapeSingleQuotes, boolean escapeForwardSlash) { | ||
if (str == null) { | ||
return null; | ||
} else { | ||
try { | ||
StringWriter writer = new StringWriter(str.length() * 2); | ||
escapeJavaStyleString(writer, str, escapeSingleQuotes, escapeForwardSlash); | ||
return writer.toString(); | ||
} catch (IOException var4) { | ||
throw new UnhandledException(var4); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why throw unhandled here? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. removed unhandledexception. |
||
} | ||
} | ||
} | ||
|
||
public static String escapeJava(String str) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why do we have this public wrapper method? Why not just expose the underlying method directly? Why not keep the name the same? (even though arguments differ) Since this is public, let's add a basic javadoc There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. There is escapeJavaScript on the same lines which we have not included in our code. |
||
return escapeJavaStyleString(str, false, false); | ||
} | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
why are we abandoning the previous member variable
eventFormatter
(classEventFormatter
) and replacing with a hard-codedStringEscapeUtils.convert
static method? Can we instead improve the existing EventFormatter implementation to add this functionality? Then the unit test can be moved to the existingEventFormatterTest
class as well.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Missed this, yeah it makes sense to continue routing through the eventFormatter and to just modify the convert method rather than pass it directly to the util.