Commit c3d2d769 authored by Andre Freyssinet's avatar Andre Freyssinet
Browse files

Fix issues about String serialization: Joram#314380.

Fix issue about serialized size of String objects.
Use UTF-8 default to serialize String objects, allows to fix a specific
charset.
parent 3aca56d8
/*
* Copyright (C) 2013 - 2021 ScalAgent Distributed Technologies
* Copyright (C) 2013 - 2022 ScalAgent Distributed Technologies
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
......@@ -27,7 +27,7 @@ import java.nio.ByteBuffer;
/**
* Decoder using a byte buffer.
*/
public class ByteBufferDecoder implements Decoder {
public final class ByteBufferDecoder implements Decoder {
private ByteBuffer buf;
public ByteBufferDecoder(ByteBuffer buf) {
......@@ -35,38 +35,47 @@ public class ByteBufferDecoder implements Decoder {
this.buf = buf;
}
@Override
public short decodeSignedShort() throws Exception {
return buf.getShort();
}
@Override
public short decodeUnsignedShort() throws Exception {
return buf.getShort();
}
@Override
public short decode16() throws Exception {
return buf.getShort();
}
@Override
public int decodeSignedInt() throws Exception {
return buf.getInt();
}
@Override
public int decodeUnsignedInt() throws Exception {
return buf.getInt();
}
@Override
public int decode32() throws Exception {
return buf.getInt();
}
@Override
public long decodeUnsignedLong() throws Exception {
return buf.getLong();
}
@Override
public long decodeSignedLong() throws Exception {
return buf.getLong();
}
@Override
public long decode64() throws Exception {
return buf.getLong();
}
......@@ -75,58 +84,68 @@ public class ByteBufferDecoder implements Decoder {
return decodeBoolean();
}
@Override
public String decodeNullableString() throws Exception {
if (isNull()) return null;
return decodeString();
}
@Override
public String decodeString() throws Exception {
int length = buf.getInt();
return decodeString(length);
}
@Override
public String decodeString(int length) throws Exception {
String s;
if (buf.hasArray()) {
s = new String(buf.array(), ((Buffer) buf).position(), length);
s = new String(buf.array(), ((Buffer) buf).position(), length, EncodableHelper.charset);
((Buffer) buf).position(((Buffer) buf).position() + length);
} else {
byte[] bytes = new byte[length];
buf.get(bytes);
s = new String(bytes);
s = new String(bytes, EncodableHelper.charset);
}
return s;
}
@Override
public byte decodeByte() throws Exception {
return buf.get();
}
@Override
public byte[] decodeNullableByteArray() throws Exception {
if (isNull()) return null;
return decodeByteArray();
}
@Override
public byte[] decodeByteArray() throws Exception {
int length = buf.getInt();
return decodeByteArray(length);
}
@Override
public byte[] decodeByteArray(int length) throws Exception {
byte[] bytes = new byte[length];
buf.get(bytes);
return bytes;
}
@Override
public boolean decodeBoolean() throws Exception {
byte b = buf.get();
return (b != 0);
}
@Override
public float decodeFloat() throws Exception {
return buf.getFloat();
}
@Override
public double decodeDouble() throws Exception {
return buf.getDouble();
}
......
/*
* Copyright (C) 2013 - 2021 ScalAgent Distributed Technologies
* Copyright (C) 2013 - 2022 ScalAgent Distributed Technologies
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
......@@ -26,7 +26,7 @@ import java.nio.ByteBuffer;
/**
* Encoder using a byte buffer.
*/
public class ByteBufferEncoder implements Encoder {
public final class ByteBufferEncoder implements Encoder {
private ByteBuffer buf;
public ByteBufferEncoder(ByteBuffer buf) {
......@@ -34,46 +34,57 @@ public class ByteBufferEncoder implements Encoder {
this.buf = buf;
}
@Override
public void encodeBoolean(boolean bool) throws Exception {
buf.put(bool ? (byte) 1 : 0);
}
@Override
public void encodeByte(byte b) throws Exception {
buf.put(b);
}
@Override
public void encodeSignedShort(short s) throws Exception {
buf.putShort(s);
}
@Override
public void encodeUnsignedShort(short s) throws Exception {
buf.putShort(s);
}
@Override
public void encode16(short s) throws Exception {
buf.putShort(s);
}
@Override
public void encodeSignedInt(int i) throws Exception {
buf.putInt(i);
}
@Override
public void encodeUnsignedInt(int i) throws Exception {
buf.putInt(i);
}
@Override
public void encode32(int i) throws Exception {
buf.putInt(i);
}
@Override
public void encodeSignedLong(long l) throws Exception {
buf.putLong(l);
}
@Override
public void encodeUnsignedLong(long l) throws Exception {
buf.putLong(l);
}
@Override
public void encode64(long l) throws Exception {
buf.putLong(l);
}
......@@ -86,6 +97,7 @@ public class ByteBufferEncoder implements Encoder {
}
}
@Override
public void encodeNullableString(String str) throws Exception {
encodeNullFlag(str);
if (str != null) {
......@@ -93,12 +105,14 @@ public class ByteBufferEncoder implements Encoder {
}
}
@Override
public void encodeString(String str) throws Exception {
byte[] bytes = str.getBytes();
byte[] bytes = str.getBytes(EncodableHelper.charset);
buf.putInt(bytes.length);
buf.put(bytes);
}
@Override
public void encodeNullableByteArray(byte[] tab) throws Exception {
encodeNullFlag(tab);
if (tab != null) {
......@@ -106,29 +120,33 @@ public class ByteBufferEncoder implements Encoder {
}
}
@Override
public void encodeByteArray(byte[] tab) throws Exception {
buf.putInt(tab.length);
buf.put(tab);
}
public void encodeNullableByteArray(byte[] tab, int offset, int length)
throws Exception {
@Override
public void encodeNullableByteArray(byte[] tab, int offset, int length) throws Exception {
encodeNullFlag(tab);
if (tab != null) {
encodeByteArray(tab, offset, length);
}
}
@Override
public void encodeByteArray(byte[] tab, int offset, int length)
throws Exception {
buf.putInt(length);
buf.put(tab, offset, length);
}
@Override
public void encodeFloat(float f) throws Exception {
buf.putFloat(f);
}
@Override
public void encodeDouble(double d) throws Exception {
buf.putDouble(d);
}
......
/*
* Copyright (C) 2013 - 2018 ScalAgent Distributed Technologies
* Copyright (C) 2013 - 2022 ScalAgent Distributed Technologies
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
......@@ -21,39 +21,82 @@
*/
package fr.dyade.aaa.common.encoding;
import java.nio.charset.Charset;
import java.util.Iterator;
import java.util.Map.Entry;
import org.objectweb.util.monolog.api.BasicLevel;
import org.objectweb.util.monolog.api.Logger;
import fr.dyade.aaa.common.Debug;
import java.util.Properties;
import java.util.Set;
public class EncodableHelper {
import fr.dyade.aaa.common.Configuration;
public final class EncodableHelper {
static Logger logger = Debug.getLogger(EncodableHelper.class.getName());
// ############################################################
// Fix charset used to encode/decode String objects.
public final static String ENCODING_CHARSET_PROPERTY = "fr.dyade.aaa.common.encoding.charset";
public final static String ENCODING_USE_JVM_CHARSET_PROPERTY = "fr.dyade.aaa.common.encoding.useJVMcharset";
public final static String ENCODING_CHARSET_DFLT = "UTF-8";
/** Default charset used to encode/decode String. */
final static Charset charset;
/** Attributes allowing to compute encoded String size. */
private final static boolean multibyte;
private final static boolean utf8;
static {
if (Configuration.getBoolean(ENCODING_USE_JVM_CHARSET_PROPERTY)) {
charset = Charset.defaultCharset();
} else {
charset = Charset.forName(Configuration.getProperty(ENCODING_CHARSET_PROPERTY, ENCODING_CHARSET_DFLT));
}
multibyte = charset.newEncoder().maxBytesPerChar() > 1.0f;
utf8 = "UTF-8".equals(charset.name());
logger.log(BasicLevel.INFO,
"Encodable configuration: " + charset.displayName() + ", multibyte=" + multibyte + ", utf8=" + utf8);
}
//############################################################
/**
* Returns the size of an encoded string.
* Assumes that every character is encoded with one byte.
* Be careful, String are now encoded as UTF-8.
*
* @param s the string to encode
* @return the size of the encoded string
*/
public static final int getStringEncodedSize(String s) {
// TODO (AF): Prevents a NPE, encodes an empty string if null
return Encodable.INT_ENCODED_SIZE + s.length();
if (! multibyte)
return Encodable.INT_ENCODED_SIZE + s.length();
else if (utf8)
return Encodable.INT_ENCODED_SIZE + Utf8.encodedLength(s);
else
return Encodable.INT_ENCODED_SIZE + s.getBytes().length;
}
/**
* Returns the size of an encoded string which
* value may be null.
* Assumes that every character is encoded with one byte.
* Returns the size of an encoded string which value may be null.
* Be careful, String are now encoded as UTF-8.
*
* @param s the string to encode
* @return the size of the encoded string
*/
public static final int getNullableStringEncodedSize(String s) {
int res = Encodable.BYTE_ENCODED_SIZE;
if (s != null) {
res += Encodable.INT_ENCODED_SIZE + s.length();
res += getStringEncodedSize(s);
}
return res;
}
public static final int getByteArrayEncodedSize(byte[] byteArray) {
return Encodable.INT_ENCODED_SIZE + byteArray.length;
}
......@@ -66,8 +109,7 @@ public class EncodableHelper {
return res;
}
public static final int getNullableByteArrayEncodedSize(byte[] byteArray,
int length) {
public static final int getNullableByteArrayEncodedSize(byte[] byteArray, int length) {
int res = Encodable.BYTE_ENCODED_SIZE;
if (byteArray != null) {
res += Encodable.INT_ENCODED_SIZE + length;
......@@ -75,6 +117,11 @@ public class EncodableHelper {
return res;
}
// ################################################################################
// Helper to encode java.util.Properties.
// Used in Destination and UserAgent for interceptors.
// ################################################################################
public static int getEncodedSize(Properties properties) throws Exception {
int res = Encodable.INT_ENCODED_SIZE;
Set<Entry<Object, Object>> entries = properties.entrySet();
......@@ -108,5 +155,4 @@ public class EncodableHelper {
}
return properties;
}
}
/*
* Copyright (C) 2013 - 2017 ScalAgent Distributed Technologies
* Copyright (C) 2013 - 2022 ScalAgent Distributed Technologies
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
......@@ -31,7 +31,7 @@ import java.io.Serializable;
* It also serves as a cache for the encoded string.
*/
@SuppressWarnings("serial")
public class EncodedString implements Serializable, Encodable {
public final class EncodedString implements Serializable, Encodable {
private String string;
private byte[] encodedString;
......@@ -47,27 +47,28 @@ public class EncodedString implements Serializable, Encodable {
return string;
}
// Note (AF): This method seems to be never used
public void writeTo(DataOutputStream os) throws IOException {
if (encodedString == null) {
encodedString = string.getBytes();
encodedString = string.getBytes(EncodableHelper.charset);
}
os.writeInt(encodedString.length);
os.write(encodedString);
}
// Note (AF): This method seems to be never used
public void readFrom(DataInputStream is) throws IOException {
int length = is.readInt();
encodedString = new byte[length];
is.readFully(encodedString);
string = new String(encodedString);
string = new String(encodedString, EncodableHelper.charset);
}
private void writeObject(java.io.ObjectOutputStream out) throws IOException {
out.writeUTF(string);
}
private void readObject(java.io.ObjectInputStream in) throws IOException,
ClassNotFoundException {
private void readObject(java.io.ObjectInputStream in) throws IOException, ClassNotFoundException {
string = in.readUTF();
}
......@@ -90,24 +91,28 @@ public class EncodedString implements Serializable, Encodable {
return string;
}
@Override
public int getEncodableClassId() {
return EncodableFactoryRepository.ENCODED_STRING_CLASS_ID;
}
@Override
public int getEncodedSize() {
return string.length() + 4;
return EncodableHelper.getStringEncodedSize(string);
}
@Override
public void encode(Encoder encoder) throws Exception {
if (encodedString == null) {
encodedString = string.getBytes();
encodedString = string.getBytes(EncodableHelper.charset);
}
encoder.encodeByteArray(encodedString);
}
@Override
public void decode(Decoder decoder) throws Exception {
encodedString = decoder.decodeByteArray();
string = new String(encodedString);
string = new String(encodedString, EncodableHelper.charset);
}
public static class Factory implements EncodableFactory {
......
/*
* Copyright (C) 2013 The Guava Authors
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
* in compliance with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software distributed under the License
* is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
* or implied. See the License for the specific language governing permissions and limitations under
* the License.
*
* This code is copied from Guava common.base.Utf8 class, in order to avoid importing a large library
* for a very limited need.
* See: https://github.com/google/guava/blob/master/guava/src/com/google/common/base/Utf8.java
*/
package fr.dyade.aaa.common.encoding;
import static java.lang.Character.MAX_SURROGATE;
import static java.lang.Character.MIN_SURROGATE;
/**
* Low-level, high-performance utility methods related to the {@linkplain Charsets#UTF_8 UTF-8}
* character encoding. UTF-8 is defined in section D92 of <a
* href="http://www.unicode.org/versions/Unicode6.2.0/ch03.pdf">The Unicode Standard Core
* Specification, Chapter 3</a>.
*
* <p>The variant of UTF-8 implemented by this class is the restricted definition of UTF-8
* introduced in Unicode 3.1. One implication of this is that it rejects <a
* href="http://www.unicode.org/versions/corrigendum1.html">"non-shortest form"</a> byte sequences,
* even though the JDK decoder may accept them.
*
* @author Martin Buchholz
* @author Clément Roux
* @since 16.0
*/
public final class Utf8 {
/**
* Returns the number of bytes in the UTF-8-encoded form of {@code sequence}. For a string, this
* method is equivalent to {@code string.getBytes(UTF_8).length}, but is more efficient in both
* time and space.
*
* @throws IllegalArgumentException if {@code sequence} contains ill-formed UTF-16 (unpaired
* surrogates)
*/
public static int encodedLength(CharSequence sequence) {
// Warning to maintainers: this implementation is highly optimized.
int utf16Length = sequence.length();
int utf8Length = utf16Length;
int i = 0;
// This loop optimizes for pure ASCII.
while (i < utf16Length && sequence.charAt(i) < 0x80) {
i++;
}
// This loop optimizes for chars less than 0x800.
for (; i < utf16Length; i++) {
char c = sequence.charAt(i);
if (c < 0x800) {
utf8Length += ((0x7f - c) >>> 31); // branch free!
} else {
utf8Length += encodedLengthGeneral(sequence, i);
break;
}
}
if (utf8Length < utf16Length) {
// Necessary and sufficient condition for overflow because of maximum 3x expansion
throw new IllegalArgumentException(
"UTF-8 length does not fit in int: " + (utf8Length + (1L << 32)));
}
return utf8Length;
}
private static int encodedLengthGeneral(CharSequence sequence, int start) {
int utf16Length = sequence.length();
int utf8Length = 0;
for (int i = start; i < utf16Length; i++) {
char c = sequence.charAt(i);
if (c < 0x800) {
utf8Length += (0x7f - c) >>> 31; // branch free!
} else {
utf8Length += 2;
// jdk7+: if (Character.isSurrogate(c)) {
if (MIN_SURROGATE <= c && c <= MAX_SURROGATE) {
// Check that we have a well-formed surrogate pair.
if (Character.codePointAt(sequence, i) == c) {
throw new IllegalArgumentException(unpairedSurrogateMsg(i));
}
i++;
}
}
}
return utf8Length;
}
private static String unpairedSurrogateMsg(int i) {
return "Unpaired surrogate at index " + i;
}
}
\ No newline at end of file
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment