Merge remote-tracking branch 'origin/GT-3347_dev747368_PR-1241_agatti_convert_char_seq'

2024-11-23 04:32:12 +00:00 · 2020-01-15 09:44:27 -05:00 · 2020-01-15 09:44:27 -05:00 · 84fda704b0
commit 84fda704b0
parent d2fc8cd6b5 3718edc935
20 changed files with 322 additions and 293 deletions
--- a/Ghidra/Features/Base/src/main/java/ghidra/app/plugin/core/codebrowser/hover/DataTypeListingHover.java
+++ b/Ghidra/Features/Base/src/main/java/ghidra/app/plugin/core/codebrowser/hover/DataTypeListingHover.java
@ -146,7 +146,7 @@ public class DataTypeListingHover extends AbstractDataTypeHover implements Listi
 				result += String.format("<br>Translated value: %s",
 					HTMLUtilities.friendlyEncodeHTML(sdi.getTranslatedValue()));
 			}
-			if (!sdi.isPascal() && !sdi.hasNullTerminator()) {
+			if (sdi.isMissingNullTerminator()) {
 				result += "<br>Missing NULL terminator.";
 			}
 			if (sdi.getStringLength() > dataInstance.getLength()) {
--- a/Ghidra/Features/Base/src/main/java/ghidra/app/plugin/core/equate/ConvertToCharAction.java
+++ b/Ghidra/Features/Base/src/main/java/ghidra/app/plugin/core/equate/ConvertToCharAction.java
@ -16,9 +16,10 @@
 package ghidra.app.plugin.core.equate;

 import ghidra.docking.settings.FormatSettingsDefinition;
+import ghidra.program.model.data.ByteDataType;
+import ghidra.program.model.data.StringDataInstance;
 import ghidra.program.model.listing.Program;
 import ghidra.program.model.scalar.Scalar;
-import ghidra.util.StringUtilities;

 public class ConvertToCharAction extends AbstractConvertAction {
 	public static final String ACTION_NAME = "Convert To Char";
@ -46,27 +47,9 @@ public class ConvertToCharAction extends AbstractConvertAction {

 	@Override
 	protected String convertToString(Program program, Scalar scalar, boolean isData) {
-		long value = scalar.getUnsignedValue();
-		if (value >= 0 && value <= 255) {
-			return StringUtilities.toQuotedString(new byte[] { (byte) value });
-		}
-
 		byte[] bytes = scalar.byteArrayValue();
-		if (!program.getMemory().isBigEndian()) {
-			// assume we want to see characters as they would appear
-			// if read from memory one byte at a time
-			reverseBytes(bytes);
-		}
-		return StringUtilities.toQuotedString(bytes);
-	}

-	private void reverseBytes(byte[] bytes) {
-		int n = bytes.length / 2;
-		int j = bytes.length - 1;
-		for (int i = 0; i < n; i++, j--) {
-			byte b = bytes[i];
-			bytes[i] = bytes[j];
-			bytes[j] = b;
-		}
+		return StringDataInstance.getCharRepresentation(ByteDataType.dataType, bytes, null,
+			program.getMemory().isBigEndian());
 	}
 }
--- a/Ghidra/Features/Base/src/main/java/ghidra/app/plugin/core/hover/AbstractScalarOperandHover.java
+++ b/Ghidra/Features/Base/src/main/java/ghidra/app/plugin/core/hover/AbstractScalarOperandHover.java
@ -221,7 +221,7 @@ public abstract class AbstractScalarOperandHover extends AbstractConfigurableHov

 	private boolean hasEncodingError(String s) {
 		return s.codePoints().anyMatch(
-			codePoint -> StringUtilities.isUnicodeReplacementCodePoint(codePoint));
+			codePoint -> codePoint == StringUtilities.UNICODE_REPLACEMENT);
 	}

 	private ByteMemBufferImpl getScalarOperandAsMemBuffer(Address addr, Scalar scalar,
--- a/Ghidra/Features/Base/src/main/java/ghidra/app/plugin/core/string/StringTableProvider.java
+++ b/Ghidra/Features/Base/src/main/java/ghidra/app/plugin/core/string/StringTableProvider.java
@ -568,8 +568,9 @@ public class StringTableProvider extends ComponentProviderAdapter implements Dom
 			FoundString foundString = stringModel.getRowObject(table.getSelectedRow());
 			MemBuffer membuf =
 				new DumbMemBufferImpl(currentProgram.getMemory(), foundString.getAddress());
-			StringDataInstance stringInstance = new StringDataInstance(foundString.getDataType(),
-				SettingsImpl.NO_SETTINGS, membuf, foundString.getLength());
+			StringDataInstance stringInstance =
+				StringDataInstance.getStringDataInstance(foundString.getDataType(), membuf,
+					SettingsImpl.NO_SETTINGS, foundString.getLength());
 			if (charOffset != 0) {
 				stringInstance = stringInstance.getCharOffcut(charOffset);
 			}
--- a/Ghidra/Features/Base/src/main/java/ghidra/app/plugin/core/strings/HasEncodingErrorColumnConstraint.java
+++ b/Ghidra/Features/Base/src/main/java/ghidra/app/plugin/core/strings/HasEncodingErrorColumnConstraint.java
@ -32,7 +32,7 @@ public class HasEncodingErrorColumnConstraint extends StringDataInstanceColumnCo
 	public boolean accepts(StringDataInstance value, TableFilterContext context) {
 		String s = value.getStringValue();
 		return (s != null) && s.chars().anyMatch(
-			codePoint -> StringUtilities.isUnicodeReplacementCodePoint(codePoint));
+			codePoint -> codePoint == StringUtilities.UNICODE_REPLACEMENT);
 	}

 	@Override
--- a/Ghidra/Features/Base/src/main/java/ghidra/app/plugin/core/strings/ViewStringsTableModel.java
+++ b/Ghidra/Features/Base/src/main/java/ghidra/app/plugin/core/strings/ViewStringsTableModel.java
@ -361,7 +361,7 @@ class ViewStringsTableModel extends AddressBasedTableModel<ProgramLocation> {
 			String s = StringDataInstance.getStringDataInstance(data).getStringValue();

 			return (s != null) && s.chars().anyMatch(
-				codePoint -> StringUtilities.isUnicodeReplacementCodePoint(codePoint));
+				codePoint -> codePoint == StringUtilities.UNICODE_REPLACEMENT);
 		}

 		@Override
--- a/Ghidra/Framework/Generic/src/main/java/ghidra/util/StringUtilities.java
+++ b/Ghidra/Framework/Generic/src/main/java/ghidra/util/StringUtilities.java
@ -1081,16 +1081,4 @@ public class StringUtilities {
 		}
 		return new String(new int[] { codePoint }, 0, 1);
 	}
-
-	/**
-	 * Returns true if the specified code point is the 'replacement' code point 0xFFFD,
-	 * which is used when decoding bytes into unicode chars and there was a bad or invalid
-	 * sequence that does not have a mapping. (ie. decoding byte char 0x80 as US-ASCII)
-	 *
-	 * @param codePoint to test
-	 * @return boolean true if the char is 0xFFFD (ie. UNICODE REPLACEMENT char)
-	 */
-	public static boolean isUnicodeReplacementCodePoint(int codePoint) {
-		return codePoint == UNICODE_REPLACEMENT;
-	}
 }
--- a/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/program/model/data/AbstractIntegerDataType.java
+++ b/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/program/model/data/AbstractIntegerDataType.java
@ -18,10 +18,10 @@ package ghidra.program.model.data;
 import java.math.BigInteger;

 import ghidra.docking.settings.*;
-import ghidra.program.model.mem.ByteMemBufferImpl;
 import ghidra.program.model.mem.MemBuffer;
 import ghidra.program.model.scalar.Scalar;
 import ghidra.util.StringFormat;
+import utilities.util.ArrayUtilities;

 /**
 * Base type for integer data types such as {@link CharDataType chars}, {@link IntegerDataType ints},
@ -225,14 +225,13 @@ public abstract class AbstractIntegerDataType extends BuiltIn implements ArraySt
 			return "??";
 		}

-		boolean isBigEndian = ENDIAN.isBigEndian(settings, buf);
+		boolean isLE = !ENDIAN.isBigEndian(settings, buf);
+		if (isLE) {
+			bytes = ArrayUtilities.reverse(bytes);
+		}

-		if (!isBigEndian) {
-			byte[] flipped = new byte[size];
-			for (int i = 0; i < size; i++) {
-				flipped[i] = bytes[size - i - 1];
-			}
-			bytes = flipped;
+		if (getFormatSettingsDefinition().getFormat(settings) == FormatSettingsDefinition.CHAR) {
+			return StringDataInstance.getCharRepresentation(this, bytes, settings, !isLE);
 		}

 		return getRepresentation(new BigInteger(bytes), settings, 8 * length);
@ -240,12 +239,15 @@ public abstract class AbstractIntegerDataType extends BuiltIn implements ArraySt

 	/**
 	 * Get integer representation of the big-endian value.
+	 * <p>
+	 * Does not handle CHAR format, use {@link StringDataInstance#getCharRepresentation(DataType, byte[], Settings)}
+	 * 
 	 * @param bigInt BigInteger value with the appropriate sign
 	 * @param settings integer format settings (PADDING, FORMAT, etc.)
 	 * @param bitLength number of value bits to be used from bigInt
 	 * @return formatted integer string
 	 */
-	public String getRepresentation(BigInteger bigInt, Settings settings, int bitLength) {
+	/*package*/ String getRepresentation(BigInteger bigInt, Settings settings, int bitLength) {

 		int format = getFormatSettingsDefinition().getFormat(settings);
 		boolean padded = PADDING.isPadded(settings);
@ -257,29 +259,8 @@ public abstract class AbstractIntegerDataType extends BuiltIn implements ArraySt
 			bigInt = bigInt.add(BigInteger.valueOf(2).pow(bitLength));
 		}

-		int nominalLen;
-
-		if (format == FormatSettingsDefinition.CHAR) {
-			int charSize = Math.min(getDataOrganization().getCharSize(), getLength());
-			nominalLen = (bitLength + 7) / 8;
-			byte[] bytes = bigInt.toByteArray();
-			if (bytes.length > nominalLen) {
-				// BigInteger supplied too many bytes
-				byte[] chars = new byte[nominalLen];
-				System.arraycopy(bytes, bytes.length - nominalLen, chars, 0, nominalLen);
-				bytes = chars;
-			}
-			else if (bytes.length < nominalLen) {
-				// BigInteger supplied too few bytes
-				byte[] chars = new byte[nominalLen];
-				System.arraycopy(bytes, 0, chars, nominalLen - bytes.length, bytes.length);
-				bytes = chars;
-			}
-			MemBuffer memBuf = new ByteMemBufferImpl(null, bytes, true);
-			return new StringDataInstance(this, settings, memBuf, charSize).getCharRepresentation();
-		}
-
 		String valStr;
+		int nominalLen;

 		switch (format) {
 			default:
@ -316,7 +297,7 @@ public abstract class AbstractIntegerDataType extends BuiltIn implements ArraySt
 	public String getArrayDefaultLabelPrefix(MemBuffer buf, Settings settings, int len,
 			DataTypeDisplayOptions options) {
 		if (hasStringValue(settings) && buf.isInitializedMemory()) {
-			return new StringDataInstance(this, settings, buf, len).getLabel(
+			return new StringDataInstance(this, settings, buf, len, true).getLabel(
 				AbstractStringDataType.DEFAULT_ABBREV_PREFIX + "_",
 				AbstractStringDataType.DEFAULT_LABEL_PREFIX, AbstractStringDataType.DEFAULT_LABEL,
 				options);
@ -328,7 +309,7 @@ public abstract class AbstractIntegerDataType extends BuiltIn implements ArraySt
 	public String getArrayDefaultOffcutLabelPrefix(MemBuffer buf, Settings settings, int len,
 			DataTypeDisplayOptions options, int offcutOffset) {
 		if (hasStringValue(settings) && buf.isInitializedMemory()) {
-			return new StringDataInstance(this, settings, buf, len).getOffcutLabelString(
+			return new StringDataInstance(this, settings, buf, len, true).getOffcutLabelString(
 				AbstractStringDataType.DEFAULT_ABBREV_PREFIX + "_",
 				AbstractStringDataType.DEFAULT_LABEL_PREFIX, AbstractStringDataType.DEFAULT_LABEL,
 				options, offcutOffset);
--- a/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/program/model/data/AbstractStringDataType.java
+++ b/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/program/model/data/AbstractStringDataType.java
@ -15,9 +15,9 @@
 */
 package ghidra.program.model.data;

-import static ghidra.program.model.data.CharsetSettingsDefinition.CHARSET;
-import static ghidra.program.model.data.RenderUnicodeSettingsDefinition.RENDER;
-import static ghidra.program.model.data.TranslationSettingsDefinition.TRANSLATION;
+import static ghidra.program.model.data.CharsetSettingsDefinition.*;
+import static ghidra.program.model.data.RenderUnicodeSettingsDefinition.*;
+import static ghidra.program.model.data.TranslationSettingsDefinition.*;

 import ghidra.docking.settings.*;
 import ghidra.program.model.mem.MemBuffer;
@ -28,7 +28,7 @@ import ghidra.program.model.mem.MemBuffer;
 * See {@link StringDataType} for information about string variations and configuration details.
 * <p>
 * Sub-classes generally only need to implement a constructor that calls the mega-constructor
- * {@link #AbstractStringDataType(String, String, String, String, String, String, String, DataType, StringLayoutEnum, DataTypeManager) AbstractStringDataType.SAbstractStringDataType(lots,of,params)}
+ * {@link #AbstractStringDataType(String, String, String, String, String, String, String, DataType, StringLayoutEnum, DataTypeManager) AbstractStringDataType.AbstractStringDataType(lots,of,params)}
 * and the {@link DataType#clone(DataTypeManager) } method.
 * <p>
 *
--- a/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/program/model/data/Array.java
+++ b/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/program/model/data/Array.java
@ -105,9 +105,9 @@ public interface Array extends DataType {
 		ArrayStringable stringableElementType = ArrayStringable.getArrayStringable(getDataType());
 		String value =
 			(stringableElementType != null && stringableElementType.hasStringValue(settings))
-				? new StringDataInstance(stringableElementType, settings, buf,
-					length).getStringRepresentation()
-				: null;
+					? new StringDataInstance(stringableElementType, settings, buf, length,
+						true).getStringRepresentation()
+					: null;
 		return (value != null) ? value : "";
 	}

--- a/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/program/model/data/ArrayStringable.java
+++ b/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/program/model/data/ArrayStringable.java
@ -47,7 +47,7 @@ public interface ArrayStringable extends DataType {
 	 */
 	public default String getArrayString(MemBuffer buf, Settings settings, int length) {
 		if (hasStringValue(settings) && buf.isInitializedMemory()) {
-			return new StringDataInstance(this, settings, buf, length).getStringValue();
+			return new StringDataInstance(this, settings, buf, length, true).getStringValue();
 		}
 		return null;
 	}
--- a/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/program/model/data/BitFieldDataType.java
+++ b/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/program/model/data/BitFieldDataType.java
@ -17,11 +17,12 @@ package ghidra.program.model.data;

 import java.math.BigInteger;

-import ghidra.docking.settings.Settings;
-import ghidra.docking.settings.SettingsDefinition;
+import ghidra.docking.settings.*;
 import ghidra.program.model.mem.MemBuffer;
 import ghidra.program.model.scalar.Scalar;
+import ghidra.util.DataConverter;
 import ghidra.util.exception.AssertException;
+import utilities.util.ArrayUtilities;

 /**
 * <code>BitFieldDataType</code> provides a means of defining a minimally sized bit-field
@ -406,7 +407,23 @@ public class BitFieldDataType extends AbstractDataType {
 		if (dt instanceof Enum) {
 			return ((Enum) dt).getRepresentation(big, settings, effectiveBitSize);
 		}
-		return ((AbstractIntegerDataType) dt).getRepresentation(big, settings, effectiveBitSize);
+		AbstractIntegerDataType intDT = (AbstractIntegerDataType) dt;
+		if (intDT.getFormatSettingsDefinition().getFormat(
+			settings) == FormatSettingsDefinition.CHAR) {
+			if (big.signum() < 0) {
+				big = big.add(BigInteger.valueOf(2).pow(bitSize));
+			}
+			int bytesLen = BitFieldDataType.getMinimumStorageSize(bitSize);
+			byte[] bytes = DataConverter.getInstance(buf.isBigEndian()).getBytes(big, bytesLen);
+			if (!EndianSettingsDefinition.ENDIAN.isBigEndian(settings, buf)) {
+				bytes = ArrayUtilities.reverse(bytes);
+			}
+
+			return StringDataInstance.getCharRepresentation(this, bytes, settings,
+				buf.isBigEndian());
+		}
+
+		return intDT.getRepresentation(big, settings, effectiveBitSize);
 	}

 	@Override
--- a/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/program/model/data/StringDataInstance.java
+++ b/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/program/model/data/StringDataInstance.java
@ -65,6 +65,12 @@ public class StringDataInstance {
 		return false;
 	}

+	/**
+	 * Returns true if the {@link Data} instance is one of the many 'char' data types.
+	 * 
+	 * @param data {@link Data} instance to test, null ok
+	 * @return boolean true if char data 
+	 */
 	public static boolean isChar(Data data) {
 		if (data == null) {
 			return false;
@ -74,6 +80,25 @@ public class StringDataInstance {
 			(dt instanceof WideChar16DataType) || (dt instanceof WideChar32DataType);
 	}

+	/**
+	 * Returns a string representation of the character(s) contained in the byte array, suitable
+	 * for display as a single character, or as a sequence of characters.
+	 * <p>
+	 * 
+	 * @param dataType the {@link DataType} of the element containing the bytes (most likely a ByteDataType)
+	 * @param bytes the bytes to convert
+	 * @param settings the {@link Settings} object for the location where the bytes came from, or null
+	 * @param isBigEndian boolean flag indicating data is big endian
+	 * @return formatted string (typically with quotes around the contents): single character: 'a', multiple characters: "a\x12bc"
+	 */
+	public static String getCharRepresentation(DataType dataType, byte[] bytes, Settings settings,
+			boolean isBigEndian) {
+		MemBuffer memBuf = new ByteMemBufferImpl(null, bytes, isBigEndian);
+		StringDataInstance sdi =
+			new StringDataInstance(dataType, settings, memBuf, bytes.length);
+		return sdi.getCharRepresentation();
+	}
+
 	/**
 	 * Returns a new {@link StringDataInstance} using the bytes in the data codeunit.
 	 * <p>
@ -93,7 +118,7 @@ public class StringDataInstance {
 			ArrayStringable arrayStringable =
 				ArrayStringable.getArrayStringable(((Array) dt).getDataType());
 			if (arrayStringable != null && arrayStringable.hasStringValue(data)) {
-				return new StringDataInstance(arrayStringable, data, data, data.getLength());
+				return new StringDataInstance(arrayStringable, data, data, data.getLength(), true);
 			}
 		}
 		return NULL_INSTANCE;
@ -114,13 +139,15 @@ public class StringDataInstance {
 		if (dataType instanceof AbstractStringDataType) {
 			return ((AbstractStringDataType) dataType).getStringDataInstance(buf, settings, length);
 		}
-		if (dataType instanceof Array) {
+		boolean isArray = dataType instanceof Array;
+		if (isArray) {
 			dataType = ArrayStringable.getArrayStringable(((Array) dataType).getDataType());
 		}
 		if (dataType instanceof ArrayStringable &&
 			((ArrayStringable) dataType).hasStringValue(settings) && buf.isInitializedMemory()) {

-			return new StringDataInstance(dataType, settings, buf, length);
+			// this could be either a charsequence or an array of char elements
+			return new StringDataInstance(dataType, settings, buf, length, isArray);
 		}
 		return NULL_INSTANCE;
 	}
@ -188,6 +215,26 @@ public class StringDataInstance {
 	 * of the containing field of the data instance.
 	 */
 	public StringDataInstance(DataType dataType, Settings settings, MemBuffer buf, int length) {
+		this(dataType, settings, buf, length, false);
+	}
+
+	/**
+	 * Creates a string instance using the data in the {@link MemBuffer} and the settings
+	 * pulled from the {@link AbstractStringDataType string data type}.
+	 * 
+	 * @param dataType {@link DataType} of the string, either a {@link AbstractStringDataType} derived type
+	 * or an {@link ArrayStringable} element-of-char-array type. 
+	 * @param settings {@link Settings} attached to the data location.
+	 * @param buf {@link MemBuffer} containing the data.
+	 * @param length Length passed from the caller to the datatype.  -1 indicates a 'probe'
+	 * trying to detect the length of an unknown string, otherwise it will be the length
+	 * of the containing field of the data instance.
+	 * @param isArrayElement boolean flag, true indicates that the specified dataType is an
+	 * element in an array (ie. char[] vs. just a plain char), causing the string layout
+	 * to be forced to {@link StringLayoutEnum#NULL_TERMINATED_BOUNDED}
+	 */
+	public StringDataInstance(DataType dataType, Settings settings, MemBuffer buf, int length,
+			boolean isArrayElement) {
 		settings = (settings == null) ? SettingsImpl.NO_SETTINGS : settings;
 		this.buf = buf;
 		this.charsetName = getCharsetNameFromDataTypeOrSettings(dataType, settings);
@ -196,7 +243,9 @@ public class StringDataInstance {
 		this.paddedCharSize = (dataType instanceof ArrayStringable) && (charSize == 1) //
 				? getDataOrganization(dataType).getCharSize()
 				: charSize;
-		this.stringLayout = getLayoutFromDataType(dataType);
+		this.stringLayout = isArrayElement //
+				? StringLayoutEnum.NULL_TERMINATED_BOUNDED
+				: getLayoutFromDataType(dataType);
 		this.showTranslation = TRANSLATION.isShowTranslated(settings);
 		this.translatedValue = TRANSLATION.getTranslatedValue(settings);
 		this.renderSetting = RENDER.getEnumValue(settings);
@ -206,11 +255,11 @@ public class StringDataInstance {
 	}

 	private StringDataInstance(StringDataInstance copyFrom, StringLayoutEnum newLayout,
-			MemBuffer newBuf, int newLen) {
+			MemBuffer newBuf, int newLen, String newCharsetName) {
 		this.charSize = copyFrom.charSize;
 		this.paddedCharSize = copyFrom.paddedCharSize;
 		this.translatedValue = null;
-		this.charsetName = copyFrom.charsetName;
+		this.charsetName = newCharsetName;
 		this.stringLayout = newLayout;
 		this.showTranslation = false;
 		this.renderSetting = copyFrom.renderSetting;
@ -234,11 +283,17 @@ public class StringDataInstance {
 		if (dataType instanceof AbstractStringDataType) {
 			return ((AbstractStringDataType) dataType).getStringLayout();
 		}
+		if (dataType instanceof AbstractIntegerDataType || dataType instanceof BitFieldDataType) {
+			return StringLayoutEnum.CHAR_SEQ;
+		}
 		return StringLayoutEnum.NULL_TERMINATED_BOUNDED;
 	}

 	private static String getCharsetNameFromDataTypeOrSettings(DataType dataType,
 			Settings settings) {
+		if (dataType instanceof BitFieldDataType) {
+			dataType = ((BitFieldDataType) dataType).getBaseDataType();
+		}
 		return (dataType instanceof DataTypeWithCharset)
 				? ((DataTypeWithCharset) dataType).getCharsetName(settings)
 				: DEFAULT_CHARSET_NAME;
@ -272,12 +327,7 @@ public class StringDataInstance {
 	}

 	private boolean isAlreadyDeterminedFixedLen() {
-		return length >= 0 && (stringLayout == StringLayoutEnum.FIXED_LEN);
-	}
-
-	public boolean isPascal() {
-		return stringLayout == StringLayoutEnum.PASCAL_255 ||
-			stringLayout == StringLayoutEnum.PASCAL_64k;
+		return length >= 0 && stringLayout.isFixedLen();
 	}

 	/**
@ -357,16 +407,16 @@ public class StringDataInstance {
 	}

 	/**
-	 * Returns true if the string has a trailing NULL character within the data instance's
-	 * bounds.
+	 * Returns true if the string should have a trailing NULL character and doesn't.
 	 *
-	 * @return boolean true if there is a trailing NULL character.
+	 * @return boolean true if the trailing NULL character is missing, false if string type
+	 * doesn't need a trailing NULL character or if it is present.
 	 */
-	public boolean hasNullTerminator() {
+	public boolean isMissingNullTerminator() {

-		if (!isPascal()) {
+		if (stringLayout.shouldTrimTrailingNulls()) {
 			String str = getStringValueNoTrim();
-			return (str != null) && (str.length() > 0) && str.charAt(str.length() - 1) == 0;
+			return (str != null) && (str.length() > 0) && str.charAt(str.length() - 1) != 0;
 		}
 		return false;
 	}
@ -422,7 +472,7 @@ public class StringDataInstance {
 	public String getStringValue() {
 		String str = getStringValueNoTrim();

-		return (str != null) && !isPascal() ? trimNulls(str) : str;
+		return (str != null) && stringLayout.shouldTrimTrailingNulls() ? trimNulls(str) : str;
 	}

 	private String getStringValueNoTrim() {
@ -440,7 +490,7 @@ public class StringDataInstance {
 	}

 	private byte[] getStringBytes() {
-		return isPascal() ? getPascalCharBytes() : getNormalStringCharBytes();
+		return stringLayout.isPascal() ? getPascalCharBytes() : getNormalStringCharBytes();
 	}

 	private byte[] getNormalStringCharBytes() {
@ -570,11 +620,6 @@ public class StringDataInstance {
 		return result;
 	}

-	private byte[] convertStringToBytes(String s, AdjustedCharsetInfo aci) {
-		Charset cs = Charset.isSupported(aci.charsetName) ? Charset.forName(aci.charsetName) : null;
-		return (cs != null) ? s.getBytes(cs) : null;
-	}
-
 	private static DataConverter getDataConverter(Endian endian) {
 		return endian == Endian.BIG ? BigEndianDataConverter.INSTANCE
 				: LittleEndianDataConverter.INSTANCE;
@ -628,6 +673,10 @@ public class StringDataInstance {
 	 * @return formatted String
 	 */
 	public String getStringRepresentation() {
+		return getStringRep(StringRenderBuilder.DOUBLE_QUOTE, StringRenderBuilder.DOUBLE_QUOTE);
+	}
+
+	private String getStringRep(char quoteChar, char quoteCharMulti) {

 		if (isProbe() || isBadCharSize() || !buf.isInitializedMemory()) {
 			return UNKNOWN;
@ -647,15 +696,23 @@ public class StringDataInstance {
 			return UNKNOWN_DOT_DOT_DOT;
 		}

+		if (stringValue.length() == 0 && aci.byteStartOffset != 0) {
+			// If the byteStartOffset isn't zero it means there was one char that was the unicode BOM.
+			// Asking the Charset to decode it returned nothing, so force it.
+			stringValue = BOM_RESULT_STR;
+		}
+
 		// if we get the same number of characters out that we put into the decoder,
 		// then its a good chance there is a one-to-one correspondence between original char
-		// and decoded char.
-		boolean canRecoverOriginalCharBytes =
-			(stringValue.length() - aci.byteStartOffset) == (stringBytes.length / charSize);
+		// offsets and decoded char offsets.
+		boolean isByteToStringCharEquiv =
+			stringValue.length() == ((stringBytes.length - aci.byteStartOffset) / charSize);

-		StringRenderBuilder strBuf = new StringRenderBuilder(charSize);
+		stringValue = stringLayout.shouldTrimTrailingNulls() ? trimNulls(stringValue) : stringValue;
+
+		StringRenderBuilder strBuf = new StringRenderBuilder(charSize,
+			stringValue.length() == 1 ? quoteChar : quoteCharMulti);

-		stringValue = !isPascal() ? trimNulls(stringValue) : stringValue;
 		if (stringValue.isEmpty() || (stringValue.length() == 1 && stringValue.charAt(0) == 0)) {
 			// force the string renderer into "string" mode so we get empty quotes when done.
 			strBuf.addString("");
@ -664,16 +721,13 @@ public class StringDataInstance {
 		// For each 32bit character in the java string try to add it to the StringRenderBuilder
 		for (int i = 0, strLength = stringValue.length(); i < strLength;) {
 			int codePoint = stringValue.codePointAt(i);
-			byte[] originalCharBytes;
-			if (canRecoverOriginalCharBytes) {
-				originalCharBytes = new byte[charSize];
-				System.arraycopy(stringBytes, i * charSize, originalCharBytes, 0, charSize);
-			}
-			else {
-				// can't get original bytes, cheat and run the codePoint through the charset
-				// to get what should be the same as the original bytes.
-				String singleCharStr = new String(new int[] { codePoint }, 0, 1);
-				originalCharBytes = convertStringToBytes(singleCharStr, aci);
+
+			RENDER_ENUM currentCharRenderSetting = renderSetting;
+			if (codePoint == StringUtilities.UNICODE_REPLACEMENT && isByteToStringCharEquiv &&
+				!isReplacementCharAt(stringBytes, i * charSize + aci.byteStartOffset)) {
+				// if this is a true decode error and we can recover the original bytes,
+				// then force the render mode to byte seq.
+				currentCharRenderSetting = RENDER_ENUM.BYTE_SEQ;
 			}

 			if (StringUtilities.isControlCharacterOrBackslash(codePoint)) {
@ -685,41 +739,30 @@ public class StringDataInstance {
 			else if (StringUtilities.isDisplayable(codePoint)) {
 				strBuf.addCodePointChar(codePoint);
 			}
-			else if (StringUtilities.isUnicodeReplacementCodePoint(codePoint)) {
-				// if this is a true decode error and we can recover the original bytes
-				// render as byte seq.
-				// Otherwise, display the <?> symbol.
-				if (canRecoverOriginalCharBytes &&
-					isMismatchedCharBytes(originalCharBytes, codePoint)) {
-					strBuf.addByteSeq(originalCharBytes);
-				}
-				else {
-					strBuf.addCodePointChar(codePoint);
-				}
-			}
 			else {
 				// not simple ascii, decide how to handle:
 				// add the character to the string in a format depending on the
 				// render settings.  ISO control chars are forced to be
 				// escaped regardless of the render setting.
-				RENDER_ENUM thisCharRenderSetting = renderSetting;
-				if (thisCharRenderSetting == RENDER_ENUM.ALL) {
+				if (currentCharRenderSetting == RENDER_ENUM.ALL) {
 					if (codePoint <= 0x7f) {
 						// render non-displayable, non-control-char ascii-ish bytes as bytes instead
 						// of as escape sequences
-						thisCharRenderSetting = RENDER_ENUM.BYTE_SEQ;
+						currentCharRenderSetting = RENDER_ENUM.BYTE_SEQ;
 					}
-					else if (Character.isISOControl(codePoint) || !Character.isDefined(codePoint)) {
-						thisCharRenderSetting = RENDER_ENUM.ESC_SEQ;
+					else if (Character.isISOControl(codePoint) || !Character.isDefined(codePoint) ||
+						codePoint == StringUtilities.UNICODE_BE_BYTE_ORDER_MARK) {
+						currentCharRenderSetting = RENDER_ENUM.ESC_SEQ;
 					}
 				}

-				switch (thisCharRenderSetting) {
+				switch (currentCharRenderSetting) {
 					case ALL:
 						strBuf.addCodePointChar(codePoint);
 						break;
 					case BYTE_SEQ:
-						strBuf.addByteSeq(originalCharBytes);
+						strBuf.addByteSeq(getOriginalBytes(isByteToStringCharEquiv, i, codePoint,
+							stringBytes, aci));
 						break;
 					case ESC_SEQ:
 						strBuf.addEscapedCodePoint(codePoint);
@ -728,9 +771,8 @@ public class StringDataInstance {
 			}
 			i += Character.charCount(codePoint);
 		}
-		String result = strBuf.toString();
 		String prefix = "";
-		if (charsetName.startsWith("UTF") && result.startsWith("\"")) {
+		if (charsetName.startsWith("UTF") && strBuf.startsWithQuotedText()) {
 			switch (charSize) {
 				case 1:
 					prefix = "u8";
@ -743,7 +785,27 @@ public class StringDataInstance {
 					break;
 			}
 		}
-		return prefix + result;
+		return prefix + strBuf.toString();
+	}
+
+	private byte[] getOriginalBytes(boolean isByteToStringCharEquiv, int charOffset, int codePoint,
+			byte[] stringBytes, AdjustedCharsetInfo aci) {
+
+		if (isByteToStringCharEquiv) {
+			byte[] originalCharBytes = new byte[charSize];
+			System.arraycopy(stringBytes, charOffset * charSize + aci.byteStartOffset,
+				originalCharBytes, 0, charSize);
+			return originalCharBytes;
+		}
+
+		// can't get original bytes, cheat and run the codePoint through the charset
+		// to get what should be the same as the original bytes.
+		String singleCharStr = new String(new int[] { codePoint }, 0, 1);
+		Charset cs = Charset.isSupported(aci.charsetName) ? Charset.forName(aci.charsetName) : null;
+		if (cs == null || !cs.canEncode()) {
+			return null;
+		}
+		return singleCharStr.getBytes(cs);
 	}

 	/**
@ -782,105 +844,35 @@ public class StringDataInstance {
 	}

 	/**
-	 * Convert a char value in memory into its canonical unicode representation, using
+	 * Convert a char value (or sequence of char values) in memory into its canonical unicode representation, using
 	 * attached charset and encoding information.
 	 * <p>
-	 * This implementation treats the char value as a 1 element long string and reuses the string
-	 * logic to read it from memory using charset info.
 	 *
-	 * @return String containing the representation of the single char.
+	 * @return String containing the representation of the char.
 	 */
 	public String getCharRepresentation() {
 		if (length < charSize /* also covers case of isProbe() */ ) {
 			return UNKNOWN_DOT_DOT_DOT;
 		}

-		byte[] charBytes = convertPaddedToUnpadded(getStringBytes());
-		if (charBytes == null) {
-			return UNKNOWN_DOT_DOT_DOT;
-		}
+		// if the charset's charsize is bigger than the number of bytes we have,
+		// discard the charset and fall back to US-ASCII
+		String newCSName = (length < charSize) ? DEFAULT_CHARSET_NAME : charsetName;

-		AdjustedCharsetInfo aci = getAdjustedCharsetInfo(charBytes);
-		String stringValue = convertBytesToString(charBytes, aci);
-		if (stringValue == null) {
-			return UNKNOWN_DOT_DOT_DOT;
-		}
+		StringDataInstance charseqSDI =
+			new StringDataInstance(this, StringLayoutEnum.CHAR_SEQ, buf, length, newCSName);

-		if (stringValue.length() == 0) {
-			if (aci.byteStartOffset == 0) {
-				return UNKNOWN;
-			}
-
-			// If the byteStartOffset isn't zero it means the char was the unicode BOM.
-			// Asking the Charset to decode it returned nothing, so force it.
-			stringValue = BOM_RESULT_STR;
-		}
-
-		int codePoint = stringValue.codePointAt(0);
-		RENDER_ENUM tmpRenderSetting = renderSetting;
-
-		StringRenderBuilder strBuf =
-			new StringRenderBuilder(charSize, StringRenderBuilder.SINGLE_QUOTE);
-		if (StringUtilities.isControlCharacterOrBackslash(codePoint)) {
-			strBuf.addString(StringUtilities.convertCodePointToEscapeSequence(codePoint));
-		}
-		else if (codePoint == 0x0000 && renderSetting != RENDER_ENUM.BYTE_SEQ) {
-			strBuf.addEscapedChar('0');
-		}
-		else if (StringUtilities.isUnicodeReplacementCodePoint(codePoint) &&
-			renderSetting != RENDER_ENUM.BYTE_SEQ) {
-			strBuf.addCodePointChar(codePoint);
-		}
-		else if (StringUtilities.isDisplayable(codePoint)) {
-			strBuf.addCodePointChar(codePoint);
-		}
-		else {
-			// not simple ascii, decide how to handle:
-			// add the character to the string in a format depending on the
-			// render settings.  ISO control chars are forced to be
-			// escaped regardless of the render setting.
-			boolean alwaysNeedsEscaping = (renderSetting == RENDER_ENUM.ALL) &&
-				(Character.isISOControl(codePoint) || !Character.isDefined(codePoint) ||
-					codePoint == StringUtilities.UNICODE_BE_BYTE_ORDER_MARK);
-
-			tmpRenderSetting = alwaysNeedsEscaping ? RENDER_ENUM.ESC_SEQ : renderSetting;
-
-			switch (tmpRenderSetting) {
-				case ALL:
-					strBuf.addCodePointChar(codePoint);
-					break;
-				case ESC_SEQ:
-					strBuf.addEscapedCodePoint(codePoint);
-					break;
-				case BYTE_SEQ:
-					// BYTE_SEQ uses original bytes of char data, not the bytes produced by the charset
-					strBuf.addByteSeq(charBytes);
-					break;
-			}
-
-		}
-		String prefix = "";
-		if (charsetName.startsWith("UTF") && tmpRenderSetting != RENDER_ENUM.BYTE_SEQ) {
-			switch (charSize) {
-				case 1:
-					prefix = "u8";
-					break;
-				case 2:
-					prefix = "u";
-					break;
-				case 4:
-					prefix = "U";
-					break;
-			}
-		}
-
-		return prefix + strBuf.toString();
+		return charseqSDI.getStringRep(StringRenderBuilder.SINGLE_QUOTE,
+			StringRenderBuilder.DOUBLE_QUOTE);
 	}

-	private boolean isMismatchedCharBytes(byte[] originalCharBytes, int codePoint) {
-		long originalValue = DataConverter.getInstance(buf.isBigEndian()).getValue(
-			originalCharBytes, originalCharBytes.length);
-		return originalValue != codePoint;
+	private boolean isReplacementCharAt(byte[] stringBytes, int byteOffset) {
+		if (byteOffset + charSize > stringBytes.length) {
+			return false;
+		}
+		long origCodePointValue = DataConverter.getInstance(buf.isBigEndian()).getValue(stringBytes,
+			byteOffset, charSize);
+		return origCodePointValue == StringUtilities.UNICODE_REPLACEMENT;
 	}

 	private static String getTranslatedStringRepresentation(String translatedString) {
@ -952,7 +944,7 @@ public class StringDataInstance {
 		}
 		int newLength = Math.max(0, length - byteOffset);
 		StringDataInstance sub = new StringDataInstance(this, getOffcutLayout(),
-			new WrappedMemBuffer(buf, byteOffset), newLength);
+			new WrappedMemBuffer(buf, byteOffset), newLength, charsetName);

 		return sub;
 	}
--- a/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/program/model/data/StringLayoutEnum.java
+++ b/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/program/model/data/StringLayoutEnum.java
@ -18,19 +18,39 @@ package ghidra.program.model.data;
 /**
 * Controls strings termination
 * <ul>
- * <li>{@link StringLayoutEnum#FIXED_LEN} (ie. fixed length, trailing nulls trimmed, interior nulls retained)
- * <li>{@link StringLayoutEnum#NULL_TERMINATED_UNBOUNDED} (ie. null terminated and ignores data instance length)
- * <li>{@link StringLayoutEnum#NULL_TERMINATED_BOUNDED} (ie. null-terminated and limited to data instance)
- * <li>{@link StringLayoutEnum#PASCAL_255} (ie. pascal string, using 1 byte for length field, max 255 char elements)
- * <li>{@link StringLayoutEnum#PASCAL_64k} (ie. pascal string, using 2 bytes for length field, max 64k char elements)
+ * <li>{@link StringLayoutEnum#FIXED_LEN}
+ * <li>{@link StringLayoutEnum#CHAR_SEQ}
+ * <li>{@link StringLayoutEnum#NULL_TERMINATED_UNBOUNDED}
+ * <li>{@link StringLayoutEnum#NULL_TERMINATED_BOUNDED}
+ * <li>{@link StringLayoutEnum#PASCAL_255}
+ * <li>{@link StringLayoutEnum#PASCAL_64k}
 * </ul>
 */
 public enum StringLayoutEnum {
+	/**
+	 * Fixed length string, trailing nulls trimmed, interior nulls retained.
+	 */
 	FIXED_LEN("fixed length"),
+	/**
+	 * Fixed length sequence of characters, all nulls retained.
+	 */
+	CHAR_SEQ("char sequence"),
+	/**
+	 * Null terminated string that ignores it's container's length when searching for terminating null character.
+	 */
 	NULL_TERMINATED_UNBOUNDED("null-terminated & unbounded"),
+	/**
+	 * Null-terminated string that is limited to it's container's length.
+	 */
 	NULL_TERMINATED_BOUNDED("null-terminated & bounded"),
-	PASCAL_255("pascal255"), // prefixed with 1 byte length field which stores number of chars (not bytes) in string
-	PASCAL_64k("pascal64k");// prefixed with 2 byte length field which stores number of chars (not bytes) in string
+	/**
+	 * Pascal string, using 1 byte for length field, max 255 char elements.
+	 */
+	PASCAL_255("pascal255"),
+	/**
+	 * Pascal string, using 2 bytes for length field, max 64k char elements
+	 */
+	PASCAL_64k("pascal64k");

 	private final String s;

@ -43,13 +63,42 @@ public enum StringLayoutEnum {
 		return s;
 	}

+	/**
+	 * Returns true if this layout is one of the pascal types.
+	 * 
+	 * @return boolean true if pascal
+	 */
 	public boolean isPascal() {
 		return this == PASCAL_255 || this == PASCAL_64k;
 	}

+	/**
+	 * Returns true if this layout is one of the null terminated types.
+	 * 
+	 * @return boolean true if null terminated string
+	 */
 	public boolean isNullTerminated() {
 		return this == NULL_TERMINATED_UNBOUNDED ||
-			this == StringLayoutEnum.NULL_TERMINATED_BOUNDED;
+			this == NULL_TERMINATED_BOUNDED;
+	}
+
+	/**
+	 * Returns true if this layout should have its trailing null characters trimmed.
+	 * 
+	 * @return boolean true if trailing nulls should be trimmed
+	 */
+	public boolean shouldTrimTrailingNulls() {
+		return this == NULL_TERMINATED_UNBOUNDED || this == NULL_TERMINATED_BOUNDED ||
+			this == FIXED_LEN;
+	}
+
+	/**
+	 * Returns true if this layout is one of the fixed-size types.
+	 * 
+	 * @return boolean true if fixed length
+	 */
+	public boolean isFixedLen() {
+		return this == FIXED_LEN || this == CHAR_SEQ;
 	}

 }
--- a/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/program/model/data/StringRenderBuilder.java
+++ b/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/program/model/data/StringRenderBuilder.java
@ -30,6 +30,7 @@ import ghidra.util.StringUtilities;
 public class StringRenderBuilder {
 	public static final char DOUBLE_QUOTE = '"';
 	public static final char SINGLE_QUOTE = '\'';
+	private static final int MAX_ASCII = 0x80;

 	private StringBuilder sb = new StringBuilder();
 	private boolean byteMode = true;
@ -45,6 +46,18 @@ public class StringRenderBuilder {
 		this.quoteChar = quoteChar;
 	}

+	/**
+	 * Returns true if the current formatted string starts with a quoted text section,
+	 * instead of a byte value section.  Useful to indicate if
+	 * the string could have a prefix applied to it (ie. u8"text")
+	 * <p>
+	 * 
+	 * @return boolean true if this string will start with a quoted text section
+	 */
+	public boolean startsWithQuotedText() {
+		return sb.length() > 0 && sb.charAt(0) == quoteChar;
+	}
+
 	/**
 	 * Append the characters in the specified string. The added characters will
 	 * be shown in a quoted text region.
@ -99,16 +112,21 @@ public class StringRenderBuilder {
 	 * <p>
 	 * {@literal { 0, 1, 2 } -> 00,01,02}
 	 *
-	 * @param bytes
+	 * @param bytes to convert to hex and append.  If null, append "???"
 	 */
 	public void addByteSeq(byte[] bytes) {
+		if (bytes == null) {
+			ensureByteMode();
+			sb.append("???");
+			return;
+		}
 		for (int i = 0; i < bytes.length; i++) {
 			ensureByteMode();
 			String valStr = Integer.toHexString(bytes[i] & 0xff).toUpperCase();
 			if (valStr.length() < 2) {
 				sb.append("0");
 			}
-			sb.append(valStr);
+			sb.append(valStr).append("h");
 		}
 	}

@ -124,10 +142,9 @@ public class StringRenderBuilder {
 	 */
 	public void addEscapedCodePoint(int codePoint) {
 		ensureTextMode();
-		char escapeChar = StringUtilities.isAsciiChar(codePoint) ? 'x'
-				: Character.isBmpCodePoint(codePoint) ? 'u' : 'U';
-		int cpDigits = StringUtilities.isAsciiChar(codePoint) ? 2
-				: Character.isBmpCodePoint(codePoint) ? 4 : 8;
+		char escapeChar =
+			(codePoint < MAX_ASCII) ? 'x' : Character.isBmpCodePoint(codePoint) ? 'u' : 'U';
+		int cpDigits = (codePoint < MAX_ASCII) ? 2 : Character.isBmpCodePoint(codePoint) ? 4 : 8;
 		String s = Integer.toHexString(codePoint).toUpperCase();
 		sb.append("\\").append(escapeChar);
 		sb.append(StringUtilities.pad(s, '0', cpDigits));
--- a/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/program/model/data/WideChar16DataType.java
+++ b/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/program/model/data/WideChar16DataType.java
@ -77,7 +77,7 @@ public class WideChar16DataType extends BuiltIn implements ArrayStringable, Data
 	@Override
 	public Object getValue(MemBuffer buf, Settings settings, int length) {
 		try {
-			return new Character((char) buf.getUnsignedShort(0));
+			return Character.valueOf((char) buf.getUnsignedShort(0));
 		}
 		catch (MemoryAccessException e) {
 			// ignore
@ -94,7 +94,7 @@ public class WideChar16DataType extends BuiltIn implements ArrayStringable, Data
 	public String getDefaultLabelPrefix(MemBuffer buf, Settings settings, int length,
 			DataTypeDisplayOptions options) {

-		StringBuffer strBuf = new StringBuffer();
+		StringBuilder strBuf = new StringBuilder();
 		strBuf.append("WCHAR16_");
 		try {
 			int val = buf.getUnsignedShort(0);
@ -125,7 +125,7 @@ public class WideChar16DataType extends BuiltIn implements ArrayStringable, Data
 	@Override
 	public String getArrayDefaultLabelPrefix(MemBuffer buf, Settings settings, int len,
 			DataTypeDisplayOptions options) {
-		return new StringDataInstance(this, settings, buf, len).getLabel(
+		return new StringDataInstance(this, settings, buf, len, true).getLabel(
 			AbstractStringDataType.DEFAULT_UNICODE_ABBREV_PREFIX + "_",
 			AbstractStringDataType.DEFAULT_UNICODE_LABEL_PREFIX,
 			AbstractStringDataType.DEFAULT_UNICODE_LABEL, options);
@ -134,7 +134,7 @@ public class WideChar16DataType extends BuiltIn implements ArrayStringable, Data
 	@Override
 	public String getArrayDefaultOffcutLabelPrefix(MemBuffer buf, Settings settings, int len,
 			DataTypeDisplayOptions options, int offcutOffset) {
-		return new StringDataInstance(this, settings, buf, len).getOffcutLabelString(
+		return new StringDataInstance(this, settings, buf, len, true).getOffcutLabelString(
 			AbstractStringDataType.DEFAULT_UNICODE_ABBREV_PREFIX + "_",
 			AbstractStringDataType.DEFAULT_UNICODE_LABEL_PREFIX,
 			AbstractStringDataType.DEFAULT_UNICODE_LABEL, options, offcutOffset);
--- a/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/program/model/data/WideChar32DataType.java
+++ b/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/program/model/data/WideChar32DataType.java
@ -96,7 +96,7 @@ public class WideChar32DataType extends BuiltIn implements ArrayStringable, Data
 	public String getDefaultLabelPrefix(MemBuffer buf, Settings settings, int length,
 			DataTypeDisplayOptions options) {

-		StringBuffer strBuf = new StringBuffer();
+		StringBuilder strBuf = new StringBuilder();
 		strBuf.append("WCHAR32_");
 		try {
 			int val = buf.getInt(0);
@ -127,7 +127,7 @@ public class WideChar32DataType extends BuiltIn implements ArrayStringable, Data
 	@Override
 	public String getArrayDefaultLabelPrefix(MemBuffer buf, Settings settings, int len,
 			DataTypeDisplayOptions options) {
-		return new StringDataInstance(this, settings, buf, len).getLabel(
+		return new StringDataInstance(this, settings, buf, len, true).getLabel(
 			AbstractStringDataType.DEFAULT_UNICODE_ABBREV_PREFIX + "_",
 			AbstractStringDataType.DEFAULT_UNICODE_LABEL_PREFIX,
 			AbstractStringDataType.DEFAULT_UNICODE_LABEL, options);
@ -136,7 +136,7 @@ public class WideChar32DataType extends BuiltIn implements ArrayStringable, Data
 	@Override
 	public String getArrayDefaultOffcutLabelPrefix(MemBuffer buf, Settings settings, int len,
 			DataTypeDisplayOptions options, int offcutOffset) {
-		return new StringDataInstance(this, settings, buf, len).getOffcutLabelString(
+		return new StringDataInstance(this, settings, buf, len, true).getOffcutLabelString(
 			AbstractStringDataType.DEFAULT_UNICODE_ABBREV_PREFIX + "_",
 			AbstractStringDataType.DEFAULT_UNICODE_LABEL_PREFIX,
 			AbstractStringDataType.DEFAULT_UNICODE_LABEL, options, offcutOffset);
--- a/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/program/model/data/WideCharDataType.java
+++ b/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/program/model/data/WideCharDataType.java
@ -83,7 +83,7 @@ public class WideCharDataType extends BuiltIn implements ArrayStringable, DataTy
 		try {
 			switch (getLength()) {
 				case 2:
-					return new Character((char) buf.getShort(0));
+					return Character.valueOf((char) buf.getShort(0));
 				case 4:
 					return new Scalar(32, buf.getInt(0), true);
 			}
@ -114,7 +114,7 @@ public class WideCharDataType extends BuiltIn implements ArrayStringable, DataTy
 			return "WCHAR_??";
 		}

-		StringBuffer strBuf = new StringBuffer();
+		StringBuilder strBuf = new StringBuilder();
 		strBuf.append("WCHAR_");
 		try {
 			int val = (int) buf.getVarLengthUnsignedInt(0, length);
@ -151,7 +151,7 @@ public class WideCharDataType extends BuiltIn implements ArrayStringable, DataTy
 	@Override
 	public String getArrayDefaultLabelPrefix(MemBuffer buf, Settings settings, int len,
 			DataTypeDisplayOptions options) {
-		return new StringDataInstance(this, settings, buf, len).getLabel(
+		return new StringDataInstance(this, settings, buf, len, true).getLabel(
 			AbstractStringDataType.DEFAULT_UNICODE_ABBREV_PREFIX + "_",
 			AbstractStringDataType.DEFAULT_UNICODE_LABEL_PREFIX,
 			AbstractStringDataType.DEFAULT_UNICODE_LABEL, options);
@ -160,7 +160,7 @@ public class WideCharDataType extends BuiltIn implements ArrayStringable, DataTy
 	@Override
 	public String getArrayDefaultOffcutLabelPrefix(MemBuffer buf, Settings settings, int len,
 			DataTypeDisplayOptions options, int offcutOffset) {
-		return new StringDataInstance(this, settings, buf, len).getOffcutLabelString(
+		return new StringDataInstance(this, settings, buf, len, true).getOffcutLabelString(
 			AbstractStringDataType.DEFAULT_UNICODE_ABBREV_PREFIX + "_",
 			AbstractStringDataType.DEFAULT_UNICODE_LABEL_PREFIX,
 			AbstractStringDataType.DEFAULT_UNICODE_LABEL, options, offcutOffset);
--- a/Ghidra/Framework/SoftwareModeling/src/test/java/ghidra/program/model/data/CharDataTypesRenderTest.java
+++ b/Ghidra/Framework/SoftwareModeling/src/test/java/ghidra/program/model/data/CharDataTypesRenderTest.java
@ -157,33 +157,33 @@ public class CharDataTypesRenderTest extends AbstractGTest {

 		// wchar32
 		String result = wchar32DT.getRepresentation(buf32, normset, wchar32DT.getLength());
-		assertEquals("U'\ufffd'", result);
+		assertEquals("AAh,AAh,AAh,AAh", result);

 		result = wchar32DT.getRepresentation(buf32, escseq, wchar32DT.getLength());
-		assertEquals("U'\ufffd'", result);
+		assertEquals("AAh,AAh,AAh,AAh", result);

 		result = wchar32DT.getRepresentation(buf32, byteseq, wchar32DT.getLength());
-		assertEquals("AA,AA,AA,AA", result);
+		assertEquals("AAh,AAh,AAh,AAh", result);

 		// wchar16
 		result = wchar16DT.getRepresentation(buf16_be, normset, wchar16DT.getLength());
-		assertEquals("u'\ufffd'", result);
+		assertEquals("D8h,00h", result);

 		result = wchar16DT.getRepresentation(buf16_be, escseq, wchar16DT.getLength());
-		assertEquals("u'\ufffd'", result);
+		assertEquals("D8h,00h", result);

 		result = wchar16DT.getRepresentation(buf16_be, byteseq, wchar16DT.getLength());
-		assertEquals("D8,00", result);
+		assertEquals("D8h,00h", result);

 		// charDT
 		result = charDT.getRepresentation(buf8, normset, charDT.getLength());
-		assertEquals("'\ufffd'", result);
+		assertEquals("85h", result);

 		result = charDT.getRepresentation(buf8, escseq, charDT.getLength());
-		assertEquals("'\ufffd'", result);
+		assertEquals("85h", result);

 		result = charDT.getRepresentation(buf8, byteseq, charDT.getLength());
-		assertEquals("85", result);
+		assertEquals("85h", result);
 	}

 	@Test
@ -196,10 +196,10 @@ public class CharDataTypesRenderTest extends AbstractGTest {

 		result = wchar16DT.getRepresentation(mb(false, 0xfd, 0xff),
 			newset().set(RENDER_ENUM.ESC_SEQ), wchar16DT.getLength());
-		assertEquals("u'\uFFFD'", result);
+		assertEquals("u'\\uFFFD'", result);

 		result = wchar16DT.getRepresentation(mb(false, 0xfd, 0xff),
 			newset().set(RENDER_ENUM.BYTE_SEQ), wchar16DT.getLength());
-		assertEquals("FD,FF", result);
+		assertEquals("FDh,FFh", result);
 	}
 }
--- a/Ghidra/Framework/SoftwareModeling/src/test/java/ghidra/program/model/data/StringDataTypeTest.java
+++ b/Ghidra/Framework/SoftwareModeling/src/test/java/ghidra/program/model/data/StringDataTypeTest.java
@ -366,7 +366,7 @@ public class StringDataTypeTest extends AbstractGTest {
 	public void testProbeGetStringRep_LeadingBinaryBytes() {
 		ByteMemBufferImpl buf = mb(false, 1, 2, 'x');

-		assertEquals("01,02,\"x\"",
+		assertEquals("01h,02h,\"x\"",
 			fixedlenString.getRepresentation(buf, newset(), buf.getLength()));
 	}

@ -375,7 +375,7 @@ public class StringDataTypeTest extends AbstractGTest {
 		ByteMemBufferImpl buf = mb(false, 'h', 'e', 'l', 'l', 'o', 0, 'a', '\n', 'b', 255, 0);

 		// US-ASCII charset doesn't map 0x80-0xff, they result in error characters
-		assertEquals("\"hello\\0a\\nb\",FF",
+		assertEquals("\"hello\\0a\\nb\",FFh",
 			fixedlenString.getRepresentation(buf, newset(), buf.getLength()));
 	}

@ -420,19 +420,19 @@ public class StringDataTypeTest extends AbstractGTest {
 		//@formatter:off
 		String expected =
 			"\"\\0\"," +
-			"01,02,03,04,05,06,"+
-			"\"\\a\\b\\t\\n\\v\\f\\r\",0E,0F,10,11,12,13,14,15,16,17,18,19,1A,1B,1C,1D,1E,1F,\""+
+			"01h,02h,03h,04h,05h,06h,"+
+			"\"\\a\\b\\t\\n\\v\\f\\r\",0Eh,0Fh,10h,11h,12h,13h,14h,15h,16h,17h,18h,19h,1Ah,1Bh,1Ch,1Dh,1Eh,1Fh,\""+
 			" !\\\"#$%&'()*+,-./0123456789:;<=>?@"+
 			"ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\\\]^_`"+
-			"abcdefghijklmnopqrstuvwxyz{|}~\",7F,"+
-			"80,81,82,83,84,85,86,87,88,89,8A,8B,8C,8D,8E,8F,"+
-			"90,91,92,93,94,95,96,97,98,99,9A,9B,9C,9D,9E,9F,"+
-			"A0,A1,A2,A3,A4,A5,A6,A7,A8,A9,AA,AB,AC,AD,AE,AF,"+
-			"B0,B1,B2,B3,B4,B5,B6,B7,B8,B9,BA,BB,BC,BD,BE,BF,"+
-			"C0,C1,C2,C3,C4,C5,C6,C7,C8,C9,CA,CB,CC,CD,CE,CF,"+
-			"D0,D1,D2,D3,D4,D5,D6,D7,D8,D9,DA,DB,DC,DD,DE,DF,"+
-			"E0,E1,E2,E3,E4,E5,E6,E7,E8,E9,EA,EB,EC,ED,EE,EF,"+
-			"F0,F1,F2,F3,F4,F5,F6,F7,F8,F9,FA,FB,FC,FD,FE,FF";
+			"abcdefghijklmnopqrstuvwxyz{|}~\",7Fh,"+
+			"80h,81h,82h,83h,84h,85h,86h,87h,88h,89h,8Ah,8Bh,8Ch,8Dh,8Eh,8Fh,"+
+			"90h,91h,92h,93h,94h,95h,96h,97h,98h,99h,9Ah,9Bh,9Ch,9Dh,9Eh,9Fh,"+
+			"A0h,A1h,A2h,A3h,A4h,A5h,A6h,A7h,A8h,A9h,AAh,ABh,ACh,ADh,AEh,AFh,"+
+			"B0h,B1h,B2h,B3h,B4h,B5h,B6h,B7h,B8h,B9h,BAh,BBh,BCh,BDh,BEh,BFh,"+
+			"C0h,C1h,C2h,C3h,C4h,C5h,C6h,C7h,C8h,C9h,CAh,CBh,CCh,CDh,CEh,CFh,"+
+			"D0h,D1h,D2h,D3h,D4h,D5h,D6h,D7h,D8h,D9h,DAh,DBh,DCh,DDh,DEh,DFh,"+
+			"E0h,E1h,E2h,E3h,E4h,E5h,E6h,E7h,E8h,E9h,EAh,EBh,ECh,EDh,EEh,EFh,"+
+			"F0h,F1h,F2h,F3h,F4h,F5h,F6h,F7h,F8h,F9h,FAh,FBh,FCh,FDh,FEh,FFh";
 		//@formatter:on

 		assertEquals("String rep w/java US-ASCII charset mapping failed", expected, actual);
@ -465,8 +465,8 @@ public class StringDataTypeTest extends AbstractGTest {
 		assertEquals(e1, fixedUtf16String.getRepresentation(buf_be, newset(), buf_be.getLength()));
 		assertEquals(e1, fixedUtf16String.getRepresentation(buf_le, newset(), buf_le.getLength()));

-		String e2_be = "u\"ab\",CC,01,12,02";
-		String e2_le = "u\"ab\",01,CC,02,12";
+		String e2_be = "u\"ab\",CCh,01h,12h,02h";
+		String e2_le = "u\"ab\",01h,CCh,02h,12h";
 		assertEquals(e2_be, fixedUtf16String.getRepresentation(buf_be,
 			newset().set(RENDER_ENUM.BYTE_SEQ), buf_be.getLength()));
 		assertEquals(e2_le, fixedUtf16String.getRepresentation(buf_le,
@ -514,45 +514,46 @@ public class StringDataTypeTest extends AbstractGTest {
 	}

 	//-------------------------------------------------------------------------------------
-	// StringDataInstance.hasNullTerminator()
+	// StringDataInstance.isMissingNullTerminator()
 	//-------------------------------------------------------------------------------------

 	@Test
 	public void testHasNullTerm() {
 		ByteMemBufferImpl buf = mb(false, 'a', 'b', 0);

-		assertTrue(mkSDI(termString, buf, newset(), buf.getLength()).hasNullTerminator());
+		assertFalse(mkSDI(termString, buf, newset(), buf.getLength()).isMissingNullTerminator());
 	}

 	@Test
 	public void testHasNullTermEOF() {
 		ByteMemBufferImpl buf = mb(false, 'a', 'b');

-		assertFalse(mkSDI(termString, buf, newset(), buf.getLength()).hasNullTerminator());
+		assertTrue(mkSDI(termString, buf, newset(), buf.getLength()).isMissingNullTerminator());
 	}

 	@Test
 	public void testHasNullTermUTF16() {
 		ByteMemBufferImpl buf = mb(false, 'a', 0, 'b', 0, 0, 0);

-		assertTrue(mkSDI(termUtf16String, buf, newset(), buf.getLength()).hasNullTerminator());
+		assertFalse(
+			mkSDI(termUtf16String, buf, newset(), buf.getLength()).isMissingNullTerminator());
 	}

 	@Test
 	public void testHasNullTermFixed() {
 		ByteMemBufferImpl buf = mb(false, 'a', 'b', 'c', 0, 0, 0);

-		assertFalse(mkSDI(fixedlenString, buf, newset(), 2).hasNullTerminator());
-		assertFalse(mkSDI(fixedlenString, buf, newset(), 3).hasNullTerminator());
-		assertTrue(mkSDI(fixedlenString, buf, newset(), 4).hasNullTerminator());
+		assertTrue(mkSDI(fixedlenString, buf, newset(), 2).isMissingNullTerminator());
+		assertTrue(mkSDI(fixedlenString, buf, newset(), 3).isMissingNullTerminator());
+		assertFalse(mkSDI(fixedlenString, buf, newset(), 4).isMissingNullTerminator());
 	}

 	@Test
 	public void testHasNullTermFixedUTF16() {
 		ByteMemBufferImpl buf = mb(false, 'a', 0, 'b', 0, 'c', 0, 0, 0, 0, 0);

-		assertFalse(mkSDI(fixedUtf16String, buf, newset(), 4).hasNullTerminator());
-		assertFalse(mkSDI(fixedUtf16String, buf, newset(), 6).hasNullTerminator());
-		assertTrue(mkSDI(fixedUtf16String, buf, newset(), 8).hasNullTerminator());
+		assertTrue(mkSDI(fixedUtf16String, buf, newset(), 4).isMissingNullTerminator());
+		assertTrue(mkSDI(fixedUtf16String, buf, newset(), 6).isMissingNullTerminator());
+		assertFalse(mkSDI(fixedUtf16String, buf, newset(), 8).isMissingNullTerminator());
 	}
 }