Fix for invalid UTF, fix for multiple anonymous function definitions

This commit is contained in:
caheckman 2019-04-09 10:39:35 -04:00
parent d95a0a05a1
commit c87adb2115
3 changed files with 30 additions and 10 deletions

View File

@ -1250,7 +1250,7 @@ bool PrintC::printCharacterConstant(ostream &s,const Address &addr,int4 charsize
/// \brief Push a single character constant to the RPN stack
///
/// For C, a character constant is usually emitted as the character in single quotes.
/// Handle unicode, wide characters, etc.
/// Handle unicode, wide characters, etc. Characters come in with the compiler's raw encoding.
/// \param val is the constant value
/// \param ct is data-type attached to the value
/// \param vn is the Varnode holding the value
@ -1259,10 +1259,17 @@ void PrintC::pushCharConstant(uintb val,const TypeChar *ct,const Varnode *vn,con
{
ostringstream t;
if ((ct->getSize()==1)&&
((val<7)||(val>0x7e)||((val>13)&&(val<0x20)))) // not a good character constant
if ((ct->getSize()==1)&&(val >= 0x80)) {
// For byte characters, the encoding is assumed to be ASCII, UTF-8, or some other
// code-page that extends ASCII. At 0x80 and above, we cannot treat the value as a
// unicode code-point. Its either part of a multi-byte UTF-8 encoding or an unknown
// code-page value. In either case, we print it as an integer.
push_integer(val,1,true,vn,op);
}
else {
// From here we assume, the constant value is a direct unicode code-point.
// The value could be an illegal code-point (surrogates or beyond the max code-point),
// but this will just be emitted as an escape sequence.
if (doEmitWideCharPrefix() && ct->getSize() > 1)
t << 'L'; // Print symbol indicating wide character
t << '\''; // char is surrounded with single quotes

View File

@ -446,6 +446,10 @@ bool PrintLanguage::unicodeNeedsEscape(int4 codepoint)
if (codepoint == 0x3000) {
return true; // ideographic space
}
if (codepoint >= 0xd7fc) { // D7FC - D7FF are currently unassigned.
// D800 - DFFF are high and low surrogates, technically illegal.
return true; // Treat as needing to be escaped
}
return false;
}
if (codepoint < 0xf900) {

View File

@ -56,8 +56,9 @@ public class ConstantPoolDex extends ConstantPool {
String classString =
DexUtil.convertTypeIndexToString(dexHeader, fieldIDItem.getClassIndex());
String[] pathArray = DexUtil.convertClassStringToPathArray("", classString);
if (pathArray != null)
if (pathArray != null) {
res.token = pathArray[pathArray.length - 1] + '.' + res.token;
}
}
DataType fieldDT = dexHeader.getDataType(program, fieldIDItem.getTypeIndex());
@ -72,13 +73,16 @@ public class ConstantPoolDex extends ConstantPool {
private String removeUniquifier(String name) {
int len = name.length();
if (len < 10 || name.charAt(len - 9) != '_')
if (len < 10 || name.charAt(len - 9) != '_') {
return name;
}
char matchChar = name.charAt(len - 8);
if (matchChar != '5' && matchChar != 'e')
if (matchChar != '5' && matchChar != 'e') {
return name;
if (name.charAt(len - 7) != '0')
}
if (name.charAt(len - 7) != '0') {
return name;
}
return name.substring(0, len - 9);
}
@ -103,14 +107,19 @@ public class ConstantPoolDex extends ConstantPool {
String classString =
DexUtil.convertTypeIndexToString(dexHeader, methodIDItem.getClassIndex());
String[] pathArray = DexUtil.convertClassStringToPathArray("", classString);
if (pathArray != null)
if (pathArray != null) {
namespaceString = pathArray[pathArray.length - 1];
}
}
if (namespaceString != null)
if (namespaceString != null) {
res.token = namespaceString + '.' + res.token;
}
}
res.tag = ConstantPool.POINTER_METHOD;
FunctionDefinitionDataType funcDef = new FunctionDefinitionDataType(res.token, dtManager);
// The FunctionDefinition is constructed on the fly, essentially as an anonymous type
// We use an internal naming scheme involding the the methodID to avoid name collisions
String defName = res.token + '_' + Integer.toHexString(methodID);
FunctionDefinitionDataType funcDef = new FunctionDefinitionDataType(defName, dtManager);
res.type = new PointerDataType(funcDef);
res.hasThisPtr = !isStatic;