GP-2628 Add 'Search|For Encoded Strings'

This commit is contained in:
dev747368 2023-11-30 18:14:29 -05:00
parent 5fd01c739d
commit c91e9aac3f
41 changed files with 3716 additions and 72 deletions

View File

@ -63,12 +63,13 @@ public class SampleStringTranslationPlugin extends Plugin implements StringTrans
}
@Override
public void translate(Program program, List<ProgramLocation> dataLocations) {
public void translate(Program program, List<ProgramLocation> stringLocations,
TranslateOptions options) {
TaskLauncher.launchModal("Yeehaw-ify strings", monitor -> {
int id = program.startTransaction("Yeehaw-ify strings");
try {
for (ProgramLocation progLoc : dataLocations) {
for (ProgramLocation progLoc : stringLocations) {
Data data = DataUtilities.getDataAtLocation(progLoc);
StringDataInstance str = StringDataInstance.getStringDataInstance(data);
String s = str.getStringValue();

View File

@ -41,7 +41,7 @@ data/parserprofiles/linux_64.prf||GHIDRA||||END|
data/parserprofiles/objc_mac_carbon.prf||GHIDRA||||END|
data/parserprofiles/vs12Local.prf||GHIDRA||||END|
data/pcodetest/EmuTesting.gdt||GHIDRA||||END|
data/stringngrams/StringModel.sng||GHIDRA||reviewed||END|
data/stringngrams/StringModel.sng||GHIDRA||||END|
data/symbols/README.txt||GHIDRA||||END|
data/symbols/win16/commctrl.exports||GHIDRA||||END|
data/symbols/win16/commdlg.exports||GHIDRA||||END|
@ -510,6 +510,8 @@ src/main/help/help/topics/Search/Search_for_DirectReferences.htm||GHIDRA||||END|
src/main/help/help/topics/Search/Search_for_Strings.htm||GHIDRA||||END|
src/main/help/help/topics/Search/Searching.htm||GHIDRA||||END|
src/main/help/help/topics/Search/images/DirectReferences.png||GHIDRA||||END|
src/main/help/help/topics/Search/images/EncodedStringsDialog_advancedoptions.png||GHIDRA||||END|
src/main/help/help/topics/Search/images/EncodedStringsDialog_initial.png||GHIDRA||||END|
src/main/help/help/topics/Search/images/MultipleSelectionError.png||GHIDRA||||END|
src/main/help/help/topics/Search/images/QueryResultsSearch.png||GHIDRA||||END|
src/main/help/help/topics/Search/images/SearchForAddressTables.png||GHIDRA||||END|

View File

@ -10,6 +10,8 @@
# [$] denotes end of string
# [SP] denotes space
# [HT] denotes horizontal tab
# Thresholds: -2.71, -3.26, -3.52, -3.84, -4.23, -4.49, -4.55, -4.74, -4.88, -5.03, -5.06, -5.2, -5.24, -5.29, -5.29, -5.42, -5.51, -5.52, -5.53, -5.6, -5.6, -5.62, -5.7, -5.7, -5.78, -5.79, -5.81, -5.81, -5.84, -5.85, -5.86, -5.88, -5.92, -5.92, -5.93, -5.95, -5.99, -6.0, -6.0, -6.0, -6.02, -6.02, -6.02, -6.05, -6.06, -6.07, -6.08, -6.1, -6.12, -6.12, -6.13, -6.13, -6.13, -6.13, -6.13, -6.13, -6.13, -6.15, -6.15, -6.16, -6.16, -6.16, -6.17, -6.19, -6.19, -6.21, -6.21, -6.21, -6.21, -6.21, -6.21, -6.25, -6.25, -6.25, -6.25, -6.25, -6.25, -6.25, -6.26, -6.26, -6.26, -6.26, -6.26, -6.26, -6.26, -6.26, -6.26, -6.29, -6.29, -6.3
# Symbol Size: 128
[HT] [HT] [HT] 17
[HT] [HT] [SP] 8

View File

@ -228,6 +228,122 @@
</BLOCKQUOTE>
<P class="providedbyplugin">Provided By: <I>StringTablePlugin</I></P>
<H3><A name="Encoded_Strings_Dialog"></A>Search For Encoded Strings</H3>
<BLOCKQUOTE>
<P>The <b>Encoded Strings Dialog</b> is an alternate way to find and
create string instances in undefined data locations. It allows setting the character
set (charset) of the string to be created, as well as the ability to filter out byte
sequences from the selected locations that are not valid strings.</P>
<P>The <b>Encoded Strings Dialog</b> will initially allow the user to select the character set of the
string to create, and displays a preview of the strings found in the current selection:</P>
</BLOCKQUOTE>
<DIV align="center">
<CENTER>
<TABLE border="0" width="100%">
<TBODY>
<TR>
<TD align="center" width="100%"><IMG src="images/EncodedStringsDialog_initial.png"></TD>
</TR>
</TBODY>
</TABLE>
</CENTER>
</DIV>
<P><B>Advanced options</B></P>
<BLOCKQUOTE>
<P>Click the <b>Advanced...</b> and the <b>A-Z,&#x6211;&#x7684;...</B> (Filter by character scripts)
buttons to show additional options that will allow filtering the selected byte range for
strings containing specific scripts (alphabets) and also excluding strings that have
properties that are unwanted.</P>
</BLOCKQUOTE>
<DIV align="center">
<CENTER>
<TABLE border="0" width="100%">
<TBODY>
<TR>
<TD align="center" width="100%"><IMG src="images/EncodedStringsDialog_advancedoptions.png"></TD>
</TR>
</TBODY>
</TABLE>
</CENTER>
</DIV>
<P><B>Character Script filtering</B></P>
<BLOCKQUOTE>
<P>The <b>Script</b> drop-down list and the various <b>Allow Additional</b> toggle buttons
control how strings are filtered based on the script (Latin, Cyrillic, Arabic, etc) of each
of the characters found in the string.</P>
<P>The script chosen in the drop-down list will limit the included strings to strings that
include at least one character of the desired script.</P>
<P>If no <b>Allow Additional</b> toggle buttons are pressed, included strings will be limited
to strings that are solely comprised of characters from the chosen script (alphabet). This
would exclude strings that contain characters such as the space character or numeric characters
(labeled as the <b>Common</b> script), which typically is not desired. Select the <b>0-9,!?</b>
toggle to allow those characters.</P>
<P>The <b>A-Z</b> (Latin) toggle will allow Latin characters to be present in included
strings. This is redundant if the <b>Script</b> drop-down list is already set to Latin,
but becomes useful when another script is chosen, to allow including strings that are a
mixture of the selected script and Latin, which commonly occurs when strings have symbol
names, scientific units, etc.</P>
<P>The <b>Any</b> toggle will allow any additional script to be present in included strings.</P>
<P>More advanced filtering logic can be had by creating a column
filter using <b>Create Column Filter</b> button in the lower right corner of the preview area
and filtering on the <b>Unicode Script</b> column.</P>
</BLOCKQUOTE>
<P><B>Exclude codec errors, non-standard control chars</B></P>
<BLOCKQUOTE>
<P>The <b>Exclude codec errors</b> check box excludes strings that contain the Unicode
REPLACEMENT character, which is placed into decoded strings when the charset codec logic
encounters a byte or byte sequence that is invalid. For example, the <b>US-ASCII</b>
charset will translate bytes greater than 0x7f into REPLACEMENT characters.</P>
<P>The <b>Exclude non-std ctrl chars</b> check box excludes strings that contain
characters that correspond to control characters in the range 1..31, but ignoring
common control characters such as tab, CR, LF.</P>
</BLOCKQUOTE>
<P><B>Exclude invalid strings</B></P>
<BLOCKQUOTE>
<P>The <b>Exclude invalid strings</b> option tests each candidate string against a pre-built
trigram frequency model and rejects strings that score lower than a cut-off value.</P>
<P>The built-in string model file was trained with mostly english strings, and will
probably mark valid words from other languages as invalid.</P>
</BLOCKQUOTE>
<P><B>Misc options</B></P>
<BLOCKQUOTE>
<P><b>Minimum Length</b> - excludes strings shorter than this (measured in characters, not
bytes)</P>
<P><b>Align start of string</b> - ensures strings start at a location that is evenly divisible
by the alignment requirements of the character size of the charset.</P>
<P><b>Truncate at ref</b> - ends strings early when there is an inbound reference to a
character inside the string.</P>
</BLOCKQUOTE>
<P><B>Tip</B></P>
<BLOCKQUOTE>
<P>When an option is responsible for excluding / filtering-out a string, that option will
have a red superscripted number next to it that contains the total count of strings
excluded by that option.</P>
</BLOCKQUOTE>
<P><B>Related</B></P>
<BLOCKQUOTE>
<P>See the <a href="../ViewStringsPlugin/ViewStringsPlugin.htm">Defined Strings</a> window to see
already created strings.</P>
</BLOCKQUOTE>
<P class="providedbyplugin">Provided By: <I>EncodedStringsPlugin</I></P>
<P class="relatedtopic">Related Topics:</P>

Binary file not shown.

After

Width:  |  Height:  |  Size: 28 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 18 KiB

View File

@ -22,7 +22,8 @@
<P>&nbsp;</P>
<P>This plugin is <I><B>not</B></I> intended to be used to locate undefined strings. Please see
<A href="help/topics/Search/Search_for_Strings.htm">Search for Strings</A> for this
<A href="../Search/Search_for_Strings.htm">Search for Strings</A> or
<A href="../Search/Search_for_Strings.htm#Encoded_Strings_Dialog">Search for Encoded Strings</A> for this
feature.</P>
<h2>Defined Strings Table Columns</h2>
@ -43,8 +44,9 @@
<li>Has Encoding Error - boolean flag that indicates the string had byte(s) that could not be converted by the character set.
This is usually caused by having the wrong character set or if the string isn't really a string.</li>
<li>Charset - name of the character set that this string is encoded in.</li>
<li>Unicode Script - a list of the the scripts (alphabets) used in the string.</li>
</ul>
<p>The <b>Is Ascii</b>, <b>Has Encoding Error</b>, and <b>Charset</b> columns are not visible by default. To display
<p>The <b>Is Ascii</b>, <b>Has Encoding Error</b>, <b>Unicode Script</b>, and <b>Charset</b> columns are not visible by default. To display
them in the table, right click on the column header row and select
<b>Add/Remove Columns...</b>.</p>
</blockquote>

View File

@ -100,7 +100,7 @@ public class FindPossibleReferencesPlugin extends Plugin {
private void createActions() {
action = new ActionBuilder(SEARCH_DIRECT_REFS_ACTION_NAME, getName())
.menuPath(ToolConstants.MENU_SEARCH, "For Direct References")
.menuGroup("search for")
.menuGroup("search for", "DirectReferences")
.helpLocation(new HelpLocation(HelpTopics.SEARCH, SEARCH_DIRECT_REFS_ACTION_NAME))
.description(getPluginDescription().getDescription())
.withContext(ListingActionContext.class, true)

View File

@ -151,7 +151,8 @@ public class AutoTableDisassemblerPlugin extends ProgramPlugin implements Domain
findTableAction.setHelpLocation(
new HelpLocation(HelpTopics.SEARCH, findTableAction.getName()));
findTableAction.setMenuBarData(new MenuData(
new String[] { ToolConstants.MENU_SEARCH, "For Address Tables" }, null, "search for"));
new String[] { ToolConstants.MENU_SEARCH, "For Address Tables" }, null, "search for",
-1, "AddressTables"));
findTableAction.setDescription(getPluginDescription().getDescription());
findTableAction.addToWindowWhen(NavigatableActionContext.class);
tool.addAction(findTableAction);

View File

@ -259,7 +259,7 @@ public class InstructionSearchPlugin extends ProgramPlugin {
searchAction.setHelpLocation(new HelpLocation("Search", "Instruction_Pattern_Search"));
searchAction.setMenuBarData(
new MenuData(new String[] { ToolConstants.MENU_SEARCH, "For Instruction Patterns" },
null, "search for"));
null, "search for", -1, "InstructionPatterns"));
searchAction.setDescription("Construct searches using selected instructions");
tool.addAction(searchAction);
}

View File

@ -148,7 +148,8 @@ public class ScalarSearchPlugin extends ProgramPlugin implements DomainObjectLis
searchAction.setHelpLocation(new HelpLocation(this.getName(), "Scalar_Search"));
searchAction.setMenuBarData(new MenuData(
new String[] { ToolConstants.MENU_SEARCH, "For Scalars..." }, null, "search for"));
new String[] { ToolConstants.MENU_SEARCH, "For Scalars..." }, null, "search for", -1,
"Scalars"));
searchAction.setDescription("Search program for scalars");
searchAction.addToWindowWhen(NavigatableActionContext.class);
tool.addAction(searchAction);

View File

@ -70,7 +70,8 @@ public class StringTablePlugin extends ProgramPlugin {
};
stringSearchAction.setHelpLocation(new HelpLocation(HelpTopics.SEARCH, SEARCH_ACTION_NAME));
stringSearchAction.setMenuBarData(new MenuData(
new String[] { ToolConstants.MENU_SEARCH, "For &Strings..." }, null, "search for"));
new String[] { ToolConstants.MENU_SEARCH, "For &Strings..." }, null, "search for", -1,
"Strings1"));
stringSearchAction.setDescription(getPluginDescription().getDescription());
stringSearchAction.addToWindowWhen(NavigatableActionContext.class);

View File

@ -15,7 +15,7 @@
*/
package ghidra.app.plugin.core.string.translate;
import static ghidra.program.model.data.TranslationSettingsDefinition.*;
import static ghidra.program.model.data.TranslationSettingsDefinition.TRANSLATION;
import java.util.List;
@ -50,7 +50,8 @@ public class ManualStringTranslationService implements StringTranslationService
}
@Override
public void translate(Program program, List<ProgramLocation> stringLocations) {
public void translate(Program program, List<ProgramLocation> stringLocations,
TranslateOptions options) {
TaskLauncher.launchModal("Manually translate strings", monitor -> {
int id = program.startTransaction("Translate strings");

View File

@ -19,6 +19,7 @@ import java.util.List;
import docking.action.MenuData;
import ghidra.app.services.StringTranslationService;
import ghidra.app.services.StringTranslationService.TranslateOptions;
import ghidra.program.model.listing.Program;
import ghidra.program.util.ProgramLocation;
import ghidra.util.HelpLocation;
@ -52,6 +53,6 @@ public class TranslateAction extends AbstractTranslateAction {
@Override
public void actionPerformed(Program program, List<ProgramLocation> dataLocations) {
service.translate(program, dataLocations);
service.translate(program, dataLocations, TranslateOptions.NONE);
}
}

View File

@ -15,7 +15,8 @@
*/
package ghidra.app.plugin.core.string.translate;
import java.util.*;
import java.util.ArrayList;
import java.util.List;
import docking.action.DockingAction;
import ghidra.app.CorePluginPackage;
@ -41,6 +42,7 @@ import ghidra.program.model.listing.Data;
//@formatter:on
public class TranslateStringsPlugin extends Plugin {
private List<DockingAction> translationActions = new ArrayList<>();
private List<StringTranslationService> translationServices = new ArrayList<>();
@ -51,7 +53,7 @@ public class TranslateStringsPlugin extends Plugin {
@Override
protected void init() {
createTranslateActions();
createTranslateActions(StringTranslationService.getCurrentStringTranslationServices(tool));
createTranslateMetaActions();
}
@ -67,25 +69,23 @@ public class TranslateStringsPlugin extends Plugin {
private void createTranslateActionsIfNeeded() {
List<StringTranslationService> newServices =
new ArrayList<>(Arrays.asList(tool.getServices(StringTranslationService.class)));
StringTranslationService.getCurrentStringTranslationServices(tool);
boolean isSame = newServices.containsAll(translationServices) &&
translationServices.containsAll(newServices);
if (!isSame) {
createTranslateActions();
createTranslateActions(newServices);
}
}
private void createTranslateActions() {
private void createTranslateActions(List<StringTranslationService> newServices) {
for (DockingAction prevAction : translationActions) {
tool.removeAction(prevAction);
}
translationActions.clear();
translationServices.clear();
translationServices.addAll(Arrays.asList(tool.getServices(StringTranslationService.class)));
Collections.sort(translationServices,
(s1, s2) -> s1.getTranslationServiceName().compareTo(s2.getTranslationServiceName()));
translationServices.addAll(newServices);
for (StringTranslationService service : translationServices) {
DockingAction action = new TranslateAction(getName(), service);

View File

@ -0,0 +1,85 @@
/* ###
* IP: GHIDRA
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ghidra.app.plugin.core.strings;
import static java.lang.Character.UnicodeScript.*;
import java.awt.Font;
import java.lang.Character.UnicodeScript;
import java.util.*;
public class CharacterScriptUtils {
/**
* Scripts that are not helpful to use when filtering strings
*/
public static final List<UnicodeScript> IGNORED_SCRIPTS = List.of(INHERITED, UNKNOWN);
/**
* The {@link UnicodeScript} value that represents the "ANY" choice. This is a bit of a hack
* and re-uses the INHERITED enum value for this purpose.
*/
public static final UnicodeScript ANY_SCRIPT_ALIAS = UnicodeScript.INHERITED;
/**
* Premade examples of characters from each specified script, using info from
* https://omniglot.com/language/phrases/hovercraft.htm and
* google translate, similar to lorem ipsum placeholding text.
* <p>
* Encoded using escape sequences to avoid any mangling by ASCII processing.
* <p>
* Scripts not in this map will have an example created from the first couple of characters
* from their unicode block that are visible to the user with their current font.
*/
static Map<UnicodeScript, String> PREMADE_EXAMPLES = Map.of(
COMMON, "0-9,!?",
ARABIC,
"\u062d\u064e\u0648\u0651\u0627\u0645\u062a\u064a \u0645\u064f\u0645\u0652\u062a\u0650\u0644\u0626\u0629 \u0628\u0650\u0623\u064e\u0646\u0652\u0642\u064e\u0644\u064e\u064a\u0652\u0633\u0648\u0646",
CYRILLIC,
"\u041c\u043e\u0451 \u0441\u0443\u0434\u043d\u043e \u043d\u0430 \u0432\u043e\u0437\u0434\u0443",
HAN, "\u6211\u7684\u6c23\u588a\u8239\u88dd\u6eff\u4e86\u9c3b\u9b5a",
HANGUL,
"\uc81c \ud638\ubc84\ud06c\ub798\ud504\ud2b8\uac00 \uc7a5\uc5b4\ub85c \uac00\ub4dd\ud574\uc694",
KATAKANA,
"\u79c1\u306e\u30db\u30d0\u30fc\u30af\u30e9\u30d5\u30c8\u306f\u9c3b\u3067\u3044\u3063\u3071\u3044\u3067\u3059" // mix of han, hiragana, katakana
);
/**
* Builds a map of example character sequences for every current UnicodeScript, where the
* specified font can display the characters of that script.
*
* @param f {@link Font}
* @param maxExampleLen length of the character sequence to generate
* @return map of unicodescript-to-string
*/
public static Map<UnicodeScript, String> getDisplayableScriptExamples(Font f,
int maxExampleLen) {
Map<UnicodeScript, String> result = new HashMap<>();
for (int i = 0; i < Character.MAX_CODE_POINT; i++) {
if (!Character.isISOControl(i)) {
UnicodeScript us = UnicodeScript.of(i);
String s = result.getOrDefault(us, "");
if (s.length() < maxExampleLen && f.canDisplay(i)) {
// Note: waiting until after f.canDisplay ensures we don't add PREMADEs if not displayable
String premade = PREMADE_EXAMPLES.get(us);
s = premade == null ? s + Character.toString(i) : premade;
result.put(us, s);
}
}
}
return result;
}
}

View File

@ -0,0 +1,67 @@
/* ###
* IP: GHIDRA
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ghidra.app.plugin.core.strings;
import java.lang.Character.UnicodeScript;
import java.util.HashMap;
import java.util.Map;
/**
* Holds counts of reasons for filter rejection
*/
class EncodedStringsFilterStats {
int total;
int codecErrors;
int nonStdCtrlChars;
int failedStringModel;
int stringLength;
int requiredScripts;
int otherScripts;
int latinScript;
int commonScript;
Map<UnicodeScript, Integer> foundScriptCounts = new HashMap<>();
public EncodedStringsFilterStats() {
// empty
}
public EncodedStringsFilterStats(EncodedStringsFilterStats other) {
this.total = other.total;
this.codecErrors = other.codecErrors;
this.nonStdCtrlChars = other.nonStdCtrlChars;
this.failedStringModel = other.failedStringModel;
this.stringLength = other.stringLength;
this.requiredScripts = other.requiredScripts;
this.otherScripts = other.otherScripts;
this.latinScript = other.latinScript;
this.commonScript = other.commonScript;
this.foundScriptCounts.putAll(other.foundScriptCounts);
}
int getTotalForAdvancedOptions() {
return codecErrors + nonStdCtrlChars + failedStringModel + stringLength;
}
int getTotalOmitted() {
return codecErrors + nonStdCtrlChars + failedStringModel + stringLength + requiredScripts;
}
@Override
public EncodedStringsFilterStats clone() {
return new EncodedStringsFilterStats(this);
}
}

View File

@ -0,0 +1,67 @@
/* ###
* IP: GHIDRA
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ghidra.app.plugin.core.strings;
import java.lang.Character.UnicodeScript;
import java.util.Set;
import ghidra.app.services.StringValidatorService;
import ghidra.docking.settings.Settings;
import ghidra.program.model.data.AbstractStringDataType;
import ghidra.util.SystemUtilities;
record EncodedStringsOptions(
AbstractStringDataType stringDT,
Settings settings,
String charsetName,
Set<UnicodeScript> requiredScripts,
Set<UnicodeScript> allowedScripts,
boolean excludeStringsWithErrors,
boolean excludeNonStdCtrlChars,
boolean alignStartOfString,
int charSize,
int minStringLength,
boolean breakOnRef,
StringValidatorService stringValidator,
boolean requireValidString) {
boolean equivalentStringCreationOptions(EncodedStringsOptions other) {
// check only the options that would change how strings are created / read from memory
// or produce values that are immutable in the table Row object
return other != null && stringDT.equals(other.stringDT) &&
equalValues(settings, other.settings) && charsetName.equals(other.charsetName) &&
alignStartOfString == other.alignStartOfString && charSize == other.charSize &&
stringValidator == other.stringValidator && breakOnRef == other.breakOnRef;
}
private static boolean equalValues(Settings s1, Settings s2) {
Set<String> s1names = Set.of(s1.getNames());
Set<String> s2names = Set.of(s2.getNames());
if (!s1names.equals(s2names)) {
return false;
}
for (String name : s1.getNames()) {
Object s1val = s1.getValue(name);
Object s2val = s2.getValue(name);
if (!SystemUtilities.isEqual(s1val, s2val)) {
return false;
}
}
return true;
}
}

View File

@ -0,0 +1,123 @@
/* ###
* IP: GHIDRA
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ghidra.app.plugin.core.strings;
import docking.DockingWindowManager;
import docking.action.DockingAction;
import docking.action.builder.ActionBuilder;
import docking.tool.ToolConstants;
import ghidra.app.CorePluginPackage;
import ghidra.app.context.NavigatableActionContext;
import ghidra.app.plugin.PluginCategoryNames;
import ghidra.app.plugin.ProgramPlugin;
import ghidra.app.services.GoToService;
import ghidra.app.util.HelpTopics;
import ghidra.framework.options.ToolOptions;
import ghidra.framework.plugintool.PluginInfo;
import ghidra.framework.plugintool.PluginTool;
import ghidra.framework.plugintool.util.PluginStatus;
import ghidra.program.model.address.AddressSetView;
import ghidra.program.model.data.CharsetInfo;
import ghidra.program.model.listing.Program;
import ghidra.util.HelpLocation;
import ghidra.util.datastruct.WeakDataStructureFactory;
import ghidra.util.datastruct.WeakSet;
//@formatter:off
@PluginInfo(
status = PluginStatus.RELEASED,
packageName = CorePluginPackage.NAME,
category = PluginCategoryNames.SEARCH,
shortDescription = "Search For Encoded Strings",
description = "Searches for strings using a specific character set and allows filtering " +
"results using the Unicode scripts (alphabets) used and other criteria. This feature " +
"is being evaluated for it's effectiveness.",
servicesRequired = { GoToService.class }
)
//@formatter:on
public class EncodedStringsPlugin extends ProgramPlugin {
private static final String ACTIONNAME = "Search For Encoded Strings";
static final String STRINGS_OPTION_NAME = "Strings";
static final String CHARSET_OPTIONNAME = "Default Charset";
static final String CHARSET_DEFAULT_VALUE = CharsetInfo.USASCII;
static final String TRANSLATE_SERVICE_OPTIONNAME = "Default Translation Service Name";
static final String STRINGMODEL_FILENAME_OPTIONNAME = "Default String Model Filename";
static final String STRINGMODEL_FILENAME_DEFAULT = "stringngrams/StringModel.sng";
static final HelpLocation HELP_LOCATION =
new HelpLocation(HelpTopics.SEARCH, "Encoded_Strings_Dialog");
private WeakSet<EncodedStringsDialog> openDialogs =
WeakDataStructureFactory.createCopyOnWriteWeakSet();
private DockingAction searchForEncodedStringsAction;
public EncodedStringsPlugin(PluginTool tool) {
super(tool);
}
public DockingAction getSearchForEncodedStringsAction() {
return searchForEncodedStringsAction;
}
@Override
protected void init() {
super.init();
registerOptions();
createActions();
}
private void registerOptions() {
ToolOptions options = tool.getOptions(STRINGS_OPTION_NAME);
options.registerOption(CHARSET_OPTIONNAME, CHARSET_DEFAULT_VALUE, null,
"Name of default charset.");
options.registerOption(STRINGMODEL_FILENAME_OPTIONNAME, STRINGMODEL_FILENAME_DEFAULT, null,
"Name of default string model file.");
options.registerOption(TRANSLATE_SERVICE_OPTIONNAME, "", null,
"Name of default translation service.");
}
@Override
protected void programClosed(Program program) {
for (EncodedStringsDialog openDialog : openDialogs) {
openDialog.programClosed(program);
}
}
void dialogClosed(EncodedStringsDialog dialog) {
openDialogs.remove(dialog);
}
private void createActions() {
searchForEncodedStringsAction =
new ActionBuilder(ACTIONNAME, getName()) // menu
.withContext(NavigatableActionContext.class, true)
.onAction(this::showSearchForEncodedStrings)
.enabledWhen(ac -> ac.getLocation() != null)
.menuPath(ToolConstants.MENU_SEARCH, "For Encoded Strings...")
.menuGroup("search for", "Strings2")
.helpLocation(HELP_LOCATION)
.buildAndInstall(tool);
}
private void showSearchForEncodedStrings(NavigatableActionContext lac) {
AddressSetView addrs = lac.hasSelection()
? lac.getSelection()
: lac.getProgram().getMemory().getAllInitializedAddressSet();
EncodedStringsDialog dlg = new EncodedStringsDialog(this, lac.getProgram(), addrs);
openDialogs.add(dlg);
DockingWindowManager.showDialog(dlg);
}
}

View File

@ -0,0 +1,82 @@
/* ###
* IP: GHIDRA
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ghidra.app.plugin.core.strings;
import java.lang.Character.UnicodeScript;
import java.util.EnumSet;
import java.util.Set;
import ghidra.program.model.data.StringDataInstance;
record EncodedStringsRow(StringDataInstance sdi, StringInfo stringInfo, int refCount,
int offcutCount, boolean validString) {
boolean matches(EncodedStringsOptions options, EncodedStringsFilterStats stats) {
stats.total++;
String str = stringInfo.stringValue();
if (options.minStringLength() > 0 && str.length() < options.minStringLength()) {
stats.stringLength++;
return false;
}
if (options.excludeStringsWithErrors() && stringInfo.hasCodecError()) {
stats.codecErrors++;
return false;
}
if (options.excludeNonStdCtrlChars() && stringInfo.hasNonStdCtrlChars()) {
stats.nonStdCtrlChars++;
return false;
}
stringInfo.scripts()
.forEach(foundScript -> stats.foundScriptCounts.merge(foundScript, 1,
(prevValue, newValue) -> prevValue + newValue));
if (options.requiredScripts() != null && !options.requiredScripts().isEmpty()) {
if (!stringInfo.scripts().containsAll(options.requiredScripts())) {
stats.requiredScripts++;
return false;
}
}
if (options.allowedScripts() != null) {
Set<UnicodeScript> scripts = EnumSet.copyOf(stringInfo.scripts());
scripts.removeAll(CharacterScriptUtils.IGNORED_SCRIPTS);
scripts.removeAll(options.requiredScripts());
boolean hadLatin = scripts.remove(UnicodeScript.LATIN);
boolean hadCommon = scripts.remove(UnicodeScript.COMMON);
scripts.removeAll(options.allowedScripts());
if (!scripts.isEmpty()) {
stats.otherScripts += 1;
return false;
}
if (hadLatin && !options.allowedScripts().contains(UnicodeScript.LATIN)) {
stats.latinScript++;
return false;
}
if (hadCommon && !options.allowedScripts().contains(UnicodeScript.COMMON)) {
stats.commonScript++;
return false;
}
}
if (options.requireValidString() && options.stringValidator() != null && !validString) {
stats.failedStringModel++;
return false;
}
return true;
}
}

View File

@ -0,0 +1,448 @@
/* ###
* IP: GHIDRA
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ghidra.app.plugin.core.strings;
import java.lang.Character.UnicodeScript;
import java.util.*;
import java.util.stream.Collectors;
import javax.swing.JTable;
import javax.swing.table.TableModel;
import docking.widgets.table.TableColumnDescriptor;
import generic.theme.GThemeDefaults;
import ghidra.app.services.StringValidatorQuery;
import ghidra.app.services.StringValidityScore;
import ghidra.docking.settings.Settings;
import ghidra.framework.plugintool.ServiceProvider;
import ghidra.framework.plugintool.ServiceProviderStub;
import ghidra.program.model.address.*;
import ghidra.program.model.data.StringDataInstance;
import ghidra.program.model.listing.Listing;
import ghidra.program.model.listing.Program;
import ghidra.program.util.ProgramLocation;
import ghidra.program.util.ProgramSelection;
import ghidra.util.datastruct.Accumulator;
import ghidra.util.exception.CancelledException;
import ghidra.util.table.AddressBasedTableModel;
import ghidra.util.table.column.AbstractGColumnRenderer;
import ghidra.util.table.column.GColumnRenderer;
import ghidra.util.table.field.AbstractProgramLocationTableColumn;
import ghidra.util.table.field.AddressBasedLocation;
import ghidra.util.task.TaskMonitor;
class EncodedStringsTableModel extends AddressBasedTableModel<EncodedStringsRow> {
private UnicodeScriptColumn unicodeScriptColumn;
private ValidStringColumn validStringColumn;
private AddressSetView selectedAddresses;
private AddressSetView filteredAddresses;
private boolean singleStringMode;
private ModelState state;
EncodedStringsTableModel(Program program, AddressSetView selectedAddresses) {
super("Encoded Strings Table", new ServiceProviderStub(), program, null, true);
this.selectedAddresses = selectedAddresses;
this.singleStringMode = selectedAddresses.getNumAddresses() == 1;
this.state = new ModelState(null, null);
}
public EncodedStringsFilterStats getStats() {
return state.stats;
}
@Override
public void dispose() {
state = new ModelState(null, null);
super.dispose();
}
@Override
protected TableColumnDescriptor<EncodedStringsRow> createTableColumnDescriptor() {
TableColumnDescriptor<EncodedStringsRow> descriptor = new TableColumnDescriptor<>();
this.validStringColumn = new ValidStringColumn();
this.unicodeScriptColumn = new UnicodeScriptColumn();
descriptor.addVisibleColumn(new DataLocationColumn(), 1, true);
descriptor.addVisibleColumn(new StringRepColumn());
descriptor.addHiddenColumn(new RefCountColumn());
descriptor.addHiddenColumn(new OffcutRefCountColumn());
descriptor.addVisibleColumn(unicodeScriptColumn);
descriptor.addVisibleColumn(validStringColumn);
descriptor.addVisibleColumn(new LengthColumn());
descriptor.addHiddenColumn(new ByteLengthColumn());
return descriptor;
}
@Override
protected void doLoad(Accumulator<EncodedStringsRow> accumulator, TaskMonitor monitor)
throws CancelledException {
Program localProgram = program;
ModelState state = this.state;
if (state == null || localProgram == null || state.options == null) {
return;
}
if (state.previousData != null) {
// used cached strings and re-filter
EncodedStringsFilterStats newStats = new EncodedStringsFilterStats();
for (EncodedStringsRow row : state.previousData) {
if (row.matches(state.options, newStats)) {
accumulator.add(row);
}
}
state.stats = newStats;
return;
}
Listing listing = localProgram.getListing();
if (filteredAddresses == null) {
filteredAddresses = singleStringMode
? UndefinedStringIterator.getSingleStringEndAddrRange(localProgram,
selectedAddresses)
: new AddressSet(selectedAddresses);
filteredAddresses =
filteredAddresses.intersect(localProgram.getMemory().getAllInitializedAddressSet());
monitor.setIndeterminate(true);
monitor.initialize(0, "Finding undefined address ranges");
// Note: this can be slow for large programs
filteredAddresses = listing.getUndefinedRanges(filteredAddresses, false, monitor);
monitor.setIndeterminate(false);
}
int align = 1;
if (state.options.alignStartOfString()) {
align = localProgram.getDataTypeManager()
.getDataOrganization()
.getSizeAlignment(state.options.charSize());
}
List<EncodedStringsRow> allStrings = new ArrayList<>();
EncodedStringsFilterStats newStats = new EncodedStringsFilterStats();
UndefinedStringIterator usi = new UndefinedStringIterator(localProgram, filteredAddresses,
state.options.charSize(), align, state.options.breakOnRef(), singleStringMode,
state.options.stringDT(), state.options.settings(), monitor);
for (StringDataInstance sdi : usi) {
monitor.checkCancelled();
StringInfo stringInfo = StringInfo.fromString(sdi.getStringValue());
int refCount = localProgram.getReferenceManager().getReferenceCountTo(sdi.getAddress());
int offcutRefCount = getOffcutRefCount(localProgram,
new AddressRangeImpl(sdi.getAddress(), sdi.getEndAddress()));
boolean isValid = true;
if (state.options.stringValidator() != null) {
StringValidatorQuery svq =
new StringValidatorQuery(stringInfo.stringValue(), stringInfo);
StringValidityScore score =
state.options.stringValidator().getStringValidityScore(svq);
isValid = score.isScoreAboveThreshold();
}
EncodedStringsRow row =
new EncodedStringsRow(sdi, stringInfo, refCount, offcutRefCount, isValid);
allStrings.add(row);
if (row.matches(state.options, newStats)) {
accumulator.add(row);
}
if (singleStringMode) {
break;
}
}
state.stats = newStats;
state.previousData = allStrings;
}
@Override
public ProgramSelection getProgramSelection(int[] rows) {
AddressSet set = new AddressSet();
for (int elementIndex : rows) {
EncodedStringsRow row = filteredData.get(elementIndex);
set.add(row.sdi().getAddressRange());
}
return new ProgramSelection(set);
}
public void removeRows(List<EncodedStringsRow> rows) {
for (EncodedStringsRow row : rows) {
removeObject(row);
}
}
public void setOptions(EncodedStringsOptions options) {
boolean canReusePrevData = options.equivalentStringCreationOptions(state.options);
ModelState newState = new ModelState(options, canReusePrevData ? state.previousData : null);
this.state = newState;
clearData();
reload();
}
@Override
public Address getAddress(int row) {
return getRowObject(row).sdi().getAddress();
}
private int getOffcutRefCount(Program localProgram, AddressRange range) {
int offcutRefCount = 0;
Address prevAddr = range.getMinAddress(); // this also allows us to skip the first addr of the range
for (Address address : localProgram.getReferenceManager()
.getReferenceDestinationIterator(new AddressSet(range), true)) {
if (!address.equals(prevAddr)) {
offcutRefCount++;
prevAddr = address;
}
}
return offcutRefCount;
}
//==================================================================================================
// Inner Classes
//==================================================================================================
private static class DataLocationColumn
extends AbstractProgramLocationTableColumn<EncodedStringsRow, AddressBasedLocation> {
@Override
public String getColumnName() {
return "Location";
}
@Override
public AddressBasedLocation getValue(EncodedStringsRow rowObject, Settings settings,
Program program, ServiceProvider serviceProvider) throws IllegalArgumentException {
return new AddressBasedLocation(program, rowObject.sdi().getAddress());
}
@Override
public ProgramLocation getProgramLocation(EncodedStringsRow rowObject, Settings settings,
Program program, ServiceProvider serviceProvider) {
return new ProgramLocation(program, rowObject.sdi().getAddress());
}
}
private static class StringRepColumn
extends AbstractProgramLocationTableColumn<EncodedStringsRow, EncodedStringsRow> {
private StringRepCellRenderer renderer = new StringRepCellRenderer();
@Override
public String getColumnName() {
return "String";
}
@Override
public EncodedStringsRow getValue(EncodedStringsRow rowObject, Settings settings,
Program program, ServiceProvider serviceProvider) throws IllegalArgumentException {
return rowObject;
}
@Override
public ProgramLocation getProgramLocation(EncodedStringsRow rowObject, Settings settings,
Program program, ServiceProvider serviceProvider) {
return new ProgramLocation(program, rowObject.sdi().getAddress());
}
@Override
public GColumnRenderer<EncodedStringsRow> getColumnRenderer() {
return renderer;
}
private class StringRepCellRenderer extends AbstractGColumnRenderer<EncodedStringsRow> {
@Override
protected String getText(Object value) {
return value instanceof EncodedStringsRow rowValue
? rowValue.sdi().getStringRepresentation()
: "";
}
@Override
public String getFilterString(EncodedStringsRow t, Settings settings) {
return getText(t);
}
@Override
protected void setForegroundColor(JTable table, TableModel model, Object value) {
if (value instanceof EncodedStringsRow rowValue &&
rowValue.stringInfo().hasCodecError()) {
setForeground(GThemeDefaults.Colors.Tables.ERROR_UNSELECTED);
}
else {
super.setForegroundColor(table, model, value);
}
}
}
}
private static class UnicodeScriptColumn
extends AbstractProgramLocationTableColumn<EncodedStringsRow, String> {
@Override
public String getColumnName() {
return "Unicode Script";
}
@Override
public String getValue(EncodedStringsRow rowObject, Settings settings, Program program,
ServiceProvider serviceProvider) throws IllegalArgumentException {
Set<UnicodeScript> scripts = rowObject.stringInfo().scripts();
String formattedColStr =
scripts.stream().map(UnicodeScript::name).collect(Collectors.joining(","));
return formattedColStr;
}
@Override
public ProgramLocation getProgramLocation(EncodedStringsRow rowObject, Settings settings,
Program program, ServiceProvider serviceProvider) {
return new ProgramLocation(program, rowObject.sdi().getAddress());
}
}
private static class RefCountColumn
extends AbstractProgramLocationTableColumn<EncodedStringsRow, Integer> {
@Override
public String getColumnName() {
return "Reference Count";
}
@Override
public Integer getValue(EncodedStringsRow rowObject, Settings settings, Program program,
ServiceProvider serviceProvider) throws IllegalArgumentException {
return rowObject.refCount();
}
@Override
public ProgramLocation getProgramLocation(EncodedStringsRow rowObject, Settings settings,
Program program, ServiceProvider serviceProvider) {
return new ProgramLocation(program, rowObject.sdi().getAddress());
}
}
private static class OffcutRefCountColumn
extends AbstractProgramLocationTableColumn<EncodedStringsRow, Integer> {
@Override
public String getColumnName() {
return "Offcut Reference Count";
}
@Override
public Integer getValue(EncodedStringsRow rowObject, Settings settings, Program program,
ServiceProvider serviceProvider) throws IllegalArgumentException {
return rowObject.offcutCount();
}
@Override
public ProgramLocation getProgramLocation(EncodedStringsRow rowObject, Settings settings,
Program program, ServiceProvider serviceProvider) {
return new ProgramLocation(program, rowObject.sdi().getAddress());
}
}
private static class ValidStringColumn
extends AbstractProgramLocationTableColumn<EncodedStringsRow, Boolean> {
@Override
public String getColumnName() {
return "Is Valid String";
}
@Override
public Boolean getValue(EncodedStringsRow rowObject, Settings settings, Program program,
ServiceProvider serviceProvider) throws IllegalArgumentException {
return rowObject.validString();
}
@Override
public ProgramLocation getProgramLocation(EncodedStringsRow rowObject, Settings settings,
Program program, ServiceProvider serviceProvider) {
return new ProgramLocation(program, rowObject.sdi().getAddress());
}
}
private static class LengthColumn
extends AbstractProgramLocationTableColumn<EncodedStringsRow, Integer> {
@Override
public String getColumnName() {
return "Length";
}
@Override
public Integer getValue(EncodedStringsRow rowObject, Settings settings, Program program,
ServiceProvider serviceProvider) throws IllegalArgumentException {
return rowObject.stringInfo().stringValue().length();
}
@Override
public ProgramLocation getProgramLocation(EncodedStringsRow rowObject, Settings settings,
Program program, ServiceProvider serviceProvider) {
return new ProgramLocation(program, rowObject.sdi().getAddress());
}
}
private static class ByteLengthColumn
extends AbstractProgramLocationTableColumn<EncodedStringsRow, Integer> {
@Override
public String getColumnName() {
return "Byte Length";
}
@Override
public Integer getValue(EncodedStringsRow rowObject, Settings settings, Program program,
ServiceProvider serviceProvider) throws IllegalArgumentException {
return rowObject.sdi().getDataLength();
}
@Override
public ProgramLocation getProgramLocation(EncodedStringsRow rowObject, Settings settings,
Program program, ServiceProvider serviceProvider) {
return new ProgramLocation(program, rowObject.sdi().getAddress());
}
}
private static class ModelState {
final EncodedStringsOptions options;
Collection<EncodedStringsRow> previousData;
EncodedStringsFilterStats stats = new EncodedStringsFilterStats();
ModelState(EncodedStringsOptions options, Collection<EncodedStringsRow> previousData) {
this.options = options;
this.previousData = previousData;
}
}
}

View File

@ -0,0 +1,64 @@
/* ###
* IP: GHIDRA
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ghidra.app.plugin.core.strings;
import java.awt.BorderLayout;
import java.awt.Component;
import docking.widgets.table.threaded.ThreadedTableModel;
import ghidra.util.table.GhidraThreadedTablePanel;
/**
* A Ghidra table panel that can show a custom overlay instead of an empty table.
*
* @param <T> table row type
*/
class EncodedStringsThreadedTablePanel<T> extends GhidraThreadedTablePanel<T> {
Component emptyTableOverlayComponent;
Component previousCenterComponent;
public EncodedStringsThreadedTablePanel(ThreadedTableModel<T, ?> model, int minUpdateDelay,
Component emptyTableOverlayComponent) {
super(model, minUpdateDelay);
this.emptyTableOverlayComponent = emptyTableOverlayComponent;
}
public void showEmptyTableOverlay(boolean b) {
BorderLayout layout = (BorderLayout) getLayout();
if (previousCenterComponent == null) {
previousCenterComponent = layout.getLayoutComponent(BorderLayout.CENTER);
}
Component currentCenterComponent = layout.getLayoutComponent(BorderLayout.CENTER);
if (b) {
if (currentCenterComponent != emptyTableOverlayComponent) {
remove(previousCenterComponent);
add(emptyTableOverlayComponent, BorderLayout.CENTER);
}
}
else {
if (currentCenterComponent != previousCenterComponent) {
remove(emptyTableOverlayComponent);
add(previousCenterComponent, BorderLayout.CENTER);
}
}
invalidate();
repaint();
}
}

View File

@ -0,0 +1,76 @@
/* ###
* IP: GHIDRA
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ghidra.app.plugin.core.strings;
import java.lang.Character.UnicodeScript;
import java.util.*;
import ghidra.util.StringUtilities;
/**
* Information about a string.
*
* @param stringValue string itself
* @param scripts set of scripts (alphabets) that the string is made of
* @param stringFeatures set of informational flags about various conditions found in the string
*/
public record StringInfo(
String stringValue,
Set<UnicodeScript> scripts,
Set<StringInfoFeature> stringFeatures) {
private static final Set<Character> STD_CTRL_CHARS = Set.of('\n', '\t', '\r');
/**
* Creates a {@link StringInfo} instance
*
* @param s string
* @return new {@link StringInfo} instance
*/
public static StringInfo fromString(String s) {
s = Objects.requireNonNullElse(s, "");
EnumSet<UnicodeScript> scripts = EnumSet.noneOf(UnicodeScript.class);
EnumSet<StringInfoFeature> features = EnumSet.noneOf(StringInfoFeature.class);
s.codePoints().forEach(codePoint -> {
try {
UnicodeScript script = Character.UnicodeScript.of(codePoint);
scripts.add(script);
if (codePoint == StringUtilities.UNICODE_REPLACEMENT) {
features.add(StringInfoFeature.CODEC_ERROR);
}
if ((codePoint < 32 && !STD_CTRL_CHARS.contains((char) codePoint)) ||
!Character.isDefined(codePoint)) {
features.add(StringInfoFeature.NON_STD_CTRL_CHARS);
}
}
catch (IllegalArgumentException e) {
// ignore this codepoint
}
});
return new StringInfo(s, scripts, features);
}
public boolean hasCodecError() {
return stringFeatures.contains(StringInfoFeature.CODEC_ERROR);
}
public boolean hasNonStdCtrlChars() {
return stringFeatures.contains(StringInfoFeature.NON_STD_CTRL_CHARS);
}
}

View File

@ -0,0 +1,21 @@
/* ###
* IP: GHIDRA
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ghidra.app.plugin.core.strings;
public enum StringInfoFeature {
CODEC_ERROR,
NON_STD_CTRL_CHARS
}

View File

@ -0,0 +1,53 @@
/* ###
* IP: GHIDRA
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ghidra.app.plugin.core.strings;
import java.util.Iterator;
/**
* Splits a string into trigrams
*/
public class StringTrigramIterator implements Iterator<Trigram> {
private final String s;
private int index = 0;
private int prevCodePoints[] = new int[2];
public StringTrigramIterator(String s) {
// throw away string if length is less than 3
this.s = s.codePointCount(0, s.length()) >= 3 ? s : null;
if (hasNext()) {
next(); // throw away first value which will be "\0, \0, first char"
}
}
@Override
public boolean hasNext() {
return s != null && index <= s.length();
}
@Override
public Trigram next() {
int codePoint = index >= s.length() ? '\0' : s.codePointAt(index);
index += Character.charCount(codePoint);
Trigram result =
new Trigram(new int[] { prevCodePoints[0], prevCodePoints[1], codePoint });
prevCodePoints[0] = prevCodePoints[1];
prevCodePoints[1] = codePoint;
return result;
}
}

View File

@ -0,0 +1,174 @@
/* ###
* IP: GHIDRA
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ghidra.app.plugin.core.strings;
import java.io.IOException;
import java.util.*;
/**
* Three (3) adjacent characters, with \0 being reserved for start and end of string magic values.
*
* @param codePoints 3 characters (as int32 code points)
*/
public record Trigram(int[] codePoints) implements Comparable<Trigram> {
public static Trigram of(int cp1, int cp2, int cp3) {
return new Trigram(new int[] { cp1, cp2, cp3 });
}
public static Trigram fromStringRep(String s1, String s2, String s3)
throws NumberFormatException, IOException {
return Trigram.of(decodeCodePoint(s1), decodeCodePoint(s2), decodeCodePoint(s3));
}
public static StringTrigramIterator iterate(String s) {
return new StringTrigramIterator(s);
}
public String toCharSeq() {
return getCodePointRepresentation(codePoints[0]) +
getCodePointRepresentation(codePoints[1]) +
getCodePointRepresentation(codePoints[2]);
}
@Override
public String toString() {
return toCharSeq();
}
@Override
public int hashCode() {
final int prime = 31;
int result = 1;
result = prime * result + Arrays.hashCode(codePoints);
return result;
}
@Override
public boolean equals(Object obj) {
if (this == obj) {
return true;
}
if (obj == null) {
return false;
}
if (getClass() != obj.getClass()) {
return false;
}
Trigram other = (Trigram) obj;
return Arrays.equals(codePoints, other.codePoints);
}
@Override
public int compareTo(Trigram o) {
int result = Integer.compare(codePoints[0], o.codePoints[0]);
result = result == 0 ? Integer.compare(codePoints[1], o.codePoints[1]) : result;
result = result == 0 ? Integer.compare(codePoints[2], o.codePoints[2]) : result;
return result;
}
//--------------------------------------------------------------------------------------------
private static final String START_OF_STRING = "[^]";
private static final String END_OF_STRING = "[$]";
private static final Set<String> META_CHARS = Set.of(START_OF_STRING, END_OF_STRING);
private static final Map<String, Integer> descriptionToCodePoint = new HashMap<>();
private static final Map<Integer, String> codePointToDescription = new HashMap<>();
private static void mapCP(String desc, int codePoint) {
descriptionToCodePoint.put(desc, codePoint);
codePointToDescription.put(codePoint, desc);
}
static {
mapCP("[NUL]", 0);
mapCP("[SOH]", 1);
mapCP("[STX]", 2);
mapCP("[ETX]", 3);
mapCP("[EOT]", 4);
mapCP("[ENQ]", 5);
mapCP("[ACK]", 6);
mapCP("[BEL]", 7);
mapCP("[BS]", 8);
mapCP("[HT]", 9);
mapCP("[LF]", 10);
mapCP("[VT]", 11);
mapCP("[FF]", 12);
mapCP("[CR]", 13);
mapCP("[SO]", 14);
mapCP("[SI]", 15);
mapCP("[DLE]", 16);
mapCP("[DC1]", 17);
mapCP("[DC2]", 18);
mapCP("[DC3]", 19);
mapCP("[DC4]", 20);
mapCP("[NAK]", 21);
mapCP("[SYN]", 22);
mapCP("[ETB]", 23);
mapCP("[CAN]", 24);
mapCP("[EM]", 25);
mapCP("[SUB]", 26);
mapCP("[ESC]", 27);
mapCP("[FS]", 28);
mapCP("[GS]", 29);
mapCP("[RS]", 30);
mapCP("[US]", 31);
mapCP("[SP]", 32);
mapCP("[DEL]", 127);
}
static String getCodePointRepresentation(int codePoint) {
if (codePoint >= 33 && codePoint <= 126) {
return Character.toString(codePoint);
}
String result = codePointToDescription.get(codePoint);
if (result != null) {
return result;
}
return codePoint > 0 && codePoint <= 0xFFFF
? "\\u%04X".formatted(codePoint)
: "\\U%08X".formatted(codePoint);
}
private static int decodeCodePoint(String rep) throws IOException, NumberFormatException {
if (rep == null || rep.isEmpty()) {
throw new IOException("Invalid character symbol in model file");
}
if (rep.codePointCount(0, rep.length()) == 1) {
return rep.codePointAt(0);
}
if (rep.length() == 3 && META_CHARS.contains(rep)) {
// convert $, ^ (start-of-line, end-of-line) to null char
return '\0';
}
if (rep.length() == 6 && rep.startsWith("\\u")) {
// "\uFFFF"
return Integer.parseUnsignedInt(rep, 2, 6, 16);
}
if (rep.length() == 10 && rep.startsWith("\\U")) {
// "\uFFFFFFFF"
return Integer.parseUnsignedInt(rep, 2, 10, 16);
}
if (rep.startsWith("[")) {
// one of the "[xx]" codes
Integer codePoint = descriptionToCodePoint.get(rep);
if (codePoint == null) {
throw new IOException("Can not parse character " + rep + " in model file");
}
return codePoint;
}
return rep.codePointAt(0);
}
}

View File

@ -0,0 +1,250 @@
/* ###
* IP: GHIDRA
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ghidra.app.plugin.core.strings;
import java.io.*;
import java.nio.charset.StandardCharsets;
import java.util.*;
import java.util.Map.Entry;
import java.util.function.Function;
import generic.jar.ResourceFile;
import ghidra.app.services.*;
/**
* A {@link StringValidatorService} that uses precomputed trigram frequencies from
* a ".sng" model file to score strings.
*/
public class TrigramStringValidator implements StringValidatorService {
/**
* Remove this flag when the trigram model thresholds have been recalculated
*/
@Deprecated(forRemoval = true, since = "10.3")
private static final boolean PRESERVE_BUG_SKIP_TRIGRAM = true;
// "Bad" log to be used as default score when we know the string is bad
private static final double DEFAULT_LOG_VALUE = -20d;
private static final double INVALID_THRESHOLD = 10.0;
public static TrigramStringValidator read(ResourceFile f) throws IOException {
return readModel(f);
}
private ResourceFile sourceFile;
private Map<Trigram, Double> trigramLogs;
private long totalNumTrigrams;
private Function<String, String> modelValueTransformer;
private double[] thresholds; // for string lengths [4..nn]
public TrigramStringValidator(Map<Trigram, Double> trigramLogs, long totalNumTrigrams,
Function<String, String> modelValueTransformer, double[] thresholds,
ResourceFile sourceFile) {
this.trigramLogs = trigramLogs;
this.totalNumTrigrams = totalNumTrigrams;
this.modelValueTransformer = modelValueTransformer;
this.thresholds = thresholds;
this.sourceFile = sourceFile;
}
public ResourceFile getSourceFile() {
return sourceFile;
}
@Override
public String getValidatorServiceName() {
return "ngram";
}
@Override
public StringValidityScore getStringValidityScore(StringValidatorQuery query) {
String transformedString = modelValueTransformer.apply(query.stringValue());
double score = DEFAULT_LOG_VALUE;
int trigramCount = 0;
StringTrigramIterator it = Trigram.iterate(transformedString);
if (it.hasNext()) {
double missingTrigramScore = Math.log10(1d / totalNumTrigrams);
score = 0;
for (; it.hasNext();) {
Trigram trigram = it.next();
trigramCount++;
if (PRESERVE_BUG_SKIP_TRIGRAM && trigramCount == 2) {
// compatibility hack to replicate trigram bug in old code
continue;
}
Double logProb = trigramLogs.get(trigram);
if (logProb == null) {
logProb = missingTrigramScore;
}
score += logProb;
}
score = score / trigramCount;
}
return new StringValidityScore(query.stringValue(), transformedString, score,
getThresholdForStringOfLength(trigramCount));
}
public long getTotalNumTrigrams() {
return totalNumTrigrams;
}
public Iterator<String> dumpModel() {
return trigramLogs.keySet()
.stream()
.sorted()
.map(trigram -> "%s=%s".formatted(trigram.toCharSeq(), trigramLogs.get(trigram)))
.iterator();
}
private double getThresholdForStringOfLength(int len) {
int index = len - 4;
if (index < 0) {
return INVALID_THRESHOLD;
}
if (index >= thresholds.length) {
index = thresholds.length - 1;
}
return thresholds[index];
}
//---------------------------------------------------------------------------------------------
private static TrigramStringValidator readModel(ResourceFile sourceFile) throws IOException {
Map<Trigram, Integer> counts = new HashMap<>();
long totalTrigrams = 0;
String modelType = null;
double[] thresholds = null;
int symbolSize = 128; // default
int lineNum = 0;
boolean inFileHeaderSection = true;
String currString = "";
try (BufferedReader br = new BufferedReader(
new InputStreamReader(sourceFile.getInputStream(), StandardCharsets.UTF_8))) {
while ((currString = br.readLine()) != null) {
lineNum++;
if (currString.isBlank()) {
continue;
}
if (inFileHeaderSection && currString.startsWith("#")) {
String[] headerFields = parseHeaderLine(currString.substring(1).trim());
if (headerFields != null) {
switch (headerFields[0]) {
case "Model Type":
modelType = headerFields[1];
break;
case "Thresholds":
thresholds = parseThresholds(headerFields[1]);
break;
case "Symbol Size":
symbolSize = Integer.parseInt(headerFields[1]);
break;
}
}
continue;
}
inFileHeaderSection = false;
String[] lineParts = currString.split("\\t");
if (lineParts.length != 4) {
throw new IOException("Invalid field count in ngram %s:%d: %s"
.formatted(sourceFile.getName(), lineNum, currString));
}
Trigram trigram = Trigram.fromStringRep(lineParts[0], lineParts[1], lineParts[2]);
int currCount = Integer.parseInt(lineParts[3]);
int[] codePoints = trigram.codePoints();
if (codePoints[1] == 0 || (codePoints[0] == 0 && codePoints[2] == 0)) {
// if invalid combination of start-of-string, end-of-string markers
continue;
}
counts.merge(trigram, currCount, (oldVal, newVal) -> oldVal + newVal);
totalTrigrams += currCount;
}
// fixup missing trigram elements
int trigramEntryCount = counts.size();
// fully populated trigram mappings would be symbolsize^3, but due to quirk of old
// code, we also have the special start-of-string and end-of-string doublets to count.
int expectedEntryCount = // symbolSize^3 + (symbolSize^2)*2
(symbolSize * symbolSize * symbolSize) + (symbolSize * symbolSize * 2);
totalTrigrams += (expectedEntryCount - trigramEntryCount);
Map<Trigram, Double> logProb = calculateLogProbs(counts, totalTrigrams);
modelType = Objects.requireNonNullElse(modelType, "");
Function<String, String> transformer = getStringTransformer(modelType);
// normalize whitespace (in addition to whatever the transformer does)
transformer = transformer
.andThen(s -> s.trim().replaceAll(" {2,}", " ").replaceAll("\t{2,}", "\t"));
return new TrigramStringValidator(logProb, totalTrigrams, transformer, thresholds,
sourceFile);
}
catch (NumberFormatException nfe) {
throw new IOException(
"Error parsing string ngram %s:%d: %s".formatted(sourceFile.getName(), lineNum,
currString));
}
}
private static Function<String, String> getStringTransformer(String modelTypeName) {
Function<String, String> transformer = switch (modelTypeName) {
case "lowercase" -> String::toLowerCase;
default -> Function.identity();
};
return transformer;
}
private static String[] parseHeaderLine(String s) {
int colon = s.indexOf(':');
return colon > 0
? new String[] { s.substring(0, colon).trim(), s.substring(colon + 1).trim() }
: null;
}
private static double[] parseThresholds(String s) {
String[] parts = s.split(",");
double[] results = new double[parts.length];
for (int i = 0; i < parts.length; i++) {
String thresholdValStr = parts[i];
double d = Double.parseDouble(thresholdValStr.trim());
results[i] = d;
}
return results;
}
private static Map<Trigram, Double> calculateLogProbs(Map<Trigram, Integer> counts,
long totalTrigrams) {
double totalTrigramsD = totalTrigrams;
Map<Trigram, Double> logTrigrams = new HashMap<>();
for (Entry<Trigram, Integer> entry : counts.entrySet()) {
Trigram trigram = entry.getKey();
Integer count = entry.getValue();
logTrigrams.put(trigram, Math.log10(count / totalTrigramsD));
}
return logTrigrams;
}
}

View File

@ -0,0 +1,260 @@
/* ###
* IP: GHIDRA
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ghidra.app.plugin.core.strings;
import java.util.Iterator;
import ghidra.docking.settings.Settings;
import ghidra.program.model.address.*;
import ghidra.program.model.data.AbstractStringDataType;
import ghidra.program.model.data.StringDataInstance;
import ghidra.program.model.listing.*;
import ghidra.program.model.mem.*;
import ghidra.util.task.TaskMonitor;
/**
* Iterator that searches for locations that could be strings and returns
* {@link StringDataInstance}s representing those locations.
*/
public class UndefinedStringIterator
implements Iterator<StringDataInstance>, Iterable<StringDataInstance> {
private static final int MAX_SANE_STRING_LENGTH = 1024 * 1024; // 1mb
static AddressSet getSingleStringEndAddrRange(Program program, AddressSetView addrs) {
Address minAddr = addrs.getMinAddress();
MemoryBlock memblock = program.getMemory().getBlock(minAddr);
Address endAddr = memblock != null ? memblock.getEnd() : minAddr;
if (endAddr.subtract(minAddr) > MAX_SANE_STRING_LENGTH) {
endAddr = minAddr.add(MAX_SANE_STRING_LENGTH);
}
return new AddressSet(minAddr, endAddr);
}
private final TaskMonitor monitor;
private final Listing listing;
private final Program program;
private final Memory memory;
private final AddressSet addrs;
private final int charSize;
private final int charAlignment;
private final boolean breakOnRef;
private final Address singleStringStart;
private final AbstractStringDataType stringDataType;
private final Settings stringSettings;
private final long origAddrCount;
private final byte[] buffer = new byte[64];
private StringDataInstance currentItem;
/**
* Creates a new UndefinedStringIterator instance.
*
* @param program {@link Program}
* @param addrs set of {@link Address}es to search.
* @param charSize size of the characters (and the null-terminator) that make up the string
* @param charAlignment alignment requirements for the start of the string
* @param breakOnRef boolean flag, if true strings will be terminated early at locations that
* have an in-bound memory reference
* @param singleStringMode boolean flag, if true only one string will be returned, and it must
* be located at the start of the specified address set (after alignment tweaks)
* @param stringDataType a string data type that corresponds to the type of string being
* searched for
* @param stringSettings {@link Settings} for the string data type
* @param monitor {@link TaskMonitor}
*/
public UndefinedStringIterator(Program program, AddressSetView addrs, int charSize,
int charAlignment, boolean breakOnRef, boolean singleStringMode,
AbstractStringDataType stringDataType, Settings stringSettings, TaskMonitor monitor) {
this.program = program;
this.listing = program.getListing();
this.memory = program.getMemory();
this.addrs = new AddressSet(addrs);
this.charSize = charSize;
this.charAlignment = charAlignment;
this.breakOnRef = breakOnRef;
this.singleStringStart = singleStringMode ? addrs.getMinAddress() : null;
this.stringDataType = stringDataType;
this.stringSettings = stringSettings;
this.monitor = monitor;
this.origAddrCount = addrs.getNumAddresses();
monitor.initialize(origAddrCount);
}
@Override
public Iterator<StringDataInstance> iterator() {
return this;
}
@Override
public boolean hasNext() {
if (currentItem == null) {
currentItem = findNext();
}
return currentItem != null;
}
@Override
public StringDataInstance next() {
StringDataInstance result = currentItem;
currentItem = null;
return result;
}
private StringDataInstance findNext() {
forceAlignment();
while (!addrs.isEmpty()) {
if (monitor.isCancelled()) {
return null;
}
if (!findStartOfString()) {
break;
}
monitor.setProgress(origAddrCount - addrs.getNumAddresses());
Address addr = addrs.getMinAddress();
Data undefData = listing.getDataAt(addr);
if (undefData == null) {
break;
}
Address eos = findEndOfString();
if (monitor.isCancelled()) {
return null;
}
addrs.deleteFromMin(eos);
long length = eos.subtract(addr) + 1;
if (length < charSize || length > MAX_SANE_STRING_LENGTH) {
// throw away, try next string
continue;
}
StringDataInstance sdi =
stringDataType.getStringDataInstance(undefData, stringSettings, (int) length);
return sdi;
}
return null;
}
private void forceAlignment() {
while (!addrs.isEmpty() && addrs.getMinAddress().getOffset() % charAlignment != 0) {
addrs.deleteFromMin(addrs.getMinAddress());
}
}
private boolean findStartOfString() {
return consumeNullTerms() && !addrs.isEmpty();
}
private Address findEndOfString() {
// search for an end-of-string location
// 1) null terminator
// 2) inbound ref
// 3) end-of-memory-block
Address max = addrs.getFirstRange().getMaxAddress();
Address bufStart = addrs.getFirstRange().getMinAddress();
try {
do {
Address refdAddr = breakOnRef ? getNextRefdAddr(bufStart, max) : null;
if (refdAddr != null) {
max = refdAddr;
}
int bytesToRead = (int) Math.min(buffer.length, max.subtract(bufStart) + 1);
int bytesRead = memory.getBytes(bufStart, buffer, 0, bytesToRead);
if (bytesRead <= 0) {
break;
}
for (int nullIndex = 0; nullIndex <= bytesRead - charSize; nullIndex += charSize) {
if (isNullChar(nullIndex)) {
// found a null term char, return it (inclusive)
return bufStart.addNoWrap(nullIndex + charSize - 1);
}
}
if (refdAddr != null) {
// always terminate if there was a inbound ref
return refdAddr.previous();
}
// loop and read next chunk and try again
bufStart = bufStart.addNoWrap(bytesRead);
}
while (bufStart.compareTo(max) <= 0);
}
catch (MemoryAccessException | AddressOverflowException e) {
// terminate loop/method
}
return max;
}
private boolean isNullChar(int index) {
for (int i = 0; i < charSize; i++) {
if (buffer[index + i] != 0) {
return false;
}
}
return true;
}
private Address getNextRefdAddr(Address start, Address end) {
AddressIterator it = program.getReferenceManager()
.getReferenceDestinationIterator(new AddressSet(start, end), true);
Address refdAddr = null;
if (it.hasNext()) {
refdAddr = it.next();
if (start.equals(refdAddr)) {
refdAddr = it.hasNext() ? it.next() : null;
}
}
return refdAddr;
}
private boolean consumeNullTerms() {
try {
if (memory.getByte(addrs.getMinAddress()) == 0) {
int bytesRead;
while (!addrs.isEmpty() && !monitor.isCancelled() &&
(bytesRead = memory.getBytes(addrs.getMinAddress(), buffer, 0,
(int) Math.min(buffer.length, addrs.getFirstRange().getLength()))) > 0) {
int nonNullIndex;
for (nonNullIndex = 0; nonNullIndex < bytesRead; nonNullIndex++) {
if (buffer[nonNullIndex] != 0) {
nonNullIndex -= nonNullIndex % charSize;
break;
}
}
if (nonNullIndex > 0) {
addrs.deleteFromMin(addrs.getMinAddress().add(nonNullIndex - 1));
}
if (nonNullIndex < bytesRead) {
break;
}
}
}
}
catch (MemoryAccessException e) {
// terminate loop/method
}
if (singleStringStart != null &&
Math.abs(singleStringStart.subtract(addrs.getMinAddress())) >= charAlignment) {
return false;
}
return true;
}
}

View File

@ -113,56 +113,57 @@ public class ViewStringsPlugin extends ProgramPlugin implements DomainObjectList
DockingAction editDataSettingsAction =
new DockingAction("Data Settings", getName(), KeyBindingType.SHARED) {
@Override
public void actionPerformed(ActionContext context) {
try {
DataSettingsDialog dialog = provider.getSelectedRowCount() == 1
? new DataSettingsDialog(provider.getSelectedData())
: new DataSettingsDialog(currentProgram, provider.getProgramSelection());
@Override
public void actionPerformed(ActionContext context) {
try {
DataSettingsDialog dialog = provider.getSelectedRowCount() == 1
? new DataSettingsDialog(provider.getSelectedData())
: new DataSettingsDialog(currentProgram,
provider.getProgramSelection());
tool.showDialog(dialog);
dialog.dispose();
tool.showDialog(dialog);
dialog.dispose();
}
catch (CancelledException e) {
// do nothing
}
}
catch (CancelledException e) {
// do nothing
}
}
};
};
editDataSettingsAction.setPopupMenuData(new MenuData(new String[] { "Settings..." }, "R"));
editDataSettingsAction.setHelpLocation(new HelpLocation("DataPlugin", "Data_Settings"));
DockingAction editDefaultSettingsAction =
new DockingAction("Default Settings", getName(), KeyBindingType.SHARED) {
@Override
public void actionPerformed(ActionContext context) {
DataType dt = getSelectedDataType();
if (dt == null) {
return;
@Override
public void actionPerformed(ActionContext context) {
DataType dt = getSelectedDataType();
if (dt == null) {
return;
}
DataTypeSettingsDialog dataSettingsDialog =
new DataTypeSettingsDialog(dt, dt.getSettingsDefinitions());
tool.showDialog(dataSettingsDialog);
dataSettingsDialog.dispose();
}
DataTypeSettingsDialog dataSettingsDialog =
new DataTypeSettingsDialog(dt, dt.getSettingsDefinitions());
tool.showDialog(dataSettingsDialog);
dataSettingsDialog.dispose();
}
@Override
public boolean isEnabledForContext(ActionContext context) {
if (provider.getSelectedRowCount() != 1) {
return false;
@Override
public boolean isEnabledForContext(ActionContext context) {
if (provider.getSelectedRowCount() != 1) {
return false;
}
DataType dt = getSelectedDataType();
if (dt == null) {
return false;
}
return dt.getSettingsDefinitions().length != 0;
}
DataType dt = getSelectedDataType();
if (dt == null) {
return false;
}
return dt.getSettingsDefinitions().length != 0;
}
private DataType getSelectedDataType() {
Data data = provider.getSelectedData();
return data != null ? data.getDataType() : null;
}
};
private DataType getSelectedDataType() {
Data data = provider.getSelectedData();
return data != null ? data.getDataType() : null;
}
};
editDefaultSettingsAction.setPopupMenuData(
new MenuData(new String[] { "Default Settings..." }, "R"));
editDefaultSettingsAction.setHelpLocation(

View File

@ -15,8 +15,9 @@
*/
package ghidra.app.plugin.core.strings;
import java.util.HashMap;
import java.util.Map;
import java.lang.Character.UnicodeScript;
import java.util.*;
import java.util.stream.Collectors;
import docking.widgets.table.DynamicTableColumn;
import docking.widgets.table.TableColumnDescriptor;
@ -60,7 +61,8 @@ class ViewStringsTableModel extends AddressBasedTableModel<ProgramLocation> {
DATA_TYPE_COL,
IS_ASCII_COL,
CHARSET_COL,
HAS_ENCODING_ERROR
HAS_ENCODING_ERROR,
UNICODE_SCRIPT
}
ViewStringsTableModel(PluginTool tool) {
@ -95,6 +97,7 @@ class ViewStringsTableModel extends AddressBasedTableModel<ProgramLocation> {
descriptor.addHiddenColumn(new IsAsciiColumn());
descriptor.addHiddenColumn(new CharsetColumn());
descriptor.addHiddenColumn(new HasEncodingErrorColumn());
descriptor.addHiddenColumn(new UnicodeScriptColumn());
return descriptor;
}
@ -362,8 +365,9 @@ class ViewStringsTableModel extends AddressBasedTableModel<ProgramLocation> {
Data data = DataUtilities.getDataAtLocation(rowObject);
String s = StringDataInstance.getStringDataInstance(data).getStringValue();
return (s != null) && s.chars().anyMatch(
codePoint -> codePoint == StringUtilities.UNICODE_REPLACEMENT);
return (s != null) && s.codePoints()
.anyMatch(
codePoint -> codePoint == StringUtilities.UNICODE_REPLACEMENT);
}
@Override
@ -398,4 +402,35 @@ class ViewStringsTableModel extends AddressBasedTableModel<ProgramLocation> {
}
private static class UnicodeScriptColumn
extends AbstractProgramLocationTableColumn<ProgramLocation, String> {
@Override
public String getColumnName() {
return "Unicode Script";
}
@Override
public String getValue(ProgramLocation rowObject, Settings settings, Program program,
ServiceProvider serviceProvider) throws IllegalArgumentException {
Data data = DataUtilities.getDataAtLocation(rowObject);
String s = StringDataInstance.getStringDataInstance(data).getStringValue();
s = Objects.requireNonNullElse(s, "");
StringInfo stringInfo = StringInfo.fromString(s);
Set<UnicodeScript> scripts = stringInfo.scripts();
scripts.removeAll(CharacterScriptUtils.IGNORED_SCRIPTS);
String formattedColStr =
scripts.stream().map(UnicodeScript::name).collect(Collectors.joining(","));
return formattedColStr;
}
@Override
public ProgramLocation getProgramLocation(ProgramLocation rowObject, Settings settings,
Program program, ServiceProvider serviceProvider) {
return rowObject;
}
}
}

View File

@ -15,9 +15,10 @@
*/
package ghidra.app.services;
import java.util.List;
import java.util.*;
import ghidra.framework.plugintool.Plugin;
import ghidra.framework.plugintool.PluginTool;
import ghidra.framework.plugintool.util.PluginDescription;
import ghidra.program.model.listing.Program;
import ghidra.program.util.ProgramLocation;
@ -30,6 +31,21 @@ import ghidra.util.HelpLocation;
* and then registered via {@link Plugin}'s registerServiceProvided().
*/
public interface StringTranslationService {
/**
* Returns a sorted list of the currently enabled StringTranslationService service providers.
*
* @param tool {@link PluginTool}
* @return sorted list of currently enabled StringTranslationServices
*/
public static List<StringTranslationService> getCurrentStringTranslationServices(
PluginTool tool) {
List<StringTranslationService> translationServices =
new ArrayList<>(Arrays.asList(tool.getServices(StringTranslationService.class)));
Collections.sort(translationServices,
(s1, s2) -> s1.getTranslationServiceName().compareTo(s2.getTranslationServiceName()));
return translationServices;
}
/**
* Returns the name of this translation service. Used when building menus to allow
* the user to pick a translation service.
@ -56,7 +72,12 @@ public interface StringTranslationService {
* @param program the program containing the data instances.
* @param stringLocations {@link List} of string locations.
*/
public void translate(Program program, List<ProgramLocation> stringLocations);
public void translate(Program program, List<ProgramLocation> stringLocations,
TranslateOptions options);
public record TranslateOptions(boolean autoTranslate) {
public static TranslateOptions NONE = new TranslateOptions(false);
};
/**
* Helper that creates a {@link HelpLocation} based on the plugin and sts.

View File

@ -0,0 +1,32 @@
/* ###
* IP: GHIDRA
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ghidra.app.services;
import ghidra.app.plugin.core.strings.StringInfo;
public record StringValidatorQuery(
String stringValue,
StringInfo stringCharInfo) {
public StringValidatorQuery(String stringValue) {
this(stringValue, StringInfo.fromString(stringValue));
}
public StringValidatorQuery(String stringValue, StringInfo stringCharInfo) {
this.stringValue = stringValue;
this.stringCharInfo = stringCharInfo;
}
}

View File

@ -0,0 +1,73 @@
/* ###
* IP: GHIDRA
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ghidra.app.services;
import java.util.*;
import ghidra.framework.plugintool.PluginTool;
/**
* A service that judges the validity of a string
*/
public interface StringValidatorService {
/**
* Returns a list of string validator services
*
* @param tool {@link PluginTool}
* @return list of services
*/
static List<StringValidatorService> getCurrentStringValidatorServices(
PluginTool tool) {
List<StringValidatorService> results =
new ArrayList<>(List.of(tool.getServices(StringValidatorService.class)));
Collections.sort(results,
(s1, s2) -> s1.getValidatorServiceName().compareTo(s2.getValidatorServiceName()));
return results;
}
StringValidatorService DUMMY = new DummyStringValidator();
/**
* Returns the name of the service
*
* @return
*/
String getValidatorServiceName();
/**
* Judges a string (specified in the query instance).
*
* @param query {@link StringValidatorQuery}
* @return {@link StringValidityScore}
*/
StringValidityScore getStringValidityScore(StringValidatorQuery query);
static class DummyStringValidator implements StringValidatorService {
@Override
public String getValidatorServiceName() {
return "Dummy";
}
@Override
public StringValidityScore getStringValidityScore(StringValidatorQuery query) {
return StringValidityScore.makeDummyFor(query.stringValue());
}
}
}

View File

@ -0,0 +1,40 @@
/* ###
* IP: GHIDRA
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ghidra.app.services;
/**
* Result of a {@link StringValidatorService}'s judgment about a string.
*
* @param originalString string being scored
* @param transformedString original string, after being tweaked
* @param score string's validity score, larger values are more valid
* @param threshold score that this string would need to exceed to be considered valid
*/
public record StringValidityScore(
String originalString,
String transformedString,
double score,
double threshold) {
public static StringValidityScore makeDummyFor(String s) {
return new StringValidityScore(s, s, 0, 100);
}
public boolean isScoreAboveThreshold() {
return score > threshold;
}
}

View File

@ -0,0 +1,65 @@
/* ###
* IP: GHIDRA
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ghidra.app.plugin.core.strings;
import static org.junit.Assert.*;
import java.io.File;
import java.io.IOException;
import java.util.List;
import org.junit.Before;
import generic.jar.ResourceFile;
import ghidra.app.plugin.core.string.NGramUtils;
import ghidra.app.plugin.core.string.StringAndScores;
import ghidra.app.services.StringValidatorQuery;
import ghidra.app.services.StringValidityScore;
import ghidra.framework.Application;
import ghidra.test.AbstractGhidraHeadlessIntegrationTest;
import utilities.util.FileUtilities;
public class TrigramStringValidatorTest extends AbstractGhidraHeadlessIntegrationTest {
TrigramStringValidator ngramValidator;
@Before
public void setup() throws IOException {
ResourceFile stringModelFile =
Application.findDataFileInAnyModule("stringngrams/StringModel.sng");
NGramUtils.startNewSession("StringModel.sng", true);
ngramValidator = TrigramStringValidator.read(stringModelFile);
}
private void assertSameStringScore(String s) {
StringValidityScore score =
ngramValidator.getStringValidityScore(new StringValidatorQuery(s));
StringAndScores sas = new StringAndScores(s, true);
NGramUtils.scoreString(sas);
assertEquals(sas.getScoreThreshold(), score.threshold(), 0.0);
assertEquals(sas.isScoreAboveThreshold(), score.isScoreAboveThreshold());
}
//@Test
public void testCompareOldAndNewScoring() throws IOException {
List<String> lines = FileUtilities.getLines(new File("lotsofstrings.txt"));
for (String s : lines) {
assertSameStringScore(s);
}
}
}

View File

@ -15,11 +15,12 @@
*/
package docking.widgets.spinner;
import java.util.ArrayList;
import java.util.List;
import java.awt.event.KeyAdapter;
import java.awt.event.KeyEvent;
import java.math.BigInteger;
import java.util.ArrayList;
import java.util.List;
import javax.swing.JSpinner;
import javax.swing.SpinnerNumberModel;
@ -43,10 +44,20 @@ public class IntegerSpinner {
* @param spinnerModel the spinner model to use in the JSpinner.
*/
public IntegerSpinner(SpinnerNumberModel spinnerModel) {
this(spinnerModel, 10);
}
/**
* Creates a new IntegerSpinner using the given spinner model.
*
* @param spinnerModel the spinner model to use in the JSpinner.
*/
public IntegerSpinner(SpinnerNumberModel spinnerModel, int columns) {
spinner = new JSpinner(spinnerModel);
integerTextField = new IntegerTextField(10, ((Number) spinnerModel.getValue()).longValue());
integerTextField =
new IntegerTextField(columns, ((Number) spinnerModel.getValue()).longValue());
integerTextField.getComponent().setName("integer.spinner.editor");
Number maximum = (Number) spinnerModel.getMaximum();
integerTextField.setMaxValue(

View File

@ -142,7 +142,7 @@ public abstract class GDynamicColumnTableModel<ROW_TYPE, DATA_SOURCE>
}
}
private TableColumnDescriptor<ROW_TYPE> getTableColumnDescriptor() {
protected TableColumnDescriptor<ROW_TYPE> getTableColumnDescriptor() {
if (columnDescriptor == null) {
columnDescriptor = createTableColumnDescriptor();
}

View File

@ -26,8 +26,7 @@ import java.util.*;
import generic.stl.Pair;
import ghidra.docking.settings.*;
import ghidra.program.model.address.Address;
import ghidra.program.model.address.AddressOutOfBoundsException;
import ghidra.program.model.address.*;
import ghidra.program.model.data.RenderUnicodeSettingsDefinition.RENDER_ENUM;
import ghidra.program.model.data.StringRenderParser.StringParseException;
import ghidra.program.model.lang.Endian;
@ -380,6 +379,19 @@ public class StringDataInstance {
return buf.getAddress();
}
public Address getEndAddress() {
try {
return length > 0 ? buf.getAddress().addNoWrap(length - 1) : buf.getAddress();
}
catch (AddressOverflowException e) {
return buf.getAddress();
}
}
public AddressRange getAddressRange() {
return new AddressRangeImpl(getAddress(), getEndAddress());
}
private boolean isBadCharSize() {
return (paddedCharSize < 1 || paddedCharSize > 8) ||
!(charSize == 1 || charSize == 2 || charSize == 4) || (paddedCharSize < charSize);

View File

@ -0,0 +1,114 @@
/* ###
* IP: GHIDRA
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package help.screenshot;
import java.nio.charset.StandardCharsets;
import org.junit.Before;
import org.junit.Test;
import ghidra.app.plugin.core.strings.EncodedStringsDialog;
import ghidra.app.plugin.core.strings.EncodedStringsPlugin;
import ghidra.app.services.ProgramManager;
import ghidra.app.util.HelpTopics;
import ghidra.test.ToyProgramBuilder;
import ghidra.util.Swing;
public class EncodedStringsDialogScreenShots extends GhidraScreenShotGenerator {
private EncodedStringsPlugin plugin;
public EncodedStringsDialogScreenShots() {
super();
}
@Override
@Before
public void setUp() throws Exception {
super.setUp();
plugin = env.addPlugin(EncodedStringsPlugin.class);
}
@Override
public void loadProgram() throws Exception {
ToyProgramBuilder builder = new ToyProgramBuilder("String Examples", false);
builder.createMemory("RAM", "0x0", 0x2000);
builder.createString("0x100", "Hello World!\n", StandardCharsets.US_ASCII, true, null);
builder.createString("0x150", bytes(0, 1, 2, 3, 4, 0x80, 0x81, 0x82, 0x83),
StandardCharsets.US_ASCII, null);
builder.createString("0x200", "\u6211\u96bb\u6c23\u588a\u8239\u88dd\u6eff\u6652\u9c54",
StandardCharsets.UTF_16, true, null);
builder.createString("0x250", "Exception %s\n\tline: %d\n", StandardCharsets.US_ASCII, true,
null);
builder.createString("0x330", "A: \u6211\u96bb\u6c23\u588a\u8239\u88dd\u6eff\u6652\u9c54",
StandardCharsets.UTF_8, true, null);
builder.createString("0x450",
"Roses are \u001b[0;31mred\u001b[0m, violets are \u001b[0;34mblue. Hope you enjoy terminal hue",
StandardCharsets.US_ASCII, true, null);
program = builder.getProgram();
runSwing(() -> {
ProgramManager pm = tool.getService(ProgramManager.class);
pm.openProgram(program.getDomainFile());
});
}
@Override
protected String getHelpTopicName() {
return HelpTopics.SEARCH;
}
@Test
public void testEncodedStringsDialog_initial() {
positionListingTop(0x50);
makeSelection(0x50, 0x500);
performAction(plugin.getSearchForEncodedStringsAction());
EncodedStringsDialog dialog = waitForDialogComponent(EncodedStringsDialog.class);
waitForTableModel(dialog.getStringModel());
captureDialog(600, 300);
}
@Test
public void testEncodedStringsDialog_advancedoptions() {
positionListingTop(0x50);
makeSelection(0x50, 0x500);
performAction(plugin.getSearchForEncodedStringsAction());
EncodedStringsDialog dialog = waitForDialogComponent(EncodedStringsDialog.class);
Swing.runNow(() -> {
dialog.setShowAdvancedOptions(true);
dialog.setShowScriptOptions(true);
dialog.setAllowAnyScriptOption(true);
dialog.setRequireValidStringOption(false);
dialog.setSelectedCharset("UTF-8");
});
waitForTableModel(dialog.getStringModel());
captureDialog(600, 450);
}
}

View File

@ -0,0 +1,183 @@
/* ###
* IP: GHIDRA
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ghidra.app.plugin.core.strings;
import static org.junit.Assert.*;
import java.lang.Character.UnicodeScript;
import java.nio.charset.StandardCharsets;
import org.junit.*;
import docking.action.DockingActionIf;
import ghidra.framework.plugintool.PluginTool;
import ghidra.program.database.ProgramDB;
import ghidra.program.model.address.*;
import ghidra.program.model.data.AbstractStringDataType;
import ghidra.program.model.listing.Data;
import ghidra.program.model.mem.MemoryBlock;
import ghidra.test.*;
import ghidra.util.Swing;
public class EncodedStringsDialogTest extends AbstractGhidraHeadedIntegrationTest {
private TestEnv env;
private ProgramDB program;
private PluginTool tool;
private MemoryBlock ram;
private DockingActionIf encodedStringsAction;
private EncodedStringsDialog dialog;
private EncodedStringsTableModel tableModel;
private EncodedStringsPlugin plugin;
@Before
public void setUp() throws Exception {
env = new TestEnv();
program = buildProgram();
ram = program.getMemory().getBlock("RAM");
tool = env.launchDefaultTool(program);
plugin = env.addPlugin(EncodedStringsPlugin.class);
encodedStringsAction = plugin.getSearchForEncodedStringsAction();
}
private ProgramDB buildProgram() throws Exception {
ToyProgramBuilder builder = new ToyProgramBuilder("String Examples", false);
builder.createMemory("RAM", "0x0", 0x500);
builder.createString("0x100", "Hello World!\n", StandardCharsets.US_ASCII, true, null);
builder.createString("0x10e", "Next string", StandardCharsets.US_ASCII, true, null);
builder.createString("0x150", bytes(0, 1, 2, 3, 4, 0x80, 0x81, 0x82, 0x83),
StandardCharsets.US_ASCII, null);
builder.createString("0x200", "\u6211\u96bb\u6c23\u588a\u8239\u88dd\u6eff\u6652\u9c54",
StandardCharsets.UTF_16, true, null);
builder.createString("0x250", "Exception %s\n\tline: %d\n", StandardCharsets.US_ASCII, true,
null);
builder.createString("0x330", "A: \u6211\u96bb\u6c23\u588a\u8239\u88dd\u6eff\u6652\u9c54",
StandardCharsets.UTF_8, true, null);
builder.createString("0x450",
"Roses are \u001b[0;31mred\u001b[0m, violets are \u001b[0;34mblue. Hope you enjoy terminal hue",
StandardCharsets.US_ASCII, true, null);
return builder.getProgram();
}
private Address addr(long offset) {
return ram.getStart().getNewAddress(offset);
}
@After
public void tearDown() throws Exception {
closeDialog();
env.dispose();
}
private void closeDialog() {
if (dialog != null) {
close(dialog);
dialog = null;
tableModel = null;
}
}
private void showDialog(AddressRange range) {
closeDialog();
makeSelection(tool, program, range.getMinAddress(), range.getMaxAddress());
performAction(encodedStringsAction, false);
dialog = waitForDialogComponent(EncodedStringsDialog.class);
tableModel = dialog.getStringModel();
waitForTableModel(tableModel);
}
@Test
public void testDefaultUSASCII() {
showDialog(ram.getAddressRange());
assertEquals(3, tableModel.getRowCount());
}
@Test
public void testSingleString() {
showDialog(new AddressRangeImpl(addr(0x100), addr(0x100)));
assertEquals(1, tableModel.getRowCount());
EncodedStringsRow row0 = tableModel.getRowObject(0);
assertEquals("Hello World!\n", row0.stringInfo().stringValue());
}
@Test
public void testUTF8() {
showDialog(ram.getAddressRange());
Swing.runNow(() -> {
dialog.setSelectedCharset("UTF-8");
});
waitForTableModel(tableModel);
assertEquals(4, tableModel.getRowCount());
}
@Test
public void testUTF8_Nonstdctrlchars() {
showDialog(ram.getAddressRange());
Swing.runNow(() -> {
dialog.setShowAdvancedOptions(true);
dialog.setExcludeNonStdCtrlChars(false);
dialog.setSelectedCharset("UTF-8");
});
waitForTableModel(tableModel);
assertEquals(5, tableModel.getRowCount());
}
@Test
public void testUTF8_HanScript() {
showDialog(ram.getAddressRange());
Swing.runNow(() -> {
dialog.setShowAdvancedOptions(true);
dialog.setRequireValidStringOption(false);
dialog.setSelectedCharset("UTF-8");
});
waitForTableModel(tableModel);
assertEquals(4, tableModel.getRowCount());
Swing.runNow(() -> {
dialog.setShowScriptOptions(true);
dialog.setShowAdvancedOptions(true);
dialog.setRequireValidStringOption(false);
dialog.setAllowAnyScriptOption(false);
dialog.setAllowLatinScriptOption(true);
dialog.setAllowCommonScriptOption(true);
dialog.setRequiredScript(UnicodeScript.HAN);
});
waitForTableModel(tableModel);
assertEquals(1, tableModel.getRowCount());
}
@Test
public void testCreateString() {
Data data = program.getListing().getDataAt(addr(0x100));
assertFalse(data.isDefined());
showDialog(ram.getAddressRange());
assertEquals(3, tableModel.getRowCount());
dialog.getCreateButton().doClick();
waitForSwing();
data = program.getListing().getDataAt(addr(0x100));
assertNotNull(data);
assertTrue(data.getDataType() instanceof AbstractStringDataType);
}
}