mirror of
https://github.com/NationalSecurityAgency/ghidra.git
synced 2024-11-22 04:05:39 +00:00
GT-2824 - Comments - Upgrade to LCS for diffing
This commit is contained in:
parent
fefb0f0208
commit
e5aaaa9a19
@ -1,150 +0,0 @@
|
||||
/* ###
|
||||
* IP: GHIDRA
|
||||
* REVIEWED: YES
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package generic.algorithms;
|
||||
|
||||
import ghidra.util.exception.CancelledException;
|
||||
import ghidra.util.task.TaskMonitor;
|
||||
import ghidra.util.task.TaskMonitorAdapter;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* Abstract class for finding the LCS between two sequences of Matchable
|
||||
* objects.
|
||||
*
|
||||
*
|
||||
*
|
||||
* @param <T> the type of the objects being compared.
|
||||
*/
|
||||
public abstract class LCS<T> {
|
||||
private int[][] c;
|
||||
|
||||
/**
|
||||
* Convenient constructor for initializing elements in subclasses
|
||||
*/
|
||||
protected LCS() {
|
||||
}
|
||||
|
||||
/**
|
||||
* @return the length of the X sequence.
|
||||
*/
|
||||
protected abstract int lengthOfX();
|
||||
|
||||
/**
|
||||
* @return the length of the Y sequence.
|
||||
*/
|
||||
protected abstract int lengthOfY();
|
||||
|
||||
/**
|
||||
* @param index the position of interest in the X sequence.
|
||||
* @return the value in the X sequence at <code>index</code>.
|
||||
* Assumes 1-indexing.
|
||||
*/
|
||||
protected abstract T valueOfX(int index);
|
||||
|
||||
/**
|
||||
* @param index the position of interest in the Y sequence.
|
||||
* @return the value in the Y sequence at <code>index</code>.
|
||||
* Assumes 1-indexing.
|
||||
*/
|
||||
protected abstract T valueOfY(int index);
|
||||
|
||||
/**
|
||||
* @param x the X-sequence element of interest
|
||||
* @param y the Y-sequence element of interest
|
||||
* @return true if <code>x</code> matches <code>y</code>; false otherwise.
|
||||
*/
|
||||
protected abstract boolean matches(T x, T y);
|
||||
|
||||
/**
|
||||
* Compute the LCS
|
||||
* @param monitor
|
||||
*/
|
||||
private void calculateLCS(TaskMonitor monitor) throws CancelledException {
|
||||
if (c != null) {
|
||||
return;
|
||||
}
|
||||
|
||||
int[][] tempC = new int[lengthOfX() + 1][];
|
||||
|
||||
monitor.setMessage("Calculating LCS...");
|
||||
monitor.initialize(tempC.length);
|
||||
|
||||
for (int i = 0; i < tempC.length; i++) {
|
||||
// Java int arrays are automatically initialized to 0
|
||||
tempC[i] = new int[lengthOfY() + 1];
|
||||
}
|
||||
|
||||
for (int i = 1; i < tempC.length; i++) {
|
||||
monitor.checkCanceled();
|
||||
for (int j = 1; j < tempC[i].length; j++) {
|
||||
if (matches(valueOfX(i), valueOfY(j))) {
|
||||
tempC[i][j] = tempC[i - 1][j - 1] + 1;
|
||||
}
|
||||
else {
|
||||
tempC[i][j] = Math.max(tempC[i][j - 1], tempC[i - 1][j]);
|
||||
}
|
||||
}
|
||||
monitor.incrementProgress(1);
|
||||
}
|
||||
|
||||
c = tempC;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return a <code>List<T></code> of elements in the LCS.
|
||||
*/
|
||||
public List<T> getLCS() {
|
||||
try {
|
||||
return getLCS(TaskMonitorAdapter.DUMMY_MONITOR);
|
||||
}
|
||||
catch (CancelledException e) {
|
||||
// can't happen with a dummy monitor
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
public List<T> getLCS(TaskMonitor monitor) throws CancelledException {
|
||||
calculateLCS(monitor);
|
||||
return getLCSHelperIterative(lengthOfX(), lengthOfY());
|
||||
}
|
||||
|
||||
/**
|
||||
* Iterative helper function for getLCS().
|
||||
* @param i the current row index
|
||||
* @param j the current column index
|
||||
* @return the LCS after analyzing element c[i, j].
|
||||
*/
|
||||
private List<T> getLCSHelperIterative(int i, int j) {
|
||||
ArrayList<T> result = new ArrayList<T>();
|
||||
while (i > 0 && j > 0) {
|
||||
if (c[i][j] == c[i - 1][j - 1] + 1 && matches(valueOfX(i), valueOfY(j))) {
|
||||
result.add(0, valueOfX(i));
|
||||
--i;
|
||||
--j;
|
||||
}
|
||||
else if (c[i][j] == c[i - 1][j]) {
|
||||
--i;
|
||||
}
|
||||
else {
|
||||
--j;
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
}
|
@ -0,0 +1,214 @@
|
||||
/* ###
|
||||
* IP: GHIDRA
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package generic.algorithms;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
import ghidra.util.exception.CancelledException;
|
||||
import ghidra.util.task.TaskMonitor;
|
||||
|
||||
/**
|
||||
* Abstract class for finding the Longest Common Subsequence (LCS) between two
|
||||
* sequences of Matchable objects, <code>x</code> and <code>y</code>.
|
||||
*
|
||||
* <p>The performance of this algorithm is O(n^2). Thus, large inputs can cause much processor
|
||||
* and memory usage. This class has an upper limit (see {@link #getSizeLimit()}) to prevent
|
||||
* accidental system failure.
|
||||
*
|
||||
* @param <T> the type of the objects being compared
|
||||
*/
|
||||
public abstract class Lcs<T> {
|
||||
|
||||
/**
|
||||
* Somewhat arbitrary upper-bound restriction. 1M is 1000 * 1000
|
||||
*/
|
||||
private static int DEFAULT_SIZE_LIMIT = 1_000_000;
|
||||
private int sizeLimit = DEFAULT_SIZE_LIMIT;
|
||||
|
||||
private int[][] c;
|
||||
|
||||
/**
|
||||
* Returns the length of the x sequence
|
||||
* @return the length of the x sequence
|
||||
*/
|
||||
protected abstract int lengthOfX();
|
||||
|
||||
/**
|
||||
* Returns the length of the y sequence
|
||||
* @return the length of the y sequence
|
||||
*/
|
||||
protected abstract int lengthOfY();
|
||||
|
||||
/**
|
||||
* Gets the value of the x sequence at the given index, where index is 1-based
|
||||
*
|
||||
* @param index the 1-based position of interest in the x sequence
|
||||
* @return the value in the x sequence at <code>index</code>
|
||||
*/
|
||||
protected abstract T valueOfX(int index);
|
||||
|
||||
/**
|
||||
* Gets the value of the y sequence at the given index, where index is 1-based
|
||||
*
|
||||
* @param index the 1-based position of interest in the Y sequence
|
||||
* @return the value in the y sequence at <code>index</code>
|
||||
*/
|
||||
protected abstract T valueOfY(int index);
|
||||
|
||||
/**
|
||||
* Returns true if the value of x and y match
|
||||
*
|
||||
* @param x the x-sequence element of interest
|
||||
* @param y the y-sequence element of interest
|
||||
* @return true if <code>x</code> matches <code>y</code>; false otherwise
|
||||
*/
|
||||
protected abstract boolean matches(T x, T y);
|
||||
|
||||
/**
|
||||
* Compute the LCS
|
||||
* @param monitor the task monitor
|
||||
*/
|
||||
private void calculateLCS(TaskMonitor monitor) throws CancelledException {
|
||||
if (c != null) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (tooBig()) {
|
||||
c = new int[0][0];
|
||||
return;
|
||||
}
|
||||
|
||||
int[][] tempC = new int[lengthOfX() + 1][];
|
||||
|
||||
monitor.setMessage("Calculating LCS...");
|
||||
monitor.initialize(tempC.length);
|
||||
|
||||
// create the zero-initialized matrix
|
||||
for (int i = 0; i < tempC.length; i++) {
|
||||
tempC[i] = new int[lengthOfY() + 1];
|
||||
}
|
||||
|
||||
for (int i = 1; i < tempC.length; i++) {
|
||||
monitor.checkCanceled();
|
||||
for (int j = 1; j < tempC[i].length; j++) {
|
||||
if (matches(valueOfX(i), valueOfY(j))) {
|
||||
tempC[i][j] = tempC[i - 1][j - 1] + 1;
|
||||
}
|
||||
else {
|
||||
tempC[i][j] = Math.max(tempC[i][j - 1], tempC[i - 1][j]);
|
||||
}
|
||||
}
|
||||
monitor.incrementProgress(1);
|
||||
}
|
||||
|
||||
c = tempC;
|
||||
}
|
||||
|
||||
/**
|
||||
* Defines an limit in the overall size of the inputs that above which no processing will
|
||||
* take place. Any value over the limit will produce an empty LCS.
|
||||
*
|
||||
* @return true if too big
|
||||
*/
|
||||
private boolean tooBig() {
|
||||
return lengthOfX() * lengthOfY() > sizeLimit;
|
||||
}
|
||||
|
||||
/**
|
||||
* Changes the size limit of this LCS, past which no calculations will be performed
|
||||
*
|
||||
* @param newLimit the new limit
|
||||
*/
|
||||
public void setSizeLimit(int newLimit) {
|
||||
this.sizeLimit = newLimit;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the current size limit, past which no calculations will be performed
|
||||
*
|
||||
* @return the size limit
|
||||
* @see #setSizeLimit(int)
|
||||
*/
|
||||
public int getSizeLimit() {
|
||||
return sizeLimit;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a list of the longest common subsequence. This result will be empty if the
|
||||
* {@link #getSizeLimit()} has been reached.
|
||||
*
|
||||
* @return the list
|
||||
*/
|
||||
public List<T> getLcs() {
|
||||
try {
|
||||
return getLcs(TaskMonitor.DUMMY);
|
||||
}
|
||||
catch (CancelledException e) {
|
||||
// can't happen with a dummy monitor
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a list of the longest common subsequence. This result will be empty if the
|
||||
* {@link #getSizeLimit()} has been reached.
|
||||
*
|
||||
* @param monitor the task monitor
|
||||
* @return the LCS list
|
||||
* @throws CancelledException if the monitor is cancelled
|
||||
*/
|
||||
public List<T> getLcs(TaskMonitor monitor) throws CancelledException {
|
||||
calculateLCS(monitor);
|
||||
return doGetLcs(monitor);
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the actual LCS based upon the already created matrix
|
||||
*
|
||||
* @param monitor the task monitor
|
||||
* @return the LCS list
|
||||
* @throws CancelledException if the monitor is cancelled
|
||||
*/
|
||||
protected List<T> doGetLcs(TaskMonitor monitor) throws CancelledException {
|
||||
|
||||
int x = 0;
|
||||
int y = 0;
|
||||
|
||||
if (c.length > 0) {
|
||||
x = lengthOfX();
|
||||
y = lengthOfY();
|
||||
}
|
||||
|
||||
List<T> result = new ArrayList<>();
|
||||
while (x > 0 && y > 0) {
|
||||
monitor.checkCanceled();
|
||||
|
||||
if (c[x][y] == c[x - 1][y - 1] + 1 && matches(valueOfX(x), valueOfY(y))) {
|
||||
result.add(0, valueOfX(x));
|
||||
--x;
|
||||
--y;
|
||||
}
|
||||
else if (c[x][y] == c[x - 1][y]) {
|
||||
--x;
|
||||
}
|
||||
else {
|
||||
--y;
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
}
|
@ -0,0 +1,201 @@
|
||||
/* ###
|
||||
* IP: GHIDRA
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package generic.algorithms;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
import ghidra.util.exception.CancelledException;
|
||||
import ghidra.util.task.TaskMonitor;
|
||||
|
||||
/**
|
||||
* Calculates the longest common subsequence (LCS) between two sequences of Matchable
|
||||
* objects, <code>x</code> and <code>y</code>.
|
||||
*
|
||||
* <p>This is an optimizing version of the {@link Lcs} that will pre-calculate all similar
|
||||
* items from the beginning and end of the two given sequences. Doing this will reduce
|
||||
* the size of the matrix created by the parent class, greatly so in the case that the
|
||||
* two inputs are mostly the same in the beginning and end. (Imagine an edit of a source
|
||||
* code file, where the typical change is somewhere in the middle of the file. In this example,
|
||||
* the optimization performed here can greatly decrease the amount of work to be performed when
|
||||
* calculating the LCS.)
|
||||
*
|
||||
* <p>Note: the parent LCS algorithm is bound by {@link #getSizeLimit()}. However, this class
|
||||
* allows clients to work around this restriction when the data has a similar beginning and ending,
|
||||
* as the similar parts will not be counted against the size limit.
|
||||
*
|
||||
* @param <I> The input sequence type
|
||||
* @param <T> the individual element type of the input sequence
|
||||
*/
|
||||
public abstract class ReducingLcs<I, T> extends Lcs<T> {
|
||||
|
||||
private I xSource; // full input x
|
||||
private I ySource; // full input y
|
||||
|
||||
private I x; // the reduced input x
|
||||
private I y; // the reduced input y
|
||||
|
||||
private int startn; // number of beginning same entries
|
||||
private int endn; // number of trailing same entries
|
||||
|
||||
/**
|
||||
* Constructor
|
||||
*
|
||||
* @param ix the input sequence <code>x</code>
|
||||
* @param iy the input sequence <code>y</code>
|
||||
*/
|
||||
public ReducingLcs(I ix, I iy) {
|
||||
this.xSource = ix;
|
||||
this.ySource = iy;
|
||||
|
||||
startn = getMatchCountFromStart();
|
||||
endn = getMatchCountFromEnd();
|
||||
int endx = getEnd(xSource);
|
||||
int endy = getEnd(ySource);
|
||||
this.x = reduce(ix, startn, endx);
|
||||
this.y = reduce(iy, startn, endy);
|
||||
}
|
||||
|
||||
private int getEnd(I i) {
|
||||
int end = lengthOf(i) - endn;
|
||||
if (end <= startn) {
|
||||
// boundary condition when the change is only a delete or insert
|
||||
end = startn;
|
||||
}
|
||||
return end;
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a subsequence from the given input sequence.
|
||||
*
|
||||
* @param i the input sequence; 0-based (x or y)
|
||||
* @param start the start index; 0-based (inclusive)
|
||||
* @param end the end index (exclusive)
|
||||
* @return the subsequence
|
||||
*/
|
||||
protected abstract I reduce(I i, int start, int end);
|
||||
|
||||
/**
|
||||
* Return the length of the given sequence
|
||||
*
|
||||
* @param i the input sequence (x or y)
|
||||
* @return the length
|
||||
*/
|
||||
protected abstract int lengthOf(I i);
|
||||
|
||||
/**
|
||||
* Return the value at the given 0-based offset
|
||||
*
|
||||
* @param i the input sequence (x or y)
|
||||
* @param offset the offset
|
||||
* @return the value
|
||||
*/
|
||||
protected abstract T valueOf(I i, int offset);
|
||||
|
||||
@Override
|
||||
protected List<T> doGetLcs(TaskMonitor monitor) throws CancelledException {
|
||||
|
||||
List<T> reducedLcs = super.doGetLcs(monitor);
|
||||
int size = reducedLcs.size() + lengthOf(x) + lengthOf(y);
|
||||
List<T> lcs = new ArrayList<>(size);
|
||||
|
||||
// add the shared beginning
|
||||
for (int i = 0; i < startn; i++) {
|
||||
monitor.checkCanceled();
|
||||
lcs.add(valueOf(xSource, i));
|
||||
}
|
||||
|
||||
// add the calculated LCS
|
||||
lcs.addAll(reducedLcs);
|
||||
|
||||
// add the shared end
|
||||
int length = lengthOf(xSource);
|
||||
int endx = getEnd(xSource);
|
||||
for (int i = endx; i < length; i++) {
|
||||
monitor.checkCanceled();
|
||||
lcs.add(valueOf(xSource, i));
|
||||
}
|
||||
|
||||
return lcs;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected int lengthOfX() {
|
||||
return lengthOf(x);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected int lengthOfY() {
|
||||
return lengthOf(y);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected T valueOfX(int index) {
|
||||
return valueOf(x, index - 1);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected T valueOfY(int index) {
|
||||
return valueOf(y, index - 1);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected boolean matches(T tx, T ty) {
|
||||
return tx.equals(ty);
|
||||
}
|
||||
|
||||
//==================================================================================================
|
||||
// Private Methods
|
||||
//==================================================================================================
|
||||
|
||||
private int getMatchCountFromStart() {
|
||||
|
||||
// scan past the beginning of all equal items
|
||||
int n = 0;
|
||||
int xl = lengthOf(xSource);
|
||||
int yl = lengthOf(ySource);
|
||||
while (n < xl && n < yl) {
|
||||
T xt = valueOf(xSource, n);
|
||||
T yt = valueOf(ySource, n);
|
||||
if (!matches(xt, yt)) {
|
||||
return n;
|
||||
}
|
||||
n++;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
private int getMatchCountFromEnd() {
|
||||
|
||||
// scan past the trailing equal items
|
||||
int xi = lengthOf(xSource) - 1;
|
||||
int yi = lengthOf(ySource) - 1;
|
||||
|
||||
int n = 0;
|
||||
for (; xi >= 0 && yi >= 0; xi--, yi--) {
|
||||
T xt = valueOf(xSource, xi);
|
||||
T yt = valueOf(ySource, yi);
|
||||
if (!matches(xt, yt)) {
|
||||
return n == 0 ? 0 : n - 1;
|
||||
}
|
||||
n++;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
}
|
@ -0,0 +1,51 @@
|
||||
/* ###
|
||||
* IP: GHIDRA
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package generic.algorithms;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* An implementation of the {@link ReducingLcs} that takes as its input a list of <T> items, where
|
||||
* the list is the 'sequence' being checked for the Longest Common Subsequence.
|
||||
*
|
||||
* @param <T> the type of the item in the sequence of items
|
||||
*/
|
||||
public class ReducingListBasedLcs<T> extends ReducingLcs<List<T>, T> {
|
||||
|
||||
public ReducingListBasedLcs(List<T> x, List<T> y) {
|
||||
super(x, y);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected boolean matches(T x, T y) {
|
||||
return x.equals(y);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected List<T> reduce(List<T> i, int start, int end) {
|
||||
return i.subList(start, end);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected int lengthOf(List<T> i) {
|
||||
return i.size();
|
||||
}
|
||||
|
||||
@Override
|
||||
protected T valueOf(List<T> i, int offset) {
|
||||
return i.get(offset);
|
||||
}
|
||||
}
|
@ -16,21 +16,18 @@
|
||||
package generic.algorithms;
|
||||
|
||||
import static org.junit.Assert.assertEquals;
|
||||
import static org.junit.Assert.assertTrue;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
import org.junit.Test;
|
||||
|
||||
import generic.test.AbstractGenericTest;
|
||||
|
||||
public class LCSTest extends AbstractGenericTest {
|
||||
|
||||
public LCSTest() {
|
||||
super();
|
||||
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testIdentical() {
|
||||
compareStrings("DEADBEEF", "DEADBEEF", "DEADBEEF");
|
||||
@ -46,36 +43,53 @@ public class LCSTest extends AbstractGenericTest {
|
||||
"Hooray for really loooooong strings that span multiple lines in java!",
|
||||
"Some really long string that might complicate things." +
|
||||
"Hooray for really long strings that span multiple lines!");
|
||||
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testDifferent() {
|
||||
|
||||
compareStrings("DEAD", "CANND", "AD");
|
||||
compareStrings("DEADBEEFISGOOD", "CANNDBEEFISBAD", "ADBEEFISD");
|
||||
compareStrings("this here is one string", "here a different string is", "here in string");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testSizeLimit() {
|
||||
|
||||
String input = "This is more than 5 characters";
|
||||
StringLcs slcs = new StringLcs(input, input);
|
||||
List<Character> lcs = slcs.getLcs();
|
||||
String result = StringUtils.join(lcs, "");
|
||||
assertEquals(input, result);
|
||||
|
||||
slcs = new StringLcs(input, input);
|
||||
slcs.setSizeLimit(10);
|
||||
List<Character> actual = slcs.getLcs();
|
||||
assertTrue(actual.isEmpty());
|
||||
}
|
||||
|
||||
private void compareStrings(String x, String y, String expected) {
|
||||
StringLCS slcs = new StringLCS(x, y);
|
||||
List<Character> actual = slcs.getLCS();
|
||||
|
||||
StringLcs slcs = new StringLcs(x, y);
|
||||
List<Character> actual = slcs.getLcs();
|
||||
|
||||
assertEquals(convertString(expected), actual);
|
||||
}
|
||||
|
||||
private List<Character> convertString(String s) {
|
||||
List<Character> charList = new ArrayList<Character>();
|
||||
for (char c : s.toCharArray())
|
||||
List<Character> charList = new ArrayList<>();
|
||||
for (char c : s.toCharArray()) {
|
||||
charList.add(c);
|
||||
}
|
||||
return charList;
|
||||
}
|
||||
|
||||
private class StringLCS extends LCS<Character> {
|
||||
private class StringLcs extends Lcs<Character> {
|
||||
|
||||
private String x;
|
||||
private String y;
|
||||
|
||||
public StringLCS(String x, String y) {
|
||||
public StringLcs(String x, String y) {
|
||||
super();
|
||||
this.x = x;
|
||||
this.y = y;
|
||||
@ -105,6 +119,5 @@ public class LCSTest extends AbstractGenericTest {
|
||||
protected Character valueOfY(int index) {
|
||||
return y.charAt(index - 1);
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
@ -0,0 +1,124 @@
|
||||
/* ###
|
||||
* IP: GHIDRA
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package generic.algorithms;
|
||||
|
||||
import static org.junit.Assert.assertEquals;
|
||||
import static org.junit.Assert.assertTrue;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
import org.junit.Test;
|
||||
|
||||
public class ReducingLCSTest {
|
||||
|
||||
@Test
|
||||
public void testIdentical() {
|
||||
compareStrings("DEADBEEF", "DEADBEEF", "DEADBEEF");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testSimilar() {
|
||||
compareStrings("DEADBEEF", "DEEDBEAD", "DEDBE");
|
||||
compareStrings(
|
||||
"Some really long string that might complicate things." +
|
||||
"Hooray for really long strings that span multiple lines!",
|
||||
"Some other really long string that might complicate things." +
|
||||
"Hooray for really loooooong strings that span multiple lines in java!",
|
||||
"Some really long string that might complicate things." +
|
||||
"Hooray for really long strings that span multiple lines!");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testDifferent() {
|
||||
|
||||
compareStrings("DEAD", "CANND", "AD");
|
||||
compareStrings("DEADBEEFISGOOD", "CANNDBEEFISBAD", "ADBEEFISD");
|
||||
compareStrings("this here is one string", "here a different string is", "here in string");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testInsertOnly() {
|
||||
|
||||
String x = "Line not modified";
|
||||
String y = "Line not not modified";
|
||||
compareStrings(x, y, x);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testRemovalOnly() {
|
||||
|
||||
String x = "Line not modified";
|
||||
String y = "Line modified";
|
||||
compareStrings(x, y, y);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testSizeLimit() {
|
||||
|
||||
String x = "This is a line that has not been modified";
|
||||
String y = "This is a line that has been modified";
|
||||
|
||||
StringLcs slcs = new StringLcs(x, y);
|
||||
slcs.setSizeLimit(10);
|
||||
List<Character> lcs = slcs.getLcs();
|
||||
String result = StringUtils.join(lcs, "");
|
||||
assertEquals(y, result); // 'y' is common, since it is 'x', with only a delete
|
||||
|
||||
String z = "Start Mod " + x + " End Mod"; // same as 'x', but with different start/end
|
||||
slcs = new StringLcs(x, z);
|
||||
slcs.setSizeLimit(10);
|
||||
List<Character> actual = slcs.getLcs();
|
||||
assertTrue(actual.isEmpty());
|
||||
}
|
||||
|
||||
private void compareStrings(String x, String y, String expected) {
|
||||
StringLcs slcs = new StringLcs(x, y);
|
||||
List<Character> actual = slcs.getLcs();
|
||||
assertEquals(convertString(expected), actual);
|
||||
}
|
||||
|
||||
private List<Character> convertString(String s) {
|
||||
List<Character> charList = new ArrayList<>();
|
||||
for (char c : s.toCharArray()) {
|
||||
charList.add(c);
|
||||
}
|
||||
return charList;
|
||||
}
|
||||
|
||||
private class StringLcs extends ReducingLcs<String, Character> {
|
||||
|
||||
public StringLcs(String x, String y) {
|
||||
super(x, y);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected String reduce(String input, int start, int end) {
|
||||
return input.substring(start, end);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected int lengthOf(String s) {
|
||||
return s.length();
|
||||
}
|
||||
|
||||
@Override
|
||||
protected Character valueOf(String s, int offset) {
|
||||
return s.charAt(offset);
|
||||
}
|
||||
}
|
||||
}
|
@ -3366,7 +3366,7 @@ public class CodeManager implements ErrorHandler, ManagerDB {
|
||||
newComment = "";
|
||||
}
|
||||
|
||||
StringDiff[] diffs = StringDiffer.getLineDiffs(newComment, oldComment);
|
||||
StringDiff[] diffs = StringDiffUtils.getLineDiffs(newComment, oldComment);
|
||||
|
||||
long date = System.currentTimeMillis();
|
||||
long addr = addrMap.getKey(address, true);
|
||||
@ -3402,11 +3402,11 @@ public class CodeManager implements ErrorHandler, ManagerDB {
|
||||
Record rec = allRecords.get(allRecords.size() - 1);
|
||||
long date = rec.getLongValue(CommentHistoryAdapter.HISTORY_DATE_COL);
|
||||
List<Record> records = subListByDate(allRecords, date);
|
||||
|
||||
List<StringDiff> diffs = new ArrayList<>(records.size());
|
||||
|
||||
String user = null;
|
||||
for (int i = 0; i < records.size(); i++) {
|
||||
Record r = records.get(i);
|
||||
for (Record r : records) {
|
||||
user = r.getString(CommentHistoryAdapter.HISTORY_USER_COL);
|
||||
int pos1 = r.getIntValue(CommentHistoryAdapter.HISTORY_POS1_COL);
|
||||
int pos2 = r.getIntValue(CommentHistoryAdapter.HISTORY_POS2_COL);
|
||||
@ -3415,7 +3415,7 @@ public class CodeManager implements ErrorHandler, ManagerDB {
|
||||
}
|
||||
|
||||
results.add(new CommentHistory(addr, commentType, user, comment, new Date(date)));
|
||||
comment = StringDiffer.applyDiffs(comment, diffs);
|
||||
comment = StringDiffUtils.applyDiffs(comment, diffs);
|
||||
|
||||
records.clear(); // remove the subList elements from the list
|
||||
}
|
||||
|
@ -0,0 +1,351 @@
|
||||
/* ###
|
||||
* IP: GHIDRA
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package ghidra.program.database.code;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
|
||||
import generic.algorithms.ReducingListBasedLcs;
|
||||
|
||||
class StringDiffUtils {
|
||||
|
||||
/**
|
||||
* Minimum size used to determine whether a new StringDiff object will be
|
||||
* created just using a string (no positions)
|
||||
* in the <code>getDiffs(String, String)</code> method.
|
||||
* @see #getLineDiffs(String, String)
|
||||
*/
|
||||
private static int MINIMUM_DIFF_SIZE = 100;
|
||||
|
||||
/**
|
||||
* Returns the list of StringDiff objects that if applied to s1 would result in s2; The
|
||||
* given text will look only for whole lines using '\n'.
|
||||
*
|
||||
* @param s1 the original string
|
||||
* @param s2 the result string
|
||||
* this value, then a completely different string will be returned
|
||||
* @return an array of StringDiff objects that change s1 into s2;
|
||||
*/
|
||||
static StringDiff[] getLineDiffs(String s1, String s2) {
|
||||
return getLineDiffs(s1, s2, MINIMUM_DIFF_SIZE);
|
||||
}
|
||||
|
||||
static StringDiff[] getLineDiffs(String s1, String s2, int minimumDiffSize) {
|
||||
if (s2.length() < minimumDiffSize) {
|
||||
return new StringDiff[] { StringDiff.allTextReplaced(s2) };
|
||||
}
|
||||
|
||||
List<Line> aList = split(s1);
|
||||
List<Line> bList = split(s2);
|
||||
LineLcs lcs = new LineLcs(aList, bList);
|
||||
List<Line> commons = lcs.getLcs();
|
||||
if (commons.isEmpty()) {
|
||||
// no common text--complete replacement
|
||||
return new StringDiff[] { StringDiff.allTextReplaced(s2) };
|
||||
}
|
||||
|
||||
int aIndex = 0;
|
||||
int bIndex = 0;
|
||||
int aLastIndex = 0;
|
||||
int bLastIndex = 0;
|
||||
List<StringDiff> results = new LinkedList<>();
|
||||
for (Line common : commons) {
|
||||
|
||||
aIndex = indexOf(aList, common, aLastIndex);
|
||||
bIndex = indexOf(bList, common, bLastIndex);
|
||||
|
||||
int aDelta = aIndex - aLastIndex;
|
||||
int bDelta = bIndex - bLastIndex;
|
||||
|
||||
int aEnd = aIndex;
|
||||
int aStart = aEnd - aDelta;
|
||||
List<Line> aPrevious = aList.subList(aStart, aEnd);
|
||||
StringDiff delete = createDelete(aPrevious);
|
||||
if (delete != null) {
|
||||
results.add(delete);
|
||||
}
|
||||
|
||||
int bEnd = bIndex;
|
||||
int bStart = bEnd - bDelta;
|
||||
List<Line> bPrevious = bList.subList(bStart, bEnd);
|
||||
StringDiff insert = createInsert(bPrevious, charOffset(aList, aIndex));
|
||||
if (insert != null) {
|
||||
results.add(insert);
|
||||
}
|
||||
|
||||
// note: nothing is needed for the 'common' string, since we don't track unchanged text
|
||||
|
||||
aLastIndex = aIndex + 1;
|
||||
bLastIndex = bIndex + 1;
|
||||
}
|
||||
|
||||
// grab remainder
|
||||
StringDiff trailingDeleted = createDeleteAtEnd(aList, aLastIndex, aList.size());
|
||||
if (trailingDeleted != null) {
|
||||
results.add(trailingDeleted);
|
||||
}
|
||||
|
||||
StringDiff trailingInserted =
|
||||
createInsertAtEnd(bList, bLastIndex, bList.size(), s1.length());
|
||||
if (trailingInserted != null) {
|
||||
results.add(trailingInserted);
|
||||
}
|
||||
|
||||
return results.toArray(new StringDiff[results.size()]);
|
||||
}
|
||||
|
||||
private static int charOffset(List<Line> list, int index) {
|
||||
Line line = list.get(index);
|
||||
return line.start;
|
||||
}
|
||||
|
||||
private static StringDiff createInsertAtEnd(List<Line> list, int start, int end,
|
||||
int insertIndex) {
|
||||
if (start - 1 == end) {
|
||||
return null;
|
||||
}
|
||||
|
||||
List<Line> toDo = list.subList(start, end);
|
||||
boolean newlineNeeded = true; // we are at the end--need a newline
|
||||
StringDiff insert = createInsert(toDo, insertIndex, newlineNeeded);
|
||||
return insert;
|
||||
}
|
||||
|
||||
private static StringDiff createInsert(List<Line> lines, int insertIndex) {
|
||||
return createInsert(lines, insertIndex, false);
|
||||
}
|
||||
|
||||
private static StringDiff createInsert(List<Line> lines, int insertIndex, boolean isAtEnd) {
|
||||
if (lines.isEmpty()) {
|
||||
return null;
|
||||
}
|
||||
|
||||
StringBuilder buffy = new StringBuilder();
|
||||
|
||||
// special case: if this insert is for the end of the line, then we want to add
|
||||
// a newline before the remaining text is added since the original text
|
||||
// did not have this newline
|
||||
if (isAtEnd) {
|
||||
buffy.append('\n');
|
||||
}
|
||||
|
||||
for (Line line : lines) {
|
||||
buffy.append(line.getText());
|
||||
}
|
||||
|
||||
return StringDiff.textInserted(buffy.toString(), insertIndex);
|
||||
}
|
||||
|
||||
private static StringDiff createDeleteAtEnd(List<Line> list, int start, int end) {
|
||||
|
||||
if (start - 1 == end) {
|
||||
return null;
|
||||
}
|
||||
|
||||
List<Line> toDo = list.subList(start, end);
|
||||
boolean includeLastNewline = false; // we are at the end--do not include artificial newline
|
||||
StringDiff delete = createDelete(toDo, includeLastNewline);
|
||||
return delete;
|
||||
}
|
||||
|
||||
private static StringDiff createDelete(List<Line> lines) {
|
||||
return createDelete(lines, true);
|
||||
}
|
||||
|
||||
private static StringDiff createDelete(List<Line> lines, boolean includeLastNewline) {
|
||||
if (lines.isEmpty()) {
|
||||
return null;
|
||||
}
|
||||
|
||||
int start = 0;
|
||||
int end = 0;
|
||||
for (Line line : lines) {
|
||||
start = line.start;
|
||||
end = line.start + line.text.length();
|
||||
}
|
||||
|
||||
// special case: if this delete is for the last line, then we want to remove the remaining
|
||||
// trailing newline
|
||||
Line last = lines.get(lines.size() - 1);
|
||||
if (!includeLastNewline && last.isLastLine) {
|
||||
start -= 1; // remove previous newline
|
||||
}
|
||||
|
||||
return StringDiff.textDeleted(start, end);
|
||||
}
|
||||
|
||||
private static int indexOf(List<Line> list, Line line, int from) {
|
||||
for (int i = from; i < list.size(); i++) {
|
||||
if (list.get(i).textMatches(line)) {
|
||||
return i;
|
||||
}
|
||||
}
|
||||
return list.size(); // should not get here since 's' is known to be in list
|
||||
}
|
||||
|
||||
private static List<Line> split(String s) {
|
||||
|
||||
LinkedList<Line> result = new LinkedList<>();
|
||||
List<String> lines = Arrays.asList(StringUtils.splitPreserveAllTokens(s, '\n'));
|
||||
int start = 0;
|
||||
for (String line : lines) {
|
||||
Line l = new Line(line + '\n', start);
|
||||
result.add(l);
|
||||
start += l.text.length();
|
||||
}
|
||||
|
||||
// strip off the trailing newline that we added above
|
||||
Line last = result.peekLast();
|
||||
last.markAsLast();
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* Applies the array of StringObjects to the string s to produce a new string. Warning - the
|
||||
* diff objects cannot be applied to an arbitrary string, the Strings must be the original
|
||||
* String used to compute the diffs.
|
||||
* @param s the original string
|
||||
* @param diffs the array of StringDiff object to apply
|
||||
* @return a new String resulting from applying the diffs to s.
|
||||
*/
|
||||
static String applyDiffs(String s, List<StringDiff> diffs) {
|
||||
|
||||
if (diffs.isEmpty()) {
|
||||
return s;
|
||||
}
|
||||
|
||||
if (diffs.get(0).start < 0) {
|
||||
// all replaced or all deleted
|
||||
String data = diffs.get(0).text;
|
||||
return data == null ? "" : data;
|
||||
}
|
||||
|
||||
int pos = 0;
|
||||
StringBuilder buf = new StringBuilder(s.length());
|
||||
for (StringDiff element : diffs) {
|
||||
if (element.start > pos) {
|
||||
buf.append(s.substring(pos, element.start));
|
||||
pos = element.start;
|
||||
}
|
||||
|
||||
String data = element.text;
|
||||
if (data != null) {
|
||||
buf.append(data);
|
||||
}
|
||||
else {
|
||||
// null data is a delete; move to the end of the delete
|
||||
pos = element.end;
|
||||
}
|
||||
}
|
||||
|
||||
if (pos < s.length()) {
|
||||
buf.append(s.substring(pos));
|
||||
}
|
||||
return buf.toString();
|
||||
}
|
||||
|
||||
//==================================================================================================
|
||||
// Inner Classes
|
||||
//==================================================================================================
|
||||
|
||||
private static class Line {
|
||||
|
||||
private String text;
|
||||
private int start;
|
||||
private boolean isLastLine;
|
||||
|
||||
public Line(String line, int start) {
|
||||
this.text = line;
|
||||
this.start = start;
|
||||
}
|
||||
|
||||
String getText() {
|
||||
if (isLastLine) {
|
||||
return textWithoutNewline(); // last line and do not include the newline
|
||||
}
|
||||
return text;
|
||||
}
|
||||
|
||||
void markAsLast() {
|
||||
isLastLine = true;
|
||||
}
|
||||
|
||||
private String textWithoutNewline() {
|
||||
if (text.charAt(text.length() - 1) == '\n') {
|
||||
return text.substring(0, text.length() - 1);
|
||||
}
|
||||
return text;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return textWithoutNewline() + " @ " + start;
|
||||
}
|
||||
|
||||
boolean textMatches(Line other) {
|
||||
return Objects.equals(text, other.text);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
final int prime = 31;
|
||||
int result = 1;
|
||||
result = prime * result + start;
|
||||
result = prime * result + ((text == null) ? 0 : text.hashCode());
|
||||
return result;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(Object obj) {
|
||||
if (this == obj) {
|
||||
return true;
|
||||
}
|
||||
if (obj == null) {
|
||||
return false;
|
||||
}
|
||||
if (getClass() != obj.getClass()) {
|
||||
return false;
|
||||
}
|
||||
Line other = (Line) obj;
|
||||
if (start != other.start) {
|
||||
return false;
|
||||
}
|
||||
if (text == null) {
|
||||
if (other.text != null) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
else if (!text.equals(other.text)) {
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
private static class LineLcs extends ReducingListBasedLcs<Line> {
|
||||
|
||||
LineLcs(List<Line> x, List<Line> y) {
|
||||
super(x, y);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected boolean matches(Line x, Line y) {
|
||||
return x.text.equals(y.text);
|
||||
}
|
||||
}
|
||||
}
|
@ -1,261 +0,0 @@
|
||||
/* ###
|
||||
* IP: GHIDRA
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package ghidra.program.database.code;
|
||||
|
||||
import java.util.LinkedList;
|
||||
import java.util.List;
|
||||
|
||||
class StringDiffer {
|
||||
|
||||
/**
|
||||
* Returns the list of StringDiff objects that if applied to s1 would result in s2; The
|
||||
* given text will look only for whole lines using '\n'.
|
||||
*
|
||||
* @param s1 the original string
|
||||
* @param s2 the result string
|
||||
* this value, then a completely different string will be returned
|
||||
* @return an array of StringDiff objects that change s1 into s2;
|
||||
*/
|
||||
static StringDiff[] getLineDiffs(String s1, String s2) {
|
||||
|
||||
/**
|
||||
* Minimum size used to determine whether a new StringDiff object will be
|
||||
* created just using a string (no positions)
|
||||
* in the <code>getDiffs(String, String)</code> method.
|
||||
* @see #getLineDiffs(String, String)
|
||||
*/
|
||||
int MINIMUM_DIFF_SIZE = 100;
|
||||
return StringDiffer.getLineDiffs(s1, s2, MINIMUM_DIFF_SIZE);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the list of StringDiff objects that if applied to s1 would result in s2; The
|
||||
* given text will look only for whole lines using '\n'.
|
||||
*
|
||||
* @param s1 the original string
|
||||
* @param s2 the result string
|
||||
* @param minimumDiffSize the minimum length of s2 required for a diff; if s2 is less than
|
||||
* this value, then a completely different string will be returned
|
||||
* @return an array of StringDiff objects that change s1 into s2;
|
||||
*/
|
||||
static StringDiff[] getLineDiffs(String s1, String s2, int minimumDiffSize) {
|
||||
if (s2.length() < minimumDiffSize) {
|
||||
return new StringDiff[] { StringDiff.allTextReplaced(s2) };
|
||||
}
|
||||
|
||||
List<StringDiff> results = new LinkedList<>();
|
||||
int cursor1 = 0;
|
||||
int cursor2 = 0;
|
||||
int len1 = s1.length();
|
||||
int len2 = s2.length();
|
||||
|
||||
/*
|
||||
-look at each line in 'line' chunks using '\n'
|
||||
*/
|
||||
|
||||
// walk each string until the end...
|
||||
while (cursor1 < len1 || cursor2 < len2) {
|
||||
String line1 = getLine(s1, cursor1);
|
||||
String line2 = getLine(s2, cursor2);
|
||||
if (line1.equals(line2)) {
|
||||
cursor1 += line1.length();
|
||||
cursor2 += line2.length();
|
||||
continue;
|
||||
}
|
||||
|
||||
// look for line1 in s2...
|
||||
int line1PosInOther = findLine(s2, cursor2, line1);
|
||||
int mark = cursor1;
|
||||
while (line1PosInOther < 0) {
|
||||
|
||||
// line1 is not in s2; scan for the next line
|
||||
cursor1 += line1.length();
|
||||
line1 = getLine(s1, cursor1);
|
||||
line1PosInOther = findLine(s2, cursor2, line1);
|
||||
}
|
||||
if (cursor1 > mark) {
|
||||
// the original line1 was not in s2; add all that was different up to current cursor1
|
||||
results.add(StringDiff.textDeleted(mark, cursor1));
|
||||
}
|
||||
|
||||
// now look for line2 in s1
|
||||
int line2PosInOther = findLine(s1, cursor1, line2);
|
||||
mark = cursor2;
|
||||
while (line2PosInOther < 0) {
|
||||
|
||||
// line2 is not in s1; scan for the next line
|
||||
cursor2 += line2.length();
|
||||
line2 = getLine(s2, cursor2);
|
||||
line2PosInOther = findLine(s1, cursor1, line2);
|
||||
}
|
||||
if (cursor2 > mark) {
|
||||
// the original line2 was not in s1; add all that was different up to current cursor2
|
||||
results.add(StringDiff.textInserted(s2.substring(mark, cursor2), cursor1));
|
||||
continue;
|
||||
}
|
||||
|
||||
// move both searches forward
|
||||
int delta1 = line2PosInOther - cursor1;
|
||||
int delta2 = line1PosInOther - cursor2;
|
||||
if (delta1 > delta2) {
|
||||
|
||||
// this can happen when two lines have been rearranged *and* the line length
|
||||
// of the moved line is *longer* than the new line at the replaced position
|
||||
results.add(
|
||||
StringDiff.textInserted(s2.substring(cursor2, line1PosInOther), cursor1));
|
||||
cursor2 = line1PosInOther;
|
||||
}
|
||||
else if (delta2 > delta1) {
|
||||
|
||||
// this can happen when two lines have been rearranged *and* the line length
|
||||
// of the moved line is *shorter* than the new line at the replaced position
|
||||
results.add(StringDiff.textDeleted(cursor1, line2PosInOther));
|
||||
cursor1 = line2PosInOther;
|
||||
}
|
||||
else { // delta1 == delta2
|
||||
|
||||
if (cursor1 != line2PosInOther) {
|
||||
results.add(StringDiff.textDeleted(cursor1, line2PosInOther));
|
||||
cursor1 = line2PosInOther;
|
||||
}
|
||||
|
||||
if (cursor2 != line1PosInOther) {
|
||||
results.add(
|
||||
StringDiff.textInserted(s2.substring(cursor2, line1PosInOther), cursor1));
|
||||
cursor2 = line1PosInOther;
|
||||
}
|
||||
}
|
||||
}
|
||||
return results.toArray(new StringDiff[results.size()]);
|
||||
}
|
||||
|
||||
/**
|
||||
* Finds a position in s that contains the string line. The matching string in
|
||||
* s must be a "complete" line, in other words if pos > 0 then s.charAt(index-1) must be
|
||||
* a newLine character and s.charAt(index+line.length()) must be a newLine or the end of
|
||||
* the string.
|
||||
* @param s the string to scan
|
||||
* @param pos the position to begin the scan.
|
||||
* @param line the line to scan for
|
||||
* @return the position in s containing the line string.
|
||||
*/
|
||||
static int findLine(String s, int pos, String line) {
|
||||
|
||||
if (line.length() == 0) {
|
||||
// this is used as a marker: -1 means not found; non-negative number signals to keep going
|
||||
return pos; // TODO this is odd; why is this a match??
|
||||
}
|
||||
|
||||
int n = s.length();
|
||||
while (pos < n) {
|
||||
int index = s.indexOf(line, pos);
|
||||
if (index < 0) {
|
||||
return index;
|
||||
}
|
||||
|
||||
if (index > 0 && s.charAt(index - 1) != '\n') {
|
||||
pos = index + line.length(); // line matched, but not a newline in 's'
|
||||
continue;
|
||||
}
|
||||
|
||||
//
|
||||
// Have a match with at start/0 or have a preceding newline
|
||||
//
|
||||
|
||||
if (line.endsWith("\n")) {
|
||||
return index; // the match ends with a newline; found line
|
||||
}
|
||||
|
||||
// no newline for the current match in 's'
|
||||
if (index + line.length() == n) {
|
||||
return index; // at the end exactly; found line
|
||||
}
|
||||
|
||||
// no newline; not at end; keep going
|
||||
pos = index + line.length();
|
||||
}
|
||||
|
||||
return -1;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a substring of s beginning at start and ending at either the end of the string or
|
||||
* the first newLine at or after start
|
||||
*
|
||||
* @param s the string to scan
|
||||
* @param start the starting position for the scan
|
||||
* @return a string that represents a line within s
|
||||
*/
|
||||
private static String getLine(String s, int start) {
|
||||
int n = s.length();
|
||||
if (start >= n) {
|
||||
return "";
|
||||
}
|
||||
int pos = start;
|
||||
while (pos < n && s.charAt(pos) != '\n') {
|
||||
pos++;
|
||||
}
|
||||
|
||||
if (pos < n) {
|
||||
pos++; // not at the end; found newline; include the newline
|
||||
}
|
||||
return s.substring(start, pos);
|
||||
}
|
||||
|
||||
/**
|
||||
* Applies the array of StringObjects to the string s to produce a new string. Warning - the
|
||||
* diff objects cannot be applied to an arbitrary string, the Strings must be the original
|
||||
* String used to compute the diffs.
|
||||
* @param s the original string
|
||||
* @param diffs the array of StringDiff object to apply
|
||||
* @return a new String resulting from applying the diffs to s.
|
||||
*/
|
||||
static String applyDiffs(String s, List<StringDiff> diffs) {
|
||||
|
||||
if (diffs.isEmpty()) {
|
||||
return s;
|
||||
}
|
||||
|
||||
if (diffs.get(0).start < 0) {
|
||||
// all replaced or all deleted
|
||||
String data = diffs.get(0).text;
|
||||
return data == null ? "" : data;
|
||||
}
|
||||
|
||||
int pos = 0;
|
||||
StringBuilder buf = new StringBuilder(s.length());
|
||||
for (StringDiff element : diffs) {
|
||||
if (element.start > pos) {
|
||||
buf.append(s.substring(pos, element.start));
|
||||
pos = element.start;
|
||||
}
|
||||
|
||||
String data = element.text;
|
||||
if (data != null) {
|
||||
buf.append(data);
|
||||
}
|
||||
else {
|
||||
// null data is a delete; move to the end of the delete
|
||||
pos = element.end;
|
||||
}
|
||||
}
|
||||
|
||||
if (pos < s.length()) {
|
||||
buf.append(s.substring(pos));
|
||||
}
|
||||
return buf.toString();
|
||||
}
|
||||
}
|
@ -1,6 +1,5 @@
|
||||
/* ###
|
||||
* IP: GHIDRA
|
||||
* REVIEWED: YES
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
@ -16,11 +15,11 @@
|
||||
*/
|
||||
package ghidra.program.util;
|
||||
|
||||
import generic.algorithms.LCS;
|
||||
import generic.algorithms.Lcs;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
public class CodeUnitLCS extends LCS<CodeUnitContainer> {
|
||||
public class CodeUnitLCS extends Lcs<CodeUnitContainer> {
|
||||
|
||||
private List<CodeUnitContainer> xList;
|
||||
private List<CodeUnitContainer> yList;
|
||||
|
@ -17,107 +17,15 @@ package ghidra.program.database.code;
|
||||
|
||||
import static org.junit.Assert.assertEquals;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.*;
|
||||
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
import org.junit.Test;
|
||||
|
||||
import generic.test.AbstractGTest;
|
||||
|
||||
public class StringDiffTest {
|
||||
|
||||
/*
|
||||
A line match is if the given line to match is contained in the source string and:
|
||||
|
||||
1) a) matches in the source string with a '\n' char at the index before the match OR
|
||||
b) is at the beginning *and* the match contains a newline
|
||||
2) is at the exact end of the source string
|
||||
|
||||
*The empty string matches at the current position
|
||||
|
||||
Source String: "abcd\nefghi\n"
|
||||
Line to Match:
|
||||
*/
|
||||
|
||||
@Test
|
||||
public void testFindLine_FromStart_EmptyLine() {
|
||||
|
||||
String source = "this is a really\nlone line with\n newlines";
|
||||
String line = "";
|
||||
int result = StringDiffer.findLine(source, 0, line);
|
||||
assertEquals(0, result);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testFindLine_FromStart_NoMatch() {
|
||||
|
||||
String source = "this is a really\nlone line with\n newlines";
|
||||
String line = "coconuts";
|
||||
int result = StringDiffer.findLine(source, 0, line);
|
||||
assertEquals(-1, result);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testFindLine_FromMiddle_NoMatch() {
|
||||
|
||||
String source = "this is a really\nlone line with\n newlines";
|
||||
String line = "coconuts";
|
||||
int result = StringDiffer.findLine(source, 15, line);
|
||||
assertEquals(-1, result);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testFindLine_FromEnd_NoMatch() {
|
||||
|
||||
String source = "this is a really\nlone line with\n newlines";
|
||||
String line = "coconuts";
|
||||
int result = StringDiffer.findLine(source, source.length(), line);
|
||||
assertEquals(-1, result);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testFindLine_FromStart_MatchWithNewline_AtStart() {
|
||||
|
||||
String source = "this is a really\nlone line with\n newlines";
|
||||
String line = "this is a really\n";
|
||||
int result = StringDiffer.findLine(source, 0, line);
|
||||
assertEquals(0, result);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testFindLine_FromStart_MatchWithNewline_AtMiddle() {
|
||||
|
||||
String source = "this is a really\nlone line with\n newlines";
|
||||
String line = "lone line with\n";
|
||||
int result = StringDiffer.findLine(source, 0, line);
|
||||
assertEquals(17, result);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testFindLine_FromStart_MatchWithNewline_AtEnd_FailWithoutPrecedingNewline() {
|
||||
|
||||
String source = "this is a really\nlone line with\n newlines\n";
|
||||
String line = "lines\n";
|
||||
int result = StringDiffer.findLine(source, 0, line);
|
||||
assertEquals(-1, result);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testFindLine_FromStart_MatchWithNewline_AtEnd_PassWithPrecedingNewline() {
|
||||
|
||||
String source = "this is a really\nlone line with\n new\nlines\n";
|
||||
String line = "lines\n";
|
||||
int result = StringDiffer.findLine(source, 0, line);
|
||||
assertEquals(37, result);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testFindLine_FromStart_MatchWithoutNewline_AtStart() {
|
||||
|
||||
String source = "this is a really\nlone line with\n newlines";
|
||||
String line = "this is a really";
|
||||
int result = StringDiffer.findLine(source, 0, line);
|
||||
assertEquals(-1, result); // match at start must contain a newline
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testGetDiffLines_Insert_AtFront() {
|
||||
|
||||
@ -126,8 +34,8 @@ public class StringDiffTest {
|
||||
String v1 = StringUtils.join(a1, '\n');
|
||||
String v2 = StringUtils.join(a2, '\n');
|
||||
|
||||
StringDiff[] diffs = StringDiffer.getLineDiffs(v1, v2, 1);
|
||||
String restoredV2 = StringDiffer.applyDiffs(v1, Arrays.asList(diffs));
|
||||
StringDiff[] diffs = StringDiffUtils.getLineDiffs(v1, v2, 1);
|
||||
String restoredV2 = StringDiffUtils.applyDiffs(v1, Arrays.asList(diffs));
|
||||
assertEquals(v2, restoredV2);
|
||||
}
|
||||
|
||||
@ -139,8 +47,8 @@ public class StringDiffTest {
|
||||
String v1 = StringUtils.join(a1, '\n');
|
||||
String v2 = StringUtils.join(a2, '\n');
|
||||
|
||||
StringDiff[] diffs = StringDiffer.getLineDiffs(v1, v2, 1);
|
||||
String restoredV2 = StringDiffer.applyDiffs(v1, Arrays.asList(diffs));
|
||||
StringDiff[] diffs = StringDiffUtils.getLineDiffs(v1, v2, 1);
|
||||
String restoredV2 = StringDiffUtils.applyDiffs(v1, Arrays.asList(diffs));
|
||||
assertEquals(v2, restoredV2);
|
||||
}
|
||||
|
||||
@ -152,8 +60,8 @@ public class StringDiffTest {
|
||||
String v1 = StringUtils.join(a1, '\n');
|
||||
String v2 = StringUtils.join(a2, '\n');
|
||||
|
||||
StringDiff[] diffs = StringDiffer.getLineDiffs(v1, v2, 1);
|
||||
String restoredV2 = StringDiffer.applyDiffs(v1, Arrays.asList(diffs));
|
||||
StringDiff[] diffs = StringDiffUtils.getLineDiffs(v1, v2, 1);
|
||||
String restoredV2 = StringDiffUtils.applyDiffs(v1, Arrays.asList(diffs));
|
||||
assertEquals(v2, restoredV2);
|
||||
}
|
||||
|
||||
@ -165,8 +73,8 @@ public class StringDiffTest {
|
||||
String v1 = StringUtils.join(a1, '\n');
|
||||
String v2 = StringUtils.join(a2, '\n');
|
||||
|
||||
StringDiff[] diffs = StringDiffer.getLineDiffs(v1, v2, 1);
|
||||
String restoredV2 = StringDiffer.applyDiffs(v1, Arrays.asList(diffs));
|
||||
StringDiff[] diffs = StringDiffUtils.getLineDiffs(v1, v2, 1);
|
||||
String restoredV2 = StringDiffUtils.applyDiffs(v1, Arrays.asList(diffs));
|
||||
assertEquals(v2, restoredV2);
|
||||
}
|
||||
|
||||
@ -178,8 +86,8 @@ public class StringDiffTest {
|
||||
String v1 = StringUtils.join(a1, '\n');
|
||||
String v2 = StringUtils.join(a2, '\n');
|
||||
|
||||
StringDiff[] diffs = StringDiffer.getLineDiffs(v1, v2, 1);
|
||||
String restoredV2 = StringDiffer.applyDiffs(v1, Arrays.asList(diffs));
|
||||
StringDiff[] diffs = StringDiffUtils.getLineDiffs(v1, v2, 1);
|
||||
String restoredV2 = StringDiffUtils.applyDiffs(v1, Arrays.asList(diffs));
|
||||
assertEquals(v2, restoredV2);
|
||||
}
|
||||
|
||||
@ -191,8 +99,8 @@ public class StringDiffTest {
|
||||
String v1 = StringUtils.join(a1, '\n');
|
||||
String v2 = StringUtils.join(a2, '\n');
|
||||
|
||||
StringDiff[] diffs = StringDiffer.getLineDiffs(v1, v2, 1);
|
||||
String restoredV2 = StringDiffer.applyDiffs(v1, Arrays.asList(diffs));
|
||||
StringDiff[] diffs = StringDiffUtils.getLineDiffs(v1, v2, 1);
|
||||
String restoredV2 = StringDiffUtils.applyDiffs(v1, Arrays.asList(diffs));
|
||||
assertEquals(v2, restoredV2);
|
||||
}
|
||||
|
||||
@ -204,8 +112,8 @@ public class StringDiffTest {
|
||||
String v1 = StringUtils.join(a1, '\n');
|
||||
String v2 = StringUtils.join(a2, '\n');
|
||||
|
||||
StringDiff[] diffs = StringDiffer.getLineDiffs(v1, v2, 1);
|
||||
String restoredV2 = StringDiffer.applyDiffs(v1, Arrays.asList(diffs));
|
||||
StringDiff[] diffs = StringDiffUtils.getLineDiffs(v1, v2, 1);
|
||||
String restoredV2 = StringDiffUtils.applyDiffs(v1, Arrays.asList(diffs));
|
||||
assertEquals(v2, restoredV2);
|
||||
}
|
||||
|
||||
@ -220,8 +128,8 @@ public class StringDiffTest {
|
||||
String v1 = StringUtils.join(a1, '\n');
|
||||
String v2 = StringUtils.join(a2, '\n');
|
||||
|
||||
StringDiff[] diffs = StringDiffer.getLineDiffs(v1, v2, 1);
|
||||
String restoredV2 = StringDiffer.applyDiffs(v1, Arrays.asList(diffs));
|
||||
StringDiff[] diffs = StringDiffUtils.getLineDiffs(v1, v2, 1);
|
||||
String restoredV2 = StringDiffUtils.applyDiffs(v1, Arrays.asList(diffs));
|
||||
assertEquals(v2, restoredV2);
|
||||
}
|
||||
|
||||
@ -233,8 +141,8 @@ public class StringDiffTest {
|
||||
String v1 = StringUtils.join(a1, '\n');
|
||||
String v2 = StringUtils.join(a2, '\n');
|
||||
|
||||
StringDiff[] diffs = StringDiffer.getLineDiffs(v1, v2, 1);
|
||||
String restoredV2 = StringDiffer.applyDiffs(v1, Arrays.asList(diffs));
|
||||
StringDiff[] diffs = StringDiffUtils.getLineDiffs(v1, v2, 1);
|
||||
String restoredV2 = StringDiffUtils.applyDiffs(v1, Arrays.asList(diffs));
|
||||
assertEquals(v2, restoredV2);
|
||||
}
|
||||
|
||||
@ -246,9 +154,50 @@ public class StringDiffTest {
|
||||
String v1 = StringUtils.join(a1, '\n');
|
||||
String v2 = StringUtils.join(a2, '\n');
|
||||
|
||||
StringDiff[] diffs = StringDiffer.getLineDiffs(v1, v2, 1);
|
||||
String restoredV2 = StringDiffer.applyDiffs(v1, Arrays.asList(diffs));
|
||||
StringDiff[] diffs = StringDiffUtils.getLineDiffs(v1, v2, 1);
|
||||
String restoredV2 = StringDiffUtils.applyDiffs(v1, Arrays.asList(diffs));
|
||||
assertEquals(v2, restoredV2);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testReplace() {
|
||||
String[] a1 = new String[] { "In", "the", "beginning" };
|
||||
String[] a2 = new String[] { "There", "was", "vastness" };
|
||||
String v1 = StringUtils.join(a1, '\n');
|
||||
String v2 = StringUtils.join(a2, '\n');
|
||||
|
||||
StringDiff[] diffs = StringDiffUtils.getLineDiffs(v1, v2, 1);
|
||||
String restoredV2 = StringDiffUtils.applyDiffs(v1, Arrays.asList(diffs));
|
||||
assertEquals(v2, restoredV2);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testTheBiggness_NoOptimization() throws Exception {
|
||||
|
||||
List<String> bigLines = generateLines(1200);
|
||||
List<String> bigLines2 = new ArrayList<>(bigLines);
|
||||
|
||||
bigLines2.set(0, "a new line at 0");
|
||||
bigLines2.set(bigLines2.size() - 1, "a new line at length");
|
||||
|
||||
String v1 = StringUtils.join(bigLines, '\n');
|
||||
String v2 = StringUtils.join(bigLines2, '\n');
|
||||
|
||||
StringDiff[] diffs = StringDiffUtils.getLineDiffs(v1, v2, 1);
|
||||
assertEquals(1, diffs.length); // 1 diff--completely different, due to size restriction on Lcs
|
||||
String restoredV2 = StringDiffUtils.applyDiffs(v1, Arrays.asList(diffs));
|
||||
assertEquals(v2, restoredV2);
|
||||
}
|
||||
|
||||
private List<String> generateLines(int size) {
|
||||
|
||||
List<String> results = new ArrayList<>();
|
||||
for (int i = 0; i < size; i++) {
|
||||
String random = AbstractGTest.getRandomString(0, 50);
|
||||
random = random.replaceAll("\n", "");
|
||||
results.add("Line " + (i + 1) + ": " + random);
|
||||
}
|
||||
|
||||
return results;
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user