GT-2824 - Comments - Upgrade to LCS for diffing

This commit is contained in:
dragonmacher 2019-05-01 14:58:37 -04:00
parent fefb0f0208
commit e5aaaa9a19
11 changed files with 1037 additions and 546 deletions

View File

@ -1,150 +0,0 @@
/* ###
* IP: GHIDRA
* REVIEWED: YES
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package generic.algorithms;
import ghidra.util.exception.CancelledException;
import ghidra.util.task.TaskMonitor;
import ghidra.util.task.TaskMonitorAdapter;
import java.util.ArrayList;
import java.util.List;
/**
* Abstract class for finding the LCS between two sequences of Matchable
* objects.
*
*
*
* @param <T> the type of the objects being compared.
*/
public abstract class LCS<T> {
private int[][] c;
/**
* Convenient constructor for initializing elements in subclasses
*/
protected LCS() {
}
/**
* @return the length of the X sequence.
*/
protected abstract int lengthOfX();
/**
* @return the length of the Y sequence.
*/
protected abstract int lengthOfY();
/**
* @param index the position of interest in the X sequence.
* @return the value in the X sequence at <code>index</code>.
* Assumes 1-indexing.
*/
protected abstract T valueOfX(int index);
/**
* @param index the position of interest in the Y sequence.
* @return the value in the Y sequence at <code>index</code>.
* Assumes 1-indexing.
*/
protected abstract T valueOfY(int index);
/**
* @param x the X-sequence element of interest
* @param y the Y-sequence element of interest
* @return true if <code>x</code> matches <code>y</code>; false otherwise.
*/
protected abstract boolean matches(T x, T y);
/**
* Compute the LCS
* @param monitor
*/
private void calculateLCS(TaskMonitor monitor) throws CancelledException {
if (c != null) {
return;
}
int[][] tempC = new int[lengthOfX() + 1][];
monitor.setMessage("Calculating LCS...");
monitor.initialize(tempC.length);
for (int i = 0; i < tempC.length; i++) {
// Java int arrays are automatically initialized to 0
tempC[i] = new int[lengthOfY() + 1];
}
for (int i = 1; i < tempC.length; i++) {
monitor.checkCanceled();
for (int j = 1; j < tempC[i].length; j++) {
if (matches(valueOfX(i), valueOfY(j))) {
tempC[i][j] = tempC[i - 1][j - 1] + 1;
}
else {
tempC[i][j] = Math.max(tempC[i][j - 1], tempC[i - 1][j]);
}
}
monitor.incrementProgress(1);
}
c = tempC;
}
/**
* @return a <code>List&ltT&gt</code> of elements in the LCS.
*/
public List<T> getLCS() {
try {
return getLCS(TaskMonitorAdapter.DUMMY_MONITOR);
}
catch (CancelledException e) {
// can't happen with a dummy monitor
}
return null;
}
public List<T> getLCS(TaskMonitor monitor) throws CancelledException {
calculateLCS(monitor);
return getLCSHelperIterative(lengthOfX(), lengthOfY());
}
/**
* Iterative helper function for getLCS().
* @param i the current row index
* @param j the current column index
* @return the LCS after analyzing element c[i, j].
*/
private List<T> getLCSHelperIterative(int i, int j) {
ArrayList<T> result = new ArrayList<T>();
while (i > 0 && j > 0) {
if (c[i][j] == c[i - 1][j - 1] + 1 && matches(valueOfX(i), valueOfY(j))) {
result.add(0, valueOfX(i));
--i;
--j;
}
else if (c[i][j] == c[i - 1][j]) {
--i;
}
else {
--j;
}
}
return result;
}
}

View File

@ -0,0 +1,214 @@
/* ###
* IP: GHIDRA
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package generic.algorithms;
import java.util.ArrayList;
import java.util.List;
import ghidra.util.exception.CancelledException;
import ghidra.util.task.TaskMonitor;
/**
* Abstract class for finding the Longest Common Subsequence (LCS) between two
* sequences of Matchable objects, <code>x</code> and <code>y</code>.
*
* <p>The performance of this algorithm is O(n^2). Thus, large inputs can cause much processor
* and memory usage. This class has an upper limit (see {@link #getSizeLimit()}) to prevent
* accidental system failure.
*
* @param <T> the type of the objects being compared
*/
public abstract class Lcs<T> {
/**
* Somewhat arbitrary upper-bound restriction. 1M is 1000 * 1000
*/
private static int DEFAULT_SIZE_LIMIT = 1_000_000;
private int sizeLimit = DEFAULT_SIZE_LIMIT;
private int[][] c;
/**
* Returns the length of the x sequence
* @return the length of the x sequence
*/
protected abstract int lengthOfX();
/**
* Returns the length of the y sequence
* @return the length of the y sequence
*/
protected abstract int lengthOfY();
/**
* Gets the value of the x sequence at the given index, where index is 1-based
*
* @param index the 1-based position of interest in the x sequence
* @return the value in the x sequence at <code>index</code>
*/
protected abstract T valueOfX(int index);
/**
* Gets the value of the y sequence at the given index, where index is 1-based
*
* @param index the 1-based position of interest in the Y sequence
* @return the value in the y sequence at <code>index</code>
*/
protected abstract T valueOfY(int index);
/**
* Returns true if the value of x and y match
*
* @param x the x-sequence element of interest
* @param y the y-sequence element of interest
* @return true if <code>x</code> matches <code>y</code>; false otherwise
*/
protected abstract boolean matches(T x, T y);
/**
* Compute the LCS
* @param monitor the task monitor
*/
private void calculateLCS(TaskMonitor monitor) throws CancelledException {
if (c != null) {
return;
}
if (tooBig()) {
c = new int[0][0];
return;
}
int[][] tempC = new int[lengthOfX() + 1][];
monitor.setMessage("Calculating LCS...");
monitor.initialize(tempC.length);
// create the zero-initialized matrix
for (int i = 0; i < tempC.length; i++) {
tempC[i] = new int[lengthOfY() + 1];
}
for (int i = 1; i < tempC.length; i++) {
monitor.checkCanceled();
for (int j = 1; j < tempC[i].length; j++) {
if (matches(valueOfX(i), valueOfY(j))) {
tempC[i][j] = tempC[i - 1][j - 1] + 1;
}
else {
tempC[i][j] = Math.max(tempC[i][j - 1], tempC[i - 1][j]);
}
}
monitor.incrementProgress(1);
}
c = tempC;
}
/**
* Defines an limit in the overall size of the inputs that above which no processing will
* take place. Any value over the limit will produce an empty LCS.
*
* @return true if too big
*/
private boolean tooBig() {
return lengthOfX() * lengthOfY() > sizeLimit;
}
/**
* Changes the size limit of this LCS, past which no calculations will be performed
*
* @param newLimit the new limit
*/
public void setSizeLimit(int newLimit) {
this.sizeLimit = newLimit;
}
/**
* Returns the current size limit, past which no calculations will be performed
*
* @return the size limit
* @see #setSizeLimit(int)
*/
public int getSizeLimit() {
return sizeLimit;
}
/**
* Returns a list of the longest common subsequence. This result will be empty if the
* {@link #getSizeLimit()} has been reached.
*
* @return the list
*/
public List<T> getLcs() {
try {
return getLcs(TaskMonitor.DUMMY);
}
catch (CancelledException e) {
// can't happen with a dummy monitor
}
return null;
}
/**
* Returns a list of the longest common subsequence. This result will be empty if the
* {@link #getSizeLimit()} has been reached.
*
* @param monitor the task monitor
* @return the LCS list
* @throws CancelledException if the monitor is cancelled
*/
public List<T> getLcs(TaskMonitor monitor) throws CancelledException {
calculateLCS(monitor);
return doGetLcs(monitor);
}
/**
* Get the actual LCS based upon the already created matrix
*
* @param monitor the task monitor
* @return the LCS list
* @throws CancelledException if the monitor is cancelled
*/
protected List<T> doGetLcs(TaskMonitor monitor) throws CancelledException {
int x = 0;
int y = 0;
if (c.length > 0) {
x = lengthOfX();
y = lengthOfY();
}
List<T> result = new ArrayList<>();
while (x > 0 && y > 0) {
monitor.checkCanceled();
if (c[x][y] == c[x - 1][y - 1] + 1 && matches(valueOfX(x), valueOfY(y))) {
result.add(0, valueOfX(x));
--x;
--y;
}
else if (c[x][y] == c[x - 1][y]) {
--x;
}
else {
--y;
}
}
return result;
}
}

View File

@ -0,0 +1,201 @@
/* ###
* IP: GHIDRA
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package generic.algorithms;
import java.util.ArrayList;
import java.util.List;
import ghidra.util.exception.CancelledException;
import ghidra.util.task.TaskMonitor;
/**
* Calculates the longest common subsequence (LCS) between two sequences of Matchable
* objects, <code>x</code> and <code>y</code>.
*
* <p>This is an optimizing version of the {@link Lcs} that will pre-calculate all similar
* items from the beginning and end of the two given sequences. Doing this will reduce
* the size of the matrix created by the parent class, greatly so in the case that the
* two inputs are mostly the same in the beginning and end. (Imagine an edit of a source
* code file, where the typical change is somewhere in the middle of the file. In this example,
* the optimization performed here can greatly decrease the amount of work to be performed when
* calculating the LCS.)
*
* <p>Note: the parent LCS algorithm is bound by {@link #getSizeLimit()}. However, this class
* allows clients to work around this restriction when the data has a similar beginning and ending,
* as the similar parts will not be counted against the size limit.
*
* @param <I> The input sequence type
* @param <T> the individual element type of the input sequence
*/
public abstract class ReducingLcs<I, T> extends Lcs<T> {
private I xSource; // full input x
private I ySource; // full input y
private I x; // the reduced input x
private I y; // the reduced input y
private int startn; // number of beginning same entries
private int endn; // number of trailing same entries
/**
* Constructor
*
* @param ix the input sequence <code>x</code>
* @param iy the input sequence <code>y</code>
*/
public ReducingLcs(I ix, I iy) {
this.xSource = ix;
this.ySource = iy;
startn = getMatchCountFromStart();
endn = getMatchCountFromEnd();
int endx = getEnd(xSource);
int endy = getEnd(ySource);
this.x = reduce(ix, startn, endx);
this.y = reduce(iy, startn, endy);
}
private int getEnd(I i) {
int end = lengthOf(i) - endn;
if (end <= startn) {
// boundary condition when the change is only a delete or insert
end = startn;
}
return end;
}
/**
* Create a subsequence from the given input sequence.
*
* @param i the input sequence; 0-based (x or y)
* @param start the start index; 0-based (inclusive)
* @param end the end index (exclusive)
* @return the subsequence
*/
protected abstract I reduce(I i, int start, int end);
/**
* Return the length of the given sequence
*
* @param i the input sequence (x or y)
* @return the length
*/
protected abstract int lengthOf(I i);
/**
* Return the value at the given 0-based offset
*
* @param i the input sequence (x or y)
* @param offset the offset
* @return the value
*/
protected abstract T valueOf(I i, int offset);
@Override
protected List<T> doGetLcs(TaskMonitor monitor) throws CancelledException {
List<T> reducedLcs = super.doGetLcs(monitor);
int size = reducedLcs.size() + lengthOf(x) + lengthOf(y);
List<T> lcs = new ArrayList<>(size);
// add the shared beginning
for (int i = 0; i < startn; i++) {
monitor.checkCanceled();
lcs.add(valueOf(xSource, i));
}
// add the calculated LCS
lcs.addAll(reducedLcs);
// add the shared end
int length = lengthOf(xSource);
int endx = getEnd(xSource);
for (int i = endx; i < length; i++) {
monitor.checkCanceled();
lcs.add(valueOf(xSource, i));
}
return lcs;
}
@Override
protected int lengthOfX() {
return lengthOf(x);
}
@Override
protected int lengthOfY() {
return lengthOf(y);
}
@Override
protected T valueOfX(int index) {
return valueOf(x, index - 1);
}
@Override
protected T valueOfY(int index) {
return valueOf(y, index - 1);
}
@Override
protected boolean matches(T tx, T ty) {
return tx.equals(ty);
}
//==================================================================================================
// Private Methods
//==================================================================================================
private int getMatchCountFromStart() {
// scan past the beginning of all equal items
int n = 0;
int xl = lengthOf(xSource);
int yl = lengthOf(ySource);
while (n < xl && n < yl) {
T xt = valueOf(xSource, n);
T yt = valueOf(ySource, n);
if (!matches(xt, yt)) {
return n;
}
n++;
}
return 0;
}
private int getMatchCountFromEnd() {
// scan past the trailing equal items
int xi = lengthOf(xSource) - 1;
int yi = lengthOf(ySource) - 1;
int n = 0;
for (; xi >= 0 && yi >= 0; xi--, yi--) {
T xt = valueOf(xSource, xi);
T yt = valueOf(ySource, yi);
if (!matches(xt, yt)) {
return n == 0 ? 0 : n - 1;
}
n++;
}
return 0;
}
}

View File

@ -0,0 +1,51 @@
/* ###
* IP: GHIDRA
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package generic.algorithms;
import java.util.List;
/**
* An implementation of the {@link ReducingLcs} that takes as its input a list of <T> items, where
* the list is the 'sequence' being checked for the Longest Common Subsequence.
*
* @param <T> the type of the item in the sequence of items
*/
public class ReducingListBasedLcs<T> extends ReducingLcs<List<T>, T> {
public ReducingListBasedLcs(List<T> x, List<T> y) {
super(x, y);
}
@Override
protected boolean matches(T x, T y) {
return x.equals(y);
}
@Override
protected List<T> reduce(List<T> i, int start, int end) {
return i.subList(start, end);
}
@Override
protected int lengthOf(List<T> i) {
return i.size();
}
@Override
protected T valueOf(List<T> i, int offset) {
return i.get(offset);
}
}

View File

@ -16,21 +16,18 @@
package generic.algorithms; package generic.algorithms;
import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertTrue;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.List; import java.util.List;
import org.apache.commons.lang3.StringUtils;
import org.junit.Test; import org.junit.Test;
import generic.test.AbstractGenericTest; import generic.test.AbstractGenericTest;
public class LCSTest extends AbstractGenericTest { public class LCSTest extends AbstractGenericTest {
public LCSTest() {
super();
}
@Test @Test
public void testIdentical() { public void testIdentical() {
compareStrings("DEADBEEF", "DEADBEEF", "DEADBEEF"); compareStrings("DEADBEEF", "DEADBEEF", "DEADBEEF");
@ -46,36 +43,53 @@ public class LCSTest extends AbstractGenericTest {
"Hooray for really loooooong strings that span multiple lines in java!", "Hooray for really loooooong strings that span multiple lines in java!",
"Some really long string that might complicate things." + "Some really long string that might complicate things." +
"Hooray for really long strings that span multiple lines!"); "Hooray for really long strings that span multiple lines!");
} }
@Test @Test
public void testDifferent() { public void testDifferent() {
compareStrings("DEAD", "CANND", "AD"); compareStrings("DEAD", "CANND", "AD");
compareStrings("DEADBEEFISGOOD", "CANNDBEEFISBAD", "ADBEEFISD"); compareStrings("DEADBEEFISGOOD", "CANNDBEEFISBAD", "ADBEEFISD");
compareStrings("this here is one string", "here a different string is", "here in string"); compareStrings("this here is one string", "here a different string is", "here in string");
} }
@Test
public void testSizeLimit() {
String input = "This is more than 5 characters";
StringLcs slcs = new StringLcs(input, input);
List<Character> lcs = slcs.getLcs();
String result = StringUtils.join(lcs, "");
assertEquals(input, result);
slcs = new StringLcs(input, input);
slcs.setSizeLimit(10);
List<Character> actual = slcs.getLcs();
assertTrue(actual.isEmpty());
}
private void compareStrings(String x, String y, String expected) { private void compareStrings(String x, String y, String expected) {
StringLCS slcs = new StringLCS(x, y);
List<Character> actual = slcs.getLCS(); StringLcs slcs = new StringLcs(x, y);
List<Character> actual = slcs.getLcs();
assertEquals(convertString(expected), actual); assertEquals(convertString(expected), actual);
} }
private List<Character> convertString(String s) { private List<Character> convertString(String s) {
List<Character> charList = new ArrayList<Character>(); List<Character> charList = new ArrayList<>();
for (char c : s.toCharArray()) for (char c : s.toCharArray()) {
charList.add(c); charList.add(c);
}
return charList; return charList;
} }
private class StringLCS extends LCS<Character> { private class StringLcs extends Lcs<Character> {
private String x; private String x;
private String y; private String y;
public StringLCS(String x, String y) { public StringLcs(String x, String y) {
super(); super();
this.x = x; this.x = x;
this.y = y; this.y = y;
@ -105,6 +119,5 @@ public class LCSTest extends AbstractGenericTest {
protected Character valueOfY(int index) { protected Character valueOfY(int index) {
return y.charAt(index - 1); return y.charAt(index - 1);
} }
} }
} }

View File

@ -0,0 +1,124 @@
/* ###
* IP: GHIDRA
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package generic.algorithms;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertTrue;
import java.util.ArrayList;
import java.util.List;
import org.apache.commons.lang3.StringUtils;
import org.junit.Test;
public class ReducingLCSTest {
@Test
public void testIdentical() {
compareStrings("DEADBEEF", "DEADBEEF", "DEADBEEF");
}
@Test
public void testSimilar() {
compareStrings("DEADBEEF", "DEEDBEAD", "DEDBE");
compareStrings(
"Some really long string that might complicate things." +
"Hooray for really long strings that span multiple lines!",
"Some other really long string that might complicate things." +
"Hooray for really loooooong strings that span multiple lines in java!",
"Some really long string that might complicate things." +
"Hooray for really long strings that span multiple lines!");
}
@Test
public void testDifferent() {
compareStrings("DEAD", "CANND", "AD");
compareStrings("DEADBEEFISGOOD", "CANNDBEEFISBAD", "ADBEEFISD");
compareStrings("this here is one string", "here a different string is", "here in string");
}
@Test
public void testInsertOnly() {
String x = "Line not modified";
String y = "Line not not modified";
compareStrings(x, y, x);
}
@Test
public void testRemovalOnly() {
String x = "Line not modified";
String y = "Line modified";
compareStrings(x, y, y);
}
@Test
public void testSizeLimit() {
String x = "This is a line that has not been modified";
String y = "This is a line that has been modified";
StringLcs slcs = new StringLcs(x, y);
slcs.setSizeLimit(10);
List<Character> lcs = slcs.getLcs();
String result = StringUtils.join(lcs, "");
assertEquals(y, result); // 'y' is common, since it is 'x', with only a delete
String z = "Start Mod " + x + " End Mod"; // same as 'x', but with different start/end
slcs = new StringLcs(x, z);
slcs.setSizeLimit(10);
List<Character> actual = slcs.getLcs();
assertTrue(actual.isEmpty());
}
private void compareStrings(String x, String y, String expected) {
StringLcs slcs = new StringLcs(x, y);
List<Character> actual = slcs.getLcs();
assertEquals(convertString(expected), actual);
}
private List<Character> convertString(String s) {
List<Character> charList = new ArrayList<>();
for (char c : s.toCharArray()) {
charList.add(c);
}
return charList;
}
private class StringLcs extends ReducingLcs<String, Character> {
public StringLcs(String x, String y) {
super(x, y);
}
@Override
protected String reduce(String input, int start, int end) {
return input.substring(start, end);
}
@Override
protected int lengthOf(String s) {
return s.length();
}
@Override
protected Character valueOf(String s, int offset) {
return s.charAt(offset);
}
}
}

View File

@ -3366,7 +3366,7 @@ public class CodeManager implements ErrorHandler, ManagerDB {
newComment = ""; newComment = "";
} }
StringDiff[] diffs = StringDiffer.getLineDiffs(newComment, oldComment); StringDiff[] diffs = StringDiffUtils.getLineDiffs(newComment, oldComment);
long date = System.currentTimeMillis(); long date = System.currentTimeMillis();
long addr = addrMap.getKey(address, true); long addr = addrMap.getKey(address, true);
@ -3402,11 +3402,11 @@ public class CodeManager implements ErrorHandler, ManagerDB {
Record rec = allRecords.get(allRecords.size() - 1); Record rec = allRecords.get(allRecords.size() - 1);
long date = rec.getLongValue(CommentHistoryAdapter.HISTORY_DATE_COL); long date = rec.getLongValue(CommentHistoryAdapter.HISTORY_DATE_COL);
List<Record> records = subListByDate(allRecords, date); List<Record> records = subListByDate(allRecords, date);
List<StringDiff> diffs = new ArrayList<>(records.size()); List<StringDiff> diffs = new ArrayList<>(records.size());
String user = null; String user = null;
for (int i = 0; i < records.size(); i++) { for (Record r : records) {
Record r = records.get(i);
user = r.getString(CommentHistoryAdapter.HISTORY_USER_COL); user = r.getString(CommentHistoryAdapter.HISTORY_USER_COL);
int pos1 = r.getIntValue(CommentHistoryAdapter.HISTORY_POS1_COL); int pos1 = r.getIntValue(CommentHistoryAdapter.HISTORY_POS1_COL);
int pos2 = r.getIntValue(CommentHistoryAdapter.HISTORY_POS2_COL); int pos2 = r.getIntValue(CommentHistoryAdapter.HISTORY_POS2_COL);
@ -3415,7 +3415,7 @@ public class CodeManager implements ErrorHandler, ManagerDB {
} }
results.add(new CommentHistory(addr, commentType, user, comment, new Date(date))); results.add(new CommentHistory(addr, commentType, user, comment, new Date(date)));
comment = StringDiffer.applyDiffs(comment, diffs); comment = StringDiffUtils.applyDiffs(comment, diffs);
records.clear(); // remove the subList elements from the list records.clear(); // remove the subList elements from the list
} }

View File

@ -0,0 +1,351 @@
/* ###
* IP: GHIDRA
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ghidra.program.database.code;
import java.util.*;
import org.apache.commons.lang3.StringUtils;
import generic.algorithms.ReducingListBasedLcs;
class StringDiffUtils {
/**
* Minimum size used to determine whether a new StringDiff object will be
* created just using a string (no positions)
* in the <code>getDiffs(String, String)</code> method.
* @see #getLineDiffs(String, String)
*/
private static int MINIMUM_DIFF_SIZE = 100;
/**
* Returns the list of StringDiff objects that if applied to s1 would result in s2; The
* given text will look only for whole lines using '\n'.
*
* @param s1 the original string
* @param s2 the result string
* this value, then a completely different string will be returned
* @return an array of StringDiff objects that change s1 into s2;
*/
static StringDiff[] getLineDiffs(String s1, String s2) {
return getLineDiffs(s1, s2, MINIMUM_DIFF_SIZE);
}
static StringDiff[] getLineDiffs(String s1, String s2, int minimumDiffSize) {
if (s2.length() < minimumDiffSize) {
return new StringDiff[] { StringDiff.allTextReplaced(s2) };
}
List<Line> aList = split(s1);
List<Line> bList = split(s2);
LineLcs lcs = new LineLcs(aList, bList);
List<Line> commons = lcs.getLcs();
if (commons.isEmpty()) {
// no common text--complete replacement
return new StringDiff[] { StringDiff.allTextReplaced(s2) };
}
int aIndex = 0;
int bIndex = 0;
int aLastIndex = 0;
int bLastIndex = 0;
List<StringDiff> results = new LinkedList<>();
for (Line common : commons) {
aIndex = indexOf(aList, common, aLastIndex);
bIndex = indexOf(bList, common, bLastIndex);
int aDelta = aIndex - aLastIndex;
int bDelta = bIndex - bLastIndex;
int aEnd = aIndex;
int aStart = aEnd - aDelta;
List<Line> aPrevious = aList.subList(aStart, aEnd);
StringDiff delete = createDelete(aPrevious);
if (delete != null) {
results.add(delete);
}
int bEnd = bIndex;
int bStart = bEnd - bDelta;
List<Line> bPrevious = bList.subList(bStart, bEnd);
StringDiff insert = createInsert(bPrevious, charOffset(aList, aIndex));
if (insert != null) {
results.add(insert);
}
// note: nothing is needed for the 'common' string, since we don't track unchanged text
aLastIndex = aIndex + 1;
bLastIndex = bIndex + 1;
}
// grab remainder
StringDiff trailingDeleted = createDeleteAtEnd(aList, aLastIndex, aList.size());
if (trailingDeleted != null) {
results.add(trailingDeleted);
}
StringDiff trailingInserted =
createInsertAtEnd(bList, bLastIndex, bList.size(), s1.length());
if (trailingInserted != null) {
results.add(trailingInserted);
}
return results.toArray(new StringDiff[results.size()]);
}
private static int charOffset(List<Line> list, int index) {
Line line = list.get(index);
return line.start;
}
private static StringDiff createInsertAtEnd(List<Line> list, int start, int end,
int insertIndex) {
if (start - 1 == end) {
return null;
}
List<Line> toDo = list.subList(start, end);
boolean newlineNeeded = true; // we are at the end--need a newline
StringDiff insert = createInsert(toDo, insertIndex, newlineNeeded);
return insert;
}
private static StringDiff createInsert(List<Line> lines, int insertIndex) {
return createInsert(lines, insertIndex, false);
}
private static StringDiff createInsert(List<Line> lines, int insertIndex, boolean isAtEnd) {
if (lines.isEmpty()) {
return null;
}
StringBuilder buffy = new StringBuilder();
// special case: if this insert is for the end of the line, then we want to add
// a newline before the remaining text is added since the original text
// did not have this newline
if (isAtEnd) {
buffy.append('\n');
}
for (Line line : lines) {
buffy.append(line.getText());
}
return StringDiff.textInserted(buffy.toString(), insertIndex);
}
private static StringDiff createDeleteAtEnd(List<Line> list, int start, int end) {
if (start - 1 == end) {
return null;
}
List<Line> toDo = list.subList(start, end);
boolean includeLastNewline = false; // we are at the end--do not include artificial newline
StringDiff delete = createDelete(toDo, includeLastNewline);
return delete;
}
private static StringDiff createDelete(List<Line> lines) {
return createDelete(lines, true);
}
private static StringDiff createDelete(List<Line> lines, boolean includeLastNewline) {
if (lines.isEmpty()) {
return null;
}
int start = 0;
int end = 0;
for (Line line : lines) {
start = line.start;
end = line.start + line.text.length();
}
// special case: if this delete is for the last line, then we want to remove the remaining
// trailing newline
Line last = lines.get(lines.size() - 1);
if (!includeLastNewline && last.isLastLine) {
start -= 1; // remove previous newline
}
return StringDiff.textDeleted(start, end);
}
private static int indexOf(List<Line> list, Line line, int from) {
for (int i = from; i < list.size(); i++) {
if (list.get(i).textMatches(line)) {
return i;
}
}
return list.size(); // should not get here since 's' is known to be in list
}
private static List<Line> split(String s) {
LinkedList<Line> result = new LinkedList<>();
List<String> lines = Arrays.asList(StringUtils.splitPreserveAllTokens(s, '\n'));
int start = 0;
for (String line : lines) {
Line l = new Line(line + '\n', start);
result.add(l);
start += l.text.length();
}
// strip off the trailing newline that we added above
Line last = result.peekLast();
last.markAsLast();
return result;
}
/**
* Applies the array of StringObjects to the string s to produce a new string. Warning - the
* diff objects cannot be applied to an arbitrary string, the Strings must be the original
* String used to compute the diffs.
* @param s the original string
* @param diffs the array of StringDiff object to apply
* @return a new String resulting from applying the diffs to s.
*/
static String applyDiffs(String s, List<StringDiff> diffs) {
if (diffs.isEmpty()) {
return s;
}
if (diffs.get(0).start < 0) {
// all replaced or all deleted
String data = diffs.get(0).text;
return data == null ? "" : data;
}
int pos = 0;
StringBuilder buf = new StringBuilder(s.length());
for (StringDiff element : diffs) {
if (element.start > pos) {
buf.append(s.substring(pos, element.start));
pos = element.start;
}
String data = element.text;
if (data != null) {
buf.append(data);
}
else {
// null data is a delete; move to the end of the delete
pos = element.end;
}
}
if (pos < s.length()) {
buf.append(s.substring(pos));
}
return buf.toString();
}
//==================================================================================================
// Inner Classes
//==================================================================================================
private static class Line {
private String text;
private int start;
private boolean isLastLine;
public Line(String line, int start) {
this.text = line;
this.start = start;
}
String getText() {
if (isLastLine) {
return textWithoutNewline(); // last line and do not include the newline
}
return text;
}
void markAsLast() {
isLastLine = true;
}
private String textWithoutNewline() {
if (text.charAt(text.length() - 1) == '\n') {
return text.substring(0, text.length() - 1);
}
return text;
}
@Override
public String toString() {
return textWithoutNewline() + " @ " + start;
}
boolean textMatches(Line other) {
return Objects.equals(text, other.text);
}
@Override
public int hashCode() {
final int prime = 31;
int result = 1;
result = prime * result + start;
result = prime * result + ((text == null) ? 0 : text.hashCode());
return result;
}
@Override
public boolean equals(Object obj) {
if (this == obj) {
return true;
}
if (obj == null) {
return false;
}
if (getClass() != obj.getClass()) {
return false;
}
Line other = (Line) obj;
if (start != other.start) {
return false;
}
if (text == null) {
if (other.text != null) {
return false;
}
}
else if (!text.equals(other.text)) {
return false;
}
return true;
}
}
private static class LineLcs extends ReducingListBasedLcs<Line> {
LineLcs(List<Line> x, List<Line> y) {
super(x, y);
}
@Override
protected boolean matches(Line x, Line y) {
return x.text.equals(y.text);
}
}
}

View File

@ -1,261 +0,0 @@
/* ###
* IP: GHIDRA
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ghidra.program.database.code;
import java.util.LinkedList;
import java.util.List;
class StringDiffer {
/**
* Returns the list of StringDiff objects that if applied to s1 would result in s2; The
* given text will look only for whole lines using '\n'.
*
* @param s1 the original string
* @param s2 the result string
* this value, then a completely different string will be returned
* @return an array of StringDiff objects that change s1 into s2;
*/
static StringDiff[] getLineDiffs(String s1, String s2) {
/**
* Minimum size used to determine whether a new StringDiff object will be
* created just using a string (no positions)
* in the <code>getDiffs(String, String)</code> method.
* @see #getLineDiffs(String, String)
*/
int MINIMUM_DIFF_SIZE = 100;
return StringDiffer.getLineDiffs(s1, s2, MINIMUM_DIFF_SIZE);
}
/**
* Returns the list of StringDiff objects that if applied to s1 would result in s2; The
* given text will look only for whole lines using '\n'.
*
* @param s1 the original string
* @param s2 the result string
* @param minimumDiffSize the minimum length of s2 required for a diff; if s2 is less than
* this value, then a completely different string will be returned
* @return an array of StringDiff objects that change s1 into s2;
*/
static StringDiff[] getLineDiffs(String s1, String s2, int minimumDiffSize) {
if (s2.length() < minimumDiffSize) {
return new StringDiff[] { StringDiff.allTextReplaced(s2) };
}
List<StringDiff> results = new LinkedList<>();
int cursor1 = 0;
int cursor2 = 0;
int len1 = s1.length();
int len2 = s2.length();
/*
-look at each line in 'line' chunks using '\n'
*/
// walk each string until the end...
while (cursor1 < len1 || cursor2 < len2) {
String line1 = getLine(s1, cursor1);
String line2 = getLine(s2, cursor2);
if (line1.equals(line2)) {
cursor1 += line1.length();
cursor2 += line2.length();
continue;
}
// look for line1 in s2...
int line1PosInOther = findLine(s2, cursor2, line1);
int mark = cursor1;
while (line1PosInOther < 0) {
// line1 is not in s2; scan for the next line
cursor1 += line1.length();
line1 = getLine(s1, cursor1);
line1PosInOther = findLine(s2, cursor2, line1);
}
if (cursor1 > mark) {
// the original line1 was not in s2; add all that was different up to current cursor1
results.add(StringDiff.textDeleted(mark, cursor1));
}
// now look for line2 in s1
int line2PosInOther = findLine(s1, cursor1, line2);
mark = cursor2;
while (line2PosInOther < 0) {
// line2 is not in s1; scan for the next line
cursor2 += line2.length();
line2 = getLine(s2, cursor2);
line2PosInOther = findLine(s1, cursor1, line2);
}
if (cursor2 > mark) {
// the original line2 was not in s1; add all that was different up to current cursor2
results.add(StringDiff.textInserted(s2.substring(mark, cursor2), cursor1));
continue;
}
// move both searches forward
int delta1 = line2PosInOther - cursor1;
int delta2 = line1PosInOther - cursor2;
if (delta1 > delta2) {
// this can happen when two lines have been rearranged *and* the line length
// of the moved line is *longer* than the new line at the replaced position
results.add(
StringDiff.textInserted(s2.substring(cursor2, line1PosInOther), cursor1));
cursor2 = line1PosInOther;
}
else if (delta2 > delta1) {
// this can happen when two lines have been rearranged *and* the line length
// of the moved line is *shorter* than the new line at the replaced position
results.add(StringDiff.textDeleted(cursor1, line2PosInOther));
cursor1 = line2PosInOther;
}
else { // delta1 == delta2
if (cursor1 != line2PosInOther) {
results.add(StringDiff.textDeleted(cursor1, line2PosInOther));
cursor1 = line2PosInOther;
}
if (cursor2 != line1PosInOther) {
results.add(
StringDiff.textInserted(s2.substring(cursor2, line1PosInOther), cursor1));
cursor2 = line1PosInOther;
}
}
}
return results.toArray(new StringDiff[results.size()]);
}
/**
* Finds a position in s that contains the string line. The matching string in
* s must be a "complete" line, in other words if pos > 0 then s.charAt(index-1) must be
* a newLine character and s.charAt(index+line.length()) must be a newLine or the end of
* the string.
* @param s the string to scan
* @param pos the position to begin the scan.
* @param line the line to scan for
* @return the position in s containing the line string.
*/
static int findLine(String s, int pos, String line) {
if (line.length() == 0) {
// this is used as a marker: -1 means not found; non-negative number signals to keep going
return pos; // TODO this is odd; why is this a match??
}
int n = s.length();
while (pos < n) {
int index = s.indexOf(line, pos);
if (index < 0) {
return index;
}
if (index > 0 && s.charAt(index - 1) != '\n') {
pos = index + line.length(); // line matched, but not a newline in 's'
continue;
}
//
// Have a match with at start/0 or have a preceding newline
//
if (line.endsWith("\n")) {
return index; // the match ends with a newline; found line
}
// no newline for the current match in 's'
if (index + line.length() == n) {
return index; // at the end exactly; found line
}
// no newline; not at end; keep going
pos = index + line.length();
}
return -1;
}
/**
* Returns a substring of s beginning at start and ending at either the end of the string or
* the first newLine at or after start
*
* @param s the string to scan
* @param start the starting position for the scan
* @return a string that represents a line within s
*/
private static String getLine(String s, int start) {
int n = s.length();
if (start >= n) {
return "";
}
int pos = start;
while (pos < n && s.charAt(pos) != '\n') {
pos++;
}
if (pos < n) {
pos++; // not at the end; found newline; include the newline
}
return s.substring(start, pos);
}
/**
* Applies the array of StringObjects to the string s to produce a new string. Warning - the
* diff objects cannot be applied to an arbitrary string, the Strings must be the original
* String used to compute the diffs.
* @param s the original string
* @param diffs the array of StringDiff object to apply
* @return a new String resulting from applying the diffs to s.
*/
static String applyDiffs(String s, List<StringDiff> diffs) {
if (diffs.isEmpty()) {
return s;
}
if (diffs.get(0).start < 0) {
// all replaced or all deleted
String data = diffs.get(0).text;
return data == null ? "" : data;
}
int pos = 0;
StringBuilder buf = new StringBuilder(s.length());
for (StringDiff element : diffs) {
if (element.start > pos) {
buf.append(s.substring(pos, element.start));
pos = element.start;
}
String data = element.text;
if (data != null) {
buf.append(data);
}
else {
// null data is a delete; move to the end of the delete
pos = element.end;
}
}
if (pos < s.length()) {
buf.append(s.substring(pos));
}
return buf.toString();
}
}

View File

@ -1,6 +1,5 @@
/* ### /* ###
* IP: GHIDRA * IP: GHIDRA
* REVIEWED: YES
* *
* Licensed under the Apache License, Version 2.0 (the "License"); * Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License. * you may not use this file except in compliance with the License.
@ -16,11 +15,11 @@
*/ */
package ghidra.program.util; package ghidra.program.util;
import generic.algorithms.LCS; import generic.algorithms.Lcs;
import java.util.List; import java.util.List;
public class CodeUnitLCS extends LCS<CodeUnitContainer> { public class CodeUnitLCS extends Lcs<CodeUnitContainer> {
private List<CodeUnitContainer> xList; private List<CodeUnitContainer> xList;
private List<CodeUnitContainer> yList; private List<CodeUnitContainer> yList;

View File

@ -17,107 +17,15 @@ package ghidra.program.database.code;
import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertEquals;
import java.util.Arrays; import java.util.*;
import org.apache.commons.lang3.StringUtils; import org.apache.commons.lang3.StringUtils;
import org.junit.Test; import org.junit.Test;
import generic.test.AbstractGTest;
public class StringDiffTest { public class StringDiffTest {
/*
A line match is if the given line to match is contained in the source string and:
1) a) matches in the source string with a '\n' char at the index before the match OR
b) is at the beginning *and* the match contains a newline
2) is at the exact end of the source string
*The empty string matches at the current position
Source String: "abcd\nefghi\n"
Line to Match:
*/
@Test
public void testFindLine_FromStart_EmptyLine() {
String source = "this is a really\nlone line with\n newlines";
String line = "";
int result = StringDiffer.findLine(source, 0, line);
assertEquals(0, result);
}
@Test
public void testFindLine_FromStart_NoMatch() {
String source = "this is a really\nlone line with\n newlines";
String line = "coconuts";
int result = StringDiffer.findLine(source, 0, line);
assertEquals(-1, result);
}
@Test
public void testFindLine_FromMiddle_NoMatch() {
String source = "this is a really\nlone line with\n newlines";
String line = "coconuts";
int result = StringDiffer.findLine(source, 15, line);
assertEquals(-1, result);
}
@Test
public void testFindLine_FromEnd_NoMatch() {
String source = "this is a really\nlone line with\n newlines";
String line = "coconuts";
int result = StringDiffer.findLine(source, source.length(), line);
assertEquals(-1, result);
}
@Test
public void testFindLine_FromStart_MatchWithNewline_AtStart() {
String source = "this is a really\nlone line with\n newlines";
String line = "this is a really\n";
int result = StringDiffer.findLine(source, 0, line);
assertEquals(0, result);
}
@Test
public void testFindLine_FromStart_MatchWithNewline_AtMiddle() {
String source = "this is a really\nlone line with\n newlines";
String line = "lone line with\n";
int result = StringDiffer.findLine(source, 0, line);
assertEquals(17, result);
}
@Test
public void testFindLine_FromStart_MatchWithNewline_AtEnd_FailWithoutPrecedingNewline() {
String source = "this is a really\nlone line with\n newlines\n";
String line = "lines\n";
int result = StringDiffer.findLine(source, 0, line);
assertEquals(-1, result);
}
@Test
public void testFindLine_FromStart_MatchWithNewline_AtEnd_PassWithPrecedingNewline() {
String source = "this is a really\nlone line with\n new\nlines\n";
String line = "lines\n";
int result = StringDiffer.findLine(source, 0, line);
assertEquals(37, result);
}
@Test
public void testFindLine_FromStart_MatchWithoutNewline_AtStart() {
String source = "this is a really\nlone line with\n newlines";
String line = "this is a really";
int result = StringDiffer.findLine(source, 0, line);
assertEquals(-1, result); // match at start must contain a newline
}
@Test @Test
public void testGetDiffLines_Insert_AtFront() { public void testGetDiffLines_Insert_AtFront() {
@ -126,8 +34,8 @@ public class StringDiffTest {
String v1 = StringUtils.join(a1, '\n'); String v1 = StringUtils.join(a1, '\n');
String v2 = StringUtils.join(a2, '\n'); String v2 = StringUtils.join(a2, '\n');
StringDiff[] diffs = StringDiffer.getLineDiffs(v1, v2, 1); StringDiff[] diffs = StringDiffUtils.getLineDiffs(v1, v2, 1);
String restoredV2 = StringDiffer.applyDiffs(v1, Arrays.asList(diffs)); String restoredV2 = StringDiffUtils.applyDiffs(v1, Arrays.asList(diffs));
assertEquals(v2, restoredV2); assertEquals(v2, restoredV2);
} }
@ -139,8 +47,8 @@ public class StringDiffTest {
String v1 = StringUtils.join(a1, '\n'); String v1 = StringUtils.join(a1, '\n');
String v2 = StringUtils.join(a2, '\n'); String v2 = StringUtils.join(a2, '\n');
StringDiff[] diffs = StringDiffer.getLineDiffs(v1, v2, 1); StringDiff[] diffs = StringDiffUtils.getLineDiffs(v1, v2, 1);
String restoredV2 = StringDiffer.applyDiffs(v1, Arrays.asList(diffs)); String restoredV2 = StringDiffUtils.applyDiffs(v1, Arrays.asList(diffs));
assertEquals(v2, restoredV2); assertEquals(v2, restoredV2);
} }
@ -152,8 +60,8 @@ public class StringDiffTest {
String v1 = StringUtils.join(a1, '\n'); String v1 = StringUtils.join(a1, '\n');
String v2 = StringUtils.join(a2, '\n'); String v2 = StringUtils.join(a2, '\n');
StringDiff[] diffs = StringDiffer.getLineDiffs(v1, v2, 1); StringDiff[] diffs = StringDiffUtils.getLineDiffs(v1, v2, 1);
String restoredV2 = StringDiffer.applyDiffs(v1, Arrays.asList(diffs)); String restoredV2 = StringDiffUtils.applyDiffs(v1, Arrays.asList(diffs));
assertEquals(v2, restoredV2); assertEquals(v2, restoredV2);
} }
@ -165,8 +73,8 @@ public class StringDiffTest {
String v1 = StringUtils.join(a1, '\n'); String v1 = StringUtils.join(a1, '\n');
String v2 = StringUtils.join(a2, '\n'); String v2 = StringUtils.join(a2, '\n');
StringDiff[] diffs = StringDiffer.getLineDiffs(v1, v2, 1); StringDiff[] diffs = StringDiffUtils.getLineDiffs(v1, v2, 1);
String restoredV2 = StringDiffer.applyDiffs(v1, Arrays.asList(diffs)); String restoredV2 = StringDiffUtils.applyDiffs(v1, Arrays.asList(diffs));
assertEquals(v2, restoredV2); assertEquals(v2, restoredV2);
} }
@ -178,8 +86,8 @@ public class StringDiffTest {
String v1 = StringUtils.join(a1, '\n'); String v1 = StringUtils.join(a1, '\n');
String v2 = StringUtils.join(a2, '\n'); String v2 = StringUtils.join(a2, '\n');
StringDiff[] diffs = StringDiffer.getLineDiffs(v1, v2, 1); StringDiff[] diffs = StringDiffUtils.getLineDiffs(v1, v2, 1);
String restoredV2 = StringDiffer.applyDiffs(v1, Arrays.asList(diffs)); String restoredV2 = StringDiffUtils.applyDiffs(v1, Arrays.asList(diffs));
assertEquals(v2, restoredV2); assertEquals(v2, restoredV2);
} }
@ -191,8 +99,8 @@ public class StringDiffTest {
String v1 = StringUtils.join(a1, '\n'); String v1 = StringUtils.join(a1, '\n');
String v2 = StringUtils.join(a2, '\n'); String v2 = StringUtils.join(a2, '\n');
StringDiff[] diffs = StringDiffer.getLineDiffs(v1, v2, 1); StringDiff[] diffs = StringDiffUtils.getLineDiffs(v1, v2, 1);
String restoredV2 = StringDiffer.applyDiffs(v1, Arrays.asList(diffs)); String restoredV2 = StringDiffUtils.applyDiffs(v1, Arrays.asList(diffs));
assertEquals(v2, restoredV2); assertEquals(v2, restoredV2);
} }
@ -204,8 +112,8 @@ public class StringDiffTest {
String v1 = StringUtils.join(a1, '\n'); String v1 = StringUtils.join(a1, '\n');
String v2 = StringUtils.join(a2, '\n'); String v2 = StringUtils.join(a2, '\n');
StringDiff[] diffs = StringDiffer.getLineDiffs(v1, v2, 1); StringDiff[] diffs = StringDiffUtils.getLineDiffs(v1, v2, 1);
String restoredV2 = StringDiffer.applyDiffs(v1, Arrays.asList(diffs)); String restoredV2 = StringDiffUtils.applyDiffs(v1, Arrays.asList(diffs));
assertEquals(v2, restoredV2); assertEquals(v2, restoredV2);
} }
@ -220,8 +128,8 @@ public class StringDiffTest {
String v1 = StringUtils.join(a1, '\n'); String v1 = StringUtils.join(a1, '\n');
String v2 = StringUtils.join(a2, '\n'); String v2 = StringUtils.join(a2, '\n');
StringDiff[] diffs = StringDiffer.getLineDiffs(v1, v2, 1); StringDiff[] diffs = StringDiffUtils.getLineDiffs(v1, v2, 1);
String restoredV2 = StringDiffer.applyDiffs(v1, Arrays.asList(diffs)); String restoredV2 = StringDiffUtils.applyDiffs(v1, Arrays.asList(diffs));
assertEquals(v2, restoredV2); assertEquals(v2, restoredV2);
} }
@ -233,8 +141,8 @@ public class StringDiffTest {
String v1 = StringUtils.join(a1, '\n'); String v1 = StringUtils.join(a1, '\n');
String v2 = StringUtils.join(a2, '\n'); String v2 = StringUtils.join(a2, '\n');
StringDiff[] diffs = StringDiffer.getLineDiffs(v1, v2, 1); StringDiff[] diffs = StringDiffUtils.getLineDiffs(v1, v2, 1);
String restoredV2 = StringDiffer.applyDiffs(v1, Arrays.asList(diffs)); String restoredV2 = StringDiffUtils.applyDiffs(v1, Arrays.asList(diffs));
assertEquals(v2, restoredV2); assertEquals(v2, restoredV2);
} }
@ -246,9 +154,50 @@ public class StringDiffTest {
String v1 = StringUtils.join(a1, '\n'); String v1 = StringUtils.join(a1, '\n');
String v2 = StringUtils.join(a2, '\n'); String v2 = StringUtils.join(a2, '\n');
StringDiff[] diffs = StringDiffer.getLineDiffs(v1, v2, 1); StringDiff[] diffs = StringDiffUtils.getLineDiffs(v1, v2, 1);
String restoredV2 = StringDiffer.applyDiffs(v1, Arrays.asList(diffs)); String restoredV2 = StringDiffUtils.applyDiffs(v1, Arrays.asList(diffs));
assertEquals(v2, restoredV2); assertEquals(v2, restoredV2);
} }
@Test
public void testReplace() {
String[] a1 = new String[] { "In", "the", "beginning" };
String[] a2 = new String[] { "There", "was", "vastness" };
String v1 = StringUtils.join(a1, '\n');
String v2 = StringUtils.join(a2, '\n');
StringDiff[] diffs = StringDiffUtils.getLineDiffs(v1, v2, 1);
String restoredV2 = StringDiffUtils.applyDiffs(v1, Arrays.asList(diffs));
assertEquals(v2, restoredV2);
}
@Test
public void testTheBiggness_NoOptimization() throws Exception {
List<String> bigLines = generateLines(1200);
List<String> bigLines2 = new ArrayList<>(bigLines);
bigLines2.set(0, "a new line at 0");
bigLines2.set(bigLines2.size() - 1, "a new line at length");
String v1 = StringUtils.join(bigLines, '\n');
String v2 = StringUtils.join(bigLines2, '\n');
StringDiff[] diffs = StringDiffUtils.getLineDiffs(v1, v2, 1);
assertEquals(1, diffs.length); // 1 diff--completely different, due to size restriction on Lcs
String restoredV2 = StringDiffUtils.applyDiffs(v1, Arrays.asList(diffs));
assertEquals(v2, restoredV2);
}
private List<String> generateLines(int size) {
List<String> results = new ArrayList<>();
for (int i = 0; i < size; i++) {
String random = AbstractGTest.getRandomString(0, 50);
random = random.replaceAll("\n", "");
results.add("Line " + (i + 1) + ": " + random);
}
return results;
}
} }