diff --git a/Ghidra/Framework/Generic/src/main/java/generic/algorithms/LCS.java b/Ghidra/Framework/Generic/src/main/java/generic/algorithms/LCS.java deleted file mode 100644 index e9921fede2..0000000000 --- a/Ghidra/Framework/Generic/src/main/java/generic/algorithms/LCS.java +++ /dev/null @@ -1,150 +0,0 @@ -/* ### - * IP: GHIDRA - * REVIEWED: YES - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package generic.algorithms; - -import ghidra.util.exception.CancelledException; -import ghidra.util.task.TaskMonitor; -import ghidra.util.task.TaskMonitorAdapter; - -import java.util.ArrayList; -import java.util.List; - -/** - * Abstract class for finding the LCS between two sequences of Matchable - * objects. - * - * - * - * @param the type of the objects being compared. - */ -public abstract class LCS { - private int[][] c; - - /** - * Convenient constructor for initializing elements in subclasses - */ - protected LCS() { - } - - /** - * @return the length of the X sequence. - */ - protected abstract int lengthOfX(); - - /** - * @return the length of the Y sequence. - */ - protected abstract int lengthOfY(); - - /** - * @param index the position of interest in the X sequence. - * @return the value in the X sequence at index. - * Assumes 1-indexing. - */ - protected abstract T valueOfX(int index); - - /** - * @param index the position of interest in the Y sequence. - * @return the value in the Y sequence at index. - * Assumes 1-indexing. - */ - protected abstract T valueOfY(int index); - - /** - * @param x the X-sequence element of interest - * @param y the Y-sequence element of interest - * @return true if x matches y; false otherwise. - */ - protected abstract boolean matches(T x, T y); - - /** - * Compute the LCS - * @param monitor - */ - private void calculateLCS(TaskMonitor monitor) throws CancelledException { - if (c != null) { - return; - } - - int[][] tempC = new int[lengthOfX() + 1][]; - - monitor.setMessage("Calculating LCS..."); - monitor.initialize(tempC.length); - - for (int i = 0; i < tempC.length; i++) { - // Java int arrays are automatically initialized to 0 - tempC[i] = new int[lengthOfY() + 1]; - } - - for (int i = 1; i < tempC.length; i++) { - monitor.checkCanceled(); - for (int j = 1; j < tempC[i].length; j++) { - if (matches(valueOfX(i), valueOfY(j))) { - tempC[i][j] = tempC[i - 1][j - 1] + 1; - } - else { - tempC[i][j] = Math.max(tempC[i][j - 1], tempC[i - 1][j]); - } - } - monitor.incrementProgress(1); - } - - c = tempC; - } - - /** - * @return a List<T> of elements in the LCS. - */ - public List getLCS() { - try { - return getLCS(TaskMonitorAdapter.DUMMY_MONITOR); - } - catch (CancelledException e) { - // can't happen with a dummy monitor - } - return null; - } - - public List getLCS(TaskMonitor monitor) throws CancelledException { - calculateLCS(monitor); - return getLCSHelperIterative(lengthOfX(), lengthOfY()); - } - - /** - * Iterative helper function for getLCS(). - * @param i the current row index - * @param j the current column index - * @return the LCS after analyzing element c[i, j]. - */ - private List getLCSHelperIterative(int i, int j) { - ArrayList result = new ArrayList(); - while (i > 0 && j > 0) { - if (c[i][j] == c[i - 1][j - 1] + 1 && matches(valueOfX(i), valueOfY(j))) { - result.add(0, valueOfX(i)); - --i; - --j; - } - else if (c[i][j] == c[i - 1][j]) { - --i; - } - else { - --j; - } - } - return result; - } -} diff --git a/Ghidra/Framework/Generic/src/main/java/generic/algorithms/Lcs.java b/Ghidra/Framework/Generic/src/main/java/generic/algorithms/Lcs.java new file mode 100644 index 0000000000..9001c475f8 --- /dev/null +++ b/Ghidra/Framework/Generic/src/main/java/generic/algorithms/Lcs.java @@ -0,0 +1,214 @@ +/* ### + * IP: GHIDRA + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package generic.algorithms; + +import java.util.ArrayList; +import java.util.List; + +import ghidra.util.exception.CancelledException; +import ghidra.util.task.TaskMonitor; + +/** + * Abstract class for finding the Longest Common Subsequence (LCS) between two + * sequences of Matchable objects, x and y. + * + *

The performance of this algorithm is O(n^2). Thus, large inputs can cause much processor + * and memory usage. This class has an upper limit (see {@link #getSizeLimit()}) to prevent + * accidental system failure. + * + * @param the type of the objects being compared + */ +public abstract class Lcs { + + /** + * Somewhat arbitrary upper-bound restriction. 1M is 1000 * 1000 + */ + private static int DEFAULT_SIZE_LIMIT = 1_000_000; + private int sizeLimit = DEFAULT_SIZE_LIMIT; + + private int[][] c; + + /** + * Returns the length of the x sequence + * @return the length of the x sequence + */ + protected abstract int lengthOfX(); + + /** + * Returns the length of the y sequence + * @return the length of the y sequence + */ + protected abstract int lengthOfY(); + + /** + * Gets the value of the x sequence at the given index, where index is 1-based + * + * @param index the 1-based position of interest in the x sequence + * @return the value in the x sequence at index + */ + protected abstract T valueOfX(int index); + + /** + * Gets the value of the y sequence at the given index, where index is 1-based + * + * @param index the 1-based position of interest in the Y sequence + * @return the value in the y sequence at index + */ + protected abstract T valueOfY(int index); + + /** + * Returns true if the value of x and y match + * + * @param x the x-sequence element of interest + * @param y the y-sequence element of interest + * @return true if x matches y; false otherwise + */ + protected abstract boolean matches(T x, T y); + + /** + * Compute the LCS + * @param monitor the task monitor + */ + private void calculateLCS(TaskMonitor monitor) throws CancelledException { + if (c != null) { + return; + } + + if (tooBig()) { + c = new int[0][0]; + return; + } + + int[][] tempC = new int[lengthOfX() + 1][]; + + monitor.setMessage("Calculating LCS..."); + monitor.initialize(tempC.length); + + // create the zero-initialized matrix + for (int i = 0; i < tempC.length; i++) { + tempC[i] = new int[lengthOfY() + 1]; + } + + for (int i = 1; i < tempC.length; i++) { + monitor.checkCanceled(); + for (int j = 1; j < tempC[i].length; j++) { + if (matches(valueOfX(i), valueOfY(j))) { + tempC[i][j] = tempC[i - 1][j - 1] + 1; + } + else { + tempC[i][j] = Math.max(tempC[i][j - 1], tempC[i - 1][j]); + } + } + monitor.incrementProgress(1); + } + + c = tempC; + } + + /** + * Defines an limit in the overall size of the inputs that above which no processing will + * take place. Any value over the limit will produce an empty LCS. + * + * @return true if too big + */ + private boolean tooBig() { + return lengthOfX() * lengthOfY() > sizeLimit; + } + + /** + * Changes the size limit of this LCS, past which no calculations will be performed + * + * @param newLimit the new limit + */ + public void setSizeLimit(int newLimit) { + this.sizeLimit = newLimit; + } + + /** + * Returns the current size limit, past which no calculations will be performed + * + * @return the size limit + * @see #setSizeLimit(int) + */ + public int getSizeLimit() { + return sizeLimit; + } + + /** + * Returns a list of the longest common subsequence. This result will be empty if the + * {@link #getSizeLimit()} has been reached. + * + * @return the list + */ + public List getLcs() { + try { + return getLcs(TaskMonitor.DUMMY); + } + catch (CancelledException e) { + // can't happen with a dummy monitor + } + return null; + } + + /** + * Returns a list of the longest common subsequence. This result will be empty if the + * {@link #getSizeLimit()} has been reached. + * + * @param monitor the task monitor + * @return the LCS list + * @throws CancelledException if the monitor is cancelled + */ + public List getLcs(TaskMonitor monitor) throws CancelledException { + calculateLCS(monitor); + return doGetLcs(monitor); + } + + /** + * Get the actual LCS based upon the already created matrix + * + * @param monitor the task monitor + * @return the LCS list + * @throws CancelledException if the monitor is cancelled + */ + protected List doGetLcs(TaskMonitor monitor) throws CancelledException { + + int x = 0; + int y = 0; + + if (c.length > 0) { + x = lengthOfX(); + y = lengthOfY(); + } + + List result = new ArrayList<>(); + while (x > 0 && y > 0) { + monitor.checkCanceled(); + + if (c[x][y] == c[x - 1][y - 1] + 1 && matches(valueOfX(x), valueOfY(y))) { + result.add(0, valueOfX(x)); + --x; + --y; + } + else if (c[x][y] == c[x - 1][y]) { + --x; + } + else { + --y; + } + } + return result; + } +} diff --git a/Ghidra/Framework/Generic/src/main/java/generic/algorithms/ReducingLcs.java b/Ghidra/Framework/Generic/src/main/java/generic/algorithms/ReducingLcs.java new file mode 100644 index 0000000000..74c1fe7b7d --- /dev/null +++ b/Ghidra/Framework/Generic/src/main/java/generic/algorithms/ReducingLcs.java @@ -0,0 +1,201 @@ +/* ### + * IP: GHIDRA + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package generic.algorithms; + +import java.util.ArrayList; +import java.util.List; + +import ghidra.util.exception.CancelledException; +import ghidra.util.task.TaskMonitor; + +/** + * Calculates the longest common subsequence (LCS) between two sequences of Matchable + * objects, x and y. + * + *

This is an optimizing version of the {@link Lcs} that will pre-calculate all similar + * items from the beginning and end of the two given sequences. Doing this will reduce + * the size of the matrix created by the parent class, greatly so in the case that the + * two inputs are mostly the same in the beginning and end. (Imagine an edit of a source + * code file, where the typical change is somewhere in the middle of the file. In this example, + * the optimization performed here can greatly decrease the amount of work to be performed when + * calculating the LCS.) + * + *

Note: the parent LCS algorithm is bound by {@link #getSizeLimit()}. However, this class + * allows clients to work around this restriction when the data has a similar beginning and ending, + * as the similar parts will not be counted against the size limit. + * + * @param The input sequence type + * @param the individual element type of the input sequence + */ +public abstract class ReducingLcs extends Lcs { + + private I xSource; // full input x + private I ySource; // full input y + + private I x; // the reduced input x + private I y; // the reduced input y + + private int startn; // number of beginning same entries + private int endn; // number of trailing same entries + + /** + * Constructor + * + * @param ix the input sequence x + * @param iy the input sequence y + */ + public ReducingLcs(I ix, I iy) { + this.xSource = ix; + this.ySource = iy; + + startn = getMatchCountFromStart(); + endn = getMatchCountFromEnd(); + int endx = getEnd(xSource); + int endy = getEnd(ySource); + this.x = reduce(ix, startn, endx); + this.y = reduce(iy, startn, endy); + } + + private int getEnd(I i) { + int end = lengthOf(i) - endn; + if (end <= startn) { + // boundary condition when the change is only a delete or insert + end = startn; + } + return end; + } + + /** + * Create a subsequence from the given input sequence. + * + * @param i the input sequence; 0-based (x or y) + * @param start the start index; 0-based (inclusive) + * @param end the end index (exclusive) + * @return the subsequence + */ + protected abstract I reduce(I i, int start, int end); + + /** + * Return the length of the given sequence + * + * @param i the input sequence (x or y) + * @return the length + */ + protected abstract int lengthOf(I i); + + /** + * Return the value at the given 0-based offset + * + * @param i the input sequence (x or y) + * @param offset the offset + * @return the value + */ + protected abstract T valueOf(I i, int offset); + + @Override + protected List doGetLcs(TaskMonitor monitor) throws CancelledException { + + List reducedLcs = super.doGetLcs(monitor); + int size = reducedLcs.size() + lengthOf(x) + lengthOf(y); + List lcs = new ArrayList<>(size); + + // add the shared beginning + for (int i = 0; i < startn; i++) { + monitor.checkCanceled(); + lcs.add(valueOf(xSource, i)); + } + + // add the calculated LCS + lcs.addAll(reducedLcs); + + // add the shared end + int length = lengthOf(xSource); + int endx = getEnd(xSource); + for (int i = endx; i < length; i++) { + monitor.checkCanceled(); + lcs.add(valueOf(xSource, i)); + } + + return lcs; + } + + @Override + protected int lengthOfX() { + return lengthOf(x); + } + + @Override + protected int lengthOfY() { + return lengthOf(y); + } + + @Override + protected T valueOfX(int index) { + return valueOf(x, index - 1); + } + + @Override + protected T valueOfY(int index) { + return valueOf(y, index - 1); + } + + @Override + protected boolean matches(T tx, T ty) { + return tx.equals(ty); + } + +//================================================================================================== +// Private Methods +//================================================================================================== + + private int getMatchCountFromStart() { + + // scan past the beginning of all equal items + int n = 0; + int xl = lengthOf(xSource); + int yl = lengthOf(ySource); + while (n < xl && n < yl) { + T xt = valueOf(xSource, n); + T yt = valueOf(ySource, n); + if (!matches(xt, yt)) { + return n; + } + n++; + } + + return 0; + } + + private int getMatchCountFromEnd() { + + // scan past the trailing equal items + int xi = lengthOf(xSource) - 1; + int yi = lengthOf(ySource) - 1; + + int n = 0; + for (; xi >= 0 && yi >= 0; xi--, yi--) { + T xt = valueOf(xSource, xi); + T yt = valueOf(ySource, yi); + if (!matches(xt, yt)) { + return n == 0 ? 0 : n - 1; + } + n++; + } + + return 0; + } + +} diff --git a/Ghidra/Framework/Generic/src/main/java/generic/algorithms/ReducingListBasedLcs.java b/Ghidra/Framework/Generic/src/main/java/generic/algorithms/ReducingListBasedLcs.java new file mode 100644 index 0000000000..57cac9b43c --- /dev/null +++ b/Ghidra/Framework/Generic/src/main/java/generic/algorithms/ReducingListBasedLcs.java @@ -0,0 +1,51 @@ +/* ### + * IP: GHIDRA + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package generic.algorithms; + +import java.util.List; + +/** + * An implementation of the {@link ReducingLcs} that takes as its input a list of items, where + * the list is the 'sequence' being checked for the Longest Common Subsequence. + * + * @param the type of the item in the sequence of items + */ +public class ReducingListBasedLcs extends ReducingLcs, T> { + + public ReducingListBasedLcs(List x, List y) { + super(x, y); + } + + @Override + protected boolean matches(T x, T y) { + return x.equals(y); + } + + @Override + protected List reduce(List i, int start, int end) { + return i.subList(start, end); + } + + @Override + protected int lengthOf(List i) { + return i.size(); + } + + @Override + protected T valueOf(List i, int offset) { + return i.get(offset); + } +} diff --git a/Ghidra/Framework/Generic/src/test/java/generic/algorithms/LCSTest.java b/Ghidra/Framework/Generic/src/test/java/generic/algorithms/LCSTest.java index c0ad8f1708..320561df9f 100644 --- a/Ghidra/Framework/Generic/src/test/java/generic/algorithms/LCSTest.java +++ b/Ghidra/Framework/Generic/src/test/java/generic/algorithms/LCSTest.java @@ -16,21 +16,18 @@ package generic.algorithms; import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; import java.util.ArrayList; import java.util.List; +import org.apache.commons.lang3.StringUtils; import org.junit.Test; import generic.test.AbstractGenericTest; public class LCSTest extends AbstractGenericTest { - public LCSTest() { - super(); - - } - @Test public void testIdentical() { compareStrings("DEADBEEF", "DEADBEEF", "DEADBEEF"); @@ -46,36 +43,53 @@ public class LCSTest extends AbstractGenericTest { "Hooray for really loooooong strings that span multiple lines in java!", "Some really long string that might complicate things." + "Hooray for really long strings that span multiple lines!"); - } @Test public void testDifferent() { + compareStrings("DEAD", "CANND", "AD"); compareStrings("DEADBEEFISGOOD", "CANNDBEEFISBAD", "ADBEEFISD"); compareStrings("this here is one string", "here a different string is", "here in string"); } + @Test + public void testSizeLimit() { + + String input = "This is more than 5 characters"; + StringLcs slcs = new StringLcs(input, input); + List lcs = slcs.getLcs(); + String result = StringUtils.join(lcs, ""); + assertEquals(input, result); + + slcs = new StringLcs(input, input); + slcs.setSizeLimit(10); + List actual = slcs.getLcs(); + assertTrue(actual.isEmpty()); + } + private void compareStrings(String x, String y, String expected) { - StringLCS slcs = new StringLCS(x, y); - List actual = slcs.getLCS(); + + StringLcs slcs = new StringLcs(x, y); + List actual = slcs.getLcs(); assertEquals(convertString(expected), actual); } private List convertString(String s) { - List charList = new ArrayList(); - for (char c : s.toCharArray()) + List charList = new ArrayList<>(); + for (char c : s.toCharArray()) { charList.add(c); + } return charList; } - private class StringLCS extends LCS { + private class StringLcs extends Lcs { private String x; private String y; - public StringLCS(String x, String y) { + public StringLcs(String x, String y) { super(); this.x = x; this.y = y; @@ -105,6 +119,5 @@ public class LCSTest extends AbstractGenericTest { protected Character valueOfY(int index) { return y.charAt(index - 1); } - } } diff --git a/Ghidra/Framework/Generic/src/test/java/generic/algorithms/ReducingLCSTest.java b/Ghidra/Framework/Generic/src/test/java/generic/algorithms/ReducingLCSTest.java new file mode 100644 index 0000000000..92fb225240 --- /dev/null +++ b/Ghidra/Framework/Generic/src/test/java/generic/algorithms/ReducingLCSTest.java @@ -0,0 +1,124 @@ +/* ### + * IP: GHIDRA + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package generic.algorithms; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; + +import java.util.ArrayList; +import java.util.List; + +import org.apache.commons.lang3.StringUtils; +import org.junit.Test; + +public class ReducingLCSTest { + + @Test + public void testIdentical() { + compareStrings("DEADBEEF", "DEADBEEF", "DEADBEEF"); + } + + @Test + public void testSimilar() { + compareStrings("DEADBEEF", "DEEDBEAD", "DEDBE"); + compareStrings( + "Some really long string that might complicate things." + + "Hooray for really long strings that span multiple lines!", + "Some other really long string that might complicate things." + + "Hooray for really loooooong strings that span multiple lines in java!", + "Some really long string that might complicate things." + + "Hooray for really long strings that span multiple lines!"); + } + + @Test + public void testDifferent() { + + compareStrings("DEAD", "CANND", "AD"); + compareStrings("DEADBEEFISGOOD", "CANNDBEEFISBAD", "ADBEEFISD"); + compareStrings("this here is one string", "here a different string is", "here in string"); + } + + @Test + public void testInsertOnly() { + + String x = "Line not modified"; + String y = "Line not not modified"; + compareStrings(x, y, x); + } + + @Test + public void testRemovalOnly() { + + String x = "Line not modified"; + String y = "Line modified"; + compareStrings(x, y, y); + } + + @Test + public void testSizeLimit() { + + String x = "This is a line that has not been modified"; + String y = "This is a line that has been modified"; + + StringLcs slcs = new StringLcs(x, y); + slcs.setSizeLimit(10); + List lcs = slcs.getLcs(); + String result = StringUtils.join(lcs, ""); + assertEquals(y, result); // 'y' is common, since it is 'x', with only a delete + + String z = "Start Mod " + x + " End Mod"; // same as 'x', but with different start/end + slcs = new StringLcs(x, z); + slcs.setSizeLimit(10); + List actual = slcs.getLcs(); + assertTrue(actual.isEmpty()); + } + + private void compareStrings(String x, String y, String expected) { + StringLcs slcs = new StringLcs(x, y); + List actual = slcs.getLcs(); + assertEquals(convertString(expected), actual); + } + + private List convertString(String s) { + List charList = new ArrayList<>(); + for (char c : s.toCharArray()) { + charList.add(c); + } + return charList; + } + + private class StringLcs extends ReducingLcs { + + public StringLcs(String x, String y) { + super(x, y); + } + + @Override + protected String reduce(String input, int start, int end) { + return input.substring(start, end); + } + + @Override + protected int lengthOf(String s) { + return s.length(); + } + + @Override + protected Character valueOf(String s, int offset) { + return s.charAt(offset); + } + } +} diff --git a/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/program/database/code/CodeManager.java b/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/program/database/code/CodeManager.java index d10130d379..ae9a526158 100644 --- a/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/program/database/code/CodeManager.java +++ b/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/program/database/code/CodeManager.java @@ -3366,7 +3366,7 @@ public class CodeManager implements ErrorHandler, ManagerDB { newComment = ""; } - StringDiff[] diffs = StringDiffer.getLineDiffs(newComment, oldComment); + StringDiff[] diffs = StringDiffUtils.getLineDiffs(newComment, oldComment); long date = System.currentTimeMillis(); long addr = addrMap.getKey(address, true); @@ -3402,11 +3402,11 @@ public class CodeManager implements ErrorHandler, ManagerDB { Record rec = allRecords.get(allRecords.size() - 1); long date = rec.getLongValue(CommentHistoryAdapter.HISTORY_DATE_COL); List records = subListByDate(allRecords, date); + List diffs = new ArrayList<>(records.size()); String user = null; - for (int i = 0; i < records.size(); i++) { - Record r = records.get(i); + for (Record r : records) { user = r.getString(CommentHistoryAdapter.HISTORY_USER_COL); int pos1 = r.getIntValue(CommentHistoryAdapter.HISTORY_POS1_COL); int pos2 = r.getIntValue(CommentHistoryAdapter.HISTORY_POS2_COL); @@ -3415,7 +3415,7 @@ public class CodeManager implements ErrorHandler, ManagerDB { } results.add(new CommentHistory(addr, commentType, user, comment, new Date(date))); - comment = StringDiffer.applyDiffs(comment, diffs); + comment = StringDiffUtils.applyDiffs(comment, diffs); records.clear(); // remove the subList elements from the list } diff --git a/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/program/database/code/StringDiffUtils.java b/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/program/database/code/StringDiffUtils.java new file mode 100644 index 0000000000..e09ddc90ec --- /dev/null +++ b/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/program/database/code/StringDiffUtils.java @@ -0,0 +1,351 @@ +/* ### + * IP: GHIDRA + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package ghidra.program.database.code; + +import java.util.*; + +import org.apache.commons.lang3.StringUtils; + +import generic.algorithms.ReducingListBasedLcs; + +class StringDiffUtils { + + /** + * Minimum size used to determine whether a new StringDiff object will be + * created just using a string (no positions) + * in the getDiffs(String, String) method. + * @see #getLineDiffs(String, String) + */ + private static int MINIMUM_DIFF_SIZE = 100; + + /** + * Returns the list of StringDiff objects that if applied to s1 would result in s2; The + * given text will look only for whole lines using '\n'. + * + * @param s1 the original string + * @param s2 the result string + * this value, then a completely different string will be returned + * @return an array of StringDiff objects that change s1 into s2; + */ + static StringDiff[] getLineDiffs(String s1, String s2) { + return getLineDiffs(s1, s2, MINIMUM_DIFF_SIZE); + } + + static StringDiff[] getLineDiffs(String s1, String s2, int minimumDiffSize) { + if (s2.length() < minimumDiffSize) { + return new StringDiff[] { StringDiff.allTextReplaced(s2) }; + } + + List aList = split(s1); + List bList = split(s2); + LineLcs lcs = new LineLcs(aList, bList); + List commons = lcs.getLcs(); + if (commons.isEmpty()) { + // no common text--complete replacement + return new StringDiff[] { StringDiff.allTextReplaced(s2) }; + } + + int aIndex = 0; + int bIndex = 0; + int aLastIndex = 0; + int bLastIndex = 0; + List results = new LinkedList<>(); + for (Line common : commons) { + + aIndex = indexOf(aList, common, aLastIndex); + bIndex = indexOf(bList, common, bLastIndex); + + int aDelta = aIndex - aLastIndex; + int bDelta = bIndex - bLastIndex; + + int aEnd = aIndex; + int aStart = aEnd - aDelta; + List aPrevious = aList.subList(aStart, aEnd); + StringDiff delete = createDelete(aPrevious); + if (delete != null) { + results.add(delete); + } + + int bEnd = bIndex; + int bStart = bEnd - bDelta; + List bPrevious = bList.subList(bStart, bEnd); + StringDiff insert = createInsert(bPrevious, charOffset(aList, aIndex)); + if (insert != null) { + results.add(insert); + } + + // note: nothing is needed for the 'common' string, since we don't track unchanged text + + aLastIndex = aIndex + 1; + bLastIndex = bIndex + 1; + } + + // grab remainder + StringDiff trailingDeleted = createDeleteAtEnd(aList, aLastIndex, aList.size()); + if (trailingDeleted != null) { + results.add(trailingDeleted); + } + + StringDiff trailingInserted = + createInsertAtEnd(bList, bLastIndex, bList.size(), s1.length()); + if (trailingInserted != null) { + results.add(trailingInserted); + } + + return results.toArray(new StringDiff[results.size()]); + } + + private static int charOffset(List list, int index) { + Line line = list.get(index); + return line.start; + } + + private static StringDiff createInsertAtEnd(List list, int start, int end, + int insertIndex) { + if (start - 1 == end) { + return null; + } + + List toDo = list.subList(start, end); + boolean newlineNeeded = true; // we are at the end--need a newline + StringDiff insert = createInsert(toDo, insertIndex, newlineNeeded); + return insert; + } + + private static StringDiff createInsert(List lines, int insertIndex) { + return createInsert(lines, insertIndex, false); + } + + private static StringDiff createInsert(List lines, int insertIndex, boolean isAtEnd) { + if (lines.isEmpty()) { + return null; + } + + StringBuilder buffy = new StringBuilder(); + + // special case: if this insert is for the end of the line, then we want to add + // a newline before the remaining text is added since the original text + // did not have this newline + if (isAtEnd) { + buffy.append('\n'); + } + + for (Line line : lines) { + buffy.append(line.getText()); + } + + return StringDiff.textInserted(buffy.toString(), insertIndex); + } + + private static StringDiff createDeleteAtEnd(List list, int start, int end) { + + if (start - 1 == end) { + return null; + } + + List toDo = list.subList(start, end); + boolean includeLastNewline = false; // we are at the end--do not include artificial newline + StringDiff delete = createDelete(toDo, includeLastNewline); + return delete; + } + + private static StringDiff createDelete(List lines) { + return createDelete(lines, true); + } + + private static StringDiff createDelete(List lines, boolean includeLastNewline) { + if (lines.isEmpty()) { + return null; + } + + int start = 0; + int end = 0; + for (Line line : lines) { + start = line.start; + end = line.start + line.text.length(); + } + + // special case: if this delete is for the last line, then we want to remove the remaining + // trailing newline + Line last = lines.get(lines.size() - 1); + if (!includeLastNewline && last.isLastLine) { + start -= 1; // remove previous newline + } + + return StringDiff.textDeleted(start, end); + } + + private static int indexOf(List list, Line line, int from) { + for (int i = from; i < list.size(); i++) { + if (list.get(i).textMatches(line)) { + return i; + } + } + return list.size(); // should not get here since 's' is known to be in list + } + + private static List split(String s) { + + LinkedList result = new LinkedList<>(); + List lines = Arrays.asList(StringUtils.splitPreserveAllTokens(s, '\n')); + int start = 0; + for (String line : lines) { + Line l = new Line(line + '\n', start); + result.add(l); + start += l.text.length(); + } + + // strip off the trailing newline that we added above + Line last = result.peekLast(); + last.markAsLast(); + + return result; + } + + /** + * Applies the array of StringObjects to the string s to produce a new string. Warning - the + * diff objects cannot be applied to an arbitrary string, the Strings must be the original + * String used to compute the diffs. + * @param s the original string + * @param diffs the array of StringDiff object to apply + * @return a new String resulting from applying the diffs to s. + */ + static String applyDiffs(String s, List diffs) { + + if (diffs.isEmpty()) { + return s; + } + + if (diffs.get(0).start < 0) { + // all replaced or all deleted + String data = diffs.get(0).text; + return data == null ? "" : data; + } + + int pos = 0; + StringBuilder buf = new StringBuilder(s.length()); + for (StringDiff element : diffs) { + if (element.start > pos) { + buf.append(s.substring(pos, element.start)); + pos = element.start; + } + + String data = element.text; + if (data != null) { + buf.append(data); + } + else { + // null data is a delete; move to the end of the delete + pos = element.end; + } + } + + if (pos < s.length()) { + buf.append(s.substring(pos)); + } + return buf.toString(); + } + +//================================================================================================== +// Inner Classes +//================================================================================================== + + private static class Line { + + private String text; + private int start; + private boolean isLastLine; + + public Line(String line, int start) { + this.text = line; + this.start = start; + } + + String getText() { + if (isLastLine) { + return textWithoutNewline(); // last line and do not include the newline + } + return text; + } + + void markAsLast() { + isLastLine = true; + } + + private String textWithoutNewline() { + if (text.charAt(text.length() - 1) == '\n') { + return text.substring(0, text.length() - 1); + } + return text; + } + + @Override + public String toString() { + return textWithoutNewline() + " @ " + start; + } + + boolean textMatches(Line other) { + return Objects.equals(text, other.text); + } + + @Override + public int hashCode() { + final int prime = 31; + int result = 1; + result = prime * result + start; + result = prime * result + ((text == null) ? 0 : text.hashCode()); + return result; + } + + @Override + public boolean equals(Object obj) { + if (this == obj) { + return true; + } + if (obj == null) { + return false; + } + if (getClass() != obj.getClass()) { + return false; + } + Line other = (Line) obj; + if (start != other.start) { + return false; + } + if (text == null) { + if (other.text != null) { + return false; + } + } + else if (!text.equals(other.text)) { + return false; + } + return true; + } + } + + private static class LineLcs extends ReducingListBasedLcs { + + LineLcs(List x, List y) { + super(x, y); + } + + @Override + protected boolean matches(Line x, Line y) { + return x.text.equals(y.text); + } + } +} diff --git a/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/program/database/code/StringDiffer.java b/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/program/database/code/StringDiffer.java deleted file mode 100644 index f00dda650f..0000000000 --- a/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/program/database/code/StringDiffer.java +++ /dev/null @@ -1,261 +0,0 @@ -/* ### - * IP: GHIDRA - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package ghidra.program.database.code; - -import java.util.LinkedList; -import java.util.List; - -class StringDiffer { - - /** - * Returns the list of StringDiff objects that if applied to s1 would result in s2; The - * given text will look only for whole lines using '\n'. - * - * @param s1 the original string - * @param s2 the result string - * this value, then a completely different string will be returned - * @return an array of StringDiff objects that change s1 into s2; - */ - static StringDiff[] getLineDiffs(String s1, String s2) { - - /** - * Minimum size used to determine whether a new StringDiff object will be - * created just using a string (no positions) - * in the getDiffs(String, String) method. - * @see #getLineDiffs(String, String) - */ - int MINIMUM_DIFF_SIZE = 100; - return StringDiffer.getLineDiffs(s1, s2, MINIMUM_DIFF_SIZE); - } - - /** - * Returns the list of StringDiff objects that if applied to s1 would result in s2; The - * given text will look only for whole lines using '\n'. - * - * @param s1 the original string - * @param s2 the result string - * @param minimumDiffSize the minimum length of s2 required for a diff; if s2 is less than - * this value, then a completely different string will be returned - * @return an array of StringDiff objects that change s1 into s2; - */ - static StringDiff[] getLineDiffs(String s1, String s2, int minimumDiffSize) { - if (s2.length() < minimumDiffSize) { - return new StringDiff[] { StringDiff.allTextReplaced(s2) }; - } - - List results = new LinkedList<>(); - int cursor1 = 0; - int cursor2 = 0; - int len1 = s1.length(); - int len2 = s2.length(); - - /* - -look at each line in 'line' chunks using '\n' - */ - - // walk each string until the end... - while (cursor1 < len1 || cursor2 < len2) { - String line1 = getLine(s1, cursor1); - String line2 = getLine(s2, cursor2); - if (line1.equals(line2)) { - cursor1 += line1.length(); - cursor2 += line2.length(); - continue; - } - - // look for line1 in s2... - int line1PosInOther = findLine(s2, cursor2, line1); - int mark = cursor1; - while (line1PosInOther < 0) { - - // line1 is not in s2; scan for the next line - cursor1 += line1.length(); - line1 = getLine(s1, cursor1); - line1PosInOther = findLine(s2, cursor2, line1); - } - if (cursor1 > mark) { - // the original line1 was not in s2; add all that was different up to current cursor1 - results.add(StringDiff.textDeleted(mark, cursor1)); - } - - // now look for line2 in s1 - int line2PosInOther = findLine(s1, cursor1, line2); - mark = cursor2; - while (line2PosInOther < 0) { - - // line2 is not in s1; scan for the next line - cursor2 += line2.length(); - line2 = getLine(s2, cursor2); - line2PosInOther = findLine(s1, cursor1, line2); - } - if (cursor2 > mark) { - // the original line2 was not in s1; add all that was different up to current cursor2 - results.add(StringDiff.textInserted(s2.substring(mark, cursor2), cursor1)); - continue; - } - - // move both searches forward - int delta1 = line2PosInOther - cursor1; - int delta2 = line1PosInOther - cursor2; - if (delta1 > delta2) { - - // this can happen when two lines have been rearranged *and* the line length - // of the moved line is *longer* than the new line at the replaced position - results.add( - StringDiff.textInserted(s2.substring(cursor2, line1PosInOther), cursor1)); - cursor2 = line1PosInOther; - } - else if (delta2 > delta1) { - - // this can happen when two lines have been rearranged *and* the line length - // of the moved line is *shorter* than the new line at the replaced position - results.add(StringDiff.textDeleted(cursor1, line2PosInOther)); - cursor1 = line2PosInOther; - } - else { // delta1 == delta2 - - if (cursor1 != line2PosInOther) { - results.add(StringDiff.textDeleted(cursor1, line2PosInOther)); - cursor1 = line2PosInOther; - } - - if (cursor2 != line1PosInOther) { - results.add( - StringDiff.textInserted(s2.substring(cursor2, line1PosInOther), cursor1)); - cursor2 = line1PosInOther; - } - } - } - return results.toArray(new StringDiff[results.size()]); - } - - /** - * Finds a position in s that contains the string line. The matching string in - * s must be a "complete" line, in other words if pos > 0 then s.charAt(index-1) must be - * a newLine character and s.charAt(index+line.length()) must be a newLine or the end of - * the string. - * @param s the string to scan - * @param pos the position to begin the scan. - * @param line the line to scan for - * @return the position in s containing the line string. - */ - static int findLine(String s, int pos, String line) { - - if (line.length() == 0) { - // this is used as a marker: -1 means not found; non-negative number signals to keep going - return pos; // TODO this is odd; why is this a match?? - } - - int n = s.length(); - while (pos < n) { - int index = s.indexOf(line, pos); - if (index < 0) { - return index; - } - - if (index > 0 && s.charAt(index - 1) != '\n') { - pos = index + line.length(); // line matched, but not a newline in 's' - continue; - } - - // - // Have a match with at start/0 or have a preceding newline - // - - if (line.endsWith("\n")) { - return index; // the match ends with a newline; found line - } - - // no newline for the current match in 's' - if (index + line.length() == n) { - return index; // at the end exactly; found line - } - - // no newline; not at end; keep going - pos = index + line.length(); - } - - return -1; - } - - /** - * Returns a substring of s beginning at start and ending at either the end of the string or - * the first newLine at or after start - * - * @param s the string to scan - * @param start the starting position for the scan - * @return a string that represents a line within s - */ - private static String getLine(String s, int start) { - int n = s.length(); - if (start >= n) { - return ""; - } - int pos = start; - while (pos < n && s.charAt(pos) != '\n') { - pos++; - } - - if (pos < n) { - pos++; // not at the end; found newline; include the newline - } - return s.substring(start, pos); - } - - /** - * Applies the array of StringObjects to the string s to produce a new string. Warning - the - * diff objects cannot be applied to an arbitrary string, the Strings must be the original - * String used to compute the diffs. - * @param s the original string - * @param diffs the array of StringDiff object to apply - * @return a new String resulting from applying the diffs to s. - */ - static String applyDiffs(String s, List diffs) { - - if (diffs.isEmpty()) { - return s; - } - - if (diffs.get(0).start < 0) { - // all replaced or all deleted - String data = diffs.get(0).text; - return data == null ? "" : data; - } - - int pos = 0; - StringBuilder buf = new StringBuilder(s.length()); - for (StringDiff element : diffs) { - if (element.start > pos) { - buf.append(s.substring(pos, element.start)); - pos = element.start; - } - - String data = element.text; - if (data != null) { - buf.append(data); - } - else { - // null data is a delete; move to the end of the delete - pos = element.end; - } - } - - if (pos < s.length()) { - buf.append(s.substring(pos)); - } - return buf.toString(); - } -} diff --git a/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/program/util/CodeUnitLCS.java b/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/program/util/CodeUnitLCS.java index 1a941b232d..ccf3dc1bdf 100644 --- a/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/program/util/CodeUnitLCS.java +++ b/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/program/util/CodeUnitLCS.java @@ -1,6 +1,5 @@ /* ### * IP: GHIDRA - * REVIEWED: YES * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -16,11 +15,11 @@ */ package ghidra.program.util; -import generic.algorithms.LCS; +import generic.algorithms.Lcs; import java.util.List; -public class CodeUnitLCS extends LCS { +public class CodeUnitLCS extends Lcs { private List xList; private List yList; diff --git a/Ghidra/Framework/SoftwareModeling/src/test/java/ghidra/program/database/code/StringDiffTest.java b/Ghidra/Framework/SoftwareModeling/src/test/java/ghidra/program/database/code/StringDiffTest.java index 097155c7ad..2f3a46a3df 100644 --- a/Ghidra/Framework/SoftwareModeling/src/test/java/ghidra/program/database/code/StringDiffTest.java +++ b/Ghidra/Framework/SoftwareModeling/src/test/java/ghidra/program/database/code/StringDiffTest.java @@ -17,107 +17,15 @@ package ghidra.program.database.code; import static org.junit.Assert.assertEquals; -import java.util.Arrays; +import java.util.*; import org.apache.commons.lang3.StringUtils; import org.junit.Test; +import generic.test.AbstractGTest; + public class StringDiffTest { - /* - A line match is if the given line to match is contained in the source string and: - - 1) a) matches in the source string with a '\n' char at the index before the match OR - b) is at the beginning *and* the match contains a newline - 2) is at the exact end of the source string - - *The empty string matches at the current position - - Source String: "abcd\nefghi\n" - Line to Match: - */ - - @Test - public void testFindLine_FromStart_EmptyLine() { - - String source = "this is a really\nlone line with\n newlines"; - String line = ""; - int result = StringDiffer.findLine(source, 0, line); - assertEquals(0, result); - } - - @Test - public void testFindLine_FromStart_NoMatch() { - - String source = "this is a really\nlone line with\n newlines"; - String line = "coconuts"; - int result = StringDiffer.findLine(source, 0, line); - assertEquals(-1, result); - } - - @Test - public void testFindLine_FromMiddle_NoMatch() { - - String source = "this is a really\nlone line with\n newlines"; - String line = "coconuts"; - int result = StringDiffer.findLine(source, 15, line); - assertEquals(-1, result); - } - - @Test - public void testFindLine_FromEnd_NoMatch() { - - String source = "this is a really\nlone line with\n newlines"; - String line = "coconuts"; - int result = StringDiffer.findLine(source, source.length(), line); - assertEquals(-1, result); - } - - @Test - public void testFindLine_FromStart_MatchWithNewline_AtStart() { - - String source = "this is a really\nlone line with\n newlines"; - String line = "this is a really\n"; - int result = StringDiffer.findLine(source, 0, line); - assertEquals(0, result); - } - - @Test - public void testFindLine_FromStart_MatchWithNewline_AtMiddle() { - - String source = "this is a really\nlone line with\n newlines"; - String line = "lone line with\n"; - int result = StringDiffer.findLine(source, 0, line); - assertEquals(17, result); - } - - @Test - public void testFindLine_FromStart_MatchWithNewline_AtEnd_FailWithoutPrecedingNewline() { - - String source = "this is a really\nlone line with\n newlines\n"; - String line = "lines\n"; - int result = StringDiffer.findLine(source, 0, line); - assertEquals(-1, result); - } - - @Test - public void testFindLine_FromStart_MatchWithNewline_AtEnd_PassWithPrecedingNewline() { - - String source = "this is a really\nlone line with\n new\nlines\n"; - String line = "lines\n"; - int result = StringDiffer.findLine(source, 0, line); - assertEquals(37, result); - } - - @Test - public void testFindLine_FromStart_MatchWithoutNewline_AtStart() { - - String source = "this is a really\nlone line with\n newlines"; - String line = "this is a really"; - int result = StringDiffer.findLine(source, 0, line); - assertEquals(-1, result); // match at start must contain a newline - } - @Test public void testGetDiffLines_Insert_AtFront() { @@ -126,8 +34,8 @@ public class StringDiffTest { String v1 = StringUtils.join(a1, '\n'); String v2 = StringUtils.join(a2, '\n'); - StringDiff[] diffs = StringDiffer.getLineDiffs(v1, v2, 1); - String restoredV2 = StringDiffer.applyDiffs(v1, Arrays.asList(diffs)); + StringDiff[] diffs = StringDiffUtils.getLineDiffs(v1, v2, 1); + String restoredV2 = StringDiffUtils.applyDiffs(v1, Arrays.asList(diffs)); assertEquals(v2, restoredV2); } @@ -139,8 +47,8 @@ public class StringDiffTest { String v1 = StringUtils.join(a1, '\n'); String v2 = StringUtils.join(a2, '\n'); - StringDiff[] diffs = StringDiffer.getLineDiffs(v1, v2, 1); - String restoredV2 = StringDiffer.applyDiffs(v1, Arrays.asList(diffs)); + StringDiff[] diffs = StringDiffUtils.getLineDiffs(v1, v2, 1); + String restoredV2 = StringDiffUtils.applyDiffs(v1, Arrays.asList(diffs)); assertEquals(v2, restoredV2); } @@ -152,8 +60,8 @@ public class StringDiffTest { String v1 = StringUtils.join(a1, '\n'); String v2 = StringUtils.join(a2, '\n'); - StringDiff[] diffs = StringDiffer.getLineDiffs(v1, v2, 1); - String restoredV2 = StringDiffer.applyDiffs(v1, Arrays.asList(diffs)); + StringDiff[] diffs = StringDiffUtils.getLineDiffs(v1, v2, 1); + String restoredV2 = StringDiffUtils.applyDiffs(v1, Arrays.asList(diffs)); assertEquals(v2, restoredV2); } @@ -165,8 +73,8 @@ public class StringDiffTest { String v1 = StringUtils.join(a1, '\n'); String v2 = StringUtils.join(a2, '\n'); - StringDiff[] diffs = StringDiffer.getLineDiffs(v1, v2, 1); - String restoredV2 = StringDiffer.applyDiffs(v1, Arrays.asList(diffs)); + StringDiff[] diffs = StringDiffUtils.getLineDiffs(v1, v2, 1); + String restoredV2 = StringDiffUtils.applyDiffs(v1, Arrays.asList(diffs)); assertEquals(v2, restoredV2); } @@ -178,8 +86,8 @@ public class StringDiffTest { String v1 = StringUtils.join(a1, '\n'); String v2 = StringUtils.join(a2, '\n'); - StringDiff[] diffs = StringDiffer.getLineDiffs(v1, v2, 1); - String restoredV2 = StringDiffer.applyDiffs(v1, Arrays.asList(diffs)); + StringDiff[] diffs = StringDiffUtils.getLineDiffs(v1, v2, 1); + String restoredV2 = StringDiffUtils.applyDiffs(v1, Arrays.asList(diffs)); assertEquals(v2, restoredV2); } @@ -191,8 +99,8 @@ public class StringDiffTest { String v1 = StringUtils.join(a1, '\n'); String v2 = StringUtils.join(a2, '\n'); - StringDiff[] diffs = StringDiffer.getLineDiffs(v1, v2, 1); - String restoredV2 = StringDiffer.applyDiffs(v1, Arrays.asList(diffs)); + StringDiff[] diffs = StringDiffUtils.getLineDiffs(v1, v2, 1); + String restoredV2 = StringDiffUtils.applyDiffs(v1, Arrays.asList(diffs)); assertEquals(v2, restoredV2); } @@ -204,8 +112,8 @@ public class StringDiffTest { String v1 = StringUtils.join(a1, '\n'); String v2 = StringUtils.join(a2, '\n'); - StringDiff[] diffs = StringDiffer.getLineDiffs(v1, v2, 1); - String restoredV2 = StringDiffer.applyDiffs(v1, Arrays.asList(diffs)); + StringDiff[] diffs = StringDiffUtils.getLineDiffs(v1, v2, 1); + String restoredV2 = StringDiffUtils.applyDiffs(v1, Arrays.asList(diffs)); assertEquals(v2, restoredV2); } @@ -220,8 +128,8 @@ public class StringDiffTest { String v1 = StringUtils.join(a1, '\n'); String v2 = StringUtils.join(a2, '\n'); - StringDiff[] diffs = StringDiffer.getLineDiffs(v1, v2, 1); - String restoredV2 = StringDiffer.applyDiffs(v1, Arrays.asList(diffs)); + StringDiff[] diffs = StringDiffUtils.getLineDiffs(v1, v2, 1); + String restoredV2 = StringDiffUtils.applyDiffs(v1, Arrays.asList(diffs)); assertEquals(v2, restoredV2); } @@ -233,8 +141,8 @@ public class StringDiffTest { String v1 = StringUtils.join(a1, '\n'); String v2 = StringUtils.join(a2, '\n'); - StringDiff[] diffs = StringDiffer.getLineDiffs(v1, v2, 1); - String restoredV2 = StringDiffer.applyDiffs(v1, Arrays.asList(diffs)); + StringDiff[] diffs = StringDiffUtils.getLineDiffs(v1, v2, 1); + String restoredV2 = StringDiffUtils.applyDiffs(v1, Arrays.asList(diffs)); assertEquals(v2, restoredV2); } @@ -246,9 +154,50 @@ public class StringDiffTest { String v1 = StringUtils.join(a1, '\n'); String v2 = StringUtils.join(a2, '\n'); - StringDiff[] diffs = StringDiffer.getLineDiffs(v1, v2, 1); - String restoredV2 = StringDiffer.applyDiffs(v1, Arrays.asList(diffs)); + StringDiff[] diffs = StringDiffUtils.getLineDiffs(v1, v2, 1); + String restoredV2 = StringDiffUtils.applyDiffs(v1, Arrays.asList(diffs)); assertEquals(v2, restoredV2); } + @Test + public void testReplace() { + String[] a1 = new String[] { "In", "the", "beginning" }; + String[] a2 = new String[] { "There", "was", "vastness" }; + String v1 = StringUtils.join(a1, '\n'); + String v2 = StringUtils.join(a2, '\n'); + + StringDiff[] diffs = StringDiffUtils.getLineDiffs(v1, v2, 1); + String restoredV2 = StringDiffUtils.applyDiffs(v1, Arrays.asList(diffs)); + assertEquals(v2, restoredV2); + } + + @Test + public void testTheBiggness_NoOptimization() throws Exception { + + List bigLines = generateLines(1200); + List bigLines2 = new ArrayList<>(bigLines); + + bigLines2.set(0, "a new line at 0"); + bigLines2.set(bigLines2.size() - 1, "a new line at length"); + + String v1 = StringUtils.join(bigLines, '\n'); + String v2 = StringUtils.join(bigLines2, '\n'); + + StringDiff[] diffs = StringDiffUtils.getLineDiffs(v1, v2, 1); + assertEquals(1, diffs.length); // 1 diff--completely different, due to size restriction on Lcs + String restoredV2 = StringDiffUtils.applyDiffs(v1, Arrays.asList(diffs)); + assertEquals(v2, restoredV2); + } + + private List generateLines(int size) { + + List results = new ArrayList<>(); + for (int i = 0; i < size; i++) { + String random = AbstractGTest.getRandomString(0, 50); + random = random.replaceAll("\n", ""); + results.add("Line " + (i + 1) + ": " + random); + } + + return results; + } }