Working LCS/SMS :)
This commit is contained in:
parent
7cc7d3bb76
commit
93d460ae5d
205
src/diff.rs
205
src/diff.rs
|
@ -3,28 +3,71 @@
|
||||||
|
|
||||||
use std::collections::HashMap;
|
use std::collections::HashMap;
|
||||||
|
|
||||||
|
#[derive(Debug)]
|
||||||
|
pub struct DiffItem {
|
||||||
|
start_a: usize,
|
||||||
|
start_b: usize,
|
||||||
|
deleted_a: usize,
|
||||||
|
inserted_b: usize,
|
||||||
|
}
|
||||||
|
|
||||||
struct DiffData {
|
struct DiffData {
|
||||||
length: usize,
|
length: usize,
|
||||||
codes: Vec<usize>,
|
codes: Vec<usize>,
|
||||||
modified: Vec<bool>,
|
modified: Vec<bool>,
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn diff(a: &str, b: &str) {
|
struct SmsData {
|
||||||
|
x: usize,
|
||||||
|
y: usize,
|
||||||
|
}
|
||||||
|
|
||||||
|
struct DiffVec {
|
||||||
|
data: Vec<usize>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl DiffVec {
|
||||||
|
fn get(&self, index: usize) -> usize {
|
||||||
|
self.data[index]
|
||||||
|
}
|
||||||
|
|
||||||
|
fn get_i32(&self, index: i32) -> usize {
|
||||||
|
if index < 0 {
|
||||||
|
panic!("Got index < 0");
|
||||||
|
}
|
||||||
|
|
||||||
|
self.get(index as usize)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn set(&mut self, index: usize, val: usize) {
|
||||||
|
self.data[index] = val;
|
||||||
|
}
|
||||||
|
|
||||||
|
fn set_i32(&mut self, index: i32, val: usize) {
|
||||||
|
if index < 0 {
|
||||||
|
panic!("Got index < 0");
|
||||||
|
}
|
||||||
|
|
||||||
|
self.set(index as usize, val);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn diff(a: &str, b: &str) -> Vec<DiffItem> {
|
||||||
let mut existing_hashes: HashMap<&str, usize> = HashMap::new();
|
let mut existing_hashes: HashMap<&str, usize> = HashMap::new();
|
||||||
let mut data_a = diff_data(a, &mut existing_hashes);
|
let mut data_a = diff_data(a, &mut existing_hashes);
|
||||||
let mut data_b = diff_data(b, &mut existing_hashes);
|
let mut data_b = diff_data(b, &mut existing_hashes);
|
||||||
|
|
||||||
let max = data_a.length + data_b.length;
|
let max = data_a.length + data_b.length;
|
||||||
let mut down_vector = vec![0usize; 2 * max + 2];
|
let mut down_vector = DiffVec { data: vec![0usize; 2 * max + 2] };
|
||||||
let mut up_vector = vec![0usize; 2 * max + 2];
|
let mut up_vector = DiffVec { data: vec![0usize; 2 * max + 2] };
|
||||||
|
|
||||||
let upper_a = data_a.length;
|
let upper_a = data_a.length;
|
||||||
let upper_b = data_b.length;
|
let upper_b = data_b.length;
|
||||||
|
|
||||||
lcs(&mut data_a, 0, upper_a, &mut data_b, 0, upper_b, &mut down_vector, &mut up_vector);
|
lcs(&mut data_a, 0, upper_a, &mut data_b, 0, upper_b, &mut down_vector, &mut up_vector);
|
||||||
|
|
||||||
optimize(&data_a);
|
optimize(&mut data_a);
|
||||||
optimize(&data_b);
|
optimize(&mut data_b);
|
||||||
|
|
||||||
create_diffs(&data_a, &data_b)
|
create_diffs(&data_a, &data_b)
|
||||||
}
|
}
|
||||||
|
@ -62,7 +105,7 @@ fn diff_codes<'a>(text: &'a str, existing_hashes: &mut HashMap<&'a str, usize>)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Longest Common-Subsequence
|
// Longest Common-Subsequence
|
||||||
fn lcs(data_a: &mut DiffData, mut lower_a: usize, mut upper_a: usize, data_b: &mut DiffData, mut lower_b: usize, mut upper_b: usize, down_vector: &mut Vec<usize>, up_vector: &mut Vec<usize>) {
|
fn lcs(data_a: &mut DiffData, mut lower_a: usize, mut upper_a: usize, data_b: &mut DiffData, mut lower_b: usize, mut upper_b: usize, down_vector: &mut DiffVec, up_vector: &mut DiffVec) {
|
||||||
while lower_a < upper_a && lower_b < upper_b && data_a.codes[lower_a] == data_b.codes[lower_b] {
|
while lower_a < upper_a && lower_b < upper_b && data_a.codes[lower_a] == data_b.codes[lower_b] {
|
||||||
lower_a += 1;
|
lower_a += 1;
|
||||||
lower_b += 1;
|
lower_b += 1;
|
||||||
|
@ -76,37 +119,40 @@ fn lcs(data_a: &mut DiffData, mut lower_a: usize, mut upper_a: usize, data_b: &m
|
||||||
if lower_a == upper_a {
|
if lower_a == upper_a {
|
||||||
// Inserted lines
|
// Inserted lines
|
||||||
while lower_b < upper_b {
|
while lower_b < upper_b {
|
||||||
lower_b += 1;
|
|
||||||
data_b.modified[lower_b] = true;
|
data_b.modified[lower_b] = true;
|
||||||
|
lower_b += 1;
|
||||||
}
|
}
|
||||||
} else if lower_b == upper_b {
|
} else if lower_b == upper_b {
|
||||||
// Deleted lines
|
// Deleted lines
|
||||||
while lower_a < upper_a {
|
while lower_a < upper_a {
|
||||||
lower_a += 1;
|
|
||||||
data_a.modified[lower_a] = true;
|
data_a.modified[lower_a] = true;
|
||||||
|
lower_a += 1;
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
// Find the middle snake and length of an optimal path for A and B
|
// Find the middle snake and length of an optimal path for A and B
|
||||||
let sms = sms(&data_a, &data_b, down_vector, up_vector);
|
let sms = sms(&data_a, lower_a, upper_a, &data_b, lower_b, upper_b, down_vector, up_vector);
|
||||||
|
|
||||||
// The path is from lower_x to (x, y) and (x, y) to upper_x
|
// The path is from lower_x to (x, y) and (x, y) to upper_x
|
||||||
lcs(data_a, lower_a, sms.0, data_b, lower_b, sms.1, down_vector, up_vector);
|
lcs(data_a, lower_a, sms.x, data_b, lower_b, sms.y, down_vector, up_vector);
|
||||||
lcs(data_a, sms.1, upper_a, data_b, sms.1, upper_b, down_vector, up_vector);
|
lcs(data_a, sms.x, upper_a, data_b, sms.y, upper_b, down_vector, up_vector);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// https://stackoverflow.com/questions/54035728/how-to-add-a-negative-i32-number-to-an-usize-variable
|
||||||
|
fn add_i32(index: usize, offset: i32) -> usize {
|
||||||
|
if offset.is_negative() {
|
||||||
|
index - offset.wrapping_abs() as u32 as usize
|
||||||
|
} else {
|
||||||
|
index + offset as usize
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Shortest Middle Snake
|
// Shortest Middle Snake
|
||||||
fn sms(data_a: &DiffData, data_b: &DiffData, down_vector: &mut Vec<usize>, up_vector: &mut Vec<usize>) -> (usize, usize) {
|
fn sms(data_a: &DiffData, lower_a: usize, upper_a: usize, data_b: &DiffData, lower_b: usize, upper_b: usize, down_vector: &mut DiffVec, up_vector: &mut DiffVec) -> SmsData {
|
||||||
let lower_a = 0usize;
|
let max = upper_a as i32 + upper_b as i32 + 1;
|
||||||
let upper_a = data_a.length;
|
|
||||||
let lower_b = 0usize;
|
|
||||||
let upper_b = data_b.length;
|
|
||||||
|
|
||||||
let mut ret = (0usize, 0usize);
|
let down_k = lower_a as i32 - lower_b as i32;
|
||||||
let max = data_a.length - data_b.length - 1;
|
let up_k = upper_a as i32 - upper_b as i32;
|
||||||
|
|
||||||
let down_k = lower_a - lower_b;
|
|
||||||
let up_k = upper_a - upper_b;
|
|
||||||
|
|
||||||
let delta = (upper_a - lower_a) - (upper_b - lower_b);
|
let delta = (upper_a - lower_a) - (upper_b - lower_b);
|
||||||
let odd_delta = (delta & 1) != 0;
|
let odd_delta = (delta & 1) != 0;
|
||||||
|
@ -114,28 +160,28 @@ fn sms(data_a: &DiffData, data_b: &DiffData, down_vector: &mut Vec<usize>, up_ve
|
||||||
let down_offset = max - down_k;
|
let down_offset = max - down_k;
|
||||||
let up_offset = max - up_k;
|
let up_offset = max - up_k;
|
||||||
|
|
||||||
let max_d = ((upper_a - lower_a + upper_b - lower_b) / 2) + 1;
|
let max_d = ((upper_a - lower_a + upper_b - lower_b) / 2) as i32 + 1;
|
||||||
|
|
||||||
down_vector[down_offset + down_k + 1] = lower_a;
|
down_vector.set_i32(down_offset + down_k + 1, lower_a);
|
||||||
up_vector[up_offset + up_k - 1] = upper_a;
|
up_vector.set_i32(up_offset + up_k - 1, upper_a);
|
||||||
|
|
||||||
for d in 0..=max_d {
|
for d in 0..=max_d {
|
||||||
// Extend the forward path
|
// Extend the forward path
|
||||||
for k in ((down_k - d)..=(down_k + d)).step_by(2) {
|
for k in ((down_k - d)..=(down_k + d)).step_by(2) {
|
||||||
let mut x = 0;
|
let mut x;
|
||||||
let mut y = 0;
|
let mut y;
|
||||||
if k == down_k - d {
|
if k == down_k - d {
|
||||||
// Down
|
// Down
|
||||||
x = down_vector[down_offset + k + 1];
|
x = down_vector.get_i32(down_offset + k + 1);
|
||||||
} else {
|
} else {
|
||||||
// Right
|
// Right
|
||||||
x = down_vector[down_offset + k - 1];
|
x = down_vector.get_i32(down_offset + k - 1) + 1;
|
||||||
if k < down_k + d && down_vector[down_offset + k + 1] >= 1 {
|
if k < down_k + d && down_vector.get_i32(down_offset + k + 1) >= x {
|
||||||
// Down
|
// Down
|
||||||
x = down_vector[down_offset + k + 1];
|
x = down_vector.get_i32(down_offset + k + 1);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
y = x - k;
|
y = add_i32(x, -k);
|
||||||
|
|
||||||
// Find the end of the furthest reaching forward D-path in diagonal k.
|
// Find the end of the furthest reaching forward D-path in diagonal k.
|
||||||
while x < upper_a && y < upper_b && data_a.codes[x] == data_b.codes[y] {
|
while x < upper_a && y < upper_b && data_a.codes[x] == data_b.codes[y] {
|
||||||
|
@ -143,46 +189,46 @@ fn sms(data_a: &DiffData, data_b: &DiffData, down_vector: &mut Vec<usize>, up_ve
|
||||||
y += 1;
|
y += 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
down_vector[down_offset + k] = x;
|
down_vector.set_i32(down_offset + k, x);
|
||||||
|
|
||||||
// Overlap ?
|
// Overlap ?
|
||||||
if odd_delta && up_k - d < k && k < up_k + d && up_vector[up_offset + k] <= down_vector[down_offset + k] {
|
if odd_delta && up_k - d < k && k < up_k + d && up_vector.get_i32(up_offset + k) <= down_vector.get_i32(down_offset + k) {
|
||||||
ret.0 = down_vector[down_offset + k];
|
let x = down_vector.get_i32(down_offset + k);
|
||||||
ret.1 = down_vector[down_offset + k] - k;
|
let y = add_i32(down_vector.get_i32(down_offset + k), -k);
|
||||||
return ret;
|
return SmsData { x, y };
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Extend the reverse path
|
// Extend the reverse path
|
||||||
for k in ((up_k - d)..=(up_k + d)).step_by(2) {
|
for k in ((up_k - d)..=(up_k + d)).step_by(2) {
|
||||||
let mut x = 0;
|
let mut x;
|
||||||
let mut y = 0;
|
let mut y;
|
||||||
|
|
||||||
if k == up_k + d {
|
if k == up_k + d {
|
||||||
// Up
|
// Up
|
||||||
x = up_vector[up_offset + k - 1];
|
x = up_vector.get_i32(up_offset + k - 1);
|
||||||
} else {
|
} else {
|
||||||
// Left
|
// Left
|
||||||
x = up_vector[up_offset + k + 1] - 1;
|
x = up_vector.get_i32(up_offset + k + 1) - 1;
|
||||||
if k > up_k - d && up_vector[up_offset + k - 1] < x {
|
if k > up_k - d && up_vector.get_i32(up_offset + k - 1) < x {
|
||||||
// Up
|
// Up
|
||||||
x = up_vector[up_offset + k - 1];
|
x = up_vector.get_i32(up_offset + k - 1);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
y = x - k;
|
y = add_i32(x, -k);
|
||||||
|
|
||||||
while x > lower_a && y > lower_b && data_a.codes[x - 1] == data_b.codes[y - 1] {
|
while x > lower_a && y > lower_b && data_a.codes[x - 1] == data_b.codes[y - 1] {
|
||||||
x -= 1;
|
x -= 1;
|
||||||
y -= 1;
|
y -= 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
up_vector[up_offset + k] = x;
|
up_vector.set_i32(up_offset + k, x);
|
||||||
|
|
||||||
// Overlap ?
|
// Overlap ?
|
||||||
if !odd_delta && down_k - d <= k && k <= down_k + d && up_vector[up_offset + k] <= down_vector[down_offset + k] {
|
if !odd_delta && down_k - d <= k && k <= down_k + d && up_vector.get_i32(up_offset + k) <= down_vector.get_i32(down_offset + k) {
|
||||||
ret.0 = down_vector[down_offset + k];
|
let x = down_vector.get_i32(down_offset + k);
|
||||||
ret.1 = down_vector[down_offset + k] - k;
|
let y = add_i32(down_vector.get_i32(down_offset + k), -k);
|
||||||
return ret;
|
return SmsData { x, y };
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -190,6 +236,65 @@ fn sms(data_a: &DiffData, data_b: &DiffData, down_vector: &mut Vec<usize>, up_ve
|
||||||
panic!("This should not be possible :(");
|
panic!("This should not be possible :(");
|
||||||
}
|
}
|
||||||
|
|
||||||
fn optimize(data: &DiffData) {}
|
fn optimize(data: &mut DiffData) {
|
||||||
|
let mut start_pos = 0usize;
|
||||||
|
let mut end_pos;
|
||||||
|
|
||||||
fn create_diffs(data_a: &DiffData, data_b: &DiffData) {}
|
while start_pos < data.length {
|
||||||
|
while start_pos < data.length && !data.modified[start_pos] {
|
||||||
|
start_pos += 1;
|
||||||
|
}
|
||||||
|
end_pos = start_pos;
|
||||||
|
while end_pos < data.length && data.modified[end_pos] {
|
||||||
|
end_pos += 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
if end_pos < data.length && data.codes[start_pos] == data.codes[end_pos] {
|
||||||
|
data.modified[start_pos] = false;
|
||||||
|
data.modified[end_pos] = true;
|
||||||
|
} else {
|
||||||
|
start_pos = end_pos;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn create_diffs(data_a: &DiffData, data_b: &DiffData) -> Vec<DiffItem> {
|
||||||
|
let mut result = Vec::new();
|
||||||
|
|
||||||
|
let mut start_a;
|
||||||
|
let mut start_b;
|
||||||
|
let mut line_a = 0usize;
|
||||||
|
let mut line_b = 0usize;
|
||||||
|
|
||||||
|
while line_a < data_a.length || line_b < data_b.length {
|
||||||
|
if line_a < data_a.length && !data_a.modified[line_a] &&
|
||||||
|
line_b < data_b.length && !data_b.modified[line_b] {
|
||||||
|
// Equal line
|
||||||
|
line_a += 1;
|
||||||
|
line_b += 1;
|
||||||
|
} else {
|
||||||
|
start_a = line_a;
|
||||||
|
start_b = line_b;
|
||||||
|
|
||||||
|
while line_a < data_a.length && (line_b >= data_b.length || data_a.modified[line_a]) {
|
||||||
|
line_a += 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
while line_b < data_b.length && (line_a >= data_a.length || data_b.modified[line_b]) {
|
||||||
|
line_b += 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
if start_a < line_a || start_b < line_b {
|
||||||
|
let item = DiffItem {
|
||||||
|
start_a,
|
||||||
|
start_b,
|
||||||
|
deleted_a: line_a - start_a,
|
||||||
|
inserted_b: line_b - start_b,
|
||||||
|
};
|
||||||
|
result.push(item);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
result
|
||||||
|
}
|
||||||
|
|
59
src/main.rs
59
src/main.rs
|
@ -6,61 +6,8 @@ mod diff;
|
||||||
|
|
||||||
fn main() {
|
fn main() {
|
||||||
let a = "abcabba\nlkajsdfasdf\nasdfasdfasdf\nlasjkdf";
|
let a = "abcabba\nlkajsdfasdf\nasdfasdfasdf\nlasjkdf";
|
||||||
let b = "abcabba\ncbabasdfasdf\nlasjkdf";
|
let b = "abcabba\ncbabasdfasdf\nlasjkdf\nope";
|
||||||
|
|
||||||
diff(a, b);
|
let result = diff(a, b);
|
||||||
// lcs(a, b);
|
dbg!(result);
|
||||||
}
|
}
|
||||||
|
|
||||||
// fn lcs(a: &str, b: &str) {
|
|
||||||
// let n = a.len() as i32;
|
|
||||||
// let m = b.len() as i32;
|
|
||||||
// let max = n + m;
|
|
||||||
// let mut endpoints = vec![0i32; max as usize * 2];
|
|
||||||
//
|
|
||||||
// for script_length in 0..max {
|
|
||||||
// let mut k = -script_length;
|
|
||||||
// while k <= script_length * 2 {
|
|
||||||
// let index = (k + max) as usize + 1;
|
|
||||||
// let previous_endpoint = endpoints[index - 1];
|
|
||||||
// let next_endpoint = endpoints[index + 1];
|
|
||||||
//
|
|
||||||
// let mut x = if k == -script_length || k != script_length && previous_endpoint < next_endpoint {
|
|
||||||
// next_endpoint
|
|
||||||
// } else {
|
|
||||||
// previous_endpoint + 1
|
|
||||||
// };
|
|
||||||
//
|
|
||||||
// let mut y = if k < x {
|
|
||||||
// x - k
|
|
||||||
// } else {
|
|
||||||
// 0
|
|
||||||
// };
|
|
||||||
//
|
|
||||||
// // Increase x and y as long as we are in a common sequence between a and b
|
|
||||||
// while x < n && y < m {
|
|
||||||
// let ac = a.chars().nth(x as usize).unwrap();
|
|
||||||
// let bc = b.chars().nth(y as usize).unwrap();
|
|
||||||
//
|
|
||||||
// if ac != bc {
|
|
||||||
// break;
|
|
||||||
// }
|
|
||||||
//
|
|
||||||
// x += 1;
|
|
||||||
// y += 1;
|
|
||||||
// }
|
|
||||||
//
|
|
||||||
// endpoints[index] = x;
|
|
||||||
//
|
|
||||||
// // We have traveled through both strings, the length of the shortest edit script (SES) has been found.
|
|
||||||
// if x >= n && y >= m {
|
|
||||||
// println!("Length of a SES is D ({d})");
|
|
||||||
// return;
|
|
||||||
// }
|
|
||||||
//
|
|
||||||
// k += 2;
|
|
||||||
// }
|
|
||||||
// }
|
|
||||||
//
|
|
||||||
// println!("Length of a SES is greater than MAX ({max})");
|
|
||||||
// }
|
|
||||||
|
|
Loading…
Reference in New Issue