Reworked diff algorithm to work with streams
This commit is contained in:
parent
93d460ae5d
commit
d44616bd72
201
src/diff.rs
201
src/diff.rs
|
@ -1,7 +1,8 @@
|
|||
// Based on https://github.com/mathertel/Diff
|
||||
// "An O(ND) Difference Algorithm and its Variations" by Eugene Myers Algorithmica Vol. 1 No. 2, 1986, p 251.
|
||||
|
||||
use std::collections::HashMap;
|
||||
use std::io::{BufRead};
|
||||
use crate::diff::IndexDirection::{None, LeftDown, RightUp};
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct DiffItem {
|
||||
|
@ -22,44 +23,84 @@ struct SmsData {
|
|||
y: usize,
|
||||
}
|
||||
|
||||
struct DiffVec {
|
||||
data: Vec<usize>,
|
||||
struct SmsBounds {
|
||||
lower_a: usize,
|
||||
lower_b: usize,
|
||||
upper_a: usize,
|
||||
upper_b: usize,
|
||||
max_d: usize,
|
||||
down_k: i32,
|
||||
up_k: i32,
|
||||
down_offset: usize,
|
||||
up_offset: usize,
|
||||
is_delta_odd: bool,
|
||||
}
|
||||
|
||||
impl DiffVec {
|
||||
fn get(&self, index: usize) -> usize {
|
||||
self.data[index]
|
||||
}
|
||||
impl SmsBounds {
|
||||
fn from(lower_a: usize, lower_b: usize, upper_a: usize, upper_b: usize) -> Self {
|
||||
let max = upper_a + upper_b + 1;
|
||||
let max_d = (upper_a - lower_a + upper_b - lower_b) / 2 + 1;
|
||||
|
||||
fn get_i32(&self, index: i32) -> usize {
|
||||
if index < 0 {
|
||||
panic!("Got index < 0");
|
||||
}
|
||||
let down_k = lower_a as i32 - lower_b as i32;
|
||||
let up_k = upper_a as i32 - upper_b as i32;
|
||||
|
||||
self.get(index as usize)
|
||||
}
|
||||
let down_offset = (max as i32 - down_k) as usize;
|
||||
let up_offset = (max as i32 - up_k) as usize;
|
||||
|
||||
fn set(&mut self, index: usize, val: usize) {
|
||||
self.data[index] = val;
|
||||
}
|
||||
let is_delta_odd = ((upper_a + lower_a + upper_b + lower_b) & 1) != 0;
|
||||
|
||||
fn set_i32(&mut self, index: i32, val: usize) {
|
||||
if index < 0 {
|
||||
panic!("Got index < 0");
|
||||
}
|
||||
|
||||
self.set(index as usize, val);
|
||||
SmsBounds {
|
||||
lower_a,
|
||||
lower_b,
|
||||
upper_a,
|
||||
upper_b,
|
||||
max_d,
|
||||
down_k,
|
||||
up_k,
|
||||
down_offset,
|
||||
up_offset,
|
||||
is_delta_odd,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn diff(a: &str, b: &str) -> Vec<DiffItem> {
|
||||
let mut existing_hashes: HashMap<&str, usize> = HashMap::new();
|
||||
fn get_down_index(&self, k: i32, dir: IndexDirection) -> usize {
|
||||
self.get_index(self.down_offset, k, dir)
|
||||
}
|
||||
|
||||
fn get_up_index(&self, k: i32, dir: IndexDirection) -> usize {
|
||||
self.get_index(self.up_offset, k, dir)
|
||||
}
|
||||
|
||||
fn get_index(&self, offset: usize, k: i32, dir: IndexDirection) -> usize {
|
||||
(offset as i32 + k + dir as i32) as usize
|
||||
}
|
||||
}
|
||||
|
||||
#[repr(i32)]
|
||||
enum IndexDirection {
|
||||
None = 0,
|
||||
LeftDown = 1, // Down: Down, Up: Left
|
||||
RightUp = -1, // Down: Up, Up: Right
|
||||
}
|
||||
|
||||
// https://stackoverflow.com/questions/54035728/how-to-add-a-negative-i32-number-to-an-usize-variable
|
||||
fn add_i32(index: usize, offset: i32) -> usize {
|
||||
if offset.is_negative() {
|
||||
index - offset.wrapping_abs() as u32 as usize
|
||||
} else {
|
||||
index + offset as usize
|
||||
}
|
||||
}
|
||||
|
||||
pub fn diff<T>(a: &mut T, b: &mut T) -> Vec<DiffItem>
|
||||
where T: BufRead {
|
||||
let mut existing_hashes: HashMap<String, usize> = HashMap::new();
|
||||
let mut data_a = diff_data(a, &mut existing_hashes);
|
||||
let mut data_b = diff_data(b, &mut existing_hashes);
|
||||
|
||||
let max = data_a.length + data_b.length;
|
||||
let mut down_vector = DiffVec { data: vec![0usize; 2 * max + 2] };
|
||||
let mut up_vector = DiffVec { data: vec![0usize; 2 * max + 2] };
|
||||
let mut down_vector = vec![0usize; 2 * max + 2];
|
||||
let mut up_vector = vec![0usize; 2 * max + 2];
|
||||
|
||||
let upper_a = data_a.length;
|
||||
let upper_b = data_b.length;
|
||||
|
@ -72,8 +113,9 @@ pub fn diff(a: &str, b: &str) -> Vec<DiffItem> {
|
|||
create_diffs(&data_a, &data_b)
|
||||
}
|
||||
|
||||
fn diff_data<'a>(text: &'a str, existing_hashes: &mut HashMap<&'a str, usize>) -> DiffData {
|
||||
let codes = diff_codes(text, existing_hashes);
|
||||
fn diff_data<T>(reader: &mut T, existing_hashes: &mut HashMap<String, usize>) -> DiffData
|
||||
where T: BufRead {
|
||||
let codes = diff_codes(reader, existing_hashes);
|
||||
let length = codes.len();
|
||||
|
||||
DiffData {
|
||||
|
@ -83,21 +125,24 @@ fn diff_data<'a>(text: &'a str, existing_hashes: &mut HashMap<&'a str, usize>) -
|
|||
}
|
||||
}
|
||||
|
||||
fn diff_codes<'a>(text: &'a str, existing_hashes: &mut HashMap<&'a str, usize>) -> Vec<usize> {
|
||||
let lines: Vec<&str> = text.split('\n').collect();
|
||||
|
||||
let mut codes = vec![0usize; lines.len()];
|
||||
fn diff_codes<T>(reader: &mut T, existing_hashes: &mut HashMap<String, usize>) -> Vec<usize>
|
||||
where T: BufRead {
|
||||
let mut codes = Vec::new();
|
||||
let mut next_code = existing_hashes.len() + 1;
|
||||
|
||||
for i in 0..lines.len() {
|
||||
let line = lines[i];
|
||||
loop {
|
||||
let mut line = String::new();
|
||||
let read_res = reader.read_line(&mut line).expect("Failed to read BufRead");
|
||||
if read_res == 0 {
|
||||
break;
|
||||
}
|
||||
|
||||
if !existing_hashes.contains_key(line) {
|
||||
if !existing_hashes.contains_key(&line) {
|
||||
existing_hashes.insert(line, next_code);
|
||||
codes[i] = next_code;
|
||||
codes.push(next_code);
|
||||
next_code += 1;
|
||||
} else {
|
||||
codes[i] = existing_hashes[line];
|
||||
codes.push(existing_hashes[&line]);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -105,7 +150,7 @@ fn diff_codes<'a>(text: &'a str, existing_hashes: &mut HashMap<&'a str, usize>)
|
|||
}
|
||||
|
||||
// Longest Common-Subsequence
|
||||
fn lcs(data_a: &mut DiffData, mut lower_a: usize, mut upper_a: usize, data_b: &mut DiffData, mut lower_b: usize, mut upper_b: usize, down_vector: &mut DiffVec, up_vector: &mut DiffVec) {
|
||||
fn lcs(data_a: &mut DiffData, mut lower_a: usize, mut upper_a: usize, data_b: &mut DiffData, mut lower_b: usize, mut upper_b: usize, down_vector: &mut Vec<usize>, up_vector: &mut Vec<usize>) {
|
||||
while lower_a < upper_a && lower_b < upper_b && data_a.codes[lower_a] == data_b.codes[lower_b] {
|
||||
lower_a += 1;
|
||||
lower_b += 1;
|
||||
|
@ -130,7 +175,8 @@ fn lcs(data_a: &mut DiffData, mut lower_a: usize, mut upper_a: usize, data_b: &m
|
|||
}
|
||||
} else {
|
||||
// Find the middle snake and length of an optimal path for A and B
|
||||
let sms = sms(&data_a, lower_a, upper_a, &data_b, lower_b, upper_b, down_vector, up_vector);
|
||||
let sms_bounds = SmsBounds::from(lower_a, lower_b, upper_a, upper_b);
|
||||
let sms = sms(&data_a, &data_b, &sms_bounds, down_vector, up_vector);
|
||||
|
||||
// The path is from lower_x to (x, y) and (x, y) to upper_x
|
||||
lcs(data_a, lower_a, sms.x, data_b, lower_b, sms.y, down_vector, up_vector);
|
||||
|
@ -138,96 +184,77 @@ fn lcs(data_a: &mut DiffData, mut lower_a: usize, mut upper_a: usize, data_b: &m
|
|||
}
|
||||
}
|
||||
|
||||
// https://stackoverflow.com/questions/54035728/how-to-add-a-negative-i32-number-to-an-usize-variable
|
||||
fn add_i32(index: usize, offset: i32) -> usize {
|
||||
if offset.is_negative() {
|
||||
index - offset.wrapping_abs() as u32 as usize
|
||||
} else {
|
||||
index + offset as usize
|
||||
}
|
||||
}
|
||||
|
||||
// Shortest Middle Snake
|
||||
fn sms(data_a: &DiffData, lower_a: usize, upper_a: usize, data_b: &DiffData, lower_b: usize, upper_b: usize, down_vector: &mut DiffVec, up_vector: &mut DiffVec) -> SmsData {
|
||||
let max = upper_a as i32 + upper_b as i32 + 1;
|
||||
fn sms(data_a: &DiffData, data_b: &DiffData, bounds: &SmsBounds, down_vector: &mut Vec<usize>, up_vector: &mut Vec<usize>) -> SmsData {
|
||||
down_vector[bounds.get_down_index(bounds.down_k, LeftDown)] = bounds.lower_a;
|
||||
up_vector[bounds.get_up_index(bounds.up_k, RightUp)] = bounds.upper_a;
|
||||
|
||||
let down_k = lower_a as i32 - lower_b as i32;
|
||||
let up_k = upper_a as i32 - upper_b as i32;
|
||||
|
||||
let delta = (upper_a - lower_a) - (upper_b - lower_b);
|
||||
let odd_delta = (delta & 1) != 0;
|
||||
|
||||
let down_offset = max - down_k;
|
||||
let up_offset = max - up_k;
|
||||
|
||||
let max_d = ((upper_a - lower_a + upper_b - lower_b) / 2) as i32 + 1;
|
||||
|
||||
down_vector.set_i32(down_offset + down_k + 1, lower_a);
|
||||
up_vector.set_i32(up_offset + up_k - 1, upper_a);
|
||||
|
||||
for d in 0..=max_d {
|
||||
for d in 0..=bounds.max_d as i32 {
|
||||
// Extend the forward path
|
||||
for k in ((down_k - d)..=(down_k + d)).step_by(2) {
|
||||
for k in ((bounds.down_k - d)..=(bounds.down_k + d)).step_by(2) {
|
||||
let mut x;
|
||||
let mut y;
|
||||
if k == down_k - d {
|
||||
|
||||
if k == bounds.down_k - d {
|
||||
// Down
|
||||
x = down_vector.get_i32(down_offset + k + 1);
|
||||
x = down_vector[bounds.get_down_index(k, LeftDown)];
|
||||
} else {
|
||||
// Right
|
||||
x = down_vector.get_i32(down_offset + k - 1) + 1;
|
||||
if k < down_k + d && down_vector.get_i32(down_offset + k + 1) >= x {
|
||||
x = down_vector[bounds.get_down_index(k, RightUp)] + 1;
|
||||
if k < bounds.down_k + d && down_vector[bounds.get_down_index(k, LeftDown)] >= x {
|
||||
// Down
|
||||
x = down_vector.get_i32(down_offset + k + 1);
|
||||
x = down_vector[bounds.get_down_index(k, LeftDown)];
|
||||
}
|
||||
}
|
||||
y = add_i32(x, -k);
|
||||
|
||||
// Find the end of the furthest reaching forward D-path in diagonal k.
|
||||
while x < upper_a && y < upper_b && data_a.codes[x] == data_b.codes[y] {
|
||||
while x < bounds.upper_a && y < bounds.upper_b && data_a.codes[x] == data_b.codes[y] {
|
||||
x += 1;
|
||||
y += 1;
|
||||
}
|
||||
|
||||
down_vector.set_i32(down_offset + k, x);
|
||||
down_vector[bounds.get_down_index(k, None)] = x;
|
||||
|
||||
// Overlap ?
|
||||
if odd_delta && up_k - d < k && k < up_k + d && up_vector.get_i32(up_offset + k) <= down_vector.get_i32(down_offset + k) {
|
||||
let x = down_vector.get_i32(down_offset + k);
|
||||
let y = add_i32(down_vector.get_i32(down_offset + k), -k);
|
||||
if bounds.is_delta_odd && bounds.up_k - d < k && k < bounds.up_k + d &&
|
||||
up_vector[bounds.get_up_index(k, None)] <= down_vector[bounds.get_down_index(k, None)] {
|
||||
let x = down_vector[bounds.get_down_index(k, None)];
|
||||
let y = add_i32(down_vector[bounds.get_down_index(k, None)], -k);
|
||||
return SmsData { x, y };
|
||||
}
|
||||
}
|
||||
|
||||
// Extend the reverse path
|
||||
for k in ((up_k - d)..=(up_k + d)).step_by(2) {
|
||||
for k in ((bounds.up_k - d)..=(bounds.up_k + d)).step_by(2) {
|
||||
let mut x;
|
||||
let mut y;
|
||||
|
||||
if k == up_k + d {
|
||||
if k == bounds.up_k + d {
|
||||
// Up
|
||||
x = up_vector.get_i32(up_offset + k - 1);
|
||||
x = up_vector[bounds.get_up_index(k, RightUp)];
|
||||
} else {
|
||||
// Left
|
||||
x = up_vector.get_i32(up_offset + k + 1) - 1;
|
||||
if k > up_k - d && up_vector.get_i32(up_offset + k - 1) < x {
|
||||
x = up_vector[bounds.get_up_index(k, LeftDown)] - 1;
|
||||
if k > bounds.up_k - d && up_vector[bounds.get_up_index(k, RightUp)] < x {
|
||||
// Up
|
||||
x = up_vector.get_i32(up_offset + k - 1);
|
||||
x = up_vector[bounds.get_up_index(k, RightUp)];
|
||||
}
|
||||
}
|
||||
y = add_i32(x, -k);
|
||||
|
||||
while x > lower_a && y > lower_b && data_a.codes[x - 1] == data_b.codes[y - 1] {
|
||||
while x > bounds.lower_a && y > bounds.lower_b && data_a.codes[x - 1] == data_b.codes[y - 1] {
|
||||
x -= 1;
|
||||
y -= 1;
|
||||
}
|
||||
|
||||
up_vector.set_i32(up_offset + k, x);
|
||||
up_vector[bounds.get_up_index(k, None)] = x;
|
||||
|
||||
// Overlap ?
|
||||
if !odd_delta && down_k - d <= k && k <= down_k + d && up_vector.get_i32(up_offset + k) <= down_vector.get_i32(down_offset + k) {
|
||||
let x = down_vector.get_i32(down_offset + k);
|
||||
let y = add_i32(down_vector.get_i32(down_offset + k), -k);
|
||||
if !bounds.is_delta_odd && bounds.down_k - d <= k && k <= bounds.down_k + d &&
|
||||
up_vector[bounds.get_up_index(k, None)] <= down_vector[bounds.get_down_index(k, None)] {
|
||||
let x = down_vector[bounds.get_down_index(k, None)];
|
||||
let y = add_i32(down_vector[bounds.get_down_index(k, None)], -k);
|
||||
return SmsData { x, y };
|
||||
}
|
||||
}
|
||||
|
|
16
src/main.rs
16
src/main.rs
|
@ -1,13 +1,21 @@
|
|||
use std::fs::File;
|
||||
use std::io;
|
||||
use std::io::{BufRead, BufReader};
|
||||
use crate::diff::diff;
|
||||
|
||||
mod matrix;
|
||||
mod lcs;
|
||||
mod diff;
|
||||
|
||||
fn main() {
|
||||
let a = "abcabba\nlkajsdfasdf\nasdfasdfasdf\nlasjkdf";
|
||||
let b = "abcabba\ncbabasdfasdf\nlasjkdf\nope";
|
||||
fn main() -> io::Result<()> {
|
||||
let file = File::open("test-data/config.jsonc")?;
|
||||
let mut reader_a = BufReader::new(file);
|
||||
|
||||
let result = diff(a, b);
|
||||
let file = File::open("test-data/config-old.jsonc")?;
|
||||
let mut reader_b = BufReader::new(file);
|
||||
|
||||
let result = diff(&mut reader_a, &mut reader_b);
|
||||
dbg!(result);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
|
|
@ -0,0 +1,65 @@
|
|||
{
|
||||
"position": "top",
|
||||
"modules-left": ["sway/workspaces"],
|
||||
"modules-right": ["network", "pulseaudio", "clock"],
|
||||
// Modules configuration
|
||||
"sway/workspaces": {
|
||||
"disable-scroll": true,
|
||||
"all-outputs": true,
|
||||
"format": "{icon}",
|
||||
"persistent_workspaces": {
|
||||
"1": [],
|
||||
"2": [],
|
||||
"3": [],
|
||||
"4": [],
|
||||
"5": [],
|
||||
"6": [],
|
||||
"7": [],
|
||||
"8": [],
|
||||
"9": [],
|
||||
"10": []
|
||||
},
|
||||
"format-icons": {
|
||||
"default": "",
|
||||
"urgent": "",
|
||||
"focused": ""
|
||||
}
|
||||
},
|
||||
"custom/spotify": {
|
||||
"format": "<span foreground='#abc123'> </span><span font='FireCodeMono Nerd Font Mono weight=325 Italic'>{}</span>",
|
||||
"interval": 1,
|
||||
"exec-if": "pgrep spotify",
|
||||
"on-click": "playerctl -p spotify play-pause",
|
||||
"on-scroll-up": "playerctl -p spotify previous",
|
||||
"on-scroll-down": "playerctl -p spotify next",
|
||||
"tooltip": false,
|
||||
"escape": true,
|
||||
"MAX-LENGTH": 60,
|
||||
"exec": "/home/loki/bin/spotify.sh"
|
||||
},
|
||||
"clock": {
|
||||
"format": "{:%a %d %H:%M} <span foreground='#123abc'></span>",
|
||||
"tooltip-format": "<big>{:%Y %B}</big>\n<tt><small>{calendar}</small></tt>"
|
||||
},
|
||||
"network": {
|
||||
"forletmat-disconnected": "Disconnected <span class='#abc123'></span>",
|
||||
"format-ethernet": "{ipaddr} <span foreground='#123abc'></span>"
|
||||
},
|
||||
"pulseaudio": {
|
||||
"format": "{volume}% <span foreground='#123abc'>{icon}</span>",
|
||||
"format-bluetooth": "{volume}% <span foreground='#abc123'>{icon}</span>",
|
||||
"format-muted": "",
|
||||
"format-icons": {
|
||||
"headphone": "",
|
||||
"hands-free": "",
|
||||
"headset": "",
|
||||
"phone": "",
|
||||
"portable": "",
|
||||
"car": "",
|
||||
"default": ["", ""]
|
||||
},
|
||||
"scroll-step": 1,
|
||||
"on-click": "pavucontrol",
|
||||
"ignored-sinks": ["Easy Effects Sink"]
|
||||
}
|
||||
}
|
|
@ -0,0 +1,65 @@
|
|||
{
|
||||
"position": "top",
|
||||
"modules-left": ["sway/workspaces"],
|
||||
"modules-right": ["network", "pulseaudio", "clock"],
|
||||
// Modules configuration
|
||||
"sway/workspaces": {
|
||||
"disable-scroll": true,
|
||||
"all-outputs": true,
|
||||
"format": "{icon}",
|
||||
"persistent_workspaces": {
|
||||
"1": [],
|
||||
"2": [],
|
||||
"3": [],
|
||||
"4": [],
|
||||
"5": [],
|
||||
"6": [],
|
||||
"7": [],
|
||||
"8": [],
|
||||
"9": [],
|
||||
"10": []
|
||||
},
|
||||
"format-icons": {
|
||||
"default": "",
|
||||
"urgent": "",
|
||||
"focused": ""
|
||||
}
|
||||
},
|
||||
"custom/spotify": {
|
||||
"format": "<span foreground='#a4b9ef'> </span><span font='FireCodeMono Nerd Font Mono weight=325 Italic'>{}</span>",
|
||||
"interval": 1,
|
||||
"exec-if": "pgrep spotify",
|
||||
"on-click": "playerctl -p spotify play-pause",
|
||||
"on-scroll-up": "playerctl -p spotify previous",
|
||||
"on-scroll-down": "playerctl -p spotify next",
|
||||
"tooltip": false,
|
||||
"escape": true,
|
||||
"MAX-LENGTH": 60,
|
||||
"exec": "/home/loki/bin/spotify.sh"
|
||||
},
|
||||
"clock": {
|
||||
"format": "{:%a %d %H:%M} <span foreground='#a4b9ef'></span>",
|
||||
"tooltip-format": "<big>{:%Y %B}</big>\n<tt><small>{calendar}</small></tt>"
|
||||
},
|
||||
"network": {
|
||||
"format-disconnected": "Disconnected <span class='#f9c096'></span>",
|
||||
"format-ethernet": "{ipaddr} <span foreground='#a4b9ef'></span>"
|
||||
},
|
||||
"pulseaudio": {
|
||||
"format": "{volume}% <span foreground='#a4b9ef'>{icon}</span>",
|
||||
"format-bluetooth": "{volume}% <span foreground='#a4b9ef'>{icon}</span>",
|
||||
"format-muted": "",
|
||||
"format-icons": {
|
||||
"headphone": "",
|
||||
"hands-free": "",
|
||||
"headset": "",
|
||||
"phone": "",
|
||||
"portable": "",
|
||||
"car": "",
|
||||
"default": ["", ""]
|
||||
},
|
||||
"scroll-step": 1,
|
||||
"on-click": "pavucontrol",
|
||||
"ignored-sinks": ["Easy Effects Sink"]
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue