1056-edit-distance — Edit Distance (Levenshtein)
Tutorial
The Problem
Edit distance (Levenshtein distance) measures how many single-character edits — insertions, deletions, and substitutions — are needed to transform one string into another. It is the core metric in spell checkers, fuzzy search, DNA sequence alignment, and natural language processing for measuring string similarity.
Vladimir Levenshtein introduced the metric in 1966. The DP solution fills a 2D table in O(m×n) time, making it tractable for strings up to a few thousand characters.
🎯 Learning Outcomes
strsim crate for production useCode Example
#![allow(clippy::all)]
// 1056: Edit Distance (Levenshtein) — 2D DP Table
use std::collections::HashMap;
// Approach 1: 2D DP table
fn edit_distance(s1: &str, s2: &str) -> usize {
let a: Vec<char> = s1.chars().collect();
let b: Vec<char> = s2.chars().collect();
let (m, n) = (a.len(), b.len());
let mut dp = vec![vec![0; n + 1]; m + 1];
for i in 0..=m {
dp[i][0] = i;
}
for j in 0..=n {
dp[0][j] = j;
}
for i in 1..=m {
for j in 1..=n {
dp[i][j] = if a[i - 1] == b[j - 1] {
dp[i - 1][j - 1]
} else {
1 + dp[i - 1][j].min(dp[i][j - 1]).min(dp[i - 1][j - 1])
};
}
}
dp[m][n]
}
// Approach 2: Space-optimized with two rows
fn edit_distance_opt(s1: &str, s2: &str) -> usize {
let a: Vec<char> = s1.chars().collect();
let b: Vec<char> = s2.chars().collect();
let (m, n) = (a.len(), b.len());
let mut prev: Vec<usize> = (0..=n).collect();
let mut curr = vec![0; n + 1];
for i in 1..=m {
curr[0] = i;
for j in 1..=n {
curr[j] = if a[i - 1] == b[j - 1] {
prev[j - 1]
} else {
1 + prev[j].min(curr[j - 1]).min(prev[j - 1])
};
}
std::mem::swap(&mut prev, &mut curr);
}
prev[n]
}
// Approach 3: Recursive with memoization
fn edit_distance_memo(s1: &str, s2: &str) -> usize {
let a: Vec<char> = s1.chars().collect();
let b: Vec<char> = s2.chars().collect();
fn solve(
i: usize,
j: usize,
a: &[char],
b: &[char],
cache: &mut HashMap<(usize, usize), usize>,
) -> usize {
if i == 0 {
return j;
}
if j == 0 {
return i;
}
if let Some(&v) = cache.get(&(i, j)) {
return v;
}
let v = if a[i - 1] == b[j - 1] {
solve(i - 1, j - 1, a, b, cache)
} else {
1 + solve(i - 1, j, a, b, cache)
.min(solve(i, j - 1, a, b, cache))
.min(solve(i - 1, j - 1, a, b, cache))
};
cache.insert((i, j), v);
v
}
let mut cache = HashMap::new();
solve(a.len(), b.len(), &a, &b, &mut cache)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_edit_distance() {
assert_eq!(edit_distance("kitten", "sitting"), 3);
assert_eq!(edit_distance("saturday", "sunday"), 3);
assert_eq!(edit_distance("", "abc"), 3);
assert_eq!(edit_distance("abc", "abc"), 0);
}
#[test]
fn test_edit_distance_opt() {
assert_eq!(edit_distance_opt("kitten", "sitting"), 3);
assert_eq!(edit_distance_opt("saturday", "sunday"), 3);
}
#[test]
fn test_edit_distance_memo() {
assert_eq!(edit_distance_memo("kitten", "sitting"), 3);
assert_eq!(edit_distance_memo("saturday", "sunday"), 3);
}
}Key Differences
Array.init with a conditional expression.min three-way**: Rust uses a.min(b).min(c); OCaml uses min (min a b) c. Both are equivalent.OCaml Approach
let edit_distance s1 s2 =
let a = Array.of_seq (String.to_seq s1) in
let b = Array.of_seq (String.to_seq s2) in
let m, n = Array.length a, Array.length b in
let dp = Array.init (m+1) (fun i ->
Array.init (n+1) (fun j -> if i = 0 then j else if j = 0 then i else 0)
) in
for i = 1 to m do
for j = 1 to n do
dp.(i).(j) <- if a.(i-1) = b.(j-1) then dp.(i-1).(j-1)
else 1 + min (min dp.(i-1).(j-1) dp.(i-1).(j)) dp.(i).(j-1)
done
done;
dp.(m).(n)
The algorithms are structurally identical — edit distance is a mathematical operation with one correct DP formulation.
Full Source
#![allow(clippy::all)]
// 1056: Edit Distance (Levenshtein) — 2D DP Table
use std::collections::HashMap;
// Approach 1: 2D DP table
fn edit_distance(s1: &str, s2: &str) -> usize {
let a: Vec<char> = s1.chars().collect();
let b: Vec<char> = s2.chars().collect();
let (m, n) = (a.len(), b.len());
let mut dp = vec![vec![0; n + 1]; m + 1];
for i in 0..=m {
dp[i][0] = i;
}
for j in 0..=n {
dp[0][j] = j;
}
for i in 1..=m {
for j in 1..=n {
dp[i][j] = if a[i - 1] == b[j - 1] {
dp[i - 1][j - 1]
} else {
1 + dp[i - 1][j].min(dp[i][j - 1]).min(dp[i - 1][j - 1])
};
}
}
dp[m][n]
}
// Approach 2: Space-optimized with two rows
fn edit_distance_opt(s1: &str, s2: &str) -> usize {
let a: Vec<char> = s1.chars().collect();
let b: Vec<char> = s2.chars().collect();
let (m, n) = (a.len(), b.len());
let mut prev: Vec<usize> = (0..=n).collect();
let mut curr = vec![0; n + 1];
for i in 1..=m {
curr[0] = i;
for j in 1..=n {
curr[j] = if a[i - 1] == b[j - 1] {
prev[j - 1]
} else {
1 + prev[j].min(curr[j - 1]).min(prev[j - 1])
};
}
std::mem::swap(&mut prev, &mut curr);
}
prev[n]
}
// Approach 3: Recursive with memoization
fn edit_distance_memo(s1: &str, s2: &str) -> usize {
let a: Vec<char> = s1.chars().collect();
let b: Vec<char> = s2.chars().collect();
fn solve(
i: usize,
j: usize,
a: &[char],
b: &[char],
cache: &mut HashMap<(usize, usize), usize>,
) -> usize {
if i == 0 {
return j;
}
if j == 0 {
return i;
}
if let Some(&v) = cache.get(&(i, j)) {
return v;
}
let v = if a[i - 1] == b[j - 1] {
solve(i - 1, j - 1, a, b, cache)
} else {
1 + solve(i - 1, j, a, b, cache)
.min(solve(i, j - 1, a, b, cache))
.min(solve(i - 1, j - 1, a, b, cache))
};
cache.insert((i, j), v);
v
}
let mut cache = HashMap::new();
solve(a.len(), b.len(), &a, &b, &mut cache)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_edit_distance() {
assert_eq!(edit_distance("kitten", "sitting"), 3);
assert_eq!(edit_distance("saturday", "sunday"), 3);
assert_eq!(edit_distance("", "abc"), 3);
assert_eq!(edit_distance("abc", "abc"), 0);
}
#[test]
fn test_edit_distance_opt() {
assert_eq!(edit_distance_opt("kitten", "sitting"), 3);
assert_eq!(edit_distance_opt("saturday", "sunday"), 3);
}
#[test]
fn test_edit_distance_memo() {
assert_eq!(edit_distance_memo("kitten", "sitting"), 3);
assert_eq!(edit_distance_memo("saturday", "sunday"), 3);
}
}#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_edit_distance() {
assert_eq!(edit_distance("kitten", "sitting"), 3);
assert_eq!(edit_distance("saturday", "sunday"), 3);
assert_eq!(edit_distance("", "abc"), 3);
assert_eq!(edit_distance("abc", "abc"), 0);
}
#[test]
fn test_edit_distance_opt() {
assert_eq!(edit_distance_opt("kitten", "sitting"), 3);
assert_eq!(edit_distance_opt("saturday", "sunday"), 3);
}
#[test]
fn test_edit_distance_memo() {
assert_eq!(edit_distance_memo("kitten", "sitting"), 3);
assert_eq!(edit_distance_memo("saturday", "sunday"), 3);
}
}
Deep Comparison
Edit Distance (Levenshtein) — Comparison
Core Insight
Edit distance finds the minimum number of single-character edits (insert, delete, replace) to transform one string into another. The 2D DP table has a clean recurrence with three operations mapping to three neighbors.
OCaml Approach
Array.init with conditional initialization for base casesArray.blit for row copying in space-optimized versionmin calls: min (min a b) cHashtbl memoization with tuple keysRust Approach
std::mem::swap for efficient row swapping (zero-copy).min() calls: a.min(b).min(c)HashMap with tuple keys for memoized versionComparison Table
| Aspect | OCaml | Rust |
|---|---|---|
| Row swap | Array.blit (copies) | std::mem::swap (zero-copy) |
| Min of 3 | min (min a b) c | a.min(b).min(c) |
| Base cases | Array.init with conditionals | Explicit init loops |
| Range collect | (0..=n).collect() equivalent via Array.init | (0..=n).collect::<Vec<_>>() |
| Space optimization | Two arrays + blit | Two vecs + swap |
Exercises
edit_distance_k(s1: &str, s2: &str, k: usize) -> bool that returns whether edit distance is ≤ k without computing the full table (using the diagonal band optimization).closest_word(target: &str, dictionary: &[&str]) -> &str that returns the word with minimum edit distance from the dictionary.