1061-word-break — Word Break
Functional Programming
Tutorial
The Problem
Given a string and a dictionary of words, can the string be segmented into a sequence of dictionary words? This is the fundamental problem in natural language tokenization, programming language lexers, and CJK (Chinese/Japanese/Korean) text segmentation where words are not separated by spaces.
The DP solution uses a boolean array where dp[i] = true means the first i characters form a valid segmentation.
🎯 Learning Outcomes
HashSet dictionarydp[j] && dict.contains(s[j..i]) as the core recurrenceCode Example
#![allow(clippy::all)]
// 1061: Word Break — DP + HashSet
use std::collections::{HashMap, HashSet, VecDeque};
// Approach 1: Bottom-up DP
fn word_break(s: &str, words: &[&str]) -> bool {
let dict: HashSet<&str> = words.iter().copied().collect();
let n = s.len();
let mut dp = vec![false; n + 1];
dp[0] = true;
for i in 1..=n {
for j in 0..i {
if dp[j] && dict.contains(&s[j..i]) {
dp[i] = true;
break;
}
}
}
dp[n]
}
// Approach 2: Recursive with memoization
fn word_break_memo(s: &str, words: &[&str]) -> bool {
let dict: HashSet<&str> = words.iter().copied().collect();
fn solve(
s: &str,
start: usize,
dict: &HashSet<&str>,
cache: &mut HashMap<usize, bool>,
) -> bool {
if start == s.len() {
return true;
}
if let Some(&v) = cache.get(&start) {
return v;
}
let mut result = false;
for end_ in (start + 1)..=s.len() {
if dict.contains(&s[start..end_]) && solve(s, end_, dict, cache) {
result = true;
break;
}
}
cache.insert(start, result);
result
}
let mut cache = HashMap::new();
solve(s, 0, &dict, &mut cache)
}
// Approach 3: BFS approach
fn word_break_bfs(s: &str, words: &[&str]) -> bool {
let dict: HashSet<&str> = words.iter().copied().collect();
let n = s.len();
let mut visited = vec![false; n + 1];
let mut queue = VecDeque::new();
queue.push_back(0);
visited[0] = true;
while let Some(start) = queue.pop_front() {
for end_ in (start + 1)..=n {
if !visited[end_] && dict.contains(&s[start..end_]) {
if end_ == n {
return true;
}
visited[end_] = true;
queue.push_back(end_);
}
}
}
false
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_word_break() {
assert!(word_break("leetcode", &["leet", "code"]));
assert!(word_break("applepenapple", &["apple", "pen"]));
assert!(!word_break(
"catsandog",
&["cats", "dog", "sand", "and", "cat"]
));
}
#[test]
fn test_word_break_memo() {
assert!(word_break_memo("leetcode", &["leet", "code"]));
assert!(!word_break_memo(
"catsandog",
&["cats", "dog", "sand", "and", "cat"]
));
}
#[test]
fn test_word_break_bfs() {
assert!(word_break_bfs("leetcode", &["leet", "code"]));
assert!(!word_break_bfs(
"catsandog",
&["cats", "dog", "sand", "and", "cat"]
));
}
}Key Differences
HashSet<&str> for O(1) dictionary lookup; OCaml's StringSet.mem is O(log n) — Rust's version has better asymptotic behavior.&s[j..i] is a zero-copy slice; OCaml's String.sub allocates a new string — this matters in hot DP loops.word_break_all uses VecDeque for BFS; OCaml would use Queue.t for the equivalent.OCaml Approach
module StringSet = Set.Make(String)
let word_break s words =
let dict = List.fold_left (fun s w -> StringSet.add w s) StringSet.empty words in
let n = String.length s in
let dp = Array.make (n + 1) false in
dp.(0) <- true;
for i = 1 to n do
for j = 0 to i - 1 do
if dp.(j) && StringSet.mem (String.sub s j (i - j)) dict then
dp.(i) <- true
done
done;
dp.(n)
Identical structure. StringSet.mem is O(log n) while Rust's HashSet::contains is O(1) amortized.
Full Source
#![allow(clippy::all)]
// 1061: Word Break — DP + HashSet
use std::collections::{HashMap, HashSet, VecDeque};
// Approach 1: Bottom-up DP
fn word_break(s: &str, words: &[&str]) -> bool {
let dict: HashSet<&str> = words.iter().copied().collect();
let n = s.len();
let mut dp = vec![false; n + 1];
dp[0] = true;
for i in 1..=n {
for j in 0..i {
if dp[j] && dict.contains(&s[j..i]) {
dp[i] = true;
break;
}
}
}
dp[n]
}
// Approach 2: Recursive with memoization
fn word_break_memo(s: &str, words: &[&str]) -> bool {
let dict: HashSet<&str> = words.iter().copied().collect();
fn solve(
s: &str,
start: usize,
dict: &HashSet<&str>,
cache: &mut HashMap<usize, bool>,
) -> bool {
if start == s.len() {
return true;
}
if let Some(&v) = cache.get(&start) {
return v;
}
let mut result = false;
for end_ in (start + 1)..=s.len() {
if dict.contains(&s[start..end_]) && solve(s, end_, dict, cache) {
result = true;
break;
}
}
cache.insert(start, result);
result
}
let mut cache = HashMap::new();
solve(s, 0, &dict, &mut cache)
}
// Approach 3: BFS approach
fn word_break_bfs(s: &str, words: &[&str]) -> bool {
let dict: HashSet<&str> = words.iter().copied().collect();
let n = s.len();
let mut visited = vec![false; n + 1];
let mut queue = VecDeque::new();
queue.push_back(0);
visited[0] = true;
while let Some(start) = queue.pop_front() {
for end_ in (start + 1)..=n {
if !visited[end_] && dict.contains(&s[start..end_]) {
if end_ == n {
return true;
}
visited[end_] = true;
queue.push_back(end_);
}
}
}
false
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_word_break() {
assert!(word_break("leetcode", &["leet", "code"]));
assert!(word_break("applepenapple", &["apple", "pen"]));
assert!(!word_break(
"catsandog",
&["cats", "dog", "sand", "and", "cat"]
));
}
#[test]
fn test_word_break_memo() {
assert!(word_break_memo("leetcode", &["leet", "code"]));
assert!(!word_break_memo(
"catsandog",
&["cats", "dog", "sand", "and", "cat"]
));
}
#[test]
fn test_word_break_bfs() {
assert!(word_break_bfs("leetcode", &["leet", "code"]));
assert!(!word_break_bfs(
"catsandog",
&["cats", "dog", "sand", "and", "cat"]
));
}
}
✓ Tests
Rust test suite
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_word_break() {
assert!(word_break("leetcode", &["leet", "code"]));
assert!(word_break("applepenapple", &["apple", "pen"]));
assert!(!word_break(
"catsandog",
&["cats", "dog", "sand", "and", "cat"]
));
}
#[test]
fn test_word_break_memo() {
assert!(word_break_memo("leetcode", &["leet", "code"]));
assert!(!word_break_memo(
"catsandog",
&["cats", "dog", "sand", "and", "cat"]
));
}
#[test]
fn test_word_break_bfs() {
assert!(word_break_bfs("leetcode", &["leet", "code"]));
assert!(!word_break_bfs(
"catsandog",
&["cats", "dog", "sand", "and", "cat"]
));
}
}
Deep Comparison
Word Break — Comparison
Core Insight
Word break checks if a string can be segmented into valid dictionary words. The DP approach marks reachable positions; BFS treats positions as graph nodes with dictionary words as edges.
OCaml Approach
StringSet via Set.Make(String) functor — ordered setString.sub s j (i-j) for substring extractionQueue module for BFSfind_opt for memoizationRust Approach
HashSet<&str> — O(1) average lookup&s[j..i] — zero-copy substring (valid for ASCII)VecDeque for BFSbreak in inner loops for optimizationComparison Table
| Aspect | OCaml | Rust |
|---|---|---|
| Dictionary type | Set.Make(String) (tree) | HashSet<&str> (hash) |
| Lookup complexity | O(log n) | O(1) average |
| Substring | String.sub s pos len (allocates) | &s[j..i] (zero-copy slice) |
| BFS queue | Queue.t (mutable) | VecDeque |
| Early termination | ref flag + check | break statement |
Exercises
word_break_shortest(s: &str, words: &[&str]) -> Option<Vec<&str>> that finds the segmentation using the fewest words.