Skip to content

Commit

Permalink
Improve Work Break (#804)
Browse files Browse the repository at this point in the history
* ref: improve work break

* ref: refactor implementation

- use `word_dict` as `&[&str]` to pass a borrowed slice without needing
to own the vector
- adjust size memoization vector to `s.len() + 1`, which allows us to
mark the `start == s.len()` case explicitly in the vector.

* feat(tests): update tests with edge cases
  • Loading branch information
sozelfist authored Oct 4, 2024
1 parent be27f2c commit 418bf15
Showing 1 changed file with 55 additions and 48 deletions.
103 changes: 55 additions & 48 deletions src/dynamic_programming/word_break.rs
Original file line number Diff line number Diff line change
@@ -1,27 +1,38 @@
// Given a string and a list of words, return true if the string can be
// segmented into a space-separated sequence of one or more words.

// Note that the same word may be reused
// multiple times in the segmentation.

// Implementation notes: Trie + Dynamic programming up -> down.
// The Trie will be used to store the words. It will be useful for scanning
// available words for the current position in the string.

use crate::data_structures::Trie;

pub fn word_break(s: &str, word_dict: Vec<&str>) -> bool {
/// Checks if a string can be segmented into a space-separated sequence
/// of one or more words from the given dictionary.
///
/// # Arguments
/// * `s` - The input string to be segmented.
/// * `word_dict` - A slice of words forming the dictionary.
///
/// # Returns
/// * `bool` - `true` if the string can be segmented, `false` otherwise.
pub fn word_break(s: &str, word_dict: &[&str]) -> bool {
let mut trie = Trie::new();
for word in word_dict {
trie.insert(word.chars(), true); // Insert each word with a value `true`
for &word in word_dict {
trie.insert(word.chars(), true);
}

let mut memo = vec![None; s.len()];
// Memoization vector: one extra space to handle out-of-bound end case.
let mut memo = vec![None; s.len() + 1];
search(&trie, s, 0, &mut memo)
}

/// Recursively checks if the substring starting from `start` can be segmented
/// using words in the trie and memoizes the results.
///
/// # Arguments
/// * `trie` - The Trie containing the dictionary words.
/// * `s` - The input string.
/// * `start` - The starting index for the current substring.
/// * `memo` - A vector for memoization to store intermediate results.
///
/// # Returns
/// * `bool` - `true` if the substring can be segmented, `false` otherwise.
fn search(trie: &Trie<char, bool>, s: &str, start: usize, memo: &mut Vec<Option<bool>>) -> bool {
if start >= s.len() {
if start == s.len() {
return true;
}

Expand All @@ -30,7 +41,6 @@ fn search(trie: &Trie<char, bool>, s: &str, start: usize, memo: &mut Vec<Option<
}

for end in start + 1..=s.len() {
// Using trie.get to check if a substring is a word
if trie.get(s[start..end].chars()).is_some() && search(trie, s, end, memo) {
memo[start] = Some(true);
return true;
Expand All @@ -43,40 +53,37 @@ fn search(trie: &Trie<char, bool>, s: &str, start: usize, memo: &mut Vec<Option<

#[cfg(test)]
mod tests {
use super::word_break;

#[test]
fn typical_cases() {
assert!(word_break("applepenapple", vec!["apple", "pen"]));
assert!(!word_break(
"catsandog",
vec!["cats", "dog", "sand", "and", "cat"]
));
assert!(word_break("cars", vec!["car", "ca", "rs"]));
}
use super::*;

#[test]
fn edge_cases() {
assert!(!word_break("abc", vec![]));
assert!(word_break("a", vec!["a"]));
}

#[test]
fn repeated_words() {
assert!(word_break("aabb", vec!["a", "b"]));
assert!(word_break("aaaaaaa", vec!["a", "aa", "aaa"]));
}

#[test]
fn no_solution() {
assert!(!word_break("abcdef", vec!["ab", "abc", "cd"]));
assert!(!word_break("xyz", vec!["a", "b", "c"]));
macro_rules! test_cases {
($($name:ident: $test_case:expr,)*) => {
$(
#[test]
fn $name() {
let (input, dict, expected) = $test_case;
assert_eq!(word_break(input, &dict), expected);
}
)*
}
}

#[test]
fn long_string() {
let long_string = "a".repeat(100);
let words = vec!["a", "aa", "aaa", "aaaa"];
assert!(word_break(&long_string, words));
test_cases! {
typical_case_1: ("applepenapple", vec!["apple", "pen"], true),
typical_case_2: ("catsandog", vec!["cats", "dog", "sand", "and", "cat"], false),
typical_case_3: ("cars", vec!["car", "ca", "rs"], true),
edge_case_empty_string: ("", vec!["apple", "pen"], true),
edge_case_empty_dict: ("apple", vec![], false),
edge_case_single_char_in_dict: ("a", vec!["a"], true),
edge_case_single_char_not_in_dict: ("b", vec!["a"], false),
edge_case_all_words_larger_than_input: ("a", vec!["apple", "banana"], false),
edge_case_no_solution_large_string: ("abcdefghijklmnoqrstuv", vec!["a", "bc", "def", "ghij", "klmno", "pqrst"], false),
successful_segmentation_large_string: ("abcdefghijklmnopqrst", vec!["a", "bc", "def", "ghij", "klmno", "pqrst"], true),
long_string_repeated_pattern: (&"ab".repeat(100), vec!["a", "b", "ab"], true),
long_string_no_solution: (&"a".repeat(100), vec!["b"], false),
mixed_size_dict_1: ("pineapplepenapple", vec!["apple", "pen", "applepen", "pine", "pineapple"], true),
mixed_size_dict_2: ("catsandog", vec!["cats", "dog", "sand", "and", "cat"], false),
mixed_size_dict_3: ("abcd", vec!["a", "abc", "b", "cd"], true),
performance_stress_test_large_valid: (&"abc".repeat(1000), vec!["a", "ab", "abc"], true),
performance_stress_test_large_invalid: (&"x".repeat(1000), vec!["a", "ab", "abc"], false),
}
}

0 comments on commit 418bf15

Please sign in to comment.