Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve Work Break #804

Merged
merged 4 commits into from
Oct 4, 2024
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
103 changes: 55 additions & 48 deletions src/dynamic_programming/word_break.rs
Original file line number Diff line number Diff line change
@@ -1,27 +1,38 @@
// Given a string and a list of words, return true if the string can be
// segmented into a space-separated sequence of one or more words.

// Note that the same word may be reused
// multiple times in the segmentation.

// Implementation notes: Trie + Dynamic programming up -> down.
// The Trie will be used to store the words. It will be useful for scanning
// available words for the current position in the string.

use crate::data_structures::Trie;

pub fn word_break(s: &str, word_dict: Vec<&str>) -> bool {
/// Checks if a string can be segmented into a space-separated sequence
/// of one or more words from the given dictionary.
///
/// # Arguments
/// * `s` - The input string to be segmented.
/// * `word_dict` - A slice of words forming the dictionary.
///
/// # Returns
/// * `bool` - `true` if the string can be segmented, `false` otherwise.
pub fn word_break(s: &str, word_dict: &[&str]) -> bool {
let mut trie = Trie::new();
for word in word_dict {
trie.insert(word.chars(), true); // Insert each word with a value `true`
for &word in word_dict {
trie.insert(word.chars(), true);
}

let mut memo = vec![None; s.len()];
// Memoization vector: one extra space to handle out-of-bound end case.
let mut memo = vec![None; s.len() + 1];
search(&trie, s, 0, &mut memo)
}

/// Recursively checks if the substring starting from `start` can be segmented
/// using words in the trie and memoizes the results.
///
/// # Arguments
/// * `trie` - The Trie containing the dictionary words.
/// * `s` - The input string.
/// * `start` - The starting index for the current substring.
/// * `memo` - A vector for memoization to store intermediate results.
///
/// # Returns
/// * `bool` - `true` if the substring can be segmented, `false` otherwise.
fn search(trie: &Trie<char, bool>, s: &str, start: usize, memo: &mut Vec<Option<bool>>) -> bool {
if start >= s.len() {
if start == s.len() {
return true;
}

Expand All @@ -30,7 +41,6 @@ fn search(trie: &Trie<char, bool>, s: &str, start: usize, memo: &mut Vec<Option<
}

for end in start + 1..=s.len() {
// Using trie.get to check if a substring is a word
if trie.get(s[start..end].chars()).is_some() && search(trie, s, end, memo) {
memo[start] = Some(true);
return true;
Expand All @@ -43,40 +53,37 @@ fn search(trie: &Trie<char, bool>, s: &str, start: usize, memo: &mut Vec<Option<

#[cfg(test)]
mod tests {
use super::word_break;

#[test]
fn typical_cases() {
assert!(word_break("applepenapple", vec!["apple", "pen"]));
assert!(!word_break(
"catsandog",
vec!["cats", "dog", "sand", "and", "cat"]
));
assert!(word_break("cars", vec!["car", "ca", "rs"]));
}
use super::*;

#[test]
fn edge_cases() {
assert!(!word_break("abc", vec![]));
assert!(word_break("a", vec!["a"]));
}

#[test]
fn repeated_words() {
assert!(word_break("aabb", vec!["a", "b"]));
assert!(word_break("aaaaaaa", vec!["a", "aa", "aaa"]));
}

#[test]
fn no_solution() {
assert!(!word_break("abcdef", vec!["ab", "abc", "cd"]));
assert!(!word_break("xyz", vec!["a", "b", "c"]));
macro_rules! test_cases {
($($name:ident: $test_case:expr,)*) => {
$(
#[test]
fn $name() {
let (input, dict, expected) = $test_case;
assert_eq!(word_break(input, &dict), expected);
}
)*
}
}

#[test]
fn long_string() {
let long_string = "a".repeat(100);
let words = vec!["a", "aa", "aaa", "aaaa"];
assert!(word_break(&long_string, words));
test_cases! {
typical_case_1: ("applepenapple", vec!["apple", "pen"], true),
typical_case_2: ("catsandog", vec!["cats", "dog", "sand", "and", "cat"], false),
typical_case_3: ("cars", vec!["car", "ca", "rs"], true),
edge_case_empty_string: ("", vec!["apple", "pen"], true),
edge_case_empty_dict: ("apple", vec![], false),
edge_case_single_char_in_dict: ("a", vec!["a"], true),
edge_case_single_char_not_in_dict: ("b", vec!["a"], false),
edge_case_all_words_larger_than_input: ("a", vec!["apple", "banana"], false),
edge_case_no_solution_large_string: ("abcdefghijklmnoqrstuv", vec!["a", "bc", "def", "ghij", "klmno", "pqrst"], false),
successful_segmentation_large_string: ("abcdefghijklmnopqrst", vec!["a", "bc", "def", "ghij", "klmno", "pqrst"], true),
long_string_repeated_pattern: (&"ab".repeat(100), vec!["a", "b", "ab"], true),
long_string_no_solution: (&"a".repeat(100), vec!["b"], false),
mixed_size_dict_1: ("pineapplepenapple", vec!["apple", "pen", "applepen", "pine", "pineapple"], true),
mixed_size_dict_2: ("catsandog", vec!["cats", "dog", "sand", "and", "cat"], false),
mixed_size_dict_3: ("abcd", vec!["a", "abc", "b", "cd"], true),
performance_stress_test_large_valid: (&"abc".repeat(1000), vec!["a", "ab", "abc"], true),
performance_stress_test_large_invalid: (&"x".repeat(1000), vec!["a", "ab", "abc"], false),
}
}