Problem Statement: Word Break II
Given a string s
and a list of words wordDict
, return all possible sentence(s) where each sentence is a valid segmentation of s
into words from wordDict
. The solution should return a list of all possible sentences.
Note:
- The returned sentences should be sorted in lexicographical order.
Example
Example 1:
Input:"catsanddog"
wordDict = ["cat", "cats", "and", "sand", "dog"]
Output:["cats and dog", "cat sand dog"]
Example 2:
Input:"pineapplepenapple"
wordDict = ["apple", "pen", "applepen", "pine", "pineapple"]
Output:["pine apple pen apple", "pineapple pen apple", "pine applepen apple"]
Example 3:
Input:"catsandog"
wordDict = ["cats", "dog", "sand", "and", "cat"]
Output:[]
Approach and Algorithm
- Backtracking with Memoization (DFS + DP):
- The idea is to recursively try all possible breaks of the string
s
into words fromwordDict
. - We can check if a substring
s[0...i]
is inwordDict
. If it is, then recursively call the function for the remaining strings[i+1...n]
. - We can use a memoization table to store intermediate results and avoid recomputing the results for the same substring.
- The idea is to recursively try all possible breaks of the string
- Steps:
- Start by checking if the prefix of the string matches any word in
wordDict
. - If it does, recursively try to break the remaining string.
- Combine the current word with the results from the recursive calls.
- Memoize the results to avoid redundant calculations.
- Once all possible sentences are found, return them.
- Start by checking if the prefix of the string matches any word in
- Time Complexity:
- The time complexity is roughly O(2^n) because in the worst case, we can try splitting at every character in the string.
- However, with memoization, the effective time complexity is reduced significantly.
- Space Complexity:
- The space complexity is O(n), due to the recursive call stack and memoization storage.
Code in Multiple Languages
C
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <stdbool.h>
#define MAX 1000
void findWordBreaks(char* s, int start, char** wordDict, int wordDictSize, bool* dp, char*** result, int* returnSize) {
if (start == strlen(s)) {
result[*returnSize] = malloc(sizeof(char*) * 2);
result[*returnSize][0] = strdup("");
(*returnSize)++;
return;
}
if (!dp[start]) return; // Skip if this position cannot be reached
for (int i = 0; i < wordDictSize; i++) {
int len = strlen(wordDict[i]);
if (start + len <= strlen(s) && strncmp(s + start, wordDict[i], len) == 0) {
char** tempResult;
int tempSize = 0;
findWordBreaks(s, start + len, wordDict, wordDictSize, dp, &tempResult, &tempSize);
for (int j = 0; j < tempSize; j++) {
int newSize = tempSize + 1;
result[*returnSize] = realloc(result[*returnSize], sizeof(char*) * newSize);
char* newSentence = malloc(strlen(wordDict[i]) + strlen(tempResult[j]) + 2);
sprintf(newSentence, "%s %s", wordDict[i], tempResult[j]);
result[*returnSize][newSize - 1] = newSentence;
}
}
}
}
int main() {
char* s = "catsanddog";
char* wordDict[] = {"cat", "cats", "and", "sand", "dog"};
int wordDictSize = 5;
bool dp[MAX];
memset(dp, 0, sizeof(dp));
char*** result = malloc(sizeof(char**) * MAX);
int returnSize = 0;
findWordBreaks(s, 0, wordDict, wordDictSize, dp, result, &returnSize);
for (int i = 0; i < returnSize; i++) {
printf("%s\n", result[i][0]);
}
return 0;
}
C++
#include <iostream>
#include <vector>
#include <unordered_set>
#include <string>
#include <algorithm>
using namespace std;
void dfs(string s, unordered_set<string>& wordSet, vector<string>& currentSentence, vector<string>& result, vector<bool>& dp) {
if (s.empty()) {
string sentence = "";
for (int i = 0; i < currentSentence.size(); i++) {
sentence += currentSentence[i];
if (i != currentSentence.size() - 1) sentence += " ";
}
result.push_back(sentence);
return;
}
if (!dp[s.size()]) return;
for (int i = 1; i <= s.size(); i++) {
string word = s.substr(0, i);
if (wordSet.find(word) != wordSet.end()) {
currentSentence.push_back(word);
dfs(s.substr(i), wordSet, currentSentence, result, dp);
currentSentence.pop_back();
}
}
}
vector<string> wordBreak(string s, unordered_set<string>& wordSet) {
int n = s.length();
vector<bool> dp(n + 1, false);
dp[0] = true;
for (int i = 1; i <= n; i++) {
for (int j = 0; j < i; j++) {
if (dp[j] && wordSet.find(s.substr(j, i - j)) != wordSet.end()) {
dp[i] = true;
break;
}
}
}
vector<string> result;
vector<string> currentSentence;
if (dp[n]) dfs(s, wordSet, currentSentence, result, dp);
sort(result.begin(), result.end());
return result;
}
int main() {
unordered_set<string> wordSet = {"cat", "cats", "and", "sand", "dog"};
string s = "catsanddog";
vector<string> result = wordBreak(s, wordSet);
for (const auto& sentence : result) {
cout << sentence << endl;
}
return 0;
}
Java
import java.util.*;
public class WordBreakII {
public static void dfs(String s, Set<String> wordSet, List<String> currentSentence, List<String> result, boolean[] dp) {
if (s.isEmpty()) {
result.add(String.join(" ", currentSentence));
return;
}
if (!dp[s.length()]) return;
for (int i = 1; i <= s.length(); i++) {
String word = s.substring(0, i);
if (wordSet.contains(word)) {
currentSentence.add(word);
dfs(s.substring(i), wordSet, currentSentence, result, dp);
currentSentence.remove(currentSentence.size() - 1);
}
}
}
public static List<String> wordBreak(String s, Set<String> wordSet) {
int n = s.length();
boolean[] dp = new boolean[n + 1];
dp[0] = true;
for (int i = 1; i <= n; i++) {
for (int j = 0; j < i; j++) {
if (dp[j] && wordSet.contains(s.substring(j, i))) {
dp[i] = true;
break;
}
}
}
List<String> result = new ArrayList<>();
if (dp[n]) {
List<String> currentSentence = new ArrayList<>();
dfs(s, wordSet, currentSentence, result, dp);
}
Collections.sort(result);
return result;
}
public static void main(String[] args) {
Set<String> wordSet = new HashSet<>(Arrays.asList("cat", "cats", "and", "sand", "dog"));
String s = "catsanddog";
List<String> result = wordBreak(s, wordSet);
for (String sentence : result) {
System.out.println(sentence);
}
}
}
Python
pythonCopy codedef dfs(s, wordSet, currentSentence, result, dp):
if not s:
result.append(" ".join(currentSentence))
return
if not dp[len(s)]:
return
for i in range(1, len(s) + 1):
word = s[:i]
if word in wordSet:
currentSentence.append(word)
dfs(s[i:], wordSet, currentSentence, result, dp)
currentSentence.pop()
def wordBreak(s, wordDict):
wordSet = set(wordDict)
n = len(s)
dp = [False] * (n + 1)
dp[0] = True
for i in range(1, n + 1):
for j in range(i):
if dp[j] and s[j:i] in wordSet:
dp[i] = True
break
result = []
if dp[n]:
currentSentence = []
dfs(s, wordSet, currentSentence, result, dp)
result.sort()
return result
# Example
s = "catsanddog"
wordDict = ["cat", "cats", "and", "sand", "dog"]
print(wordBreak(s, wordDict)) # Output: ["cats and dog", "cat sand dog"]
C#
using System;
using System.Collections.Generic;
public class WordBreakII {
public static void Dfs(string s, HashSet<string> wordSet, List<string> currentSentence, List<string> result, bool[] dp) {
if (s.Length == 0) {
result.Add(string.Join(" ", currentSentence));
return;
}
if (!dp[s.Length]) return;
for (int i = 1; i <= s.Length; i++) {
string word = s.Substring(0, i);
if (wordSet.Contains(word)) {
currentSentence.Add(word);
Dfs(s.Substring(i), wordSet, currentSentence, result, dp);
currentSentence.RemoveAt(currentSentence.Count - 1);
}
}
}
public static List<string> WordBreak(string s, HashSet<string> wordSet) {
int n = s.Length;
bool[] dp = new bool[n + 1];
dp[0] = true;
for (int i = 1; i <= n; i++) {
for (int j = 0; j < i; j++) {
if (dp[j] && wordSet.Contains(s.Substring(j, i - j))) {
dp[i] = true;
break;
}
}
}
List<string> result = new List<string>();
if (dp[n]) {
List<string> currentSentence = new List<string>();
Dfs(s, wordSet, currentSentence, result, dp);
}
result.Sort();
return result;
}
public static void Main() {
HashSet<string> wordSet = new HashSet<string> { "cat", "cats", "and", "sand", "dog" };
string s = "catsanddog";
List<string> result = WordBreak(s, wordSet);
foreach (var sentence in result) {
Console.WriteLine(sentence);
}
}
}
JavaScript
function dfs(s, wordSet, currentSentence, result, dp) {
if (s === "") {
result.push(currentSentence.join(" "));
return;
}
if (!dp[s.length]) return;
for (let i = 1; i <= s.length; i++) {
const word = s.slice(0, i);
if (wordSet.has(word)) {
currentSentence.push(word);
dfs(s.slice(i), wordSet, currentSentence, result, dp);
currentSentence.pop();
}
}
}
function wordBreak(s, wordDict) {
const wordSet = new Set(wordDict);
const n = s.length;
const dp = Array(n + 1).fill(false);
dp[0] = true;
for (let i = 1; i <= n; i++) {
for (let j = 0; j < i; j++) {
if (dp[j] && wordSet.has(s.slice(j, i))) {
dp[i] = true;
break;
}
}
}
const result = [];
if (dp[n]) {
dfs(s, wordSet, [], result, dp);
}
return result.sort();
}
console.log(wordBreak("catsanddog", ["cat", "cats", "and", "sand", "dog"])); // Output: ["cats and dog", "cat sand dog"]
Summary
- Time Complexity: Approximately O(2^n) with memoization, where
n
is the length of the strings
. Memoization optimizes redundant calculations. - Space Complexity: O(n) for the recursion stack and the
dp[]
array. - Algorithm: Use DFS with backtracking and memoization to generate all possible valid sentences and sort them lexicographically.