dsa-programs/Algorithms/DynamicProgramming/WordBreak.java at master · SrinivasVadige/dsa-programs · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
package Algorithms.DynamicProgramming;

import java.util.*;
import java.util.stream.Collectors;

/**
 * <pre>
    Given a string s and a dictionary of strings wordDict, return true if s can be segmented into a space-separated sequence of one or more dictionary words.
    n^2 allowed
    wordDict is like a set as every word is unique

    loop wordDict and divide the left and right parts of "s" as per word index =
    and again check next  word index divide left and right parts ... so on =
    and finally all "s" parts in wordDict

    looks like binary tree

    top-down memo dp? -- remove the matched word in wordDict i.e n-1 and send to child?

    t-d return? =

    note that word in wordDict can have multiple instances in "s" --> so n-1 the list? or hashmap to check if already validated?

    </pre>

 * @author Srinivas Vadige, srinivas.vadige@gmail.com
 * @since 26 Oct 2024
 * @link 139. Word Break <a href="https://leetcode.com/problems/word-break/">LeetCode link</a>
 * @topics Array, Hash Table, String, Dynamic Programming, Memoization, Trie
 * @companies Amazon, Meta, Google, Microsoft, Walmart Labs, Apple, MongoDB, Bloomberg, Uber, Netflix, TikTok, Adobe, Salesforce, Oracle, Intuit, Coupang, Yahoo, LinkedIn, Goldman Sachs, Zoho
 */
public class WordBreak {
    public static void main(String[] args) {
        // "ccaccc", ["cc","ac"]
        // "leetcode", ["leet", "code"]
        // "catsandog". ["cats","dog","sand","and","cat"]
        String s = "ccaccc";
        List<String> wordDict = Arrays.asList("cc","ac");

        // loops "s" ---> O(n^2) time
        System.out.println("wordBreak using backtracking over s: " + wordBreakUsingBacktrackingOverS(s, wordDict));
        System.out.println("wordBreak using topDownMemoDp over s: " + wordBreakUsingTopDownMemoWithDfsDpOverS(s, wordDict));
        System.out.println("wordBreak using bottomUpTabulationDp over s: " + wordBreakUsingBottomUpTabulationDpWithBfsOverS(s, wordDict));


        // loops "wordDict" ---> O(mn) time
        System.out.println("wordBreak using backtracking over wordDict: " + wordBreakStartsUsingBacktrackingOverWordDictLoop(s, wordDict));
        System.out.println("wordBrea using topDownMemoDp over WordDict: " + wordBreakUsingTopDownMemoDpOverWordDictLoop(s, wordDict));
        System.out.println("wordBreak using bottomUpTabulationDp over WordDict: " + wordBreakUsingBottomUpTabulationDpOverWordDictLoop(s, wordDict));


        // Trie ---> O(n^2+m⋅k) time
        System.out.println("wordBreakTrieApproach: " + wordBreakTrieApproach(s, wordDict));
    }


    // LOOP OVER "s"

    /**
     * @TimeComplexity O(2^n) -- TLE
     * @SpaceComplexity O(k), where k = wordDict.size()


     s = "leetcode", wordDict = ["lee", "code", "leetc", "ode"]
     so, only "leetc + ode" works


     ================= DECISION TREE ================


                     ✅ 2 matches      l e e t c o d e
                                       l
                            ________________|________________
                            |                               |
                      l e e t c o d e                l e e t c o d e
                      l   r                          l       r
                             |                              |
      ❌ 0 matches   l e e t c o d e                l e e t c o d e    ✅ 1 match
                            l         r                        l  r
                                                             |
                                                     l e e t c o d e
                                                                     l
                                                          ✅ IndexOutOfBound
                                                         DONE

     */
    public static boolean wordBreakUsingBacktrackingOverS(String s, List<String> wordDict) {
        return backtrack(s, s.length(), 0, new StringBuilder(), new HashSet<>(wordDict));
    }

    private static boolean backtrack(String s, int n, int l, StringBuilder sb, Set<String> wordSet){
        if(l==n) {
            return true;
        }

        boolean isFound = false;
        for(int r=l; r<n; r++) {
            sb.append(s.charAt(r));
            if(wordSet.contains(sb.toString()) && !isFound) { // or if(wordSet.contains(sb.toString()) && backtrack(s, n, r+1, new StringBuilder(), wordSet, seen) {isFound = true; break;}
                isFound = backtrack(s, n, r+1, new StringBuilder(), wordSet);
            }
        }
        // sb.setLength(0); // free up reused buffer -- optional

        return isFound;
    }


    /**
     * @TimeComplexity O(n^2)
     * @SpaceComplexity O(n + k), n = s.length(), k = wordDict.size()
     */
    public static boolean wordBreakUsingTopDownMemoWithDfsDpOverS(String s, List<String> wordDict) {
        return dfs(s, s.length(), 0, new StringBuilder(), new HashSet<>(wordDict), new Boolean[s.length()]);
    }

    private static boolean dfs(String s, int n, int l, StringBuilder sb, Set<String> wordSet, Boolean[] seen){
        if(l==n) {
            return true;
        } else if (seen[l] != null) {
            return seen[l];
        }

        boolean isFound = false;
        for(int r=l; r<n; r++) {
            sb.append(s.charAt(r));
            if(wordSet.contains(sb.toString()) && !isFound) { // or if(wordSet.contains(sb.toString()) && dfs(s, n, r+1, new StringBuilder(), wordSet, seen) {isFound = true; break;}
                isFound = dfs(s, n, r+1, new StringBuilder(), wordSet, seen);
            }
        }
        // sb.setLength(0); // free up reused buffer -- optional

        return seen[l] = isFound;
    }


    /**
     * @TimeComplexity O(n^2)
     * @SpaceComplexity O(n + k), n = s.length(), k = wordDict.size()


        here we have to use combination of (seenSet and queue) to avoid -> duplication check
        or
        we can use boolean[] dp array just like {@link #wordBreakUsingBottomUpTabulationDpOverWordDictLoop}
     */
    public static boolean wordBreakUsingBottomUpTabulationDpWithBfsOverS(String s, List<String> wordDict) {
        Set<String> wordSet = new HashSet<>(wordDict);
        int n = s.length();
        boolean[] dp = new boolean[n];

        Queue<Integer> starts = new LinkedList<>();
        starts.add(0);
        Set<Integer> seen = new HashSet<>();

        while(!starts.isEmpty()) {
            int l = starts.poll();
            // if (l == n) return true;
            for (int r = l; r < n; r++) {
                if (wordSet.contains(s.substring(l, r+1))) {
                    dp[r] = true;
                    if (seen.add(r)) starts.offer(r+1);
                }
            }
        }

        return dp[n-1];
    }


    public static boolean wordBreakUsingBottomUpTabulationDpWithBfsOverS2(String s, List<String> wordDict) {
        Set<String> wordSet = new HashSet<>(wordDict);
        int n = s.length();
        boolean[] dp = new boolean[n + 1];
        dp[0] = true;

        List<Integer> trues = new ArrayList<>(); // or use Queue
        trues.add(0); // starting point

        int i = 0;
        while (i < trues.size()) {
            int l = trues.get(i++);
            for (int r = l + 1; r <= n; r++) {
                if (wordSet.contains(s.substring(l, r)) && !dp[r]) {
                    dp[r] = true;
                    trues.add(r); // ✅ Safe, no CME: we're not using iterator (as forEach is a Iterator.hasNext() loop internally)
                }
            }
        }

        return dp[n];
    }


    public static boolean wordBreakUsingBottomUpTabulationDpWithBfsOverS3(String s, List<String> wordDict) {
        Set<String> wordSet = new HashSet<>(wordDict);
        int n = s.length();
        boolean[] dp = new boolean[n + 1];
        dp[0] = true;

        List<Integer> trues = new ArrayList<>();
        trues.add(0);
        ListIterator<Integer> it = trues.listIterator();

        while (it.hasNext()) {
            int l = it.next();
            for (int r = l + 1; r <= n; r++) {
                if (wordSet.contains(s.substring(l, r)) && !dp[r]) {
                    dp[r] = true;
                    it.add(r);
                    it.previous(); // ✅ Safe, no CME: as we use ListIterator (not Iterator) and decrement it's cursor position
                }
            }
        }

        return dp[n];
    }


    /**
        s = "helloworld", wordDict = ["world", "ello", "he", "lo", "ll"]

     initial dp of wordSizes
        dp =   "",  "h",   "he", "hel","hell","hello","hellow","hellowo", "hellowor", "helloworl", "helloworld"

           ""     h      e      l      l      o       w          o          r           l           d
         [true, false, false, false, false, false,  false,     false,     false,      false,      false]

         dp[0]=true i.e we can make "" without any wordDict words

        wSize=1, trues=[0], l=0
          l=0
          ""     h      e      l      l      o      w      o      r      l      d
        [true, false, false, false, false, false, false, false, false, false, false]    l=0 --> cause we have "he". So, break;
        [true, false, true, false, false, false, false, false, false, false, false]  wSize=2, trues=[], l=1
        [true, false, true, false, false, false, false, false, false, false, false]
        [true, false, true, false, false, false, false, false, false, false, false]

     */
    public static boolean wordBreakUsingBottomUpTabulationDpWithBfsOverS4(String s, List<String> wordDict) {
        Set<String> wordSet = new HashSet<>(wordDict);
        int n = s.length();
        boolean[] dp = new boolean[n + 1]; // wordSizes from "" to "helloworld"
        dp[0] = true;

        List<Integer> trues = new ArrayList<>(); // --- just like queue
        trues.add(0); // l=0

        for (int wSize = 1; wSize <= n; wSize++) { // rExclusive --- number of chars or word size
            for (int l : trues) { // lInclusive
                if (wordSet.contains(s.substring(l, wSize))) {
                    dp[wSize] = true;
                    trues.add(wSize);
                    break; // ✅ because of this break, it doesn't throw CME
                }
            }
        }

        return dp[n];

        /*

        NOTE:

        List<Integer> trues = new ArrayList<>(); // --- just like queue
        trues.add(0);
        for (int l : trues) { // lInclusive
            for (int r = 1; r <= n; r++) { // rExclusive
                if (wordSet.contains(s.substring(l, r))) {
                    dp[r] = true;
                    trues.add(r); // ----> ❌Throws ConcurrentModificationException
                    break;
                }
            }
        }


         and


        Set<Integer> trues = new HashSet<>();
        trues.add(0);
        for (int l : trues) {
            for (int r = l + 1; r <= n; r++) {
                if (wordSet.contains(s.substring(l, r))) {
                    dp[r] = true;
                    trues.add(r); // ❌ CME or silent skip
                }
            }
        }


        and


        List<Integer> trues = new ArrayList<>();
        trues.add(0);
        ListIterator<Integer> it = trues.listIterator();

        while (it.hasNext()) {
            int l = it.next();
            for (int r = l + 1; r <= n; r++) {
                if (wordSet.contains(s.substring(l, r)) && !dp[r]) {
                    dp[r] = true;
                    it.add(r); // ❌ It doesn't throw CME but it silently skips it ❌ not good
                }
            }
        }


         */
    }


    /**
     * working but TLE
     * check {@link #wordBreakIndexOfApproach} for more understanding
     * note that wordBreakIndexOfApproach() only works for unique words with unique chars


     s = "leetcode", wordDict = ["lee", "code", "leetc", "ode"]
     so, only "leetc + ode" works


        ============================================ DECISION TREE ===============================================


                       check "tcode"                         check "leetcode"      check "ode"
                         new i=3                                      i=0         new i=5
                          _____________________________________________|__________________________________________
                          |                            |                            |                            |
                         lee                         code                         leetc          check ""       ode
                          ✅                          ❌                           ✅            new i=8        ❌
       ___________________|__________________                  _____________________|__________________
       |           |           |            |                  |            |            |            |
      lee         code        leetc        ode                lee          code         leetc        ode
       ❌          ❌          ❌          ❌                  ❌          ❌           ❌          ✅
                                                                                                     DONE


    */
    public static boolean wordBreakStartsUsingBacktrackingOverWordDictLoop(String s, List<String> wordDict) {
        return backtrack(s, wordDict);
    }

    private static boolean backtrack(String s, List<String> list) {
        if (s.isEmpty()) return true;

        for (String w: list) {
            if(s.startsWith(w) && backtrack(s.substring(w.length()), list)) { // or s.startsWith(word, i) && dfs(s, i + word.length(), wordDict)
                return true;
            }
        }
        return false;
    }


    /**
     * @TimeComplexity O(m*n)
     * @SpaceComplexity O(m)
     * Top-Down Memo DP but as we start from 0 index we call it as dfs i.e increase depth
     * and if matched then make the start index as "after that word"
     */
    public static boolean wordBreakUsingTopDownMemoDpOverWordDictLoop(String s, List<String> wordDict) {
        return dfs(s, 0, wordDict, new Boolean[ s.length()]);
    }

    private static boolean dfs(String s, int i, List<String> wordDict, Boolean[] dp) {
        if (i == s.length()) return true; // i.e., exactly matched "leetcode↓" ---> IndexOutOfBound base case
        if (dp[i] != null) return dp[i];

        for (String word : wordDict) {
            if (s.startsWith(word, i) && dfs(s, i + word.length(), wordDict, dp)) // move to r+1 index, just like #wordBreakUsingTopDownMemoWithDfsDpOverS()
                return dp[i] = true;
        }
        return dp[i] = false;
    }


    /**
     * <pre>
     * traverse only the custom start indices and check if any word in wordDict can be formed from that index
     * works on "future start indices" or "next validation start indices" - i.e up to index-1 substring is already calculated
     * so, after total validation, the future start index of the complete string s="leetcode" is s.length() => "leetcode↓" i.e not s.length()-1
     * Note: In this all possible future start index validations, we might have same start index
     * --> i.e in this kind of dp we do not return already calculated value but we might assign same value "true" to same index in multiple possibilities
     * so, using this we can also calculate count of all possibilities to form the s string with some extra logic
     * </pre>
     * @TimeComplexity O(mn)
     * @SpaceComplexity O(m)
     */
    public static boolean wordBreakUsingBottomUpTabulationDpOverWordDictLoop(String s, List<String> wordDict) {
        boolean[] dp = new boolean[s.length() + 1]; // +1 for "leetcode↓" ---> it maintains all possible future start indices
        dp[0] = true; // initial future start index
        for (int l = 0; l < s.length(); l++) {
            if (!dp[l]) continue; // => skip up the non-valid future indexes
            for (String word : wordDict) {
                if (l + word.length() <= s.length() && s.startsWith(word, l)) {
                    dp[l + word.length()] = true; // r+1 => future start indices
                }
            }
        }
        return dp[s.length()]; // "leetcode↓" - "next validation start index" of the complete string is true
    }


    public static boolean wordBreakUsingBottomUpTabulationDpOverWordDictLoop2(String s, List<String> wordDict) {
        boolean[] dp = new boolean[s.length()]; // true for valid start indices
        for (int i = s.length() - 1; i >= 0; i--) {
            if (wordDict.contains(s.substring(i)))
                dp[i] = true;
            else {
                for (int j = i + 1; j < s.length(); j++) { // or word list for loop
                    if (dp[j] && wordDict.contains(s.substring(i, j))) { // dp[j] == true means s.substring(j) is valid
                        dp[i] = true;
                        break;
                    }
                }
            }
        }
        return dp[0]; // "[l]eetcode" i.e total s validation completed from right to left
    }


    /**
     * @TimeComplexity O(n^2+m⋅k)
     * @SpaceComplexity O(n+m⋅k)

          s = "leetcode", wordDict = ["lee", "code", "leetc", "ode"]
          so, only "leetc + ode" works


                                        ================= TRIE NODE TREE ================


                                                              root
                                                               ""
                                          ______________________|_____________________
                                          |                     |                    |
                                          c                     l                    o
                                          |                     |                    |
                                          o                     e                    d
                                          |                     |                    |
                                          d                     e true               e true
                                          |                     |
                                          e true                t
                                                                |
                                                                c true

     */
    public static boolean wordBreakTrieApproach(String s, List<String> wordDict) {
        class TrieNode {
            boolean isWord;
            final Map<Character, TrieNode> children = new HashMap<>();
        }
        TrieNode root = new TrieNode();

        // Step 1: Build Trie -- O(m⋅k), where m = s.length and k = wordDict.size
        for (String word : wordDict) {
            TrieNode curr = root;
            for (char c : word.toCharArray()) {
                curr.children.computeIfAbsent(c, k -> new TrieNode()); // or if (!curr.children.containsKey(c)) curr.children.put(c, new TrieNode());
                curr = curr.children.get(c);
            }
            curr.isWord = true;
        }

        // Step 2: Check if any word can be formed -- O(n^2)
        boolean[] dp = new boolean[s.length()];
        for (int l = 0; l < s.length(); l++) {
            if (l == 0 || dp[l - 1]) { // i.e dp[l] is valid word end
                TrieNode curr = root;
                for (int r = l; r < s.length(); r++) {
                    char c = s.charAt(r);
                    if (!curr.children.containsKey(c)) {
                        break; // No words exist
                    }

                    curr = curr.children.get(c);
                    if (curr.isWord) {
                        dp[r] = true; // mark all "r" valid pointers
                    }
                }
            }
        }
        return dp[s.length() - 1];
    }


    // -------------- MY THOUGHTS - 26/10/2024 -------------

    @SuppressWarnings("unused")
    public static boolean wordBreakIndexOfApproach(String s, List<String> wordDict) {
        Map<String, Integer> map = wordDict.stream().collect(
            Collectors.groupingBy(i->i, Collectors.summingInt(e->0)) );
        return rec1(s, map);
    }

    // -- failing for "ccaccc" ["cc","ac"] and "aaaaaaa" ["aaaa","aaa"]
    public static boolean rec1(String s, Map<String, Integer> map ){
        //System.out.println(s);
        int i = -1;
        if(s.isEmpty()) return true;
        for (Map.Entry<String, Integer> entry: map.entrySet()){
            //System.out.println(s + " " + entry.getKey());
            if(s.equals(entry.getKey()) || map.keySet().contains(s)) return true;

            i = s.indexOf(entry.getKey());

            if(i>-1){
                map.put(entry.getKey(), entry.getValue()+1);
                return rec1( i==0? "" : s.substring(0, i), map)
                && rec1( s.substring(i+entry.getKey().length()), map );
            }
        }
        if(i==-1) return false;
        return true;
    }

    // success for "ccaccc" ["cc","ac"] and "aaaaaaa" ["aaaa","aaa"] but
    // --- failing for "catsandogcat" ["cats","dog","sand","and","cat","an"]
    // because it can divide it with "sand", "and"
    // write a logic to check with all start possibilities like cat and cats...
    // so, use startsWith() instead of indexOf??
    public boolean rec2(String s, Map<String, Integer> map ){
        //System.out.println(s);
        int i = -1;
        if(s.isEmpty()) return true;
        for (Map.Entry<String, Integer> entry: map.entrySet()){
            //System.out.println(s + " " + entry.getKey());
            if(s.equals(entry.getKey())){
                map.merge(s, 1, Integer::sum);
                return true;
            }

            i = s.indexOf(entry.getKey());

            if(i>-1){
                if(entry.getValue() > 0) continue;
                map.put(entry.getKey(), entry.getValue()+1);
                return rec2( i==0? "" : s.substring(0, i), map)
                && rec2( s.substring(i+entry.getKey().length()), map );
            }
        }
        if(i==-1) return false;
        return true;
    }


    /**
     * STILL TLE
     * same as wordBreakStartsWithRecursiveBacktracking() but use dp memo for todo index
     * "catsandogcat", ["cats","dog","sand","and","cat","an"]
     * here check "cats" scenario, "cat" scenario and here we already reached last "ogcat" case
     * as we already checked upto "catsand" i.e "cats, and" or "cat sand"
     */
    public static boolean wordBreakStartsWithTopDownMemoDp(String s, List<String> wordDict) {
        boolean[] dp = new boolean[s.length() + 1];
        rec(s, wordDict, dp, s.length()-1);
        return dp[dp.length-1]; // last index to save if we already reached up to "" in rec()
    }

    private static boolean rec(String s, List<String> list, boolean[] dp, int i) {
        System.out.println(s);
        if (s.isEmpty()){
            dp[dp.length-1]=true;
            return true;
        }
        else if(dp[dp.length-1] == true) return true;
        else if(dp[i] == true) return true;

        for (String w: list) {
            if(dp[dp.length-1] == true) return true;
            if(s.startsWith(w)) {
                dp[i] = rec(s.substring(w.length()), list, dp, dp.length-s.length()-1);
            }
        }

        return false;
    }

}