Find the starting indices of the substrings

Spread the love

You are given a string S and a list of words L, which is essentially an array or vector of strings with all the same length. Determine the beginning indices of the substrings in string S, which has every word found in list L. If string S is “barfooapplefoobar” and list of words (L) is [“ foo”, “barfu” then we have to search for substrings “foobar,” “barfu,” in string S. The sequence of words in list L does not matter. Note: Words within the list L can be repeated.

Table of Contents

Examples :

Input : S: "barfoothefoobarman" 
        L: ["foo", "bar"]                     
Output : 0 9
Explanation : 
// at index 0 : barfoo
// at index 9 : foobar 

Input : S: "catbatatecatatebat"
        L: ["cat", "ate", "bat"] 
Output : 0 3 9
Explanation : 
// at index 0 : catbatate
// at index 3 : batatecat
// at index 9 : catatebat    

Input : S : "abcdababcd"
        L : ["ab", "ab", "cd"]
Output : 0 2 4 
Explanation :
// at index 0 : abcdab
// at index 2 : cdabab
// at index 4 : ababcd

Input : S : "abcdababcd"
        L : ["ab", "ab"]
Output : 4

Approach : We can use Hashing Technique to solve the above problem.

Let’s walk through the procedures:

Declare a map (hash_map) containing every word from List L matching their occurrences within List L.

Go through all conceivable substrings in string S that match size_L (total character count generated if all the words in list L concatenated).

For every conceivable substring, first create a temporary map (temp_hash_map) from original map (hash_map).

Get the words from the substring; if the word appears in temp_hash_map we reduce its corresponding count; if it does not appear in temp_hash_map we merely break.

Following our substring, we search temp_hash_map for any key whose count exceeds 0. Should we discover no such key, all the words in list L were discovered in substring; should we discover a key whose count exceeds 0, we did not traverse the entire substring since we came across a word not in temp_hash_map.

C++

// CPP program to calculate the starting indices
// of substrings inside S which contains all the
// words present in List L.
#include <bits/stdc++.h>
using namespace std;

// Returns an integer vector consisting of starting
// indices of substrings present inside the string S
vector<int> findSubstringIndices(string S, 
							const vector<string>& L)
{

	// Number of a characters of a word in list L.
	int size_word = L[0].size();

	// Number of words present inside list L.
	int word_count = L.size();

	// Total characters present in list L.
	int size_L = size_word * word_count;

	// Resultant vector which stores indices.
	vector<int> res;

	// If the total number of characters in list L
	// is more than length of string S itself.
	if (size_L > S.size())
		return res;

	// Map stores the words present in list L
	// against it's occurrences inside list L
	unordered_map<string, int> hash_map;

	for (int i = 0; i < word_count; i++) 
		hash_map[L[i]]++; 

	for (int i = 0; i <= S.size() - size_L; i++) {
		unordered_map<string, int> temp_hash_map(hash_map);

		int j = i,count=word_count;

		// Traverse the substring
		while (j < i + size_L) {

			// Extract the word
			string word = S.substr(j, size_word);


			// If word not found or if frequency of current word is more than required simply break.
			if (hash_map.find(word) == hash_map.end()||temp_hash_map[word]==0)
				break;

			// Else decrement the count of word from hash_map
			else
			{ temp_hash_map[word]--;count--;} 


			j += size_word;
		}
	
		// Store the starting index of that substring when all the words in the list are in substring
		if (count == 0)
			res.push_back(i);
	}

	return res;
}

// Driver Code
int main()
{
	string S = "barfoothefoobarman";
	vector<string> L = { "foo", "bar" };
	vector<int> indices = findSubstringIndices(S, L);
	for (int i = 0; i < indices.size(); i++)
		cout << indices[i] << " ";
	return 0;
}

Java

// Java program to calculate the starting indices
// of substrings inside S which contains all the
// words present in List L.
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;

class GFG 
{
	public static ArrayList<Integer> 
	findSubstring(String A, final List<String> B) 
	{

		// Number of a characters of a word in list L.
		int size_word = B.get(0).length();
		
		// Number of words present inside list L. 
		int word_count = B.size();

		// Total characters present in list L.
		int size_l = size_word * word_count;

		// Resultant vector which stores indices.
		ArrayList<Integer> res = new ArrayList<Integer>();
		int n = A.length();
		
		// If the total number of characters in list L 
		// is more than length of string S itself.
		if (size_l > n) 
		{
			return res;
		}

		// Map stores the words present in list L 
		// against it's occurrences inside list L 
		HashMap<String, Integer> hashMap = 
			new HashMap<String, Integer>();

		for (String word : B) 
		{
			hashMap.put(word, hashMap.getOrDefault(word, 0) + 1);
		}

		
		for (int i = 0; i <= n - size_l; i++) 
		{
			HashMap<String, Integer> tempMap = 
			(HashMap<String, Integer>) hashMap.clone();
			int j = i, count = word_count;
			
			// Traverse the substring 
			while (j < i + size_l) 
			{
				// Extract the word
				String word = A.substring(j, j + size_word);
			
				// If word not found or if frequency 
				// of current word is more than required simply break. 
				if (!hashMap.containsKey(word) || tempMap.get(word) == 0) 
				{
					break;
				} 
				
				// Else decrement the count of word from hash_map
				else
				{
					tempMap.put(word, tempMap.get(word) - 1);
					count--;
				}
				j += size_word;
			}
			
			// Store the starting index of that
			// substring when all the words in 
			// the list are in substring 
			if (count == 0)
			{
				res.add(i);
			}

		}
		return res;
	}

	// Driver code
	public static void main(String[] args) 
	{
		String S = "barfoothefoobarman";
		ArrayList<String> L = 
		new ArrayList<>(Arrays.asList("foo", "bar"));
		ArrayList<Integer> indices = findSubstring(S, L);
		for (Integer i : indices)
		{
			System.out.println(i);
		}
	}
}

// This code is contributed by Manish Sakariya

Python3

# Python3 program to calculate the starting indices
# of substrings inside S which contains all the
# words present in List L.

# Returns an integer vector consisting of starting
# indices of substrings present inside the string S
def findSubStringIndices(s, L):

	# Number of a characters of a word in list L.
	size_word = len(L[0])

	# Number of words present inside list L.
	word_count = len(L)

	# Total characters present in list L.
	size_L = size_word * word_count

	# Resultant vector which stores indices.
	res = []

	# If the total number of characters in list L
	# is more than length of string S itself.
	if size_L > len(s):
		return res

	# Map stores the words present in list L
	# against it's occurrences inside list L
	hash_map = dict()

	for i in range(word_count):
		if L[i] in hash_map:
			hash_map[L[i]] += 1
		else:
			hash_map[L[i]] = 1

	for i in range(0, len(s) - size_L + 1, 1):
		temp_hash_map = hash_map.copy()
		j = i
		count = word_count

		# Traverse the substring
		while j < i + size_L:

			# Extract the word
			word = s[j:j + size_word]

			# If word not found or if frequency of 
			# current word is more than required simply break.
			if (word not in hash_map or
				temp_hash_map[word] == 0):
				break

			# Else decrement the count of word
			# from hash_map
			else:
				temp_hash_map[word] -= 1
				count -= 1
			j += size_word

		# Store the starting index of that substring
		# when all the words in the list are in substring
		if count == 0:
			res.append(i)
	return res

# Driver Code
if __name__ == "__main__":
	s = "barfoothefoobarman"
	L = ["foo", "bar"]
	indices = findSubStringIndices(s, L)
	
	print(*indices)

# This code is contributed by
# sanjeev2552

JS

<script>

// JavaScript code to implement the above approach

// Returns an integer vector consisting of starting
// indices of substrings present inside the string S
function findSubStringIndices(s, L){

	// Number of a characters of a word in list L.
	let size_word = L[0].length

	// Number of words present inside list L.
	let word_count = L.length

	// Total characters present in list L.
	let size_L = size_word * word_count

	// Resultant vector which stores indices.
	let res = []

	// If the total number of characters in list L
	// is more than length of string S itself.
	if(size_L > s.length)
		return res

	// Map stores the words present in list L
	// against it's occurrences inside list L
	let hash_map = new Map()

	for(let i=0;i<word_count;i++){
		if(hash_map.has(L[i]))
			hash_map.set(L[i],hash_map.get(L[i]) + 1)
		else
			hash_map.set(L[i], 1)
	}

	for(let i=0;i< s.length - size_L + 1;i++){
		let temp_hash_map = new Map(hash_map)
		let j = i
		let count = word_count

		// Traverse the substring
		while(j < i + size_L){

			// Extract the word
			let word = s.substring(j,j+size_word)

			// If word not found or if frequency of
			// current word is more than required simply break.
			if (hash_map.has(word) == false || temp_hash_map.get(word) == 0)
				break

			// Else decrement the count of word
			// from hash_map
			else{
				temp_hash_map.set(word ,temp_hash_map.get(word)- 1)
				count -= 1
			}
			j += size_word
		}

		// Store the starting index of that substring
		// when all the words in the list are in substring
		if(count == 0)
			res.push(i)
	}
	return res
}

// Driver Code

let s = "barfoothefoobarman"
let L = ["foo", "bar"]
let indices = findSubStringIndices(s, L)

document.write(indices)

// This code is contributed by Shinjanpatra

</script>

Output

0 9

Time Complexity : O(N – K) * K N : length of string S. K : total length of list L if all the words are concatenated. If L : [“ab”, “cd”] then K = 4.

Space Complexity: O(K), where K : total length of list L