An array of N words is given. Each word consists of small letters ('a' − 'z'). Our goal is to concatenate the words in such a way as to obtain a single word with the longest possible substring composed of one particular letter. Find the length of such a substring.
Write a function:
def solution(words)
that, given an array words containing N strings, returns the length of the longest substring of a word created as described above.
Examples:
1. Given N=3 and words=["aabb", "aaaa", "bbab"], your function should return 6. One of the best concatenations is words[1] + words[0] + words[2] = "aaaaaabbbbab". The longest substring is composed of letter 'a' and its length is 6.
2. Given N=3 and words=["xxbxx", "xbx", "x"], your function should return 4. One of the best concatenations is words[0] + words[2] + words[1] = "xxbxxxxbx". The longest substring is composed of letter 'x' and its length is 4.
3. Given N=4 and words=["dd", "bb", "cc", "dd"], your function should return 4. One of the best concatenations is words[0] + words[3] + words[1] + words[2] = "ddddbbcc". The longest substring is composed of letter 'd' and its length is 4.
Write an efficient algorithm for the following assumptions:
- N is an integer within the range [1..100,000];
- all the words are non−empty and consist only of lowercase letters (a−z);
- S denotes the sum of the lengths of words; S is an integer within the range [1..100,000].
def add_complete(letter, lenght, index, complete_word):
words = complete_word.get(letter)
if not words:
words = []
complete_word[letter] = words
words.append((lenght, index))
def fast_solution(words):
prefixes = {}
complete_words = {}
letters = set()
suffixes = {}
best_concat = 0
for i in range(len(words)):
word = words[i]
initial_letter = word[0]
last_letter = word[-1]
letters.add(initial_letter)
letters.add(last_letter)
j = 0
while j < len(word) and word[j] == initial_letter:
j = j + 1
prefix_candidate = j
if prefix_candidate >= len(word):
add_complete(initial_letter, len(word), i, complete_words)
continue
best_prefix = prefixes.get(initial_letter)
if not best_prefix:
best_prefix = (0, -1, 0, -1)
if prefix_candidate >= best_prefix[0]:
prefixes[initial_letter] = (prefix_candidate, i, best_prefix[0], best_prefix[1])
else:
if prefix_candidate >= best_prefix[2]:
prefixes[initial_letter] = (best_prefix[0], best_prefix[1], prefix_candidate, i)
j = - 1
while word[j] == last_letter:
j = j - 1
suffix_candidate = - j - 1
best_suffix = suffixes.get(last_letter)
if not best_suffix:
best_suffix = (0, -1, 0, -1)
if suffix_candidate > best_suffix[0]:
suffixes[last_letter] = (suffix_candidate, i, best_suffix[0], best_suffix[1])
else:
if suffix_candidate >= best_suffix[2]:
suffixes[last_letter] = (best_suffix[0], best_suffix[1], suffix_candidate, i)
i = prefix_candidate
while i <= len(word) - suffix_candidate:
j = i + 1
count = 1
while j < len(word) and word[i] == word[j]:
count += 1
j += 1
if count > best_concat:
best_concat = count
i = j
for key in letters:
suffix = suffixes.get(key)
prefix = prefixes.get(key)
if not suffix:
suffix = (0, -1)
if not prefix:
prefix = (0, -1)
if suffix[1] == prefix[1] and suffix[0] > 0:
candidate = max(suffix[0] + prefix[2], prefix[0] + suffix[2])
else:
candidate = suffix[0] + prefix[0]
cwords = complete_words.get(key)
if not cwords:
cwords = []
for word in cwords:
candidate += word[0]
if candidate > best_concat:
best_concat = candidate
return best_concat
def add_complete(letter, lenght, index, complete_word):
words = complete_word.get(letter)
if not words:
words = []
complete_word[letter] = words
words.append((lenght, index))
def solution(words):
prefixes = {}
complete_words = {}
letters = set()
suffixes = {}
best_concat = 0
for i in range(len(words)):
word = words[i]
initial_letter = word[0]
last_letter = word[-1]
letters.add(initial_letter)
letters.add(last_letter)
j = 0
while j < len(word) and word[j] == initial_letter:
j = j + 1
prefix_candidate = j
if prefix_candidate >= len(word):
add_complete(initial_letter, len(word), i, complete_words)
continue
best_prefix = prefixes.get(initial_letter)
if not best_prefix:
best_prefix = (0, -1, 0, -1)
if prefix_candidate >= best_prefix[0]:
prefixes[initial_letter] = (prefix_candidate, i, best_prefix[0], best_prefix[1])
else:
if prefix_candidate >= best_prefix[2]:
prefixes[initial_letter] = (best_prefix[0], best_prefix[1], prefix_candidate, i)
j = - 1
while word[j] == last_letter:
j = j - 1
suffix_candidate = - j - 1
best_suffix = suffixes.get(last_letter)
if not best_suffix:
best_suffix = (0, -1, 0, -1)
if suffix_candidate > best_suffix[0]:
suffixes[last_letter] = (suffix_candidate, i, best_suffix[0], best_suffix[1])
else:
if suffix_candidate >= best_suffix[2]:
suffixes[last_letter] = (best_suffix[0], best_suffix[1], suffix_candidate, i)
i = prefix_candidate
while i <= len(word) - suffix_candidate:
j = i + 1
count = 1
while j < len(word) and word[i] == word[j]:
count += 1
j += 1
if count > best_concat:
best_concat = count
i = j
for key in letters:
suffix = suffixes.get(key)
prefix = prefixes.get(key)
if not suffix:
suffix = (0, -1)
if not prefix:
prefix = (0, -1)
if suffix[1] == prefix[1] and suffix[0] > 0:
candidate = max(suffix[0] + prefix[2], prefix[0] + suffix[2])
else:
candidate = suffix[0] + prefix[0]
cwords = complete_words.get(key)
if not cwords:
cwords = []
for word in cwords:
candidate += word[0]
if candidate > best_concat:
best_concat = candidate
return best_concat
def add_complete(letter, lenght, index, complete_word):
words = complete_word.get(letter)
if not words:
words = []
complete_word[letter] = words
words.append((lenght, index))
def solution(words):
prefixes = {}
complete_words = {}
letters = set()
suffixes = {}
best_concat = 0
for i in range(len(words)):
word = words[i]
initial_letter = word[0]
last_letter = word[-1]
letters.add(initial_letter)
letters.add(last_letter)
j = 0
while j < len(word) and word[j] == initial_letter:
j = j + 1
prefix_candidate = j
if prefix_candidate >= len(word):
add_complete(initial_letter, len(word), i, complete_words)
continue
best_prefix = prefixes.get(initial_letter)
if not best_prefix:
best_prefix = (0, -1, 0, -1)
if prefix_candidate >= best_prefix[0]:
prefixes[initial_letter] = (prefix_candidate, i, best_prefix[0], best_prefix[1])
else:
if prefix_candidate >= best_prefix[2]:
prefixes[initial_letter] = (best_prefix[0], best_prefix[1], prefix_candidate, i)
j = - 1
while word[j] == last_letter:
j = j - 1
suffix_candidate = - j - 1
best_suffix = suffixes.get(last_letter)
if not best_suffix:
best_suffix = (0, -1, 0, -1)
if suffix_candidate > best_suffix[0]:
suffixes[last_letter] = (suffix_candidate, i, best_suffix[0], best_suffix[1])
else:
if suffix_candidate >= best_suffix[2]:
suffixes[last_letter] = (best_suffix[0], best_suffix[1], suffix_candidate, i)
i = prefix_candidate
while i <= len(word) - suffix_candidate:
j = i + 1
count = 1
while j < len(word) and word[i] == word[j]:
count += 1
j += 1
if count > best_concat:
best_concat = count
i = j
for key in letters:
suffix = suffixes.get(key)
prefix = prefixes.get(key)
if not suffix:
suffix = (0, -1)
if not prefix:
prefix = (0, -1)
if suffix[1] == prefix[1] and suffix[0] > 0:
candidate = max(suffix[0] + prefix[2], prefix[0] + suffix[2])
else:
candidate = suffix[0] + prefix[0]
cwords = complete_words.get(key)
if not cwords:
cwords = []
for word in cwords:
candidate += word[0]
if candidate > best_concat:
best_concat = candidate
return best_concat
def add_complete(letter, lenght, index, complete_word):
words = complete_word.get(letter)
if not words:
words = []
complete_word[letter] = words
words.append((lenght, index))
def solution(words):
prefixes = {}
complete_words = {}
letters = set()
suffixes = {}
best_concat = 0
for i in range(len(words)):
word = words[i]
initial_letter = word[0]
last_letter = word[-1]
letters.add(initial_letter)
letters.add(last_letter)
j = 0
while j < len(word) and word[j] == initial_letter:
j = j + 1
prefix_candidate = j
if prefix_candidate >= len(word):
add_complete(initial_letter, len(word), i, complete_words)
continue
best_prefix = prefixes.get(initial_letter)
if not best_prefix:
best_prefix = (0, -1, 0, -1)
if prefix_candidate >= best_prefix[0]:
prefixes[initial_letter] = (prefix_candidate, i, best_prefix[0], best_prefix[1])
else:
if prefix_candidate >= best_prefix[2]:
prefixes[initial_letter] = (best_prefix[0], best_prefix[1], prefix_candidate, i)
j = - 1
while word[j] == last_letter:
j = j - 1
suffix_candidate = - j - 1
best_suffix = suffixes.get(last_letter)
if not best_suffix:
best_suffix = (0, -1, 0, -1)
if suffix_candidate > best_suffix[0]:
suffixes[last_letter] = (suffix_candidate, i, best_suffix[0], best_suffix[1])
else:
if suffix_candidate >= best_suffix[2]:
suffixes[last_letter] = (best_suffix[0], best_suffix[1], suffix_candidate, i)
i = prefix_candidate
while i <= len(word) - suffix_candidate:
j = i + 1
count = 1
while j < len(word) and word[i] == word[j]:
count += 1
j += 1
if count > best_concat:
best_concat = count
i = j
for key in letters:
suffix = suffixes.get(key)
prefix = prefixes.get(key)
if not suffix:
suffix = (0, -1)
if not prefix:
prefix = (0, -1)
if suffix[1] == prefix[1] and suffix[0] > 0:
candidate = max(suffix[0] + prefix[2], prefix[0] + suffix[2])
else:
candidate = suffix[0] + prefix[0]
cwords = complete_words.get(key)
if not cwords:
cwords = []
for word in cwords:
candidate += word[0]
if candidate > best_concat:
best_concat = candidate
return best_concat
The solution obtained perfect score.
S - sum of the lengths of words
N = 4, S = 30, each word contains long substring with one letter.
N = 6, S = 50, the longest prefix and the longest suffix occur in the same word.
N = 25, S = 400, each word contains long substring with one letter.
N = 25, S = 400, the longest prefix and the longest suffix occur in the same word.
S = 20,000, each word contains long substring with one letter.
S = 20,000, the longest prefix and the longest suffix occur in the same word.
S = 100,000, each word contains long substring with one letter.
S = 100,000, the longest prefix and the longest suffix occur in the same word.