I'm glad I wasn't alone in wanting to build a solver for letter-boxed.
For the initial wordlist filtering, you can use regular expressions to improve speed by 14-15x
here's an example with a profiler to compare speeds.
import cProfile
import regex as re
def regex_approach(word_file):
top = ['T','C','R']
right = ['I','U','O']
left = ['K','M','N']
bottom = ['S','A','L']
letters = {
'top':top,
'bottom':bottom,
'left': left,
'right': right
}
_top = "".join(top)
_left = "".join(left)
_right = "".join(right)
_bottom = "".join(bottom)
all_letters = [y for x in letters.values() for y in x]
ALLLETTERS = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
letter_combos = "".join(all_letters)
# We don't actually need BADLETTERS, we can just do not (^) letter_combos,
# but this was a bit more demonstrative at the cost of performance
BADLETTERS = re.sub(f'[{letter_combos}]','',ALLLETTERS)
# we don't split the word into a list, we can just do fast regex instead
# step 1: remove words with bad letters -- re.multiline ensures we treat newlines as new lines.
_words = re.sub(rf'^[A-Z]*[{BADLETTERS}\']{{1,}}[A-Z]*$','', words, flags=re.MULTILINE)
valid_word_pattern = rf'^[A-Z]*([{_top}][{_top}]|[{_left}][{_left}]|[{_bottom}][{_bottom}]|[{_right}][{_right}])[A-Z]*$'
# step 2: remove words that violate the same-side rule
_words = re.sub(valid_word_pattern,'',_words,flags=re.MULTILINE)
# N.B. I'm pretty sure we can combine these two regexes somehow, which will be even more performant, but it started to get ugly
# remove blanks
_words = re.sub(f'\n+','\n', _words)
return _words.split()
def existing_approach(word_file):
top = ['T','C','R']
right = ['I','U','O']
left = ['K','M','N']
bottom = ['S','A','L']
pos = {
'top':top,
'bottom':bottom,
'left': left,
'right': right
}
chars = set(pos.keys())
actual_words = sorted(set(list(word.strip().upper() for word in word_file)))
valid_words = [w for w in actual_words if set(w)-chars==set()]
toss = []
for word in valid_words:
## make sure letters aren't adjacent and don't repeat
letters = list(word)
num = 1
while num < len(letters):
if (pos[letters[num]] == pos[letters[num-1]]):
toss.append(word)
num = len(letters)
else:
num += 1
return [w for w in valid_words if w not in toss]
if __name__ == "__main__":
with open('./words_hard.txt','r') as f1:
words = f1.read()
profiler = cProfile.Profile()
profiler.enable()
#existing_approach(words)
# Easy : 2354840 function calls in 0.622 seconds
# Hard: 8830337 function calls in 2.383 seconds
regex_approach(words)
# Easy : 3963 function calls (3910 primitive calls) in 0.044 seconds, 14x faster
# Hard: 3963 function calls (3910 primitive calls) in 0.150 seconds, 15x faster
profiler.disable()
profiler.print_stats(sort='cumulative')
I'm still poking at trying to find a much faster solver that solves it in n>=2 words. What I have so far is only marginally better -- still a lot of iterating through lists and using regex positive lookaheads.
However, for one word solutions, we can use the original, un-split word file directly and get a faster result with lookarounds.
def find_one_word_solution(all_letters: list,_words: str):
combo = [fr'(?=.*{letter})' for letter in all_letters]
_pattern = r'\b'+''.join(combo) + r'\w*\b'
return re.findall(_pattern,_words)
I just wanted to share this for fun, but if you want me to submit a pull request for any of this, let me know.
Hello!
I'm glad I wasn't alone in wanting to build a solver for letter-boxed. For the initial wordlist filtering, you can use regular expressions to improve speed by 14-15x here's an example with a profiler to compare speeds.
I'm still poking at trying to find a much faster solver that solves it in
n>=2
words. What I have so far is only marginally better -- still a lot of iterating through lists and using regex positive lookaheads.However, for one word solutions, we can use the original, un-split word file directly and get a faster result with lookarounds.
I just wanted to share this for fun, but if you want me to submit a pull request for any of this, let me know.