Home   About Me   Blog   Algorithms & datastructures Home  



import pathlib

import utils

# https://albertauyeung.github.io/2020/06/15/python-trie.html
# A Trie is commonly used to represent a dictionary for looking up words in a vocabulary.
# eg auto-completion or query suggestion
#
# Tries can be implemented using an array, see: https://www.techiedelight.com/trie-implementation-python/
# However, they waste space since they allocate space for the entire alphabet.
# The Trie implemented below is space efficient since it only allocates memory for alphabet in use.


class Node:
    def __init__(self, char):
        self.char = char
        self.is_end = False  # end of a word?
        self.counter = 0  # how may times word is inserted
        self.children: dict[str, Node] = {}  # keys are characters, values are Nodes


class Trie:
    def __init__(self):
        # root node does not store any character
        self.root = Node("")

    def insert(self, word: str):
        node = self.root
        for i in word:
            if i in node.children:
                node = node.children[i]
            else:
                # If a character is not found,
                # create a new node in the trie
                new_node = Node(i)
                node.children[i] = new_node
                node = new_node
        node.is_end = True
        node.counter += 1

    def depth_first_traverse(self, node, prefix, output):
        prefix = prefix + node.char
        if node.is_end:
            output.append((prefix, node.counter))

        for child in node.children.values():
            self.depth_first_traverse(child, prefix, output)

    def query(self, prefix):
        node = self.root
        for i in prefix:
            if i in node.children:
                node = node.children[i]
            else:
                # not found, return empty list
                return []
        # traverse trie to get all candidates
        output = []
        self.depth_first_traverse(node, prefix[:-1], output=output)
        return output


t = Trie()
t.insert("was")
t.insert("word")
t.insert("war")
t.insert("what")
t.insert("where")
t.insert("where")
t.insert("where")


utils.assert_r("trie", t.query("wh"), [("what", 1), ("where", 3)])
utils.assert_r("trie", t.query("nothing"), [])
utils.assert_r("trie", t.query("what"), [("what", 1)])


t = Trie()
f = pathlib.Path.cwd().joinpath("test_data/10_000_words.txt").open()
words = f.readlines()
f.close()
for w in words[:1000]:
    t.insert(w)


if t.query("bl") != [
    ("bl\n", 1),
    ("black\n", 1),
    ("blackberry\n", 1),
    ("blackjack\n", 1),
    ("blacks\n", 1),
    ("blade\n", 1),
    ("blades\n", 1),
    ("blah\n", 1),
    ("blair\n", 1),
    ("blake\n", 1),
    ("blame\n", 1),
    ("blank\n", 1),
    ("blanket\n", 1),
    ("blast\n", 1),
    ("bleeding\n", 1),
    ("blend\n", 1),
    ("bless\n", 1),
    ("blessed\n", 1),
    ("blind\n", 1),
    ("blink\n", 1),
    ("block\n", 1),
    ("blocked\n", 1),
]:
    utils.assert_r(
        "trie",
        t.query("bl"),
        [
            ("bl\n", 1),
            ("black\n", 1),
            ("blackberry\n", 1),
            ("blackjack\n", 1),
            ("blacks\n", 1),
            ("blade\n", 1),
            ("blades\n", 1),
            ("blah\n", 1),
            ("blair\n", 1),
            ("blake\n", 1),
            ("blame\n", 1),
            ("blank\n", 1),
            ("blanket\n", 1),
            ("blast\n", 1),
            ("bleeding\n", 1),
            ("blend\n", 1),
            ("bless\n", 1),
            ("blessed\n", 1),
            ("blind\n", 1),
            ("blink\n", 1),
            ("block\n", 1),
            ("blocked\n", 1),
        ],
    )