Home   About Me   Blog   Algorithms & datastructures Home  



import utils

h = __import__("6_hashmaps")


# https://en.wikibooks.org/wiki/Algorithm_Implementation/Strings


# Rabin-Karp algorithm is a string searching algorithm that uses hashing to find an exact match of a pattern string in a text.
def rabin_karp(s: str, pattern: str):
    """
    https://en.wikipedia.org/wiki/Rabin%E2%80%93Karp_algorithm
    """
    # O(nm)

    n = len(s)
    m = len(pattern)
    hash_pat = h.paul_larson_hash(pattern)

    for i in range(0, (n - m + 1)):
        xx = s[i : i + m]
        hs = h.paul_larson_hash(xx)
        if hs == hash_pat:
            if s[i : i + m] == pattern[0:m]:
                return i
    return None


# Also have a look at this alternative implementation:
# https://github.com/mccricardo/Rabin-Karp/blob/master/rabin_karp.py

res = rabin_karp("komu", "mu")
utils.assert_r("strings", res, 2)


res = rabin_karp("komu", "k")
utils.assert_r("strings", res, 0)


res = rabin_karp("komu", "ko")
utils.assert_r("strings", res, 0)


res = rabin_karp("abcdefghijklmnopqrstuvwxyz", "w")
utils.assert_r("strings", res, 22)
utils.assert_r("strings", res, "abcdefghijklmnopqrstuvwxyz".index("w"))
utils.assert_r("strings", list("abcdefghijklmnopqrstuvwxyz")[22], "w")


res = rabin_karp("komu", "kw")
utils.assert_r("strings", res, None)


my_str = """So they build their world in great confusion
To force on us the devil's illusion.
But the stone that the builder refuse
Shall be the head cornerstone,
And no matter what game they play,
Eh, we got something they could never take away;
We got something they could never take away - Bob Marley"""
res = rabin_karp(my_str, "builder")
utils.assert_r("strings", res, 105)
utils.assert_r("strings", res, my_str.index("builder"))

res = rabin_karp(my_str, "king")
utils.assert_r("strings", res, None)


def check_if_anagram(word1: str, word2: str) -> bool:
    # https://twitter.com/fermatslibrary/status/1385957963429515266?s=21
    # 1. map each of the 26 chars of alphabet to a unique prime number.
    # 2. convert each char in each word into its corresponding prime number
    # 3. multiply each of those prime numbers per word.
    # 4. if product_of_word1 == product_of_word2 , then the two words are anagrams
    # note; this only works when u use primes and not natural numbers.
    # natural numbers work upto a point but they have corner cases.
    # strictly, natural numbers would work for when both words are of equal length
    # and since anagrams have to be of equal length, then natural numbers would also work.
    # primes work for words of all lengths

    # TODO: fully implement this and test
    primess = {"a": 2, "b": 5, "c": 7}  # etc
    _ = primess
    return False


# TODO: Levenshtein distance
# https://en.wikibooks.org/wiki/Algorithm_Implementation/Strings

# TODO:  Sorensen-Dice coefficient(can be used in place of Levenshtein, it is also simpler)
# https://en.wikibooks.org/wiki/Algorithm_Implementation/Strings


# TODO: Longest common subsequence
# https://en.wikibooks.org/wiki/Algorithm_Implementation/Strings

# TODO: Longest common substring
# https://en.wikibooks.org/wiki/Algorithm_Implementation/Strings

# TODO: Horspool Algorithm(alternative to rabin-karp)
# https://github.com/Th1nkK1D/horspoolPythonMIPS/blob/master/horspool.py