jeudi 25 mars 2021

matching the string with pattern with no of mismatches. My code is working correctly but i can't figure out how to optimize it and make it faster

#python3

import sys
from _collections import deque

def hashTable(s, prime, x):    # calcuate hash
    h = list([] for _ in range(len(s) + 1))
    h[0] = 0
    for i in range(1, len(s) + 1):
        h[i] = (h[i - 1] * x + ord(s[i - 1])) % prime
    return h

def subhash(table, start, length):    #calculate subhash
    y = pow(x, length, pr)
    hash_value = (table[start + length] - y * table[start]) % pr
    return hash_value

def nextmsm(le, i, n):        #find next mismatch
    c = 0
    st = deque()
    st.append((0, le))
    a = 0
    while len(st) != 0:
        if c > n:
            break
        a, le = st.pop()
        th = subhash(tx, a+i, le)
        ph = subhash(pt, a, le)
        if th == ph:
            continue
        if le == 1:
            if th != ph:
                c = c + 1
        else :
            st.append((a, le//2))
            st.append((a+le//2, le - le//2))

    if c > n:
        return False
    return True


def solve(o, text, pattern):     # find pattern with mismatch
    global tx, pt
    pos = []
    tx = hashTable(text, pr, x)
    pt = hashTable(pattern, pr, x)
    for i in range(len(text) - len(pattern)+1):
        if nextmsm(len(pattern), i, o):
            pos.append(i)

    return pos


pr = 1000000007
x = 263
while True:
    line = input()
    k, t, p = line.split()
    ans = solve(int(k), t, p)
    print(len(ans), *ans)

this sol first takes the input k that is no of max mismatch, t= text, p = pattern, it then passes to solve function which calculates the hash value and call function msmatch to find no of mismatch. Msmatach function calculates the subhash which then with the help of binary search helps to find no of mismatch

Aucun commentaire:

Enregistrer un commentaire