4
\$\begingroup\$

Is the algorithm correct? It seems to work.

print("Multiple sequence longest common subsequence implementation in Python.\nFor example a naive algorithm would perfom c*128^128 elementary operations on a problem set of 128 sequences of length 128 elements.")
print(" That is: ", 128**128, "operations.")

from collections import defaultdict
import sys

def ptable(m, n, table):
   for i in range(m):
       line = ""
       for j in range(n):
           line += str(table[i, j]) + " "
       print(line)

def lcs(x, y):
    table = defaultdict(int)
    n = len(x)
    m = len(y)

    for i in range(n):
        for j in range(m):
            if x[i] == y[j]:
                table[i, j] = table[i-1, j-1]+1
            else:
                table[i, j] = max(table[i-1, j], table[i, j-1])

    return table

def mlcs(strings):
    strings = list(set(strings))
    tables = dict()
    for i in range(1, len(strings)):
        x = strings[i]
        for y in strings[:i]:
            lcsxy = lcs(x, y)
            tables[x,y] = lcsxy

    def rec(indexes):
        for v in indexes.values():
            if v < 0:
                return ""
        same = True
        for i in range(len(strings)-1):
            x = strings[i]
            y = strings[i+1]
            same &= x[indexes[x]] == y[indexes[y]]
        if same:
            x = strings[0]
            c = x[indexes[x]]
            for x in strings:
                indexes[x] -= 1
            return rec(indexes) + c
        else:
            v = -1
            ind = None
            for x in strings:
                indcopy = indexes.copy()
                indcopy[x] -= 1
                icv = valueat(indcopy)
                if icv > v:
                    ind = indcopy
                    v = icv
            indexes = ind
            return rec(indexes)

    def valueat(indexes):
        m = 12777216
        for i in range(1, len(strings)):
            x = strings[i]
            for y in strings[:i]:
                lcsxy = tables[x,y]
                p = lcsxy[indexes[x],indexes[y]]
                m = min(m, p)
        return m         

    indexes = dict()
    for x in strings:
        indexes[x] = len(x) - 1
    return rec(indexes)

def check(string, seq):
    i = 0
    j = 0
    while i < len(string) and j < len(seq):
        if string[i] == seq[j]:
            j += 1
        i += 1
    return len(seq) - j

def checkall(strings, seq):
    for x in strings:
        a = check(x, seq)
        if not a == 0:
            print(x, seq, a)
            return False
    return True

strings = []
sigma = ["a", "b", "c", "d"]
#sigma = ["a", "b"]
import random
for i in range(5):    
    string = ""
    for j in range(128):
        string += random.choice(sigma)
    strings += [string]

#strings = ["abbab", "ababa", "abbba"]
#strings = ["abab", "baba", "abaa"]
#strings = ["bacda", "abcde", "decac"]
#strings = ["babbabbb", "bbbaabaa", "abbbabab", "abbababa"]
#strings = ["human", "chimpanzee", "woman"]
#strings = ["ababa", "babab", "bbbab"]
#strings = ["ababaaba", "abbabbaa", "aaaaabba"]
#strings = ["aabbbaba", "aaaabbba", "aabbbaab"]
print("Strings:", strings)
l = mlcs(strings)
print("Lcs:", l, checkall(strings, l))`
\$\endgroup\$
2
  • \$\begingroup\$ Is it working as intended or is it not working as intended? \$\endgroup\$ Commented May 5, 2015 at 15:42
  • \$\begingroup\$ It seems to work as intended with the methods of testing provided. Maybe one needs to find larger test cases. \$\endgroup\$ Commented May 5, 2015 at 15:48

1 Answer 1

1
\$\begingroup\$

This test case generation method produces cases where the result is sometimes sub-optimal. Thus there is a flaw in the algorithm.

strings = []
sigma = ["a", "b", "c", "d"]
#sigma = ["a", "b"]
sigmax = ["e", " f", "g"]
import random
Nx = 8
N = 16
M = 5 # Number of strings
x = ""
for i in range(Nx):
    x += random.choice(sigmax)
for i in range(M):    
    string = ""
    for j in range(N):
        string += random.choice(sigma)
    indexes = list(range(N))
    random.shuffle(indexes)
    indexes = sorted(indexes[:len(x)])
    for j in range(len(x)):
        string = string[:indexes[j]]+x[j]+string[indexes[j]:]
    strings += [string]

So, back to the drawing board, so to speak.

\$\endgroup\$
2
  • \$\begingroup\$ Could you add either an example or an analysis? \$\endgroup\$ Commented May 5, 2015 at 19:42
  • \$\begingroup\$ Added example generating code. The idea of the lcs algorithm is an extension of the 2-string lcs. The function rec does the backtracking and valueat is supposed to produce the correct value at a position of an M-dimensional space (M is the number of sequences). For this purpose the values at each of the "coordinate planes" (each a pairing of two strings) are produced using the regular 2-dim lcs algorithm. There are (m^2-m)/2 such pairings/planes. \$\endgroup\$ Commented May 6, 2015 at 3:51

You must log in to answer this question.

Start asking to get answers

Find the answer to your question by asking.

Ask question

Explore related questions

See similar questions with these tags.