当前位置: 移动技术网 > IT编程>脚本编程>Python > 读书笔记 Bioinformatics Algorithms Chapter5

读书笔记 Bioinformatics Algorithms Chapter5

2018年09月23日  | 移动技术网IT编程  | 我要评论

云南省公务员网,合肥建材市场有哪些,海螺网

chapter5  how do we compare dna sequences 

bioinformatics algorithms-an_active learning approach

 
一、
1983年,russell doolitte 将血小板源生长因子[platelet derived growth factor(pdgf),一种刺激细胞增殖的物质]和其它已知基因比对,发现它的序列和原癌基因(oncogene)的序列有很高的相似度。这让科学家猜测某些癌症是因为基因在不合适的时机作用所致(scientists hypothesized that some forms of cancer might be caused by a good gene doing the right thing at the wrong time.)。
二、提出问题 序列比对:动态规划法
 
1.全局比对:
状态转移方程
 
  1 '''
  2 code challenge: solve the global alignment problem.
  3      input: two protein strings written in the single-letter amino acid alphabet.
  4      output: the maximum alignment score of these strings followed by an alignment achieving this maximum score. use the
  5     blosum62 scoring matrix for matches and mismatches as well as the indel penalty σ = 5.
  6 ----------
  7 sample input:
  8 pleasantly
  9 meanly
 10 ----------
 11 sample output:
 12 8
 13 pleasantly
 14 -mea--n-ly
 15 ----------
 16 @ lo kowngho  2018.9
 17 '''
 18 import numpy
 19 from os.path import dirname
 20 
 21 def grade(symb1,symb2):
 22     indx1 = symbollist[symb1]
 23     indx2 = symbollist[symb2]
 24     return matrix[indx1][indx2]
 25     
 26 def init_graph_global(l1,l2):
 27     graph = numpy.zeros([l2,l1])
 28     for x in range(1,l2):
 29         graph[x][0] = graph[x-1][0]-5
 30     for y in range(1,l1):
 31         graph[0][y] = graph[0][y-1]-5
 32     return graph
 33         
 34 def init_path(l1,l2):
 35     path = numpy.zeros([l2,l1])
 36     for x in range(1,l2):
 37         path[x][0] = 1
 38     for y in range(1,l1):
 39         path[0][y] = 2
 40     return path
 41 
 42 def buildglobalalignmentgraph(text1,text2):
 43     
 44     l1 = len(text1)
 45     l2 = len(text2)
 46     graph = init_graph_global(l1, l2)
 47     path = init_path(l1, l2)
 48         
 49     for x in range(1,l2):
 50         for y in range(1,l1):
 51             graph[x][y] = max(graph[x-1][y]-5, graph[x][y-1]-5, graph[x-1][y-1]+grade(text1[y],text2[x]))
 52             if graph[x-1][y]-5==graph[x][y]:
 53                 path[x][y]=1
 54             elif graph[x][y-1]-5==graph[x][y]:
 55                 path[x][y]=2
 56             else:
 57                 path[x][y]=3
 58     return [graph,path]
 59     
 60     
 61 def outputglobalaligement(path,graph,text1,text2):
 62     outt1 = ''
 63     outt2 = ''
 64     l1 = len(text1)
 65     l2 = len(text2)
 66     x = l2-1
 67     y = l1-1
 68     while(x!=0 or y!=0):
 69         if path[x][y]==1:
 70             outt1 += '-'
 71             outt2 += text2[x]
 72             x -= 1            
 73         elif path[x][y]==2:
 74             outt1 += text1[y]
 75             outt2 += '-'
 76             y -= 1
 77         else:
 78             outt1 += text1[y]
 79             outt2 += text2[x]
 80             x -= 1
 81             y -= 1
 82     return [outt1[::-1],outt2[::-1]]    
 83     
 84 def importscorematrix():
 85     dataset = open(dirname(__file__)+'blosum62.txt').read().strip().split('\n')
 86     symbollist = dataset[0].split()
 87     for i in range(len(symbollist)):
 88         symbollist[i]=[symbollist[i],i]
 89     symbollist = dict(symbollist)
 90     print(symbollist)
 91     matrix = []
 92     for i in range(1,len(dataset)):
 93         matrix.append(dataset[i].split()[1:])
 94     for l in range(len(matrix)):
 95         for i in range(len(matrix[l])):
 96             matrix[l][i]=int(matrix[l][i])
 97     return [matrix,symbollist]
 98 
 99 if __name__ == '__main__':
100     
101     [matrix,symbollist] = importscorematrix()
102 
103     dataset = open(dirname(__file__)+'dataset.txt').read().strip().split()
104     text1 = '0'+dataset[0]
105     text2 = '0'+dataset[1]
106     
107     [graph,path] = buildglobalalignmentgraph(text1, text2)
108     
109     [outt1,outt2] = outputglobalaligement(path,graph,text1,text2)
110     
111     print(int(graph[-1][-1]))
112     print(outt1)
113     print(outt2)
全局比对 python
 
2. 局部比对
可以把局部比对想象成下面的free taxi场景,在开始和结尾都不受罚分约束,只在中间的某一过程受罚分约束。
              
在全局比对的基础上,状态转移方程在加上一个0,表示每一个点,既可以由→、↓、↘经过罚分得到,也可以直接由起点,不经罚分得到(free taxi)。
  1 '''
  2 code challenge: solve the local alignment problem.
  3      input: two protein strings written in the single-letter amino acid alphabet.
  4      output: the maximum score of a local alignment of the strings, followed by a local alignment of these strings achieving the maximum
  5      score. use the pam250 scoring matrix for matches and mismatches as well as the indel penalty σ = 5.
  6 ---------------
  7 sample input:
  8 meanly
  9 penalty
 10 ---------------
 11 sample output:
 12 15
 13 eanl-y
 14 enalty
 15 ---------------
 16 lo kwongho 2018.9
 17 '''
 18 import numpy
 19 from os.path import dirname
 20 
 21 def grade(symb1,symb2):
 22     indx1 = symbollist[symb1]
 23     indx2 = symbollist[symb2]
 24     return matrix[indx1][indx2]
 25 
 26 def init_graph_local(l1,l2):
 27     graph = numpy.zeros([l1,l2])
 28     return graph
 29         
 30 def init_path(l1,l2):
 31     path = numpy.ones([l1,l2])*-1
 32     for x in range(1,l1):
 33         path[x][0] = 1
 34     for y in range(1,l2):
 35         path[0][y] = 2
 36     return path
 37 
 38 def buildlocalalignmentgraph(text1,text2):
 39     l1 = len(text1)
 40     l2 = len(text2)
 41     graph = init_graph_local(l1, l2)
 42     path = init_path(l1, l2)
 43     
 44     for x in range(1,l1):
 45         for y in range(1,l2):
 46             graph[x][y] = max(graph[x-1][y]-5, graph[x][y-1]-5, graph[x-1][y-1]+grade(text1[x],text2[y]),0)
 47             if graph[x-1][y]-5 == graph[x][y]:
 48                 path[x][y] = 1
 49             elif graph[x][y-1]-5==graph[x][y]:
 50                 path[x][y] = 2
 51             elif graph[x][y] == 0:
 52                 path[x][y] = 0
 53             else:
 54                 path[x][y] = 3
 55     maxval = 0
 56     maxindx = [-1,-1]
 57     for x in range(1,l1):
 58         for y in range(1,l2):
 59             if graph[x][y]>maxval:
 60                 maxval=graph[x][y]
 61                 maxindx=[x,y]
 62                 
 63     return [graph,path,maxindx]
 64 
 65 def outputlocalaligement(path,graph,text1,text2,maxindx):
 66     outt1 = ''
 67     outt2 = ''
 68     l1 = len(text1)
 69     l2 = len(text2)
 70     x = maxindx[0]
 71     y = maxindx[1]
 72     while(x!=0 or y!=0):
 73         if path[x][y]==1:
 74             outt1 += text1[x]
 75             outt2 += '-'
 76             x -= 1            
 77         elif path[x][y]==2:
 78             outt1 += '-'
 79             outt2 += text2[y]
 80             y -= 1
 81         elif path[x][y]==3:
 82             outt1 += text1[x]
 83             outt2 += text2[y]
 84             x -= 1
 85             y -= 1
 86         else:
 87             x=0
 88             y=0
 89     return [outt1[::-1],outt2[::-1]]    
 90 
 91 
 92 def importscorematrix():
 93     dataset = open(dirname(__file__)+'pam250.txt').read().strip().split('\n')
 94     symbollist = dataset[0].split()
 95     for i in range(len(symbollist)):
 96         symbollist[i]=[symbollist[i],i]
 97     symbollist = dict(symbollist)
 98     matrix = []
 99     for i in range(1,len(dataset)):
100         matrix.append(dataset[i].split()[1:])
101     for l in range(len(matrix)):
102         for i in range(len(matrix[l])):
103             matrix[l][i]=int(matrix[l][i])
104     return [matrix,symbollist]
105     
106 if __name__ == '__main__':
107     [matrix,symbollist] = importscorematrix()
108     
109     dataset = open(dirname(__file__)+'dataset.txt').read().strip().split()
110     text1 = '0'+dataset[0]
111     text2 = '0'+dataset[1]
112     
113     [graph,path,maxindx] = buildlocalalignmentgraph(text1,text2)
114     
115     [outt1,outt2]=outputlocalaligement(path,graph,text1,text2,maxindx)
116     print(int(graph[maxindx[0]][maxindx[1]]))
117     print(outt1)
118     print(outt2)
局部比对 python
3. overlarp alignment
  1 '''
  2 code challenge: solve the overlap alignment problem.
  3    >>input: two strings v and w, each of length at most 1000.
  4    >>output: the score of an optimal overlap alignment of v and w, followed by an alignment of a suffix v' of v and a prefix w' of w.
  5     achieving this maximum score. use an alignment score in which matches count +1 and both the mismatch and indel penalties are 2.
  6 -------------------
  7 sample input:
  8 pawheae
  9 heagawghee
 10 -------------------
 11 sample output:
 12 1
 13 heae
 14 heag
 15 -------------------
 16 coder: lo kwongho
 17 '''
 18 
 19 import numpy
 20 from os.path import dirname
 21 
 22 def init_graph_overlap(l1,l2):
 23     graph = numpy.zeros([l1,l2])
 24     for y in range(1,l2):
 25         graph[0][y] = graph[0][y-1]-1
 26     return graph
 27         
 28 def init_path(l1,l2):
 29     path = numpy.ones([l1,l2])*-1
 30     for x in range(1,l1):
 31         path[x][0] = 1
 32     for y in range(1,l2):
 33         path[0][y] = 2
 34     return path
 35 
 36 def buildoverlapalignmentgraph(text1,text2):
 37     l1 = len(text1)
 38     l2 = len(text2)
 39     graph = init_graph_overlap(l1, l2)
 40     path = init_path(l1,l2)
 41     for x in range(1,l1):
 42         for y in range(1,l2):
 43             if text1[x]==text2[y]:
 44                 graph[x][y]=max(graph[x-1][y-1]+1,graph[x-1][y]-2,graph[x][y-1]-2)
 45             else:
 46                 graph[x][y]=max(graph[x-1][y-1]-2,graph[x-1][y]-2,graph[x][y-1]-2)
 47             if graph[x][y]==graph[x-1][y]-2:
 48                 path[x][y]=1
 49             elif graph[x][y]==graph[x][y-1]-2:
 50                 path[x][y]=2
 51             else:
 52                 path[x][y]=3
 53         
 54     maxval = 0
 55     maxindx = -1
 56     for i in range(l2):
 57         if graph[l1-1][i]>maxval:
 58             maxval=graph[l1-1][i]
 59             maxindx=i
 60                     
 61     return [graph,path,maxindx,maxval]    
 62     
 63 def outputoverlapaligement(path,graph,text1,text2,maxindx):
 64     outt1 = ''
 65     outt2 = ''
 66     l1 = len(text1)
 67     l2 = len(text2)
 68     x = l1-1
 69     y = maxindx
 70     while(y!=0):
 71         if path[x][y]==1:
 72             outt1 += text1[x]
 73             outt2 += '-'
 74             x -= 1            
 75         elif path[x][y]==2:
 76             outt1 += '-'
 77             outt2 += text2[y]
 78             y -= 1
 79         elif path[x][y]==3:
 80             outt1 += text1[x]
 81             outt2 += text2[y]
 82             x -= 1
 83             y -= 1
 84         else:
 85             x=0
 86             y=0
 87     return [outt1[::-1],outt2[::-1]]    
 88         
 89 
 90 if __name__ == '__main__':
 91     dataset = open(dirname(__file__)+'dataset.txt').read().strip().split()
 92     text1 = '0'+dataset[0]
 93     text2 = '0'+dataset[1]
 94     l1 = len(text1)
 95     l2 = len(text2)
 96     [graph,path,maxindx,maxval] = buildoverlapalignmentgraph(text1,text2)
 97     #print(graph)
 98         
 99     [outtext1,outtext2]=outputoverlapaligement(path, graph, text1, text2, maxindx)
100 
101     print(int(maxval))
102     print(outtext1)
103     print(outtext2)
overlarp in python
4.fitting alignment 
  1 '''
  2 fitting alignment problem: construct a highest-scoring fitting alignment between two strings.
  3    >>input: strings v and w as well as a matrix score.
  4    >>output: a highest-scoring fitting alignment of v and w as defined by the scoring matrix score.
  5 -------------------
  6 sample input:
  7 gtaggcttaaggtta
  8 tagata
  9 -------------------
 10 sample output:
 11 2
 12 taggctta
 13 taga--ta
 14 -------------------
 15 coder: lo kwongho
 16 '''
 17 
 18 import numpy
 19 from os.path import dirname
 20 
 21 def init_graph_fiting(l1,l2):
 22     graph = numpy.zeros([l1,l2])
 23     for y in range(1,l2):
 24         graph[0][y] = graph[0][y-1]-1
 25     return graph
 26         
 27 def init_path(l1,l2):
 28     path = numpy.ones([l1,l2])*-1
 29     for x in range(1,l1):
 30         path[x][0] = 1
 31     for y in range(1,l2):
 32         path[0][y] = 2
 33     return path
 34 
 35 def buildfittingalignmentgraph(text1,text2):
 36     l1 = len(text1)
 37     l2 = len(text2)
 38     graph = init_graph_fiting(l1, l2)
 39     path = init_path(l1,l2)
 40     for x in range(1,l1):
 41         for y in range(1,l2):
 42             if text1[x]==text2[y]:
 43                 graph[x][y]=max(graph[x-1][y-1]+1,graph[x-1][y]-1,graph[x][y-1]-1)
 44             else:
 45                 graph[x][y]=max(graph[x-1][y-1]-1,graph[x-1][y]-1,graph[x][y-1]-1)
 46             if graph[x][y]==graph[x-1][y]-1:
 47                 path[x][y]=1
 48             elif graph[x][y]==graph[x][y-1]-1:
 49                 path[x][y]=2
 50             else:
 51                 path[x][y]=3
 52     
 53     maxval = 0
 54     maxindx = -1
 55     for i in range(l1):
 56         if graph[i][l2-1]>maxval:
 57             maxval=graph[i][l2-1]
 58             maxindx=i
 59                         
 60     return [graph,path,maxindx,maxval]    
 61     
 62 def outputfittingaligement(path,graph,text1,text2,maxindx):
 63     outt1 = ''
 64     outt2 = ''
 65     l1 = len(text1)
 66     l2 = len(text2)
 67     x = maxindx
 68     y = l2-1
 69     while(y!=0):
 70         if path[x][y]==1:
 71             outt1 += text1[x]
 72             outt2 += '-'
 73             x -= 1            
 74         elif path[x][y]==2:
 75             outt1 += '-'
 76             outt2 += text2[y]
 77             y -= 1
 78         elif path[x][y]==3:
 79             outt1 += text1[x]
 80             outt2 += text2[y]
 81             x -= 1
 82             y -= 1
 83         else:
 84             x=0
 85             y=0
 86     return [outt1[::-1],outt2[::-1]]    
 87         
 88 
 89 if __name__ == '__main__':
 90     dataset = open(dirname(__file__)+'dataset.txt').read().strip().split()
 91     text1 = '0'+dataset[0]
 92     text2 = '0'+dataset[1]
 93     l1 = len(text1)
 94     l2 = len(text2)
 95     [graph,path,maxindx,maxval] = buildfittingalignmentgraph(text1,text2)
 96     
 97     [outtext1,outtext2]=outputfittingaligement(path, graph, text1, text2, maxindx)
 98     #print(graph)
 99     print(int(maxval))
100     print(outtext1)
101     print(outtext2)
fitting alignment in python
这四种比对的关系如图:
 
全局比对                    局部比对
overlarp alignment                 fitting alignment
5、基因的插入和删除,通常都是连续的一段,故在比对出现的连续空位,应该把它们当作一个整体看待。在比对的空位罚分中,生物学家认为,在每一条链上新开一个空位,应罚重分,而空位的延续,罚分应较少:
解决问题的方法是:开三个矩阵,每个矩阵代表一种方向。在→、↓方向上行走,代表产生空位。故每当从↘转移到→、↓,或者→、↓间转移,代表在某链上产生新空位,重罚,而在→、↓内转移,代表空位延续,轻罚。
 
                     
  1 '''
  2 code challenge: solve the alignment with affine gap penalties problem.
  3    >>input: two amino acid strings v and w (each of length at most 100).
  4    >>output: the maximum alignment score between v and w, followed by an alignment of v and w achieving this maximum score. use the
  5      blosum62 scoring matrix, a gap opening penalty of 11, and a gap extension penalty of 1.
  6 ---------------------
  7 sample input:
  8 prteins
  9 prtwpsein
 10 ---------------------
 11 sample output:
 12 8
 13 prt---eins
 14 prtwpsein-
 15 ---------------------
 16 coder: lo kwongho
 17 '''
 18 import numpy
 19 from os.path import dirname
 20 neginfinity = -999
 21 #penalties
 22 gs = -10 #gap_start
 23 ge = -1  #gap_extend
 24 #
 25 def grade(symb1,symb2):
 26     indx1 = symbollist[symb1]
 27     indx2 = symbollist[symb2]
 28     return matrix[indx1][indx2]
 29 
 30 def initgraph(l1,l2):
 31     graph = [numpy.zeros([l1,l2]    ,dtype=int) for i in range(3)]
 32 
 33     graph[1][0][0] = neginfinity
 34     graph[2][0][0] = neginfinity
 35     for x in range(1,l1):
 36         graph[0][x][0]=neginfinity
 37         if x==1:
 38             graph[1][x][0]=ge+gs
 39         else:
 40             graph[1][x][0]=graph[1][x-1][0]+ge
 41         graph[2][x][0]=neginfinity
 42     for y in range(1,l2):
 43         graph[0][0][y]=neginfinity
 44         if y ==1:
 45             graph[2][0][y]=ge+gs
 46         else:
 47             graph[2][0][y]=graph[2][0][y-1]+ge
 48         graph[1][0][y]=neginfinity
 49     return graph
 50     
 51 def init_path(l1,l2):
 52     path = [numpy.ones([l1,l2])*-1 for i in range(3)]
 53     '''for x in range(1,l1):
 54         path[x][0] = 1
 55     for y in range(1,l2):
 56         path[0][y] = 2'''
 57     return path
 58     
 59 def buildalignmentgraph(text1,text2,l1,l2):
 60 
 61     graph = initgraph(l1,l2)
 62     #path = #init_path(l1,l2)
 63     for x in range(1,l1):
 64         for y in range(1,l2):                
 65             # x ######
 66             graph[1][x][y]=max(gs+ge+graph[0][x-1][y],gs+ge+graph[2][x-1][y],ge+graph[1][x-1][y])
 67 
 68                 
 69             # y ###### 
 70             graph[2][x][y]=max(gs+ge+graph[0][x][y-1],gs+ge+graph[1][x][y-1],ge+graph[2][x][y-1])
 71 
 72             # m ######
 73             graph[0][x][y]=grade(text1[x], text2[y])+max(graph[0][x-1][y-1],graph[1][x-1][y-1],graph[2][x-1][y-1])
 74 
 75     maxval = 0
 76     maxindx = -1
 77     for i in range(3):
 78         if graph[i][l1-1][l2-1]>maxval:
 79             maxval=graph[i][l1-1][l2-1]
 80             maxindx=i
 81     return [graph,maxindx,maxval]
 82 
 83 def trackback(graph,maxindx,text1,text2):
 84     x = len(text1)-1
 85     y = len(text2)-1
 86     otext1 = ''
 87     otext2 = ''
 88     indx = maxindx
 89     while(1):
 90         if indx==0:
 91             otext1 += text1[x]
 92             otext2 += text2[y]
 93             if x ==1:
 94                 break
 95             if graph[0][x][y]==graph[1][x-1][y-1]+grade(text1[x], text2[y]):
 96                 indx = 1
 97             elif graph[0][x][y]==graph[2][x-1][y-1]+grade(text1[x], text2[y]):
 98                 indx = 2
 99             else:
100                 indx = 0
101             x -= 1
102             y -= 1
103         elif indx==1:
104             otext1 += text1[x]
105             otext2 += '-'
106             if x == 1:
107                 break
108             if graph[1][x][y]==graph[0][x-1][y]+ge+gs:
109                 indx = 0
110             elif graph[1][x][y]==graph[2][x-1][y]+ge+gs:
111                 indx = 2
112             else:
113                 indx = 1
114             x -= 1
115         else:
116             otext1 += '-'
117             otext2 += text2[y]
118             if y == 1:
119                 break
120             if graph[2][x][y]==graph[0][x][y-1]+ge+gs:
121                 indx = 0
122             elif graph[2][x][y]==graph[1][x][y-1]+ge+gs:
123                 indx = 1
124             else:
125                 indx = 2
126             y -= 1
127                 
128     return [otext1[::-1],otext2[::-1]]
129         
130 def importscorematrix():
131     dataset = open(dirname(__file__)+'blosum62.txt').read().strip().split('\n')
132     symbollist = dataset[0].split()
133     for i in range(len(symbollist)):
134         symbollist[i]=[symbollist[i],i]
135     symbollist = dict(symbollist)
136     matrix = []
137     for i in range(1,len(dataset)):
138         matrix.append(dataset[i].split()[1:])
139     for l in range(len(matrix)):
140         for i in range(len(matrix[l])):
141             matrix[l][i]=int(matrix[l][i])
142     return [matrix,symbollist]
143 
144 
145 if __name__ == '__main__':
146     [matrix,symbollist] = importscorematrix() # 打分矩阵
147     
148     dataset = open(dirname(__file__)+'dataset.txt').read().strip().split()
149     text1 = '0'+dataset[0]
150     text2 = '0'+dataset[1]
151     l1 = len(text1)
152     l2 = len(text2)
153     [graph,maxindx,maxval] = buildalignmentgraph(text1, text2, l1, l2)
154     #print(graph)
155     
156     [output_text1,output_text2] = trackback(graph,maxindx,text1,text2)
157     print(maxval)
158     print(output_text1)
159     print(output_text2)
160     
alignment with affine gap penalties

 

6 * 一种线性空间的比对方法 space-efficient sequence alignment(分治+动态规划)
https://www.cs.rit.edu/~rlaz/algorithms20082/slides/spaceefficientalignment.pdf
  1 '''
  2 code challenge: implement linearspacealignment to solve the global alignment problem for a large dataset.
  3 >>>input: two long (10000 amino acid) protein strings written in the single-letter amino acid alphabet.
  4 >>>output: the maximum alignment score of these strings, followed by an alignment achieving this maximum score. use the
  5 
  6 blosum62 scoring matrix and indel penalty σ = 5.
  7 ------------
  8 sample input:
  9 pleasantly
 10 meanly
 11 ------------
 12 sample output:
 13 8
 14 pleasantly
 15 -mea--n-ly
 16 ------------
 17 coder: lo kwongho in 2018.9
 18 '''
 19 from os.path import dirname
 20 import numpy
 21 #
 22 indel = -5
 23 neginf = -9999
 24 #
 25 #
 26 def grade(symb1,symb2):
 27     indx1 = symbollist[symb1]
 28     indx2 = symbollist[symb2]
 29     return matrix[indx1][indx2]
 30 
 31 def importscorematrix():
 32     dataset = open(dirname(__file__)+'blosum62.txt').read().strip().split('\n')
 33     symbollist = dataset[0].split()
 34     for i in range(len(symbollist)):
 35         symbollist[i]=[symbollist[i],i]
 36     symbollist = dict(symbollist)
 37     matrix = []
 38     for i in range(1,len(dataset)):
 39         matrix.append(dataset[i].split()[1:])
 40     for l in range(len(matrix)):
 41         for i in range(len(matrix[l])):
 42             matrix[l][i]=int(matrix[l][i])
 43     return [matrix,symbollist]
 44 #
 45 def half_alignment(v,w):
 46     nv = len(v)
 47     mw = len(w)
 48     s = numpy.zeros(shape=(nv+1,2),dtype=int)
 49     for i in range(nv+1):
 50         s[i,0] = indel*i
 51     if mw==0:
 52         return s[:,0] #
 53     for j in range(mw):
 54         s[0,1]=s[0,0]+indel
 55         for i in range(nv):
 56             s[i+1,1]=max(s[i,1]+indel,s[i+1,0]+indel,s[i,0]+grade(w[j],v[i]))
 57         s[:,0]=s[:,1]
 58     return s[:,1]
 59 
 60 def midedge(v,w):
 61     nv = len(v)
 62     mw = len(w)
 63     mid = int((mw-1)/2)
 64     wl = w[:mid]
 65     wr = w[mid+1:]
 66     pre = half_alignment(v,wl)
 67     suf = half_alignment(v[::-1],wr[::-1])[::-1]
 68     hs = [pre[i]+suf[i]+indel  for i in range(nv+1)]
 69     ds = [pre[i]+suf[i+1]+grade(w[mid],v[i])  for i in range(nv)]
 70     maxhs = max(hs)
 71     maxds = max(ds)
 72     if maxhs>maxds:
 73         return ( (hs.index(maxhs),mid) ,(hs.index(maxhs),mid+1) )
 74     else:
 75         return ( (ds.index(maxds),mid) ,(ds.index(maxds)+1,mid+1) )
 76 
 77 def build_alignment_track(v,w):
 78     vn = len(v)
 79     wm = len(w)
 80     if vn==0 and wm==0:
 81         return []
 82     elif vn==0:
 83         return ['-']*wm
 84     elif wm==0:
 85         return ['|']*vn
 86     ((x1,y1),(x2,y2)) = midedge(v,w)
 87     if x1==x2:
 88         edge = ['-']
 89     else:
 90         edge = ['\\']
 91     wleft = w[:y1]
 92     wright = w[y2:]
 93     vupper = v[:x1]
 94     vbotm = v[x2:]
 95 
 96     upper_left_track = build_alignment_track(vupper,wleft)
 97     bottom_right_track = build_alignment_track(vbotm,wright)
 98     return upper_left_track+edge+bottom_right_track
 99 
100 def tracktostring(v,w,track):
101     vi = 0
102     wj = 0
103     outv = ''
104     outw = ''
105     score = 0
106     for i in track:
107         if i == '|':
108             outv += v[vi]
109             outw += '-'
110             score += indel
111             vi += 1    
112         elif i == '-':
113             outv += '-'
114             outw += w[wj]
115             score += indel
116             wj += 1            
117         else:
118             outv += v[vi]
119             outw += w[wj]
120             score += grade(v[vi], w[wj])
121             vi += 1
122             wj += 1
123             
124     return [score,outv,outw]
125 
126 def linearspacealignment(v,w):
127     track = build_alignment_track(v,w)
128     [score,outv, outw] = tracktostring(v,w,track)
129     print(score)
130     print(outv)
131     print(outw)
132 
133 if __name__ == '__main__':
134     dataset = open(dirname(__file__)+'dataset.txt').read().strip().split()
135     [matrix,symbollist] = importscorematrix()
136     v = dataset[0]
137     w = dataset[1]
138     linearspacealignment(v,w)
linear-space alignment

 

如对本文有疑问,请在下面进行留言讨论,广大热心网友会与你互动!! 点击进行留言回复

相关文章:

验证码:
移动技术网