def smith_waterman(seq1, seq2, match_score=3, mismatch_penalty=-1, gap_penalty=-2):
# 初始化矩阵
matrix = [[0] * (len(seq2) + 1) for _ in range(len(seq1) + 1)]
max_score = 0
max_i, max_j = 0, 0
# 填充矩阵
for i in range(1, len(seq1) + 1):
for j in range(1, len(seq2) + 1):
if seq1[i - 1] == seq2[j - 1]:
score = matrix[i - 1][j - 1] + match_score
else:
score = max(
matrix[i - 1][j] + gap_penalty,
matrix[i][j - 1] + gap_penalty,
matrix[i - 1][j - 1] + mismatch_penalty
)
if score > max_score:
max_score = score
max_i, max_j = i, j
matrix[i][j] = max(score, 0)
# 回溯,获取最大得分路径
align1, align2 = '', ''
i, j = max_i, max_j
while i > 0 and j > 0 and matrix[i][j] != 0:
if matrix[i][j] == matrix[i - 1][j - 1] + (match_score if seq1[i - 1] == seq2[j - 1] else mismatch_penalty):
align1 = seq1[i - 1] + align1
align2 = seq2[j - 1] + align2
i -= 1
j -= 1
elif matrix[i][j] == matrix[i - 1][j] + gap_penalty:
align1 = seq1[i - 1] + align1
align2 = '-' + align2
i -= 1
else:
align1 = '-' + align1
align2 = seq2[j - 1] + align2
j -= 1
return max_score, align1, align2
示例用法
seq1 = “AGCACACA”
seq2 = “ACACACTA”
score, align1, align2 = smith_waterman(seq1, seq2)
print(“Alignment score:”, score)
print(“Sequence 1:”, align1)
print(“Sequence 2:”, align2)