Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 7 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ subject to a decay to prioritize long gaps.
The input files are FASTA format sequences, or strings of sequences.

Here is some skeleton code to get you started:

```
import swalign
# choose your own values here… 2 and -1 are common.
match = 2
Expand All @@ -19,5 +19,11 @@ Here is some skeleton code to get you started:
sw = swalign.LocalAlignment(scoring) # you can also choose gap penalties, etc...
alignment = sw.align('ACACACTA','AGCACACA')
alignment.dump()
```

To cythonize this package for accelerated use, run the following and copy the .so file to the desired python library path.
```
python setup.py build_ext --inplace
```

For other uses, see the script in bin/swalign or https://compgen.io/projects/swalign
13 changes: 10 additions & 3 deletions setup.py
Original file line number Diff line number Diff line change
@@ -1,18 +1,25 @@
from setuptools import setup
from setuptools import setup, Extension
from Cython.Build import cythonize

with open("README.md", "r") as fh:
long_description = fh.read()

ext_modules = [
Extension(
r'swalign',
[r'swalign/__init__.py']
),
]

setup(name='swalign',
version='0.3.6',
description='Smith-Waterman local aligner',
long_description=long_description,
long_description_content_type="text/markdown",
author='Marcus Breese',
author_email='marcus@breese.com',
url='http://github.com/mbreese/swalign/',
packages=['swalign'],
scripts=['bin/swalign'],
python_requires='>=3.1',

ext_modules=cythonize(ext_modules),
)
39 changes: 26 additions & 13 deletions swalign/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,7 @@ def __init__(self, scoring_matrix, gap_penalty=-1, gap_extension_penalty=-1, gap
self.full_query = full_query

def align(self, ref, query, ref_name='', query_name='', rc=False):

orig_ref = ref
orig_query = query

Expand All @@ -122,38 +123,44 @@ def align(self, ref, query, ref_name='', query_name='', rc=False):

# calculate matrix
for row in range(1, matrix.rows):

# saving matrix values and re-using them across columns as locals improves performance
left = matrix.get(row, 0)
diag = matrix.get(row - 1, 0)
up = matrix.get(row - 1, 1)

for col in range(1, matrix.cols):
mm_val = matrix.get(row - 1, col - 1)[0] + self.scoring_matrix.score(query[row - 1], ref[col - 1], self.wildcard)
mm_val = diag[0] + self.scoring_matrix.score(query[row - 1], ref[col - 1], self.wildcard)

ins_run = 0
del_run = 0

if matrix.get(row - 1, col)[1] == 'i':
ins_run = matrix.get(row - 1, col)[2]
if matrix.get(row - 1, col)[0] == 0:
if up[1] == 'i':
ins_run = up[2]
if up[0] == 0:
# no penalty to start the alignment
ins_val = 0
else:
if not self.gap_extension_decay:
ins_val = matrix.get(row - 1, col)[0] + self.gap_extension_penalty
ins_val = up[0] + self.gap_extension_penalty
else:
ins_val = matrix.get(row - 1, col)[0] + min(0, self.gap_extension_penalty + ins_run * self.gap_extension_decay)
ins_val = up[0] + min(0, self.gap_extension_penalty + ins_run * self.gap_extension_decay)
else:
ins_val = matrix.get(row - 1, col)[0] + self.gap_penalty
ins_val = up[0] + self.gap_penalty

if matrix.get(row, col - 1)[1] == 'd':
del_run = matrix.get(row, col - 1)[2]
if matrix.get(row, col - 1)[0] == 0:
if left[1] == 'd':
del_run = left[2]
if left[0] == 0:
# no penalty to start the alignment
del_val = 0
else:
if not self.gap_extension_decay:
del_val = matrix.get(row, col - 1)[0] + self.gap_extension_penalty
del_val = left[0] + self.gap_extension_penalty
else:
del_val = matrix.get(row, col - 1)[0] + min(0, self.gap_extension_penalty + del_run * self.gap_extension_decay)
del_val = left[0] + min(0, self.gap_extension_penalty + del_run * self.gap_extension_decay)

else:
del_val = matrix.get(row, col - 1)[0] + self.gap_penalty
del_val = left[0] + self.gap_penalty

if self.globalalign or self.full_query:
cell_val = max(mm_val, del_val, ins_val)
Expand Down Expand Up @@ -184,6 +191,12 @@ def align(self, ref, query, ref_name='', query_name='', rc=False):

matrix.set(row, col, val)

# adjust temp values to reduce total matrix accesses
diag = up
left = val
if(col + 1 < matrix.cols):
up = matrix.get(row - 1, col + 1)

# backtrack
if self.globalalign:
# backtrack from last cell
Expand Down