Source code for sequana.restriction
#
# This file is part of Sequana software
#
# Copyright (c) 2016-2022 - Sequana Development Team
#
# Distributed under the terms of the 3-clause BSD license.
# The full license is in the LICENSE file, distributed with this software.
#
# website: https://github.com/sequana/sequana
# documentation: http://sequana.readthedocs.io
#
##############################################################################
# Define restriction enzymes and their recognition sites
# sources: https://www.neb.com/en/tools-and-resources/selection-charts/alphabetized-list-of-recognition-specificities
# https://en.wikipedia.org/wiki/List_of_restriction_enzyme_cutting_sites:_O%E2%80%93R
restriction_enzymes = {
"EcoRI": "GAATTC",
"BamHI": "GGATCC",
"HindIII": "AAGCTT",
"NotI": "GCGGCCGC",
"Sau3AI": "GATC",
"MluCI": "AATT",
}
# Function to find restriction sites
[docs]
def find_restriction_sites(sequence, enzymes):
"""Find restriction sites in a DNA sequence.
Args:
sequence (str): The DNA sequence.
enzymes (dict): Dictionary of enzyme names and recognition sequences.
Returns:
dict: A dictionary with enzyme names as keys and lists of start positions as values.
"""
sites = {}
sequence = sequence.upper() # Ensure case consistency
for enzyme, motif in enzymes.items():
positions = []
start = 0
while True:
start = sequence.find(motif, start)
if start == -1:
break
positions.append(start + 1) # Use 1-based indexing
start += 1 # Move to the next position
sites[enzyme] = positions
return sites