Source code for sequana.cpg_islands

[docs] def CpG(sequence, window=200): """ The Sequence Manipulation Suite: CpG Islands Results for 1200 residue sequence "sample sequence" starting "taacatactt". CpG islands search using window size of 200. Range, value 32 to 231, the y-value is 1.75 and the %GC content is 50.5 33 to 232, the y-value is 1.75 and the %GC content is 50.5 Gardiner-Garden M, Frommer M. J Mol Biol. 1987 Jul 20;196(2):261-82. """ return compute_cpg_content(sequence)
[docs] def compute_cpg_content(seq): seq = seq.upper() c_count = seq.count("C") g_count = seq.count("G") cg_count = seq.count("CG") expected_cg = (c_count * g_count) / len(seq) if len(seq) > 0 else 0 obs_exp_ratio = cg_count / expected_cg if expected_cg > 0 else 0 gc_content = (c_count + g_count) / len(seq) return {"CpG count": cg_count, "Observed/Expected CpG": obs_exp_ratio, "GC content": gc_content}