002 生信基础题
001 'GATCCAGATCCCCATAC', 计算这串数列中两个出现最高的频率。
t = 'GATCCAGATCCCCATAC'
L = [ ]
for i in range(len(t)-1):
L.append(t[i:i+2])
x = reduce(lambda x,y: x if L.count(x)>L.count(y) else y, L)
# reduce(function, iterable[, initializer])
print x, 'appeared', L.count(x), 'times! It is the most frequent 2-mer.'
方法二:
def PatternCount(Pattern, Text):
count = 0
for i in range(len(Text)-len(Pattern)+1): #Text 内容, Pattern 去字符长度
if Text[i:i+len(Pattern)]==Pattern:
count = count +1
return count
def CountDict(Text, k):
Count = {}
for i in range(len(Text)-k+1):
Pattern = Text[i:i+k]
Count[i] = PatternCount(Pattern, Text)
return Count
def FrequentWords(Text, k):
FrequentPatterns = []
Count = CountDict(Text,k)
m = max(Count.values())
for i in Count:
if Count[i] == m:
FrequentPatterns.append(Text[i:i+k])
return FrequentPatterns
import sys
print(FrequentWords("GATCCAGATCCCCATAC", 2))
002 Reverse Complement Problem:
Find the reverse complement of a DNA string.
Input: A DNA string Pattern.
Output: The reverse complement of Pattern.
Sample Input:
AAAACCCGGT
Sample Output:
ACCGGGTTTT

浙公网安备 33010602011771号