import re
from asyncio.windows_events import NULL
f = open("out.txt", "w")
condon_len = 3
test= "TCTCGAGAGGCATATGGTCACATGATCGGTCGAGCGTGTTTCAAAGTTTGCGCCTAG"
dna_map = {
"TTT":"F", "TCT":"S", "TAT":"Y", "TGT":"C",
"TTC":"F", "TCC":"S", "TAC":"Y", "TGC":"C",
"TTA":"L", "TCA":"S", "TAA":"STOP", "TGA":"STOP",
"TTG":"L", "TCG":"S", "TAG":"STOP", "TGG":"W",
"CTT":"L", "CCT":"P", "CAT":"H", "CGT":"R",
"CTC":"L", "CCC":"P", "CAC":"H", "CGC":"R",
"CTA":"L", "CCA":"P", "CAA":"Q", "CGA":"R",
"CTG":"L", "CCG":"P", "CAG":"Q", "CGG":"R",
"ATT":"I", "ACT":"T", "AAT":"N", "AGT":"S",
"ATC":"I", "ACC":"T", "AAC":"N", "AGC":"S",
"ATA":"I", "ACA":"T", "AAA":"K", "AGA":"R",
"ATG":"M", "ACG":"T", "AAG":"K", "AGG":"R",
"GTT":"V", "GCT":"A", "GAT":"D", "GGT":"G",
"GTC":"V", "GCC":"A", "GAC":"D", "GGC":"G",
"GTA":"V", "GCA":"A", "GAA":"E", "GGA":"G",
"GTG":"V", "GCG":"A", "GAG":"E", "GGG":"G"
}
def GetAA(condon):
return dna_map[condon]
class Sequence:
def __init__(self, name, seq):
self.name = name
self.seq = seq
self.found = []
def p(self):
print("seq name:" + self.name + "len:" + str(len(self.seq)))
print("seq:" + self.seq)
for s in self.found:
print(s)
def pAA(self):
print (self.found[0])
f.write(self.found[0])
def Translate(self):
seq_len = len(self.seq)
aa_seq=""
start_pos = 0
end_pos = condon_len
while end_pos <= seq_len:
condon = self.seq[start_pos:end_pos]
AA = GetAA(condon)
if AA=="STOP":
break
aa_seq = aa_seq + AA
start_pos = start_pos + condon_len
end_pos = end_pos + condon_len
self.found.append(aa_seq)
input_file = open("rosalind_splc.txt", "r")
lines = input_file.readlines()
list = []
s = NULL
for l in lines:
if (l[0]==">"):
if s != NULL:
list.append(s)
s = Sequence(l.replace('\n', ''), '')
else:
s.seq = s.seq + l.replace('\n', '')
list.append(s)
list_len = len(list)
for i in range(1, list_len):
list[0].seq = list[0].seq.replace(list[i].seq, '')
list[0].Translate()
list[0].pAA()
f.close()
27 February 2022
ROSALIND RNA Splicing solution - https://rosalind.info/problems/splc/
26 February 2022
ROSALIND - Locating Restriction Sites solution to https://rosalind.info/problems/revp/
c_map = {
"A":"T",
"G":"C",
"C":"G",
"T":"A"
}
def GetComplement(seq):
r = ''.join([c_map[x] for x in seq])
return r
def GetRevComplement(seq):
c = GetComplement(seq)
return c[::-1]
f = open("out.txt", "w")
input_file = open("rosalind_revp.txt", "r")
seq = ""
lines = input_file.readlines()
for l in lines:
if (l[0]!=">"):
seq = seq + l.replace("\n", "").replace("\r","")
seq_len = len(seq)
start_len = 2
end_len = 6
found_list = {}
for i in range(start_len, end_len+1):
for j in range(0, seq_len-(i*2)+1):
first_half = seq[j:j+i]
second_half =seq[j+i:j+i*2]
rc_second_half = GetRevComplement(second_half)
if (first_half == rc_second_half):
#print(first_half + " " + second_half + " " + rc_second_half + " " + str(j+1) + " " +str(i*2))
found_list[j+1]=i*2
sorted_found_list = sorted(found_list.items(), key=lambda item: item[0])
for key, value in sorted_found_list:
print(str(key) + " " + str(value))
f.write(str(key) + " " + str(value) + '\n')
f.close()
25 February 2022
Enumerating Gene Orders - https://rosalind.info/problems/perm/
from itertools import permutations
import math
f = open("out.txt", "w")
num=5
s=1
e=s+num
p=math.factorial(num)
print(p)
f.write(str(p) + '\n')
l = list(permutations(range(s, e)))
for i in range(p):
for j in range(num):
print(l[i][j], end=' ')
f.write(str(l[i][j]) + ' ')
print('')
f.write('\n')
f.close()
Calculating Protein Mass - https://rosalind.info/problems/prtm/
mmt = {
"A":71.03711,
"C":103.00919,
"D":115.02694,
"E":129.04259,
"F":147.06841,
"G":57.02146,
"H":137.05891,
"I":113.08406,
"K":128.09496,
"L":113.08406,
"M":131.04049,
"N":114.04293,
"P":97.05276,
"Q":128.05858,
"R":156.10111,
"S":87.03203,
"T":101.04768,
"V":99.06841,
"W":186.07931,
"Y":163.06333 }
input_file = open("rosalind_prtm.txt", "r")
AA = input_file.read().replace('\n','')
print(sum([mmt[x] for x in AA]))
Answer for ROSALIND Open Reading Frames https://rosalind.info/problems/orf/
import re
condon_len = 3
c_map = {
"A":"T",
"G":"C",
"C":"G",
"T":"A"
}
dna_map = {
"TTT":"F", "TCT":"S", "TAT":"Y", "TGT":"C",
"TTC":"F", "TCC":"S", "TAC":"Y", "TGC":"C",
"TTA":"L", "TCA":"S", "TAA":"STOP", "TGA":"STOP",
"TTG":"L", "TCG":"S", "TAG":"STOP", "TGG":"W",
"CTT":"L", "CCT":"P", "CAT":"H", "CGT":"R",
"CTC":"L", "CCC":"P", "CAC":"H", "CGC":"R",
"CTA":"L", "CCA":"P", "CAA":"Q", "CGA":"R",
"CTG":"L", "CCG":"P", "CAG":"Q", "CGG":"R",
"ATT":"I", "ACT":"T", "AAT":"N", "AGT":"S",
"ATC":"I", "ACC":"T", "AAC":"N", "AGC":"S",
"ATA":"I", "ACA":"T", "AAA":"K", "AGA":"R",
"ATG":"M", "ACG":"T", "AAG":"K", "AGG":"R",
"GTT":"V", "GCT":"A", "GAT":"D", "GGT":"G",
"GTC":"V", "GCC":"A", "GAC":"D", "GGC":"G",
"GTA":"V", "GCA":"A", "GAA":"E", "GGA":"G",
"GTG":"V", "GCG":"A", "GAG":"E", "GGG":"G"
}
found = []
input_file = open("rosalind_orf.txt", "r")
def GetComplement(seq):
r = ''.join([c_map[x] for x in seq])
return r
def GetAA(condon):
return dna_map[condon]
def GetAASeq(dna_seq, pos):
dna_seq_len = len(dna_seq)
seq=""
start_pos = pos
end_pos = pos + condon_len
condon = dna_seq[start_pos:end_pos]
AA = GetAA(condon)
while AA != "STOP":
seq = seq + AA
start_pos = start_pos + condon_len
end_pos = end_pos + condon_len
if (end_pos> (dna_seq_len-1)):
return "" #end of seq reached without finding stop condon
condon = dna_seq[start_pos:end_pos]
AA = GetAA(condon)
found.append(seq)
return seq
dna_seq = ""
lines = input_file.readlines()
for l in lines:
if (l[0]!=">"):
dna_seq = dna_seq + l.replace("\n", "").replace("\r","")
r_dna_seq = GetComplement(dna_seq[::-1])
seq_regex="ATG"
p = re.compile(seq_regex)
for m in p.finditer(dna_seq):
GetAASeq(dna_seq, m.start())
for m in p.finditer(r_dna_seq):
GetAASeq(r_dna_seq, m.start())
for s in list(set(found)):
print(s)
17 February 2022
ROSALIND - Inferring mRNA from Protein
Answer to https://rosalind.info/problems/mrna/
def GetMRna(AA):
l_aa = {key for key, value in aa_map.items() if value == AA}
return l_aa
input_file = open("rosalind_mrna.txt", "r")
aa_seq = input_file.read().replace("\n", "")
print(aa_seq)
aa_map = {
"UUU":"F", "UCU":"S", "UAU":"Y", "UGU":"C",
"UUC":"F", "UCC":"S", "UAC":"Y", "UGC":"C",
"UUA":"L", "UCA":"S", "UAA":"STOP", "UGA":"STOP",
"UUG":"L", "UCG":"S", "UAG":"STOP", "UGG":"W",
"CUU":"L", "CCU":"P", "CAU":"H", "CGU":"R",
"CUC":"L", "CCC":"P", "CAC":"H", "CGC":"R",
"CUA":"L", "CCA":"P", "CAA":"Q", "CGA":"R",
"CUG":"L", "CCG":"P", "CAG":"Q", "CGG":"R",
"AUU":"I", "ACU":"T", "AAU":"N", "AGU":"S",
"AUC":"I", "ACC":"T", "AAC":"N", "AGC":"S",
"AUA":"I", "ACA":"T", "AAA":"K", "AGA":"R",
"AUG":"M", "ACG":"T", "AAG":"K", "AGG":"R",
"GUU":"V", "GCU":"A", "GAU":"D", "GGU":"G",
"GUC":"V", "GCC":"A", "GAC":"D", "GGC":"G",
"GUA":"V", "GCA":"A", "GAA":"E", "GGA":"G",
"GUG":"V", "GCG":"A", "GAG":"E", "GGG":"G",
}
condon_len = []
stop_condons_len = 3
aa_seq_len = len(aa_seq)
for i in range(aa_seq_len):
condons = GetMRna(aa_seq[i])
condons_len = len(condons)
stop_condons_len = stop_condons_len * condons_len
print("Answer:" + str(stop_condons_len % 1000000) )
06 February 2022
Rosalind - Answer to Finding a Protein Motif
Answer to Finding a Protein Motif https://rosalind.info/problems/mprt/
import urllib.request
import re
from numpy import mat
#N-glycosylation motif
MotifRegex = "N[^P][ST][^P]"
file_out = open("output.txt", "w")
#Class to process protein sequence
class Sequence:
def __init__(self, name, seq):
self.name = name
self.seq = seq
def FindReg(self, reg):
match_string = ""
for match in re.finditer('(?={0})'.format(MotifRegex), self.seq):
index = match.start()+1
if len(match_string) == 0:
match_string = str(index)
else:
match_string = match_string + " " + str(index)
return match_string
def PrintOutcome(self):
match_string = self.FindReg(MotifRegex)
if len(match_string)>0:
print(self.name)
print(match_string)
file_out.write(self.name + '\n')
file_out.write(match_string + '\n')
#Get protein sequence from https://www.uniprot.org/uniprot/
def GetProtein(name):
url_base = "https://www.uniprot.org/uniprot/"
url_prefix = ".fasta"
url = url_base + name + url_prefix
with urllib.request.urlopen(url) as response:
html = response.read()
lines = html.decode().split("\n")
for line in lines:
if len(line)>1 and line[0] == ">":
protein = Sequence(name, "")
else:
protein.seq = protein.seq+line
return protein
ProteinList = []
input_file = open("rosalind_mprt.txt", "r")
#input_file = open("input.txt", "r")
lines = input_file.readlines()
for line in lines:
Protein = GetProtein(line.replace("\n", ""))
ProteinList.append(Protein)
for Protein in ProteinList:
Protein.PrintOutcome()
file_out.close()
Subscribe to:
Posts (Atom)