27 February 2022

ROSALIND RNA Splicing solution - https://rosalind.info/problems/splc/

 import re  
 from asyncio.windows_events import NULL  
 f = open("out.txt", "w")  
 condon_len = 3  
 test= "TCTCGAGAGGCATATGGTCACATGATCGGTCGAGCGTGTTTCAAAGTTTGCGCCTAG"  
 dna_map = {  
   "TTT":"F", "TCT":"S", "TAT":"Y", "TGT":"C",   
 "TTC":"F", "TCC":"S", "TAC":"Y", "TGC":"C",   
 "TTA":"L", "TCA":"S", "TAA":"STOP", "TGA":"STOP",   
 "TTG":"L", "TCG":"S", "TAG":"STOP", "TGG":"W",   
 "CTT":"L", "CCT":"P", "CAT":"H", "CGT":"R",   
 "CTC":"L", "CCC":"P", "CAC":"H", "CGC":"R",   
 "CTA":"L", "CCA":"P", "CAA":"Q", "CGA":"R",   
 "CTG":"L", "CCG":"P", "CAG":"Q", "CGG":"R",   
 "ATT":"I", "ACT":"T", "AAT":"N", "AGT":"S",   
 "ATC":"I", "ACC":"T", "AAC":"N", "AGC":"S",   
 "ATA":"I", "ACA":"T", "AAA":"K", "AGA":"R",   
 "ATG":"M", "ACG":"T", "AAG":"K", "AGG":"R",   
 "GTT":"V", "GCT":"A", "GAT":"D", "GGT":"G",   
 "GTC":"V", "GCC":"A", "GAC":"D", "GGC":"G",   
 "GTA":"V", "GCA":"A", "GAA":"E", "GGA":"G",   
 "GTG":"V", "GCG":"A", "GAG":"E", "GGG":"G"  
 }  
 def GetAA(condon):  
   return dna_map[condon]  
 class Sequence:  
   def __init__(self, name, seq):  
     self.name = name  
     self.seq = seq  
     self.found = []  
   def p(self):  
     print("seq name:" + self.name + "len:" + str(len(self.seq)))   
     print("seq:" + self.seq)  
     for s in self.found:  
       print(s)   
   def pAA(self):  
     print (self.found[0])   
     f.write(self.found[0])     
   def Translate(self):  
     seq_len = len(self.seq)  
     aa_seq=""  
     start_pos = 0  
     end_pos = condon_len  
     while end_pos <= seq_len:  
       condon = self.seq[start_pos:end_pos]  
       AA = GetAA(condon)  
       if AA=="STOP":  
         break  
       aa_seq = aa_seq + AA  
       start_pos = start_pos + condon_len  
       end_pos = end_pos + condon_len  
     self.found.append(aa_seq)    
 input_file = open("rosalind_splc.txt", "r")  
 lines = input_file.readlines()  
 list = []  
 s = NULL  
 for l in lines:  
   if (l[0]==">"):  
     if s != NULL:  
       list.append(s)  
     s = Sequence(l.replace('\n', ''), '')  
   else:  
     s.seq = s.seq + l.replace('\n', '')  
 list.append(s)      
 list_len = len(list)  
 for i in range(1, list_len):  
   list[0].seq = list[0].seq.replace(list[i].seq, '')  
 list[0].Translate()  
 list[0].pAA()    
 f.close()     

26 February 2022

ROSALIND - Locating Restriction Sites solution to https://rosalind.info/problems/revp/

 c_map = {  
   "A":"T",  
   "G":"C",  
   "C":"G",  
   "T":"A"  
 }  
 def GetComplement(seq):  
   r = ''.join([c_map[x] for x in seq])  
   return r  
 def GetRevComplement(seq):  
   c = GetComplement(seq)   
   return c[::-1]    
 f = open("out.txt", "w")  
 input_file = open("rosalind_revp.txt", "r")  
 seq = ""  
 lines = input_file.readlines()  
 for l in lines:  
   if (l[0]!=">"):  
     seq = seq + l.replace("\n", "").replace("\r","")  
 seq_len = len(seq)  
 start_len = 2  
 end_len = 6  
 found_list = {}  
 for i in range(start_len, end_len+1):   
   for j in range(0, seq_len-(i*2)+1):  
     first_half = seq[j:j+i]  
     second_half =seq[j+i:j+i*2]  
     rc_second_half = GetRevComplement(second_half)  
     if (first_half == rc_second_half):  
       #print(first_half + " " + second_half + " " + rc_second_half + " " + str(j+1) + " " +str(i*2))  
       found_list[j+1]=i*2  
 sorted_found_list = sorted(found_list.items(), key=lambda item: item[0])  
 for key, value in sorted_found_list:  
   print(str(key) + " " + str(value))        
   f.write(str(key) + " " + str(value) + '\n')  
 f.close()    

25 February 2022

Enumerating Gene Orders - https://rosalind.info/problems/perm/

 from itertools import permutations  
 import math  
 f = open("out.txt", "w")  
 num=5  
 s=1   
 e=s+num  
 p=math.factorial(num)  
 print(p)  
 f.write(str(p) + '\n')  
 l = list(permutations(range(s, e)))  
 for i in range(p):  
   for j in range(num):  
     print(l[i][j], end=' ')  
     f.write(str(l[i][j]) + ' ')  
   print('')    
   f.write('\n')  
 f.close()    

Calculating Protein Mass - https://rosalind.info/problems/prtm/

 mmt = {  
 "A":71.03711,  
 "C":103.00919,  
 "D":115.02694,  
 "E":129.04259,  
 "F":147.06841,  
 "G":57.02146,  
 "H":137.05891,  
 "I":113.08406,  
 "K":128.09496,  
 "L":113.08406,  
 "M":131.04049,  
 "N":114.04293,  
 "P":97.05276,  
 "Q":128.05858,  
 "R":156.10111,  
 "S":87.03203,  
 "T":101.04768,  
 "V":99.06841,  
 "W":186.07931,  
 "Y":163.06333 }  
 input_file = open("rosalind_prtm.txt", "r")  
 AA = input_file.read().replace('\n','')  
 print(sum([mmt[x] for x in AA]))  

Answer for ROSALIND Open Reading Frames https://rosalind.info/problems/orf/

 import re  
 condon_len = 3  
 c_map = {  
   "A":"T",  
   "G":"C",  
   "C":"G",  
   "T":"A"  
 }  
 dna_map = {  
   "TTT":"F", "TCT":"S", "TAT":"Y", "TGT":"C",   
 "TTC":"F", "TCC":"S", "TAC":"Y", "TGC":"C",   
 "TTA":"L", "TCA":"S", "TAA":"STOP", "TGA":"STOP",   
 "TTG":"L", "TCG":"S", "TAG":"STOP", "TGG":"W",   
 "CTT":"L", "CCT":"P", "CAT":"H", "CGT":"R",   
 "CTC":"L", "CCC":"P", "CAC":"H", "CGC":"R",   
 "CTA":"L", "CCA":"P", "CAA":"Q", "CGA":"R",   
 "CTG":"L", "CCG":"P", "CAG":"Q", "CGG":"R",   
 "ATT":"I", "ACT":"T", "AAT":"N", "AGT":"S",   
 "ATC":"I", "ACC":"T", "AAC":"N", "AGC":"S",   
 "ATA":"I", "ACA":"T", "AAA":"K", "AGA":"R",   
 "ATG":"M", "ACG":"T", "AAG":"K", "AGG":"R",   
 "GTT":"V", "GCT":"A", "GAT":"D", "GGT":"G",   
 "GTC":"V", "GCC":"A", "GAC":"D", "GGC":"G",   
 "GTA":"V", "GCA":"A", "GAA":"E", "GGA":"G",   
 "GTG":"V", "GCG":"A", "GAG":"E", "GGG":"G"  
 }  
 found = []  
 input_file = open("rosalind_orf.txt", "r")  
 def GetComplement(seq):  
   r = ''.join([c_map[x] for x in seq])  
   return r  
 def GetAA(condon):  
   return dna_map[condon]  
 def GetAASeq(dna_seq, pos):  
   dna_seq_len = len(dna_seq)  
   seq=""  
   start_pos = pos  
   end_pos = pos + condon_len  
   condon = dna_seq[start_pos:end_pos]  
   AA = GetAA(condon)  
   while AA != "STOP":  
     seq = seq + AA  
     start_pos = start_pos + condon_len  
     end_pos = end_pos + condon_len  
     if (end_pos> (dna_seq_len-1)):  
       return "" #end of seq reached without finding stop condon  
     condon = dna_seq[start_pos:end_pos]  
     AA = GetAA(condon)  
   found.append(seq)    
   return seq    
 dna_seq = ""  
 lines = input_file.readlines()  
 for l in lines:  
   if (l[0]!=">"):  
     dna_seq = dna_seq + l.replace("\n", "").replace("\r","")  
 r_dna_seq = GetComplement(dna_seq[::-1])  
 seq_regex="ATG"  
 p = re.compile(seq_regex)  
 for m in p.finditer(dna_seq):  
   GetAASeq(dna_seq, m.start())  
 for m in p.finditer(r_dna_seq):  
   GetAASeq(r_dna_seq, m.start())    
 for s in list(set(found)):  
   print(s)     

17 February 2022

ROSALIND - Inferring mRNA from Protein

Answer to https://rosalind.info/problems/mrna/
 def GetMRna(AA):  
   l_aa = {key for key, value in aa_map.items() if value == AA}  
   return l_aa  
 input_file = open("rosalind_mrna.txt", "r")  
 aa_seq = input_file.read().replace("\n", "")  
 print(aa_seq)  
 aa_map = {  
   "UUU":"F", "UCU":"S", "UAU":"Y", "UGU":"C",   
 "UUC":"F", "UCC":"S", "UAC":"Y", "UGC":"C",   
 "UUA":"L", "UCA":"S", "UAA":"STOP", "UGA":"STOP",   
 "UUG":"L", "UCG":"S", "UAG":"STOP", "UGG":"W",   
 "CUU":"L", "CCU":"P", "CAU":"H", "CGU":"R",   
 "CUC":"L", "CCC":"P", "CAC":"H", "CGC":"R",   
 "CUA":"L", "CCA":"P", "CAA":"Q", "CGA":"R",   
 "CUG":"L", "CCG":"P", "CAG":"Q", "CGG":"R",   
 "AUU":"I", "ACU":"T", "AAU":"N", "AGU":"S",   
 "AUC":"I", "ACC":"T", "AAC":"N", "AGC":"S",   
 "AUA":"I", "ACA":"T", "AAA":"K", "AGA":"R",   
 "AUG":"M", "ACG":"T", "AAG":"K", "AGG":"R",   
 "GUU":"V", "GCU":"A", "GAU":"D", "GGU":"G",   
 "GUC":"V", "GCC":"A", "GAC":"D", "GGC":"G",   
 "GUA":"V", "GCA":"A", "GAA":"E", "GGA":"G",   
 "GUG":"V", "GCG":"A", "GAG":"E", "GGG":"G",   
 }  
 condon_len = []  
 stop_condons_len = 3  
 aa_seq_len = len(aa_seq)  
 for i in range(aa_seq_len):  
   condons = GetMRna(aa_seq[i])  
   condons_len = len(condons)  
   stop_condons_len = stop_condons_len * condons_len  
 print("Answer:" + str(stop_condons_len % 1000000) )      

06 February 2022

Rosalind - Answer to Finding a Protein Motif

Answer to Finding a Protein Motif https://rosalind.info/problems/mprt/
 import urllib.request  
 import re  
 from numpy import mat  
 #N-glycosylation motif  
 MotifRegex = "N[^P][ST][^P]"  
 file_out = open("output.txt", "w")  
 #Class to process protein sequence  
 class Sequence:  
   def __init__(self, name, seq):  
     self.name = name  
     self.seq = seq  
   def FindReg(self, reg):  
     match_string = ""  
     for match in re.finditer('(?={0})'.format(MotifRegex), self.seq):  
       index = match.start()+1  
       if len(match_string) == 0:  
         match_string = str(index)  
       else:  
         match_string = match_string + " " + str(index)  
     return match_string   
   def PrintOutcome(self):  
     match_string = self.FindReg(MotifRegex)  
     if len(match_string)>0:  
       print(self.name)   
       print(match_string)  
       file_out.write(self.name + '\n')  
       file_out.write(match_string + '\n')  
 #Get protein sequence from https://www.uniprot.org/uniprot/  
 def GetProtein(name):  
   url_base = "https://www.uniprot.org/uniprot/"  
   url_prefix = ".fasta"  
   url = url_base + name + url_prefix  
   with urllib.request.urlopen(url) as response:  
     html = response.read()  
     lines = html.decode().split("\n")    
     for line in lines:  
       if len(line)>1 and line[0] == ">":  
         protein = Sequence(name, "")  
       else:  
         protein.seq = protein.seq+line  
   return protein        
 ProteinList = []  
 input_file = open("rosalind_mprt.txt", "r")  
 #input_file = open("input.txt", "r")  
 lines = input_file.readlines()  
 for line in lines:  
   Protein = GetProtein(line.replace("\n", ""))  
   ProteinList.append(Protein)  
 for Protein in ProteinList:  
   Protein.PrintOutcome()    
 file_out.close()