06 February 2022

Rosalind - Answer to Finding a Protein Motif

Answer to Finding a Protein Motif https://rosalind.info/problems/mprt/
 import urllib.request  
 import re  
 from numpy import mat  
 #N-glycosylation motif  
 MotifRegex = "N[^P][ST][^P]"  
 file_out = open("output.txt", "w")  
 #Class to process protein sequence  
 class Sequence:  
   def __init__(self, name, seq):  
     self.name = name  
     self.seq = seq  
   def FindReg(self, reg):  
     match_string = ""  
     for match in re.finditer('(?={0})'.format(MotifRegex), self.seq):  
       index = match.start()+1  
       if len(match_string) == 0:  
         match_string = str(index)  
       else:  
         match_string = match_string + " " + str(index)  
     return match_string   
   def PrintOutcome(self):  
     match_string = self.FindReg(MotifRegex)  
     if len(match_string)>0:  
       print(self.name)   
       print(match_string)  
       file_out.write(self.name + '\n')  
       file_out.write(match_string + '\n')  
 #Get protein sequence from https://www.uniprot.org/uniprot/  
 def GetProtein(name):  
   url_base = "https://www.uniprot.org/uniprot/"  
   url_prefix = ".fasta"  
   url = url_base + name + url_prefix  
   with urllib.request.urlopen(url) as response:  
     html = response.read()  
     lines = html.decode().split("\n")    
     for line in lines:  
       if len(line)>1 and line[0] == ">":  
         protein = Sequence(name, "")  
       else:  
         protein.seq = protein.seq+line  
   return protein        
 ProteinList = []  
 input_file = open("rosalind_mprt.txt", "r")  
 #input_file = open("input.txt", "r")  
 lines = input_file.readlines()  
 for line in lines:  
   Protein = GetProtein(line.replace("\n", ""))  
   ProteinList.append(Protein)  
 for Protein in ProteinList:  
   Protein.PrintOutcome()    
 file_out.close()  

No comments:

Post a Comment