import urllib.request
import re
from numpy import mat
#N-glycosylation motif
MotifRegex = "N[^P][ST][^P]"
file_out = open("output.txt", "w")
#Class to process protein sequence
class Sequence:
def __init__(self, name, seq):
self.name = name
self.seq = seq
def FindReg(self, reg):
match_string = ""
for match in re.finditer('(?={0})'.format(MotifRegex), self.seq):
index = match.start()+1
if len(match_string) == 0:
match_string = str(index)
else:
match_string = match_string + " " + str(index)
return match_string
def PrintOutcome(self):
match_string = self.FindReg(MotifRegex)
if len(match_string)>0:
print(self.name)
print(match_string)
file_out.write(self.name + '\n')
file_out.write(match_string + '\n')
#Get protein sequence from https://www.uniprot.org/uniprot/
def GetProtein(name):
url_base = "https://www.uniprot.org/uniprot/"
url_prefix = ".fasta"
url = url_base + name + url_prefix
with urllib.request.urlopen(url) as response:
html = response.read()
lines = html.decode().split("\n")
for line in lines:
if len(line)>1 and line[0] == ">":
protein = Sequence(name, "")
else:
protein.seq = protein.seq+line
return protein
ProteinList = []
input_file = open("rosalind_mprt.txt", "r")
#input_file = open("input.txt", "r")
lines = input_file.readlines()
for line in lines:
Protein = GetProtein(line.replace("\n", ""))
ProteinList.append(Protein)
for Protein in ProteinList:
Protein.PrintOutcome()
file_out.close()
06 February 2022
Rosalind - Answer to Finding a Protein Motif
Answer to Finding a Protein Motif https://rosalind.info/problems/mprt/
Subscribe to:
Post Comments (Atom)
No comments:
Post a Comment