04 February 2023

Practical Restriction Mapping Algorithm in C#

https://github.com/leocfong/bio2
 using System;  
 using System.Collections.Generic;  
 using System.Linq;  
 namespace bio2  
 {  
   public class PartialDigest  
   {  
     int width; List<int> X = new List<int>();  
     List<int> L = new List<int> { 2, 2, 3, 3, 4, 5, 6, 7, 8, 10 };  
     public void Main()  
     {  
       width = L.Max();  
       L.Remove(L.Single(x => x == width));  
       X.Add(0);  
       X.Add(width);  
       DisplayStatus();  
       Place();  
     }  
     public void DisplayStatus()  
     {  
       Console.Write("L:");  
       Utility.print(L);  
       Console.Write("X:");  
       Utility.print(X);  
     }  
     public void Place()  
     {  
       if (L.Count() == 0)  
       {  
         Console.WriteLine("Solution found");  
         Console.WriteLine("****************");  
         Utility.print(X);  
         Console.WriteLine("****************");  
         return;  
       }  
       int y1 = L.Max();  
       List<int> DX1 = (from v in X select Math.Abs(y1 - v)).ToList();  
       if (Utility.IsAInB(DX1, L))  
       {  
         X.Add(y1);  
         Utility.RemoveAFromB(DX1, L);  
         DisplayStatus();  
         Place();  
       }  
       int y2 = width - y1;  
       List<int> DX2 = (from v in X select Math.Abs(y2 - v)).ToList();  
       if (Utility.IsAInB(DX2, L))  
       {  
         X.Add(y2);  
         Utility.RemoveAFromB(DX2, L);  
         DisplayStatus();  
         Place();  
       }  
     }  
   }  
 }  

11 July 2022

Partial Digest C++ implementation

https://github.com/leocfong/bio/blob/main/bio/digest.cpp
The rest of the code is in GitHub
#include "digest.h"
#include <iostream>
#include <vector>
#include "Util.h"
using namespace std;
vector<int> FindDigest(vector<int>& X)
{
    vector<int> v;
    size_t n = X.size();
    for (int i = 0; i < n; i++)
    {
        for (int j = 0; j < i; j++)
        {
            int g = abs(X[i] - X[j]);

            v.push_back(g);
        }
    }
    return v;
}
void Place(vector<int>& L, vector<int>& X, const int width, std::vector<int>& Answer)
{
    if (L.size() == 0)
    {
        Answer = X;
        cout << endl << "*****************" << endl;
        cout << "Answer:" << endl;
        Render(Answer);
        cout << endl<<  "*****************" << endl;
        return;
    }
    int y = FindMax(L);
    vector<int> r1 = DeltaToVector(X, y);
    if (IsSubsetOf(r1, L))
    { 
        X.push_back(y);
        RemoveSubset(L, r1);
        cout << "A-X:"; Render(X); cout << "  L:"; Render(L); cout << "  r1:"; Render(r1); cout << endl;
        Place(L, X, width, Answer);
        cout << "B-X:"; Render(X); cout << "  L:"; Render(L); cout << "  r1:"; Render(r1); cout << endl;
        RemoveFromVector(X, y);
        Add(L, r1);
    }
    int wy = width - y;
    vector<int> r2 = DeltaToVector(X, wy);
    if (IsSubsetOf(r2, L))
    {
        X.push_back(wy);
        RemoveSubset(L, r2);
        cout << "A-X:"; Render(X); cout << "  L:"; Render(L); cout << "  r2:"; Render(r2); cout << endl;
        Place(L, X, width, Answer);
        cout << "B-X:"; Render(X); cout << "  L:"; Render(L); cout << "  r2:"; Render(r2); cout << endl;
        RemoveFromVector(X, wy);
        Add(L, r2);
    }
    return;
}
void PartialDigest(vector<int>& L, vector<int>& X, std::vector<int>& Answer)
{
   int width = FindMax(L);
   RemoveFromVector(L, width);
   X.push_back(0); X.push_back(width);
   cout << "X:";Render(X);cout << "  L:";Render(L); cout << endl;
   Place(L, X, width, Answer);
}

05 March 2022

Enumerating k-mers Lexicographically solution for ROSALIND - https://rosalind.info/problems/lexf/

 f = open("out.txt", "w")  
 input_file = open("rosalind_lexf.txt", "r")  
 lines = input_file.readlines()  
 char_array = lines[0].replace('\n', '').split(' ')  
 seq_len = int(lines[1])  
 char_array.sort()  
 char_array_len = len(char_array)  
 rep_len = []  
 rep_inc_counter = []  
 rep_char_index = []  
 rep_len_i = 1  
 for i in range(seq_len):  
   rep_inc_counter.append(0)  
   rep_char_index.append(0)  
   rep_len.append(rep_len_i)  
   rep_len_i = rep_len_i * char_array_len  
 num_combination = char_array_len ** seq_len  
 C = []  
 for i in range(num_combination):  
   s = ""  
   for l in range(seq_len):   
     s = char_array[rep_char_index[l]] + s  
     rep_inc_counter[l] = rep_inc_counter[l] + 1  
     if rep_inc_counter[l] == rep_len[l]:  
       rep_char_index[l] = rep_char_index[l] + 1  
       rep_inc_counter[l] = 0   
     if rep_char_index[l]==char_array_len:  
       rep_char_index[l] = 0  
   C.append(s)  
 for s in C:  
   print(s)  
   f.write(s+'\n')  
 f.close()     

27 February 2022

ROSALIND RNA Splicing solution - https://rosalind.info/problems/splc/

 import re  
 from asyncio.windows_events import NULL  
 f = open("out.txt", "w")  
 condon_len = 3  
 test= "TCTCGAGAGGCATATGGTCACATGATCGGTCGAGCGTGTTTCAAAGTTTGCGCCTAG"  
 dna_map = {  
   "TTT":"F", "TCT":"S", "TAT":"Y", "TGT":"C",   
 "TTC":"F", "TCC":"S", "TAC":"Y", "TGC":"C",   
 "TTA":"L", "TCA":"S", "TAA":"STOP", "TGA":"STOP",   
 "TTG":"L", "TCG":"S", "TAG":"STOP", "TGG":"W",   
 "CTT":"L", "CCT":"P", "CAT":"H", "CGT":"R",   
 "CTC":"L", "CCC":"P", "CAC":"H", "CGC":"R",   
 "CTA":"L", "CCA":"P", "CAA":"Q", "CGA":"R",   
 "CTG":"L", "CCG":"P", "CAG":"Q", "CGG":"R",   
 "ATT":"I", "ACT":"T", "AAT":"N", "AGT":"S",   
 "ATC":"I", "ACC":"T", "AAC":"N", "AGC":"S",   
 "ATA":"I", "ACA":"T", "AAA":"K", "AGA":"R",   
 "ATG":"M", "ACG":"T", "AAG":"K", "AGG":"R",   
 "GTT":"V", "GCT":"A", "GAT":"D", "GGT":"G",   
 "GTC":"V", "GCC":"A", "GAC":"D", "GGC":"G",   
 "GTA":"V", "GCA":"A", "GAA":"E", "GGA":"G",   
 "GTG":"V", "GCG":"A", "GAG":"E", "GGG":"G"  
 }  
 def GetAA(condon):  
   return dna_map[condon]  
 class Sequence:  
   def __init__(self, name, seq):  
     self.name = name  
     self.seq = seq  
     self.found = []  
   def p(self):  
     print("seq name:" + self.name + "len:" + str(len(self.seq)))   
     print("seq:" + self.seq)  
     for s in self.found:  
       print(s)   
   def pAA(self):  
     print (self.found[0])   
     f.write(self.found[0])     
   def Translate(self):  
     seq_len = len(self.seq)  
     aa_seq=""  
     start_pos = 0  
     end_pos = condon_len  
     while end_pos <= seq_len:  
       condon = self.seq[start_pos:end_pos]  
       AA = GetAA(condon)  
       if AA=="STOP":  
         break  
       aa_seq = aa_seq + AA  
       start_pos = start_pos + condon_len  
       end_pos = end_pos + condon_len  
     self.found.append(aa_seq)    
 input_file = open("rosalind_splc.txt", "r")  
 lines = input_file.readlines()  
 list = []  
 s = NULL  
 for l in lines:  
   if (l[0]==">"):  
     if s != NULL:  
       list.append(s)  
     s = Sequence(l.replace('\n', ''), '')  
   else:  
     s.seq = s.seq + l.replace('\n', '')  
 list.append(s)      
 list_len = len(list)  
 for i in range(1, list_len):  
   list[0].seq = list[0].seq.replace(list[i].seq, '')  
 list[0].Translate()  
 list[0].pAA()    
 f.close()     

26 February 2022

ROSALIND - Locating Restriction Sites solution to https://rosalind.info/problems/revp/

 c_map = {  
   "A":"T",  
   "G":"C",  
   "C":"G",  
   "T":"A"  
 }  
 def GetComplement(seq):  
   r = ''.join([c_map[x] for x in seq])  
   return r  
 def GetRevComplement(seq):  
   c = GetComplement(seq)   
   return c[::-1]    
 f = open("out.txt", "w")  
 input_file = open("rosalind_revp.txt", "r")  
 seq = ""  
 lines = input_file.readlines()  
 for l in lines:  
   if (l[0]!=">"):  
     seq = seq + l.replace("\n", "").replace("\r","")  
 seq_len = len(seq)  
 start_len = 2  
 end_len = 6  
 found_list = {}  
 for i in range(start_len, end_len+1):   
   for j in range(0, seq_len-(i*2)+1):  
     first_half = seq[j:j+i]  
     second_half =seq[j+i:j+i*2]  
     rc_second_half = GetRevComplement(second_half)  
     if (first_half == rc_second_half):  
       #print(first_half + " " + second_half + " " + rc_second_half + " " + str(j+1) + " " +str(i*2))  
       found_list[j+1]=i*2  
 sorted_found_list = sorted(found_list.items(), key=lambda item: item[0])  
 for key, value in sorted_found_list:  
   print(str(key) + " " + str(value))        
   f.write(str(key) + " " + str(value) + '\n')  
 f.close()    

25 February 2022

Enumerating Gene Orders - https://rosalind.info/problems/perm/

 from itertools import permutations  
 import math  
 f = open("out.txt", "w")  
 num=5  
 s=1   
 e=s+num  
 p=math.factorial(num)  
 print(p)  
 f.write(str(p) + '\n')  
 l = list(permutations(range(s, e)))  
 for i in range(p):  
   for j in range(num):  
     print(l[i][j], end=' ')  
     f.write(str(l[i][j]) + ' ')  
   print('')    
   f.write('\n')  
 f.close()    

Calculating Protein Mass - https://rosalind.info/problems/prtm/

 mmt = {  
 "A":71.03711,  
 "C":103.00919,  
 "D":115.02694,  
 "E":129.04259,  
 "F":147.06841,  
 "G":57.02146,  
 "H":137.05891,  
 "I":113.08406,  
 "K":128.09496,  
 "L":113.08406,  
 "M":131.04049,  
 "N":114.04293,  
 "P":97.05276,  
 "Q":128.05858,  
 "R":156.10111,  
 "S":87.03203,  
 "T":101.04768,  
 "V":99.06841,  
 "W":186.07931,  
 "Y":163.06333 }  
 input_file = open("rosalind_prtm.txt", "r")  
 AA = input_file.read().replace('\n','')  
 print(sum([mmt[x] for x in AA]))