There is a following codebase which is related to biology it's from
https://github.com/geohot/corona . I am looking to apply one of the creational patterns to it
Either apply it to this existing code or add some extra file to it.
(Singleton\builder\prototype) I am not sure how to begin.
Any pointers how to go about it. Should I look at the codebase or the overall domain of the
problem? any place where I could apply the singleton/builder pattern here from lib import cc, translate
corona = {}
corona['untranslated_region'] = cc[0:265]
corona['orf1a'] = translate(cc[266-1:13483], True)
corona['orf1b'] = translate(cc[13468-1:21555], False).strip("*") # chop off the stop, note this doesn't have a start
corona['spike_glycoprotein'] = translate(cc[21563-1:25384], True)
corona['orf3a'] = translate(cc[25393-1:26220], True)
corona['envelope_protein'] = translate(cc[26245-1:26472], True) # also known as small membrane
corona['membrane_glycoprotein'] = translate(cc[26523-1:27191], True)
corona['orf6'] = translate(cc[27202-1:27387], True)
corona['orf7a'] = translate(cc[27394-1:27759], True)
corona['orf7b'] = translate(cc[27756-1:27887], True) # is this one real?
corona['orf8'] = translate(cc[27894-1:28259], True)
corona['nucleocapsid_phosphoprotein'] = translate(cc[28274-1:29533], True)
corona['orf10'] = translate(cc[29558-1:29674], True)
import random
tt = """Ala / A GCU, GCC, GCA, GCG
Ile / I AUU, AUC, AUA
Arg / R CGU, CGC, CGA, CGG; AGA, AGG
Leu / L CUU, CUC, CUA, CUG; UUA, UUG
Asn / N AAU, AAC
Lys / K AAA, AAG
Asp / D GAU, GAC
Met / M AUG
Phe / F UUU, UUC
Cys / C UGU, UGC
Pro / P CCU, CCC, CCA, CCG
Gln / Q CAA, CAG
Ser / S UCU, UCC, UCA, UCG; AGU, AGC
Glu / E GAA, GAG
Thr / T ACU, ACC, ACA, ACG
Trp / W UGG
Gly / G GGU, GGC, GGA, GGG
Tyr / Y UAU, UAC
His / H CAU, CAC
Val / V GUU, GUC, GUA, GUG
STOP UAA, UGA, UAG
""".strip()
dec = {}
for t in tt.split("\n"):
k = t[:len("Val / V")].strip()
v = t[len("Val / V "):]
if '/' in k:
k = k.split("/")[-1].strip()
k = k.replace("STOP", "*")
v = v.replace(",", "").replace(";", "").lower().replace("u", "t").split(" ")
for vv in v:
if vv in dec:
print("dup", vv)
dec[vv.strip()] = k
def translate(x, protein=False):
x = x.lower()
aa = []
for i in range(0, len(x)-2, 3):
aa.append(dec[x[i:i+3]])
aa = ''.join(aa)
if protein:
if aa[0] != "M" or aa[-1] != "*":
print("BAD PROTEIN")
print(aa)
return None
aa = aa[:-1]
return aa
ltl = 'Asp D Glu E Arg R Lys K His H Asn N Gln Q Ser S Thr T Tyr Y Ala A Gly G Val V Leu L Ile I Pro P Phe F Met M Trp W Cys C'
ltl = ltl.split(" ")
ltl = dict(zip(ltl[1::2], ltl[0::2]))
def get_atoms():
from data import get_amber99sb
amber99sb = get_amber99sb()
residues = amber99sb.getElementsByTagName("Residue")
atoms = {}
for r in residues:
name = r.attributes['name'].value
atoms[name] = [x.attributes['name'].value for x in r.getElementsByTagName("Atom")]
return atoms
def write_unfolded(fasta, fn):
atoms = get_atoms()
atom_num = 1
res_num = 1
ss = []
random.seed(1337)
for i, aa in enumerate(fasta):
tl = ltl[aa].upper()
for a in atoms[tl] + ([] if i != len(fasta)-1 else ["OXT"]):
if len(a) < 4:
pa = " " + a
else:
pa = a
gr = lambda: 1.0*(random.random()-0.5)
x,y,z = gr(), gr(), gr()
x += res_num*5
s = "ATOM %6d %-4s %3s A %3d %8.3f%8.3f%8.3f 1.00 1.00 %s" % \
(atom_num, pa, tl, res_num, x, y, z, a[0:1])
ss.append(s)
atom_num += 1
res_num += 1
with open(fn, "w") as f:
f.write('\n'.join(ss))
def invert(dd):
dd = dd.upper()
def _invert(x):
if x == 'A':
return 'T'
elif x == 'T':
return 'A'
elif x == 'C':
return 'G'
elif x == 'G':
return 'C'
return (''.join([_invert(x) for x in dd]))[::-1]
import pathlib
import os
import json
with open(os.path.join(pathlib.Path(__file__).parent.absolute(), "data", "allseq.json")) as f:
allseq = json.load(f)
cc = allseq['MN908947']
Aucun commentaire:
Enregistrer un commentaire