#!/usr/bin/env python
# -*- coding: utf-8 -*-
'''Provides functions for assembly of sequences by homologous recombination.
Given a list of sequences (Drecords), all sequences will be analyzed for
overlapping regions of DNA (common substrings).
The assembly algorithm is based on forming a network where each
overlapping sequence forms a node and intervening sequences form edges.
Then all possible linear or circular assemblies will be returned in the
order of their length.
'''
import networkx as nx
import sys
import operator
import Bio.SeqIO
import itertools
import copy
from Bio.Seq import Seq
from Bio.Seq import reverse_complement as rc
from Bio.SeqUtils.CheckSum import seguid
from findsubstrings_suffix_arrays_python import common_sub_strings
from Bio.Alphabet.IUPAC import ambiguous_dna
from Bio.SeqFeature import SeqFeature, FeatureLocation, ExactPosition
from pydna._simple_paths7 import all_circular_paths_edges
from pydna._simple_paths8 import all_simple_paths_edges
from pydna.dsdna import Drecord
[docs]def circular_assembly(form_rec_list, limit=25):
'''Accepts a list of Drecords and tries to assemble them into a
circular assembly by homologous recombination based on shared
regions of homology with a minimum length given by limit.
Parameters
----------
form_rec_list : list
a list of Drecord objects.
limit : int, optional
limit is set to 25 by default.
Returns
-------
frecs, cp : tuple
frecs are the same Drecords as given as arguments, but with the
regions of homology added to the features.
cp is a list of Drecords representing the circular products
sorted by length (long -> short).
'''
frecs, G = _make_graph(form_rec_list, limit)
G.remove_nodes_from(('5','3'))
for cycle in nx.simple_cycles(G)[1:]:
circular_paths = [(cycle+cycle[1:])[n:len(cycle)+n] for n in range(len(cycle)-1)]
for circular_path in circular_paths:
keynode = circular_path[0]
x=[G[u][v][0]['sek'] for u,v in zip(circular_path, circular_path[1:])] #collect edges around cycle
y=[G.node[node]['sek'] for node in circular_path] #collect nodes around cycle
sm = reduce(lambda x,y:x+y, [s for s in list(itertools.chain.from_iterable(itertools.izip_longest(y,x)))[:-1]])
incoming = [(n, keynode) for n in G.predecessors(keynode) if n not in circular_path]
outgoing = [(keynode, n) for n in G.successors(keynode) if n not in circular_path]
new_node = seguid(sm.seq)
G.add_node(new_node, sek=sm)
G.add_edges_from( [(p[0], new_node, {'sek' : G[p[0]][p[1]][0]['sek'] }) for p in incoming] )
G.add_edges_from( [(new_node, p[1], {'sek' : G[p[0]][p[1]][0]['sek'] }) for p in outgoing] )
circular_products=[]
unique_lengths=[]
for path in all_circular_paths_edges(G):
result = Drecord(Seq("", ambiguous_dna))
for first_node, second_node, edgedict in path:
result+=edgedict['sek'] + G.node[second_node]['sek']
circular_products.append(result)
unique_lengths.append(len(result))
unique_lengths = set(unique_lengths)
unique_circular_products=circular_products[:]
for le in unique_lengths:
lst = [se for se in circular_products if len(se)==le]
a = lst.pop(0)
for b in lst:
if ( str(a.seq).lower() in str((b+b).seq).lower()
or
str(a.seq).lower() in str((b+b).seq.reverse_complement()).lower()):
unique_circular_products.remove(b)
unique_circular_products.sort(key=len, reverse=True)
for cp in unique_circular_products:
length = len(cp)
cp.description = "circular assembly product {}".format(len(cp))
cp.linear=True
cp2 = cp+cp
cp.circular=True
osf = [feature for feature in cp.features if "from_left" in feature.qualifiers]
cp.features = [feature for feature in cp.features if not "from_left" in feature.qualifiers]
for feature in osf:
seq = feature.qualifiers["from_left"]
if feature.strand == 1 and str(seq).lower()==str(cp2[feature.location.start:feature.location.start+len(seq)].seq).lower():
begin = feature.location.start
end = feature.location.start+len(seq)
elif feature.strand == -1 and str(seq).lower()==str(rc(cp2[feature.location.start:feature.location.start+len(seq)].seq)).lower():
begin = feature.location.start
end = feature.location.start+len(seq)
else:
continue
if end > length:
a = SeqFeature(FeatureLocation(feature.location.start, length),
type=feature.type,
location_operator=feature.location_operator,
strand=feature.strand,
id=feature.id,
qualifiers=feature.qualifiers,
sub_features=None)
b = SeqFeature(FeatureLocation(0, feature.location.end-length),
type=feature.type,
location_operator=feature.location_operator,
strand=feature.strand,
id=feature.id,
qualifiers=feature.qualifiers,
sub_features=None)
newf = SeqFeature(FeatureLocation(a.location.start, b.location.end),
type=feature.type,
location_operator="join",
strand=feature.strand,
id=feature.id,
qualifiers=feature.qualifiers,
sub_features=[a,b])
else:
newf = SeqFeature(FeatureLocation(begin, end),
type=feature.type,
location_operator=feature.location_operator,
strand=feature.strand,
id=feature.id,
qualifiers={k:v for k,v in feature.qualifiers.items() if k!="from_left"},
sub_features=None,)
cp.features.append(newf)
osf = [feature for feature in cp.features if "to_right" in feature.qualifiers]
cp.features = [feature for feature in cp.features if not "to_right" in feature.qualifiers]
for feature in osf:
seq = feature.qualifiers["to_right"]
if feature.strand == 1 and str(seq).lower()==str(cp2[length+feature.location.start-len(seq):length+feature.location.start].seq).lower():
begin = feature.location.start-len(seq)
end = feature.location.start
elif feature.strand == -1 and str(seq).lower()==str(rc(cp2[feature.location.start-len(seq):feature.location.start].seq)).lower():
begin = feature.location.start-len(seq)
end = feature.location.start
else:
continue
if feature.location.start < 0:
a = SeqFeature(FeatureLocation(length+feature.location.end-len(seq), end),
type=feature.type,
location_operator=feature.location_operator,
strand=feature.strand,
id=feature.id,
qualifiers=feature.qualifiers,
sub_features=None)
b = SeqFeature(FeatureLocation(0, feature.location.end),
type=feature.type,
location_operator=feature.location_operator,
strand=feature.strand,
id=feature.id,
qualifiers=feature.qualifiers,
sub_features=None)
newf = SeqFeature(FeatureLocation(a.location.start, b.location.end),
type=feature.type,
location_operator="join",
strand=feature.strand,
id=feature.id,
qualifiers=feature.qualifiers,
sub_features=[a,b])
else:
newf = SeqFeature(FeatureLocation(begin, end),
type=feature.type,
location_operator=feature.location_operator,
strand=feature.strand,
id=feature.id,
qualifiers={k:v for k,v in feature.qualifiers.items() if k!="to_right"},
sub_features=None,)
cp.features.append(newf)
return frecs, unique_circular_products
[docs]def linear_assembly(form_rec_list, limit=25):
'''Accepts a list of Drecords and tries to assemble them into a
linear assembly by homologous recombination based on shared
regions of homology with a minimum length given by limit.
Parameters
----------
form_rec_list : list
a list of Drecord objects.
limit : int, optional
limit is set to 25 by default.
Returns
-------
frecs, lp : tuple
frecs are the same Drecords as given as arguments, but with the
regions of homology added to the features.
lp is a list of Drecords representing the linear products
sorted by length (long -> short).
'''
frecs, G = _make_graph(form_rec_list, limit)
for cycle in nx.simple_cycles(G):
circular_paths = [(cycle+cycle[1:])[n:len(cycle)+n] for n in range(len(cycle)-1)]
for circular_path in circular_paths:
keynode = circular_path[0]
x=[G[u][v][0]['sek'] for u,v in zip(circular_path, circular_path[1:])] #collect edges
y=[G.node[node]['sek'] for node in circular_path] #collect nodes
sm = reduce(lambda x,y:x+y, [s for s in list(itertools.chain.from_iterable(itertools.izip_longest(y,x)))[:-1]])
incoming = [(n, keynode) for n in G.predecessors(keynode) if n not in circular_path]
outgoing = [(keynode, n) for n in G.successors(keynode) if n not in circular_path]
new_node = seguid(sm.seq)
G.add_node(new_node, sek=sm)
G.add_edges_from( [(p[0], new_node, {'sek' : G[p[0]][p[1]][0]['sek'] }) for p in incoming] )
G.add_edges_from( [(new_node, p[1], {'sek' : G[p[0]][p[1]][0]['sek'] }) for p in outgoing] )
linear_products=[]
unique_lengths=[]
for path in all_simple_paths_edges(G, '5', '3', data=True):
result = Drecord(Seq("", ambiguous_dna))
for first_node, second_node, edgedict in path:
result+=edgedict.values().pop()['sek']
result+=G.node[second_node]['sek']
result.circular = False
linear_products.append(result)
unique_lengths.append(len(result))
unique_lengths = set(unique_lengths)
unique_linear_products=linear_products[:]
for le in unique_lengths:
lst = [se for se in linear_products if len(se)==le]
a = lst.pop()
for b in lst:
if ( str(a.seq).lower() == str(b.seq).lower()
or
str(a.seq).lower() == str(b.seq.reverse_complement()).lower()):
unique_linear_products.remove(b)
unique_linear_products.sort(key=len, reverse=True)
for lp in unique_linear_products:
lp.description = "linear assembly product {}".format(len(lp))
osf = [feature for feature in lp.features if "from_left" in feature.qualifiers]
lp.features = [feature for feature in lp.features if not "from_left" in feature.qualifiers]
for feature in osf:
seq = feature.qualifiers["from_left"]
if feature.strand == 1 and str(seq).lower()==str(lp[feature.location.start:feature.location.start+len(seq)].seq).lower():
begin = feature.location.start
end = feature.location.start+len(seq)
elif feature.strand == -1 and str(seq).lower()==str(rc(lp[feature.location.start:feature.location.start+len(seq)].seq)).lower():
begin = feature.location.start
end = feature.location.start+len(seq)
else:
continue
newf = SeqFeature(FeatureLocation(begin, end),
type=feature.type,
location_operator=feature.location_operator,
strand=feature.strand,
id=feature.id,
qualifiers={k:v for k,v in feature.qualifiers.items() if k!="from_left"},
sub_features=None,)
lp.features.append(newf)
osf = [feature for feature in lp.features if "to_right" in feature.qualifiers]
lp.features = [feature for feature in lp.features if not "to_right" in feature.qualifiers]
for feature in osf:
seq = feature.qualifiers["to_right"]
if feature.strand == 1 and str(seq).lower()==str(lp[feature.location.start-len(seq):feature.location.start].seq).lower():
begin = feature.location.start-len(seq)
end = feature.location.start
elif feature.strand == -1 and str(seq).lower()==str(rc(lp[feature.location.start-len(seq):feature.location.start].seq)).lower():
begin = feature.location.start-len(seq)
end = feature.location.start
else:
continue
newf = SeqFeature(FeatureLocation(begin, end),
type=feature.type,
location_operator=feature.location_operator,
strand=feature.strand,
id=feature.id,
qualifiers={k:v for k,v in feature.qualifiers.items() if k!="to_right"},
sub_features=None,)
lp.features.append(newf)
return frecs, unique_linear_products
def _make_graph(recs, limit=25):
form_rec_list=list(copy.deepcopy(recs))
for frec in form_rec_list:
frec.features = [f for f in frec.features if f.type!="overlap"]
frec.seq = frec.seq.fill_in() # !!!
rc = { frec : frec.reverse_complement() for frec in form_rec_list }
G=nx.MultiDiGraph( multiedges=True, selfloops=False)
G.add_node( "5", sek=Drecord(Seq("",ambiguous_dna)))
G.add_node( "3", sek=Drecord(Seq("",ambiguous_dna)))
matches=[]
for a, b in itertools.combinations(form_rec_list, 2):
match = common_sub_strings(str(a.seq).upper(),
str(b.seq).upper(),
limit)
if match:
matches.append((a, b, match))
match = common_sub_strings(str(a.seq).upper(),
str(rc[b].seq).upper(),
limit)
if match:
matches.append((a, rc[b], match))
matches.append((rc[a], b, [(len(a)-sa-le,len(b)-sb-le,le) for sa,sb,le in match]))
for a, b, match in matches:
for start_in_a, start_in_b, length in match:
node_seq = a[start_in_a:start_in_a+length]
node_seq2 = b[start_in_b:start_in_b+length]
assert str(node_seq.seq).lower() == str(node_seq2.seq).lower()
node_seq.features.extend(node_seq2.features)
chksum = seguid(node_seq.seq)
G.add_node(chksum, sek = node_seq)
qual = {"note" : "olp_{}".format(chksum),
"chksum" : chksum,
"ApEinfo_fwdcolor" : "green",
"ApEinfo_revcolor" : "red",}
a.features.append( SeqFeature( FeatureLocation(start_in_a,
start_in_a + length),
type = "overlap",
qualifiers = qual))
b.features.append( SeqFeature( FeatureLocation(start_in_b,
start_in_b + length),
type = "overlap",
qualifiers = qual))
form_rec_list.extend(rc.values())
for frec in form_rec_list:
overlaps = sorted({f.qualifiers["chksum"]:f for f in frec.features if f.type=="overlap"}.values(), key = operator.attrgetter("location.start"))
if overlaps:
overlaps = ([SeqFeature(FeatureLocation(0,0),
type = "overlap",
qualifiers = {"chksum":"5"})]+
overlaps+
[SeqFeature(FeatureLocation(len(frec),len(frec)),
type = "overlap",
qualifiers = {"chksum":"3"})])
for olp1, olp2 in zip(overlaps, overlaps[1:]):
n1 = olp1.qualifiers["chksum"]
n2 = olp2.qualifiers["chksum"]
start, end = olp1.location.end, olp2.location.start
sek = frec[start:end]
for feature in frec.features:
if start<feature.location.end<end and feature.location.start<start:
newf = SeqFeature(FeatureLocation(feature.location.end-start,
feature.location.end-start),
type=feature.type,
location_operator=feature.location_operator,
strand=feature.strand,
id=feature.id,
qualifiers=feature.qualifiers,
sub_features=None,)
newf.qualifiers['to_right'] = feature.extract(frec).seq
sek.features.append(newf)
if start<feature.location.start<end and feature.location.end>end:
newf = SeqFeature(FeatureLocation(feature.location.start-start,
feature.location.start-start),
type=feature.type,
location_operator=feature.location_operator,
strand=feature.strand,
id=feature.id,
qualifiers=feature.qualifiers,
sub_features=None,)
newf.qualifiers['from_left'] = feature.extract(frec).seq
sek.features.append(newf)
G.add_edge(n1, n2, sek=sek)
return form_rec_list, G
if __name__=="__main__":
from dsdna import parse
text2 = '''
>693
ttctagaactagtggatcccccgggctgcagatgagtgaaggccccgtcaaattcgaaaaaaataccgtcatatctgtctttggtgcgtcaggtgatctggcaaagaagaagacttttcccgccttatttgggcttttcagagaaggttaccttgatccatctaccaagatcttcggttatgcccggtccaaattgtccatggaggaggacctgaagtcccgtgtcctaccccacttgaaaaaacctcacggtgaagccgatgactctaaggtcgaacagttcttcaagatggtcagctacatttcgggaaattacgacacagatgaaggcttcgacgaattaagaacgcagatcgagaaattcgagaaaagtgccaacgtcgatgtcccacaccgtctcttctatctggccttgccgccaagcgtttttttgacggtggccaagcagatcaagagtcgtgtgtacgcagagaatggcatcacccgtgtaatcgtagagaaacctttcggccacgacctggcctctgccagggagctgcaaaaaaacctggggcccctctttaaagaagaagagttgtacagaattgaccattacttgggtaaagagttggtcaagaatcttttagtcttgaggttcggtaaccagtttttgaatgcctcgtggaatagagacaacattcaaagcgttcagat
>934
tatcgataagcttgatatcgaattcctgcagctaattatccttcgtatcttctggcttagtcacgggccaagcgtaagggtgcttttcgggcataacatacttgtgtttttgcatatattccttcaatccctttggacctcttgatccgtaggggtaaatttccggtgttggaccgtccggacgctctatgtgcttcagtaatggggtgaatatgccccaactgatatccaattcgtcatctctgacaaagttggaatggtcacccagtagggcgtctcttatcaacacctcgtaagcctctggaatccaaaagtcttggtacctgcttgcgtaagttagattcagatctgtgacttgggtagcatttgacagaccaggggtcttagcattaaactttaggtacacagcggcatcgggctgcactctgatgaccagttcgttatttggaatgtctttgaagacacccgatgcgaccgctttgtactgcagtctgatctccaccttggactcattcaaagccttaccggcacgcatcatgatggggacgccctcccaacgctcgttttcgatgttgaaagtcattgctgcaaaagtgacacatttagagtccttgtctacagtgtcatcatccacgtaggcgggcttagacccgtcctcagatttaccgtactggcccaagaggacgtcgtccgtgtcgatgggggccacggcctttagaaccttaaccttttcgtcacgaatagattccgggtcaaaagacaccggtctttccatagtcaagagagtcatgatttgtaacagatggttctgcatcacgtctctgattatgcctatagagtcgaaatagccgccacggccttcggtgccgaacctctctttaaacgaaatctgaacgctttgaatgttgtctctattccacgaggcattcaaaaa
>7729
gaattcgatatcaagcttatcgataccgtcgacctcgagtcatgtaattagttatgtcacgcttacattcacgccctccccccacatccgctctaaccgaaaaggaaggagttagacaacctgaagtctaggtccctatttatttttttatagttatgttagtattaagaacgttatttatatttcaaatttttcttttttttctgtacagacgcgtgtacgcatgtaacattatactgaaaaccttgcttgagaaggttttgggacgctcgaaggctttaatttgcggccggtacccaattcgccctatagtgagtcgtattacgcgcgctcactggccgtcgttttacaacgtcgtgactgggaaaaccctggcgttacccaacttaatcgccttgcagcacatccccctttcgccagctggcgtaatagcgaagaggcccgcaccgatcgcccttcccaacagttgcgcagcctgaatggcgaatggcgcgacgcgccctgtagcggcgcattaagcgcggcgggtgtggtggttacgcgcagcgtgaccgctacacttgccagcgccctagcgcccgctcctttcgctttcttcccttcctttctcgccacgttcgccggctttccccgtcaagctctaaatcgggggctccctttagggttccgatttagtgctttacggcacctcgaccccaaaaaacttgattagggtgatggttcacgtagtgggccatcgccctgatagacggtttttcgccctttgacgttggagtccacgttctttaatagtggactcttgttccaaactggaacaacactcaaccctatctcggtctattcttttgatttataagggattttgccgatttcggcctattggttaaaaaatgagctgatttaacaaaaatttaacgcgaattttaacaaaatattaacgtttacaatttcctgatgcggtattttctccttacgcatctgtgcggtatttcacaccgcatatcgacggtcgaggagaacttctagtatatccacatacctaatattattgccttattaaaaatggaatcccaacaattacatcaaaatccacattctcttcaaaatcaattgtcctgtacttccttgttcatgtgtgttcaaaaacgttatatttataggataattatactctatttctcaacaagtaattggttgtttggccgagcggtctaaggcgcctgattcaagaaatatcttgaccgcagttaactgtgggaatactcaggtatcgtaagatgcaagagttcgaatctcttagcaaccattatttttttcctcaacataacgagaacacacaggggcgctatcgcacagaatcaaattcgatgactggaaattttttgttaatttcagaggtcgcctgacgcatatacctttttcaactgaaaaattgggagaaaaaggaaaggtgagaggccggaaccggcttttcatatagaatagagaagcgttcatgactaaatgcttgcatcacaatacttgaagttgacaatattatttaaggacctattgttttttccaataggtggttagcaatcgtcttactttctaacttttcttaccttttacatttcagcaatatatatatatatttcaaggatataccattctaatgtctgcccctatgtctgcccctaagaagatcgtcgttttgccaggtgaccacgttggtcaagaaatcacagccgaagccattaaggttcttaaagctatttctgatgttcgttccaatgtcaagttcgatttcgaaaatcatttaattggtggtgctgctatcgatgctacaggtgtcccacttccagatgaggcgctggaagcctccaagaaggttgatgccgttttgttaggtgctgtggctggtcctaaatggggtaccggtagtgttagacctgaacaaggtttactaaaaatccgtaaagaacttcaattgtacgccaacttaagaccatgtaactttgcatccgactctcttttagacttatctccaatcaagccacaatttgctaaaggtactgacttcgttgttgtcagagaattagtgggaggtatttactttggtaagagaaaggaagacgatggtgatggtgtcgcttgggatagtgaacaatacaccgttccagaagtgcaaagaatcacaagaatggccgctttcatggccctacaacatgagccaccattgcctatttggtccttggataaagctaatcttttggcctcttcaagattatggagaaaaactgtggaggaaaccatcaagaacgaattccctacattgaaggttcaacatcaattgattgattctgccgccatgatcctagttaagaacccaacccacctaaatggtattataatcaccagcaacatgtttggtgatatcatctccgatgaagcctccgttatcccaggttccttgggtttgttgccatctgcgtccttggcctctttgccagacaagaacaccgcatttggtttgtacgaaccatgccacggttctgctccagatttgccaaagaataaggttgaccctatcgccactatcttgtctgctgcaatgatgttgaaattgtcattgaacttgcctgaagaaggtaaggccattgaagatgcagttaaaaaggttttggatgcaggtatcagaactggtgatttaggtggttccaacagtaccaccgaagtcggtgatgctgtcgccgaagaagttaagaaaatccttgcttaaaaagattctctttttttatgatatttgtacataaactttataaatgaaattcataatagaaacgacacgaaattacaaaatggaatatgttcatagggtagacgaaactatatacgcaatctacatacatttatcaagaaggagaaaaaggaggatagtaaaggaatacaggtaagcaaattgatactaatggctcaacgtgataaggaaaaagaattgcactttaacattaatattgacaaggaggagggcaccacacaaaaagttaggtgtaacagaaaatcatgaaactacgattcctaatttgatattggaggattttctctaaaaaaaaaaaaatacaacaaataaaaaacactcaatgacctgaccatttgatggagtttaagtcaataccttcttgaagcatttcccataatggtgaaagttccctcaagaattttactctgtcagaaacggccttacgacgtagtcgatatggtgcactctcagtacaatctgctctgatgccgcatagttaagccagccccgacacccgccaacacccgctgacgcgccctgacgggcttgtctgctcccggcatccgcttacagacaagctgtgaccgtctccgggagctgcatgtgtcagaggttttcaccgtcatcaccgaaacgcgcgagacgaaagggcctcgtgatacgcctatttttataggttaatgtcatgataataatggtttcttagtatgatccaatatcaaaggaaatgatagcattgaaggatgagactaatccaattgaggagtggcagcatatagaacagctaaagggtagtgctgaaggaagcatacgataccccgcatggaatgggataatatcacaggaggtactagactacctttcatcctacataaatagacgcatataagtacgcatttaagcataaacacgcactatgccgttcttctcatgtatatatatatacaggcaacacgcagatataggtgcgacgtgaacagtgagctgtatgtgcgcagctcgcgttgcattttcggaagcgctcgttttcggaaacgctttgaagttcctattccgaagttcctattctctagaaagtataggaacttcagagcgcttttgaaaaccaaaagcgctctgaagacgcactttcaaaaaaccaaaaacgcaccggactgtaacgagctactaaaatattgcgaataccgcttccacaaacattgctcaaaagtatctctttgctatatatctctgtgctatatccctatataacctacccatccacctttcgctccttgaacttgcatctaaactcgacctctacattttttatgtttatctctagtattactctttagacaaaaaaattgtagtaagaactattcatagagtgaatcgaaaacaatacgaaaatgtaaacatttcctatacgtagtatatagagacaaaatagaagaaaccgttcataattttctgaccaatgaagaatcatcaacgctatcactttctgttcacaaagtatgcgcaatccacatcggtatagaatataatcggggatgcctttatcttgaaaaaatgcacccgcagcttcgctagtaatcagtaaacgcgggaagtggagtcaggctttttttatggaagagaaaatagacaccaaagtagccttcttctaaccttaacggacctacagtgcaaaaagttatcaagagactgcattatagagcgcacaaaggagaaaaaaagtaatctaagatgctttgttagaaaaatagcgctctcgggatgcatttttgtagaacaaaaaagaagtatagattctttgttggtaaaatagcgctctcgcgttgcatttctgttctgtaaaaatgcagctcagattctttgtttgaaaaattagcgctctcgcgttgcatttttgttttacaaaaatgaagcacagattcttcgttggtaaaatagcgctttcgcgttgcatttctgttctgtaaaaatgcagctcagattctttgtttgaaaaattagcgctctcgcgttgcatttttgttctacaaaatgaagcacagatgcttcgttcaggtggcacttttcggggaaatgtgcgcggaacccctatttgtttatttttctaaatacattcaaatatgtatccgctcatgagacaataaccctgataaatgcttcaataatattgaaaaaggaagagtatgagtattcaacatttccgtgtcgcccttattcccttttttgcggcattttgccttcctgtttttgctcacccagaaacgctggtgaaagtaaaagatgctgaagatcagttgggtgcacgagtgggttacatcgaactggatctcaacagcggtaagatccttgagagttttcgccccgaagaacgttttccaatgatgagcacttttaaagttctgctatgtggcgcggtattatcccgtattgacgccgggcaagagcaactcggtcgccgcatacactattctcagaatgacttggttgagtactcaccagtcacagaaaagcatcttacggatggcatgacagtaagagaattatgcagtgctgccataaccatgagtgataacactgcggccaacttacttctgacaacgatcggaggaccgaaggagctaaccgcttttttgcacaacatgggggatcatgtaactcgccttgatcgttgggaaccggagctgaatgaagccataccaaacgacgagcgtgacaccacgatgcctgtagcaatggcaacaacgttgcgcaaactattaactggcgaactacttactctagcttcccggcaacaattaatagactggatggaggcggataaagttgcaggaccacttctgcgctcggcccttccggctggctggtttattgctgataaatctggagccggtgagcgtgggtctcgcggtatcattgcagcactggggccagatggtaagccctcccgtatcgtagttatctacacgacggggagtcaggcaactatggatgaacgaaatagacagatcgctgagataggtgcctcactgattaagcattggtaactgtcagaccaagtttactcatatatactttagattgatttaaaacttcatttttaatttaaaaggatctaggtgaagatcctttttgataatctcatgaccaaaatcccttaacgtgagttttcgttccactgagcgtcagaccccgtagaaaagatcaaaggatcttcttgagatcctttttttctgcgcgtaatctgctgcttgcaaacaaaaaaaccaccgctaccagcggtggtttgtttgccggatcaagagctaccaactctttttccgaaggtaactggcttcagcagagcgcagataccaaatactgtccttctagtgtagccgtagttaggccaccacttcaagaactctgtagcaccgcctacatacctcgctctgctaatcctgttaccagtggctgctgccagtggcgataagtcgtgtcttaccgggttggactcaagacgatagttaccggataaggcgcagcggtcgggctgaacggggggttcgtgcacacagcccagcttggagcgaacgacctacaccgaactgagatacctacagcgtgagctatgagaaagcgccacgcttcccgaagggagaaaggcggacaggtatccggtaagcggcagggtcggaacaggagagcgcacgagggagcttccagggggaaacgcctggtatctttatagtcctgtcgggtttcgccacctctgacttgagcgtcgatttttgtgatgctcgtcaggggggcggagcctatggaaaaacgccagcaacgcggcctttttacggttcctggccttttgctggccttttgctcacatgttctttcctgcgttatcccctgattctgtggataaccgtattaccgcctttgagtgagctgataccgctcgccgcagccgaacgaccgagcgcagcgagtcagtgagcgaggaagcggaagagcgcccaatacgcaaaccgcctctccccgcgcgttggccgattcattaatgcagctggcacgacaggtttcccgactggaaagcgggcagtgagcgcaacgcaattaatgtgagttacctcactcattaggcaccccaggctttacactttatgcttccggctcctatgttgtgtggaattgtgagcggataacaatttcacacaggaaacagctatgaccatgattacgccaagcgcgcaattaaccctcactaaagggaacaaaagctggagctcagtttatcattatcaatactcgccatttcaaagaatacgtaaataattaatagtagtgattttcctaactttatttagtcaaaaaattagccttttaattctgctgtaacccgtacatgcccaaaatagggggcgggttacacagaatatataacatcgtaggtgtctgggtgaacagtttattcctggcatccactaaatataatggagcccgctttttaagctggcatccagaaaaaaaaagaatcccagcaccaaaatattgttttcttcaccaaccatcagttcataggtccattctcttagcgcaactacagagaacaggggcacaaacaggcaaaaaacgggcacaacctcaatggagtgatgcaacctgcctggagtaaatgatgacacaaggcaattgacccacgcatgtatctatctcattttcttacaccttctattaccttctgctctctctgatttggaaaaagctgaaaaaaaaggttgaaaccagttccctgaaattattcccctacttgactaataagtatataaagacggtaggtattgattgtaattctgtaaatctatttcttaaacttcttaaattctacttttatagttagtcttttttttagttttaaaacaccagaacttagtttcgacggattctagaactagtggatcccccgggctgcag
'''
list_of_formatted_seq_records = parse(text2)
frecs,circ = circular_assembly(list_of_formatted_seq_records, limit=25)
candidate = circ[0]
correct = 'TTCTAGAACTAGTGGATCCCCCGGGCTGCAGATGAGTGAAGGCCCCGTCAAATTCGAAAAAAATACCGTCATATCTGTCTTTGGTGCGTCAGGTGATCTGGCAAAGAAGAAGACTTTTCCCGCCTTATTTGGGCTTTTCAGAGAAGGTTACCTTGATCCATCTACCAAGATCTTCGGTTATGCCCGGTCCAAATTGTCCATGGAGGAGGACCTGAAGTCCCGTGTCCTACCCCACTTGAAAAAACCTCACGGTGAAGCCGATGACTCTAAGGTCGAACAGTTCTTCAAGATGGTCAGCTACATTTCGGGAAATTACGACACAGATGAAGGCTTCGACGAATTAAGAACGCAGATCGAGAAATTCGAGAAAAGTGCCAACGTCGATGTCCCACACCGTCTCTTCTATCTGGCCTTGCCGCCAAGCGTTTTTTTGACGGTGGCCAAGCAGATCAAGAGTCGTGTGTACGCAGAGAATGGCATCACCCGTGTAATCGTAGAGAAACCTTTCGGCCACGACCTGGCCTCTGCCAGGGAGCTGCAAAAAAACCTGGGGCCCCTCTTTAAAGAAGAAGAGTTGTACAGAATTGACCATTACTTGGGTAAAGAGTTGGTCAAGAATCTTTTAGTCTTGAGGTTCGGTAACCAGTTTTTGAATGCCTCGTGGAATAGAGACAACATTCAAAGCGTTCAGATTTCGTTTAAAGAGAGGTTCGGCACCGAAGGCCGTGGCGGCTATTTCGACTCTATAGGCATAATCAGAGACGTGATGCAGAACCATCTGTTACAAATCATGACTCTCTTGACTATGGAAAGACCGGTGTCTTTTGACCCGGAATCTATTCGTGACGAAAAGGTTAAGGTTCTAAAGGCCGTGGCCCCCATCGACACGGACGACGTCCTCTTGGGCCAGTACGGTAAATCTGAGGACGGGTCTAAGCCCGCCTACGTGGATGATGACACTGTAGACAAGGACTCTAAATGTGTCACTTTTGCAGCAATGACTTTCAACATCGAAAACGAGCGTTGGGAGGGCGTCCCCATCATGATGCGTGCCGGTAAGGCTTTGAATGAGTCCAAGGTGGAGATCAGACTGCAGTACAAAGCGGTCGCATCGGGTGTCTTCAAAGACATTCCAAATAACGAACTGGTCATCAGAGTGCAGCCCGATGCCGCTGTGTACCTAAAGTTTAATGCTAAGACCCCTGGTCTGTCAAATGCTACCCAAGTCACAGATCTGAATCTAACTTACGCAAGCAGGTACCAAGACTTTTGGATTCCAGAGGCTTACGAGGTGTTGATAAGAGACGCCCTACTGGGTGACCATTCCAACTTTGTCAGAGATGACGAATTGGATATCAGTTGGGGCATATTCACCCCATTACTGAAGCACATAGAGCGTCCGGACGGTCCAACACCGGAAATTTACCCCTACGGATCAAGAGGTCCAAAGGGATTGAAGGAATATATGCAAAAACACAAGTATGTTATGCCCGAAAAGCACCCTTACGCTTGGCCCGTGACTAAGCCAGAAGATACGAAGGATAATTAGCTGCAGGAATTCGATATCAAGCTTATCGATACCGTCGACCTCGAGTCATGTAATTAGTTATGTCACGCTTACATTCACGCCCTCCCCCCACATCCGCTCTAACCGAAAAGGAAGGAGTTAGACAACCTGAAGTCTAGGTCCCTATTTATTTTTTTATAGTTATGTTAGTATTAAGAACGTTATTTATATTTCAAATTTTTCTTTTTTTTCTGTACAGACGCGTGTACGCATGTAACATTATACTGAAAACCTTGCTTGAGAAGGTTTTGGGACGCTCGAAGGCTTTAATTTGCGGCCGGTACCCAATTCGCCCTATAGTGAGTCGTATTACGCGCGCTCACTGGCCGTCGTTTTACAACGTCGTGACTGGGAAAACCCTGGCGTTACCCAACTTAATCGCCTTGCAGCACATCCCCCTTTCGCCAGCTGGCGTAATAGCGAAGAGGCCCGCACCGATCGCCCTTCCCAACAGTTGCGCAGCCTGAATGGCGAATGGCGCGACGCGCCCTGTAGCGGCGCATTAAGCGCGGCGGGTGTGGTGGTTACGCGCAGCGTGACCGCTACACTTGCCAGCGCCCTAGCGCCCGCTCCTTTCGCTTTCTTCCCTTCCTTTCTCGCCACGTTCGCCGGCTTTCCCCGTCAAGCTCTAAATCGGGGGCTCCCTTTAGGGTTCCGATTTAGTGCTTTACGGCACCTCGACCCCAAAAAACTTGATTAGGGTGATGGTTCACGTAGTGGGCCATCGCCCTGATAGACGGTTTTTCGCCCTTTGACGTTGGAGTCCACGTTCTTTAATAGTGGACTCTTGTTCCAAACTGGAACAACACTCAACCCTATCTCGGTCTATTCTTTTGATTTATAAGGGATTTTGCCGATTTCGGCCTATTGGTTAAAAAATGAGCTGATTTAACAAAAATTTAACGCGAATTTTAACAAAATATTAACGTTTACAATTTCCTGATGCGGTATTTTCTCCTTACGCATCTGTGCGGTATTTCACACCGCATATCGACGGTCGAGGAGAACTTCTAGTATATCCACATACCTAATATTATTGCCTTATTAAAAATGGAATCCCAACAATTACATCAAAATCCACATTCTCTTCAAAATCAATTGTCCTGTACTTCCTTGTTCATGTGTGTTCAAAAACGTTATATTTATAGGATAATTATACTCTATTTCTCAACAAGTAATTGGTTGTTTGGCCGAGCGGTCTAAGGCGCCTGATTCAAGAAATATCTTGACCGCAGTTAACTGTGGGAATACTCAGGTATCGTAAGATGCAAGAGTTCGAATCTCTTAGCAACCATTATTTTTTTCCTCAACATAACGAGAACACACAGGGGCGCTATCGCACAGAATCAAATTCGATGACTGGAAATTTTTTGTTAATTTCAGAGGTCGCCTGACGCATATACCTTTTTCAACTGAAAAATTGGGAGAAAAAGGAAAGGTGAGAGGCCGGAACCGGCTTTTCATATAGAATAGAGAAGCGTTCATGACTAAATGCTTGCATCACAATACTTGAAGTTGACAATATTATTTAAGGACCTATTGTTTTTTCCAATAGGTGGTTAGCAATCGTCTTACTTTCTAACTTTTCTTACCTTTTACATTTCAGCAATATATATATATATTTCAAGGATATACCATTCTAATGTCTGCCCCTATGTCTGCCCCTAAGAAGATCGTCGTTTTGCCAGGTGACCACGTTGGTCAAGAAATCACAGCCGAAGCCATTAAGGTTCTTAAAGCTATTTCTGATGTTCGTTCCAATGTCAAGTTCGATTTCGAAAATCATTTAATTGGTGGTGCTGCTATCGATGCTACAGGTGTCCCACTTCCAGATGAGGCGCTGGAAGCCTCCAAGAAGGTTGATGCCGTTTTGTTAGGTGCTGTGGCTGGTCCTAAATGGGGTACCGGTAGTGTTAGACCTGAACAAGGTTTACTAAAAATCCGTAAAGAACTTCAATTGTACGCCAACTTAAGACCATGTAACTTTGCATCCGACTCTCTTTTAGACTTATCTCCAATCAAGCCACAATTTGCTAAAGGTACTGACTTCGTTGTTGTCAGAGAATTAGTGGGAGGTATTTACTTTGGTAAGAGAAAGGAAGACGATGGTGATGGTGTCGCTTGGGATAGTGAACAATACACCGTTCCAGAAGTGCAAAGAATCACAAGAATGGCCGCTTTCATGGCCCTACAACATGAGCCACCATTGCCTATTTGGTCCTTGGATAAAGCTAATCTTTTGGCCTCTTCAAGATTATGGAGAAAAACTGTGGAGGAAACCATCAAGAACGAATTCCCTACATTGAAGGTTCAACATCAATTGATTGATTCTGCCGCCATGATCCTAGTTAAGAACCCAACCCACCTAAATGGTATTATAATCACCAGCAACATGTTTGGTGATATCATCTCCGATGAAGCCTCCGTTATCCCAGGTTCCTTGGGTTTGTTGCCATCTGCGTCCTTGGCCTCTTTGCCAGACAAGAACACCGCATTTGGTTTGTACGAACCATGCCACGGTTCTGCTCCAGATTTGCCAAAGAATAAGGTTGACCCTATCGCCACTATCTTGTCTGCTGCAATGATGTTGAAATTGTCATTGAACTTGCCTGAAGAAGGTAAGGCCATTGAAGATGCAGTTAAAAAGGTTTTGGATGCAGGTATCAGAACTGGTGATTTAGGTGGTTCCAACAGTACCACCGAAGTCGGTGATGCTGTCGCCGAAGAAGTTAAGAAAATCCTTGCTTAAAAAGATTCTCTTTTTTTATGATATTTGTACATAAACTTTATAAATGAAATTCATAATAGAAACGACACGAAATTACAAAATGGAATATGTTCATAGGGTAGACGAAACTATATACGCAATCTACATACATTTATCAAGAAGGAGAAAAAGGAGGATAGTAAAGGAATACAGGTAAGCAAATTGATACTAATGGCTCAACGTGATAAGGAAAAAGAATTGCACTTTAACATTAATATTGACAAGGAGGAGGGCACCACACAAAAAGTTAGGTGTAACAGAAAATCATGAAACTACGATTCCTAATTTGATATTGGAGGATTTTCTCTAAAAAAAAAAAAATACAACAAATAAAAAACACTCAATGACCTGACCATTTGATGGAGTTTAAGTCAATACCTTCTTGAAGCATTTCCCATAATGGTGAAAGTTCCCTCAAGAATTTTACTCTGTCAGAAACGGCCTTACGACGTAGTCGATATGGTGCACTCTCAGTACAATCTGCTCTGATGCCGCATAGTTAAGCCAGCCCCGACACCCGCCAACACCCGCTGACGCGCCCTGACGGGCTTGTCTGCTCCCGGCATCCGCTTACAGACAAGCTGTGACCGTCTCCGGGAGCTGCATGTGTCAGAGGTTTTCACCGTCATCACCGAAACGCGCGAGACGAAAGGGCCTCGTGATACGCCTATTTTTATAGGTTAATGTCATGATAATAATGGTTTCTTAGTATGATCCAATATCAAAGGAAATGATAGCATTGAAGGATGAGACTAATCCAATTGAGGAGTGGCAGCATATAGAACAGCTAAAGGGTAGTGCTGAAGGAAGCATACGATACCCCGCATGGAATGGGATAATATCACAGGAGGTACTAGACTACCTTTCATCCTACATAAATAGACGCATATAAGTACGCATTTAAGCATAAACACGCACTATGCCGTTCTTCTCATGTATATATATATACAGGCAACACGCAGATATAGGTGCGACGTGAACAGTGAGCTGTATGTGCGCAGCTCGCGTTGCATTTTCGGAAGCGCTCGTTTTCGGAAACGCTTTGAAGTTCCTATTCCGAAGTTCCTATTCTCTAGAAAGTATAGGAACTTCAGAGCGCTTTTGAAAACCAAAAGCGCTCTGAAGACGCACTTTCAAAAAACCAAAAACGCACCGGACTGTAACGAGCTACTAAAATATTGCGAATACCGCTTCCACAAACATTGCTCAAAAGTATCTCTTTGCTATATATCTCTGTGCTATATCCCTATATAACCTACCCATCCACCTTTCGCTCCTTGAACTTGCATCTAAACTCGACCTCTACATTTTTTATGTTTATCTCTAGTATTACTCTTTAGACAAAAAAATTGTAGTAAGAACTATTCATAGAGTGAATCGAAAACAATACGAAAATGTAAACATTTCCTATACGTAGTATATAGAGACAAAATAGAAGAAACCGTTCATAATTTTCTGACCAATGAAGAATCATCAACGCTATCACTTTCTGTTCACAAAGTATGCGCAATCCACATCGGTATAGAATATAATCGGGGATGCCTTTATCTTGAAAAAATGCACCCGCAGCTTCGCTAGTAATCAGTAAACGCGGGAAGTGGAGTCAGGCTTTTTTTATGGAAGAGAAAATAGACACCAAAGTAGCCTTCTTCTAACCTTAACGGACCTACAGTGCAAAAAGTTATCAAGAGACTGCATTATAGAGCGCACAAAGGAGAAAAAAAGTAATCTAAGATGCTTTGTTAGAAAAATAGCGCTCTCGGGATGCATTTTTGTAGAACAAAAAAGAAGTATAGATTCTTTGTTGGTAAAATAGCGCTCTCGCGTTGCATTTCTGTTCTGTAAAAATGCAGCTCAGATTCTTTGTTTGAAAAATTAGCGCTCTCGCGTTGCATTTTTGTTTTACAAAAATGAAGCACAGATTCTTCGTTGGTAAAATAGCGCTTTCGCGTTGCATTTCTGTTCTGTAAAAATGCAGCTCAGATTCTTTGTTTGAAAAATTAGCGCTCTCGCGTTGCATTTTTGTTCTACAAAATGAAGCACAGATGCTTCGTTCAGGTGGCACTTTTCGGGGAAATGTGCGCGGAACCCCTATTTGTTTATTTTTCTAAATACATTCAAATATGTATCCGCTCATGAGACAATAACCCTGATAAATGCTTCAATAATATTGAAAAAGGAAGAGTATGAGTATTCAACATTTCCGTGTCGCCCTTATTCCCTTTTTTGCGGCATTTTGCCTTCCTGTTTTTGCTCACCCAGAAACGCTGGTGAAAGTAAAAGATGCTGAAGATCAGTTGGGTGCACGAGTGGGTTACATCGAACTGGATCTCAACAGCGGTAAGATCCTTGAGAGTTTTCGCCCCGAAGAACGTTTTCCAATGATGAGCACTTTTAAAGTTCTGCTATGTGGCGCGGTATTATCCCGTATTGACGCCGGGCAAGAGCAACTCGGTCGCCGCATACACTATTCTCAGAATGACTTGGTTGAGTACTCACCAGTCACAGAAAAGCATCTTACGGATGGCATGACAGTAAGAGAATTATGCAGTGCTGCCATAACCATGAGTGATAACACTGCGGCCAACTTACTTCTGACAACGATCGGAGGACCGAAGGAGCTAACCGCTTTTTTGCACAACATGGGGGATCATGTAACTCGCCTTGATCGTTGGGAACCGGAGCTGAATGAAGCCATACCAAACGACGAGCGTGACACCACGATGCCTGTAGCAATGGCAACAACGTTGCGCAAACTATTAACTGGCGAACTACTTACTCTAGCTTCCCGGCAACAATTAATAGACTGGATGGAGGCGGATAAAGTTGCAGGACCACTTCTGCGCTCGGCCCTTCCGGCTGGCTGGTTTATTGCTGATAAATCTGGAGCCGGTGAGCGTGGGTCTCGCGGTATCATTGCAGCACTGGGGCCAGATGGTAAGCCCTCCCGTATCGTAGTTATCTACACGACGGGGAGTCAGGCAACTATGGATGAACGAAATAGACAGATCGCTGAGATAGGTGCCTCACTGATTAAGCATTGGTAACTGTCAGACCAAGTTTACTCATATATACTTTAGATTGATTTAAAACTTCATTTTTAATTTAAAAGGATCTAGGTGAAGATCCTTTTTGATAATCTCATGACCAAAATCCCTTAACGTGAGTTTTCGTTCCACTGAGCGTCAGACCCCGTAGAAAAGATCAAAGGATCTTCTTGAGATCCTTTTTTTCTGCGCGTAATCTGCTGCTTGCAAACAAAAAAACCACCGCTACCAGCGGTGGTTTGTTTGCCGGATCAAGAGCTACCAACTCTTTTTCCGAAGGTAACTGGCTTCAGCAGAGCGCAGATACCAAATACTGTCCTTCTAGTGTAGCCGTAGTTAGGCCACCACTTCAAGAACTCTGTAGCACCGCCTACATACCTCGCTCTGCTAATCCTGTTACCAGTGGCTGCTGCCAGTGGCGATAAGTCGTGTCTTACCGGGTTGGACTCAAGACGATAGTTACCGGATAAGGCGCAGCGGTCGGGCTGAACGGGGGGTTCGTGCACACAGCCCAGCTTGGAGCGAACGACCTACACCGAACTGAGATACCTACAGCGTGAGCTATGAGAAAGCGCCACGCTTCCCGAAGGGAGAAAGGCGGACAGGTATCCGGTAAGCGGCAGGGTCGGAACAGGAGAGCGCACGAGGGAGCTTCCAGGGGGAAACGCCTGGTATCTTTATAGTCCTGTCGGGTTTCGCCACCTCTGACTTGAGCGTCGATTTTTGTGATGCTCGTCAGGGGGGCGGAGCCTATGGAAAAACGCCAGCAACGCGGCCTTTTTACGGTTCCTGGCCTTTTGCTGGCCTTTTGCTCACATGTTCTTTCCTGCGTTATCCCCTGATTCTGTGGATAACCGTATTACCGCCTTTGAGTGAGCTGATACCGCTCGCCGCAGCCGAACGACCGAGCGCAGCGAGTCAGTGAGCGAGGAAGCGGAAGAGCGCCCAATACGCAAACCGCCTCTCCCCGCGCGTTGGCCGATTCATTAATGCAGCTGGCACGACAGGTTTCCCGACTGGAAAGCGGGCAGTGAGCGCAACGCAATTAATGTGAGTTACCTCACTCATTAGGCACCCCAGGCTTTACACTTTATGCTTCCGGCTCCTATGTTGTGTGGAATTGTGAGCGGATAACAATTTCACACAGGAAACAGCTATGACCATGATTACGCCAAGCGCGCAATTAACCCTCACTAAAGGGAACAAAAGCTGGAGCTCAGTTTATCATTATCAATACTCGCCATTTCAAAGAATACGTAAATAATTAATAGTAGTGATTTTCCTAACTTTATTTAGTCAAAAAATTAGCCTTTTAATTCTGCTGTAACCCGTACATGCCCAAAATAGGGGGCGGGTTACACAGAATATATAACATCGTAGGTGTCTGGGTGAACAGTTTATTCCTGGCATCCACTAAATATAATGGAGCCCGCTTTTTAAGCTGGCATCCAGAAAAAAAAAGAATCCCAGCACCAAAATATTGTTTTCTTCACCAACCATCAGTTCATAGGTCCATTCTCTTAGCGCAACTACAGAGAACAGGGGCACAAACAGGCAAAAAACGGGCACAACCTCAATGGAGTGATGCAACCTGCCTGGAGTAAATGATGACACAAGGCAATTGACCCACGCATGTATCTATCTCATTTTCTTACACCTTCTATTACCTTCTGCTCTCTCTGATTTGGAAAAAGCTGAAAAAAAAGGTTGAAACCAGTTCCCTGAAATTATTCCCCTACTTGACTAATAAGTATATAAAGACGGTAGGTATTGATTGTAATTCTGTAAATCTATTTCTTAAACTTCTTAAATTCTACTTTTATAGTTAGTCTTTTTTTTAGTTTTAAAACACCAGAACTTAGTTTCGACGGA'
print eq(correct,candidate, circular=True)
import sys; sys.exit()
import time
start = time.time()
import textwrap,sys
from utils import eq
a='''
LOCUS New_DNA 48 bp ds-DNA linear 20-NOV-2012
DEFINITION .
SOURCE .
ORGANISM .
COMMENT
COMMENT ApEinfo:methylated:1
FEATURES Location/Qualifiers
misc_feature 10..29
/note=fw1
/ApEinfo_fwdcolor=cyan
/ApEinfo_revcolor=green
/ApEinfo_graphicformat=arrow_data {{0 1 2 0 0 -1} {} 0}
width 5 offset 0
misc_feature complement(9..30)
/note=rv1
/ApEinfo_fwdcolor=cyan
/ApEinfo_revcolor=green
/ApEinfo_graphicformat=arrow_data {{0 1 2 0 0 -1} {} 0}
width 5 offset 0
ORIGIN
1 atagtcacgt atgcattcgc CAGGAACAGT AGTTATATAC GTGTCGTA
//
LOCUS New_DNA 40 bp ds-DNA linear 20-NOV-2012
DEFINITION .
SOURCE .
ORGANISM .
COMMENT New DNA from 1 to 48
COMMENT
COMMENT ApEinfo:methylated:1
FEATURES Location/Qualifiers
misc_feature 21..35
/note=fw2
/ApEinfo_fwdcolor=cyan
/ApEinfo_revcolor=green
/ApEinfo_graphicformat=arrow_data {{0 1 2 0 0 -1} {} 0}
width 5 offset 0
misc_feature complement(17..37)
/note=rv2
/ApEinfo_fwdcolor=cyan
/ApEinfo_revcolor=green
/ApEinfo_graphicformat=arrow_data {{0 1 2 0 0 -1} {} 0}
width 5 offset 0
ORIGIN
1 CAGGAACAGT AGTTATATAC GTGTCGTAcc tctttctctc
//
'''
list_of_formatted_seq_records = parse(a)
frecs,lin = linear_assembly(list_of_formatted_seq_records, limit=25)
from helper import ape
ape(lin[0])