File: Restriction.html

package info (click to toggle)
python-biopython 1.68%2Bdfsg-3~bpo8%2B1
  • links: PTS, VCS
  • area: main
  • in suites: jessie-backports
  • size: 46,856 kB
  • sloc: python: 160,306; xml: 93,216; ansic: 9,118; sql: 1,208; makefile: 155; sh: 63
file content (1096 lines) | stat: -rw-r--r-- 113,149 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
<html xmlns="http://www.w3.org/1999/xhtml">
<head>
  <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
  <meta http-equiv="Content-Style-Type" content="text/css" />
  <meta name="generator" content="pandoc" />
  <title></title>
  <style type="text/css">code{white-space: pre;}</style>
  <style type="text/css">
div.sourceCode { overflow-x: auto; background-color: #e9e9e9; border-radius: 5px; padding: 0 1rem; }
code.inline { color: #bf616a; background-color: #e9e9e9; border-radius: 3px; padding: 0.1em 0.2em; }
table.sourceCode, tr.sourceCode, td.lineNumbers, td.sourceCode {
  margin: 0; padding: 0; vertical-align: baseline; border: none; }
table.sourceCode { width: 100%; line-height: 100%; }
td.lineNumbers { text-align: right; padding-right: 4px; padding-left: 4px; color: #aaaaaa; border-right: 1px solid #aaaaaa; }
td.sourceCode { padding-left: 5px; }
code > span.kw { color: #007020; font-weight: bold; } /* Keyword */
code > span.dt { color: #902000; } /* DataType */
code > span.dv { color: #40a070; } /* DecVal */
code > span.bn { color: #40a070; } /* BaseN */
code > span.fl { color: #40a070; } /* Float */
code > span.ch { color: #4070a0; } /* Char */
code > span.st { color: #4070a0; } /* String */
code > span.co { color: #60a0b0; font-style: italic; } /* Comment */
code > span.ot { color: #007020; } /* Other */
code > span.al { color: #ff0000; font-weight: bold; } /* Alert */
code > span.fu { color: #06287e; } /* Function */
code > span.er { color: #ff0000; font-weight: bold; } /* Error */
code > span.wa { color: #60a0b0; font-weight: bold; font-style: italic; } /* Warning */
code > span.cn { color: #880000; } /* Constant */
code > span.sc { color: #4070a0; } /* SpecialChar */
code > span.vs { color: #4070a0; } /* VerbatimString */
code > span.ss { color: #bb6688; } /* SpecialString */
code > span.im { } /* Import */
code > span.va { color: #19177c; } /* Variable */
code > span.cf { color: #007020; font-weight: bold; } /* ControlFlow */
code > span.op { color: #666666; } /* Operator */
code > span.bu { } /* BuiltIn */
code > span.ex { } /* Extension */
code > span.pp { color: #bc7a00; } /* Preprocessor */
code > span.at { color: #7d9029; } /* Attribute */
code > span.do { color: #ba2121; font-style: italic; } /* Documentation */
code > span.an { color: #60a0b0; font-weight: bold; font-style: italic; } /* Annotation */
code > span.cv { color: #60a0b0; font-weight: bold; font-style: italic; } /* CommentVar */
code > span.in { color: #60a0b0; font-weight: bold; font-style: italic; } /* Information */
  </style>
</head>
<body>
<h1 id="working-with-restriction-enzymes">Working with restriction enzymes</h1>
<h2 id="table-of-contents">Table of contents</h2>
<ol style="list-style-type: decimal">
<li><a href="#1">The restriction enzymes classes</a>
<ol style="list-style-type: decimal">
<li><a href="#1.1">Importing the enzymes</a></li>
<li><a href="#1.2">Naming convention</a></li>
<li><a href="#1.3">Searching for restriction sites</a></li>
<li><a href="#1.4">Retrieving the sequences produced by a digestion</a></li>
<li><a href="#1.5">Analysing circular sequences</a></li>
<li><a href="#1.6">Comparing enzymes with each others</a></li>
<li><a href="#1.7">Other facilities provided by the enzyme classes</a></li>
</ol></li>
<li><a href="#2">The RestrictionBatch class: a class to deal with several enzymes</a>
<ol style="list-style-type: decimal">
<li><a href="#2.1">Creating a RestrictionBatch</a></li>
<li><a href="#2.2">Restricting a RestrictionBatch to a particular supplier</a></li>
<li><a href="#2.3">Adding enzymes to a RestrictionBatch</a></li>
<li><a href="#2.4">Removing enzymes from a RestrictionBatch</a></li>
<li><a href="#2.5">Manipulating RestrictionBatch</a></li>
<li><a href="#2.6">Analysing sequences with a RestrictionBatch</a></li>
<li><a href="#2.7">Other RestrictionBatch methods</a></li>
</ol></li>
<li><a href="#3">AllEnzymes and CommOnly: two preconfigured RestrictionBatches</a></li>
<li><a href="#4">The Analysis class: even simpler restriction analysis</a>
<ol style="list-style-type: decimal">
<li><a href="#4.1">Setting up an Analysis</a></li>
<li><a href="#4.2">Full restriction analysis</a></li>
<li><a href="#4.3">Changing the title</a></li>
<li><a href="#4.4">Customising the output</a></li>
<li><a href="#4.5">Fancier restriction analysis</a></li>
<li><a href="#4.6">More complex analysis</a></li>
</ol></li>
<li><a href="#5">Advanced features: the FormattedSeq class</a>
<ol style="list-style-type: decimal">
<li><a href="#5.1">Creating a FormattedSeq</a></li>
<li><a href="#5.2">Unlike Bio.Seq, FormattedSeq retains information about their shape</a></li>
<li><a href="#5.3">Changing the shape of a FormattedSeq</a></li>
<li><a href="#5.4">Using / and // operators with FormattedSeq</a></li>
</ol></li>
<li><a href="#6">More advanced features</a>
<ol style="list-style-type: decimal">
<li><a href="#6.1">Updating the enzymes from Rebase</a>
<ol style="list-style-type: decimal">
<li><a href="#6.1.1">Fetching the recent enzyme files manually from Rebase</a></li>
<li><a href="#6.1.2">Fetching the recent enzyme files with rebase_update.py</a></li>
<li><a href="#6.1.3">Compiling a new dictionary with ranacompiler.py</a></li>
</ol></li>
<li><a href="#6.2">Subclassing the class Analysis</a></li>
</ol></li>
<li><a href="#7">Limitation and caveat</a>
<ol style="list-style-type: decimal">
<li><a href="#7.1">All DNA are non methylated</a></li>
<li><a href="#7.2">No support for star activity</a></li>
<li><a href="#7.3">Safe to use with degenerated DNA</a></li>
<li><a href="#7.4">Non standard bases in DNA are not allowed</a></li>
<li><a href="#7.5">Sites found at the edge of linear DNA might not be accessible in a real digestion</a></li>
<li><a href="#7.6">Restriction reports cutting sites not enzyme recognition sites</a></li>
</ol></li>
<li><a href="#8">Annexe: modifying dir() to use with from Bio.Restriction import *</a></li>
</ol>
<h3 id="the-restriction-enzymes-classes"><a name="1"></a>1. The restriction enzymes classes</h3>
<p>The restriction enzyme package is situated in <code class="inline">Bio.Restriction</code>. This package will allow you to work with restriction enzymes and realise restriction analysis on your sequence. Restriction make use of the facilities offered by <strong>REBASE</strong> and contains classes for more than 800 restriction enzymes. This chapter will lead you through a quick overview of the facilities offered by the <code class="inline">Restriction</code> package of Biopython. The chapter is constructed as an interactive Python session and the best way to read it is with a Python shell open alongside you.</p>
<h4 id="importing-the-enzymes"><a name="1.1"></a> 1.1 Importing the enzymes</h4>
<p>To import the enzymes, open a Python shell and type:</p>
<div class="sourceCode"><pre class="sourceCode python"><code class="sourceCode python"><span class="op">&gt;&gt;&gt;</span> <span class="im">from</span> Bio <span class="im">import</span> Restriction
<span class="op">&gt;&gt;&gt;</span> <span class="bu">dir</span>()
[<span class="st">&#39;Restriction&#39;</span>, <span class="st">&#39;__builtins__&#39;</span>, <span class="st">&#39;__doc__&#39;</span>, <span class="st">&#39;__name__&#39;</span>, <span class="st">&#39;__package__&#39;</span>]
<span class="op">&gt;&gt;&gt;</span> Restriction.EcoRI
EcoRI
<span class="op">&gt;&gt;&gt;</span> Restriction.EcoRI.site
<span class="co">&#39;GAATTC&#39;</span>
<span class="op">&gt;&gt;&gt;</span></code></pre></div>
<p>You will certainly notice that the package is quite slow to load. This is normal as each enzyme possess its own class and there is a lot of them. This will not affect the speed of Python after the initial import.</p>
<p>I don't know for you but I find it quite cumbersome to have to prefix each operation with <code class="inline">Restriction.</code>, so here is another way to import the package.</p>
<div class="sourceCode"><pre class="sourceCode python"><code class="sourceCode python"><span class="op">&gt;&gt;&gt;</span> <span class="im">from</span> Bio.Restriction <span class="im">import</span> <span class="op">*</span>
<span class="op">&gt;&gt;&gt;</span> EcoRI
EcoRI
<span class="op">&gt;&gt;&gt;</span> EcoRI.site
<span class="co">&#39;GAATTC&#39;</span>
<span class="op">&gt;&gt;&gt;</span></code></pre></div>
<p>However, this method has one big disadvantage: It is almost impossible to use the command <code class="inline">dir()</code> anymore as there is so much enzymes the results is hardly readable. A workaround is provided at the end of this tutorial. I let you decide which method you prefer. But in this tutorial I will use the second. If you prefer the first method you will need to prefix each call to a restriction enzyme with <code class="inline">Restriction.</code> in the remaining of the tutorial.</p>
<h4 id="naming-convention"><a name="1.2"></a>1.2 Naming convention</h4>
<p>To access an enzyme simply enter it's name. You must respect the usual naming convention with the upper case letters and Latin numbering (in upper case as well):</p>
<div class="sourceCode"><pre class="sourceCode python"><code class="sourceCode python"><span class="op">&gt;&gt;&gt;</span> EcoRI
EcoRI
<span class="op">&gt;&gt;&gt;</span> ecori

Traceback (most recent call last):
  File <span class="st">&quot;&lt;pyshell#25&gt;&quot;</span>, line <span class="dv">1</span>, <span class="op">in</span> <span class="op">-</span>toplevel<span class="op">-</span>
    ecori
<span class="pp">NameError</span>: name <span class="st">&#39;ecori&#39;</span> <span class="op">is</span> <span class="op">not</span> defined
<span class="op">&gt;&gt;&gt;</span> EcoR1

Traceback (most recent call last):
  File <span class="st">&quot;&lt;pyshell#26&gt;&quot;</span>, line <span class="dv">1</span>, <span class="op">in</span> <span class="op">-</span>toplevel<span class="op">-</span>
    EcoR1
<span class="pp">NameError</span>: name <span class="st">&#39;EcoR1&#39;</span> <span class="op">is</span> <span class="op">not</span> defined
<span class="op">&gt;&gt;&gt;</span> KpnI
KpnI
<span class="op">&gt;&gt;&gt;</span></code></pre></div>
<p><code class="inline">ecori</code> or <code class="inline">EcoR1</code> are not enzymes, <code class="inline">EcoRI</code> and <code class="inline">KpnI</code> are.</p>
<h4 id="searching-for-restriction-sites"><a name="1.3"></a>1.3 Searching for restriction sites</h4>
<p>So what can we do with these restriction enzymes? To see that we will need a DNA sequence. Restriction enzymes support both <code class="inline">Bio.Seq.MutableSeq</code>and <code class="inline">Bio.Seq.Seq</code> objects. You can use any DNA alphabet which complies with the IUPAC alphabet.</p>
<div class="sourceCode"><pre class="sourceCode python"><code class="sourceCode python"><span class="op">&gt;&gt;&gt;</span> <span class="im">from</span> Bio.Seq <span class="im">import</span> Seq
<span class="op">&gt;&gt;&gt;</span> <span class="im">from</span> Bio.Alphabet.IUPAC <span class="im">import</span> IUPACAmbiguousDNA
<span class="op">&gt;&gt;&gt;</span> amb <span class="op">=</span> IUPACAmbiguousDNA()
<span class="op">&gt;&gt;&gt;</span> my_seq <span class="op">=</span> Seq(<span class="st">&#39;AAAAAAAAAAAAAA&#39;</span>, amb)
<span class="op">&gt;&gt;&gt;</span> my_seq
Seq(<span class="st">&#39;AAAAAAAAAAAAAA&#39;</span>, IUPACAmbiguousDNA())</code></pre></div>
<p>Searching a sequence for the presence of restriction site for your preferred enzyme is as simple as:</p>
<div class="sourceCode"><pre class="sourceCode python"><code class="sourceCode python"><span class="op">&gt;&gt;&gt;</span> EcoRI.search(my_seq)
[]</code></pre></div>
<p>The results is a list. Here the list is empty since there is obviously no EcoRI site in <em>my_seq</em>. Let's try to get a sequence with an EcoRI site.</p>
<div class="sourceCode"><pre class="sourceCode python"><code class="sourceCode python"><span class="op">&gt;&gt;&gt;</span> ecoseq <span class="op">=</span> my_seq <span class="op">+</span> Seq(EcoRI.site, amb) <span class="op">+</span> my_seq
<span class="op">&gt;&gt;&gt;</span> ecoseq
Seq(<span class="st">&#39;AAAAAAAAAAAAAAGAATTCAAAAAAAAAAAAAA&#39;</span>, IUPACAmbiguousDNA())
<span class="op">&gt;&gt;&gt;</span> EcoRI.search(ecoseq)
[<span class="dv">16</span>]</code></pre></div>
<p>We therefore have a site at position 16 of the sequence <em>ecoseq</em>. The position returned by the method search is the first base of the downstream segment produced by a restriction (i.e. the first base after the position where the enzyme will cut). The <code class="inline">Restriction</code> package follows biological convention (the first base of a sequence is base 1). No need to make difficult conversions between your recorded biological data and the results produced by the enzymes in this package.</p>
<h4 id="retrieving-the-sequences-produced-by-a-digestion"><a name="1.4"></a>1.4 Retrieving the sequences produced by a digestion</h4>
<p><code class="inline">Seq</code> objects as all Python sequences, have different conventions and the first base of a sequence is base 0. Therefore to get the sequences produced by an EcoRI digestion of <em>ecoseq</em>, one should do the following:</p>
<div class="sourceCode"><pre class="sourceCode python"><code class="sourceCode python"><span class="op">&gt;&gt;&gt;</span> ecoseq[:<span class="dv">15</span>], ecoseq[<span class="dv">15</span>:]
(Seq(<span class="st">&#39;AAAAAAAAAAAAAAG&#39;</span>, IUPACAmbiguousDNA()), Seq(<span class="st">&#39;AATTCAAAAAAAAAAAAAA&#39;</span>, IUPACAm
biguousDNA()))</code></pre></div>
<p>I hear you thinking &quot;this is a cumbersome and error prone method to get these sequences&quot;. To simplify your life, <code class="inline">Restriction</code> provides another method to get these sequences without hassle: <code class="inline">catalyse</code>. This method will return a tuple containing all the fragments produced by a complete digestion of the sequence. Using it is as simple as before:</p>
<div class="sourceCode"><pre class="sourceCode python"><code class="sourceCode python"><span class="op">&gt;&gt;&gt;</span> EcoRI.catalyse(ecoseq)
(Seq(<span class="st">&#39;AAAAAAAAAAAAAAG&#39;</span>, IUPACAmbiguousDNA()), Seq(<span class="st">&#39;AATTCAAAAAAAAAAAAAA&#39;</span>, IUPACAm
biguousDNA()))</code></pre></div>
<p>BTW, you can also use spell it the American way <code class="inline">catalyze</code>:</p>
<div class="sourceCode"><pre class="sourceCode python"><code class="sourceCode python"><span class="op">&gt;&gt;&gt;</span> EcoRI.catalyze(ecoseq)
(Seq(<span class="st">&#39;AAAAAAAAAAAAAAG&#39;</span>, IUPACAmbiguousDNA()), Seq(<span class="st">&#39;AATTCAAAAAAAAAAAAAA&#39;</span>, IUPACAm
biguousDNA()))</code></pre></div>
<h4 id="analysing-circular-sequences"><a name="1.5"></a>1.5 Analysing circular sequences</h4>
<p>Now, if you have entered the previous command in your shell you may have noticed that both <code class="inline">search</code> and <code class="inline">catalyse</code> can take a second argument <code class="inline">linear</code> which defaults to <code class="inline">True</code>. Using this will allow you to simulate circular sequences such as plasmids. Setting <code class="inline">linear</code> to <code class="inline">False</code> informs the enzyme to make the search over a circular sequence and to search for potential sites spanning over the boundaries of the sequence.</p>
<div class="sourceCode"><pre class="sourceCode python"><code class="sourceCode python"><span class="op">&gt;&gt;&gt;</span> EcoRI.search(ecoseq, linear<span class="op">=</span><span class="va">False</span>)
[<span class="dv">16</span>]
<span class="op">&gt;&gt;&gt;</span> EcoRI.catalyse(ecoseq, linear<span class="op">=</span><span class="va">False</span>)
(Seq(<span class="st">&#39;AATTCAAAAAAAAAAAAAAAAAAAAAAAAAAAAG&#39;</span>, IUPACAmbiguousDNA()),)
<span class="op">&gt;&gt;&gt;</span> ecoseq  <span class="co"># for memory</span>
Seq(<span class="st">&#39;AAAAAAAAAAAAAAGAATTCAAAAAAAAAAAAAA&#39;</span>, IUPACAmbiguousDNA())</code></pre></div>
<p>OK, this is quite a difference, we only get one fragment, which correspond to the linearised sequence. The beginning sequence has been shifted to take this fact into account. Moreover we can see another difference:</p>
<div class="sourceCode"><pre class="sourceCode python"><code class="sourceCode python"><span class="op">&gt;&gt;&gt;</span> new_seq <span class="op">=</span> Seq(<span class="st">&#39;TTCAAAAAAAAAAAAAAAAAAAAAAAAAAAAGAA&#39;</span>, IUPACAmbiguousDNA())
<span class="op">&gt;&gt;&gt;</span> EcoRI.search(new_seq)
[]
<span class="op">&gt;&gt;&gt;</span> EcoRI.search(new_seq, linear<span class="op">=</span><span class="va">False</span>)
[<span class="dv">33</span>]</code></pre></div>
<p>As you can see using <code class="inline">linear=False</code>, make a site appearing in the sequence <em>new_seq</em>. This site does not exist in a linear sequence as the EcoRI site is split into two halves at the start and the end of the sequence. In a circular sequence however, the site is effectively present when the beginning and end of the sequence are joined.</p>
<h4 id="comparing-enzymes-with-each-others"><a name="1.6"></a>1.6 Comparing enzymes with each others</h4>
<p><code class="inline">Restriction</code> enzymes define 4 comparative operators <code class="inline">==</code>, <code class="inline">!=</code>, <code class="inline">&gt;&gt;</code> and <code class="inline">%</code>. All these operator compares two enzymes together and either return <code class="inline">True</code> or <code class="inline">False</code>.</p>
<dl>
<dt><code class="inline">==</code> (test identity)</dt>
<dd>It will return <code class="inline">True</code> if the two sides of the operator are the same. *Same&quot; is defined as: same name, same site, same overhang (i.e. the only thing which is equal to <code class="inline">EcoRI</code> is <code class="inline">EcoRI</code>).
</dd>
<dt><code class="inline">!=</code> (test for different site or cutting)</dt>
<dd>It will return <code class="inline">True</code> if the two sides of the operator are different. Two enzymes are not different if the result produced by one enzyme will always be the same as the result produced by the other (i.e. true isoschizomers will not being the same enzymes, are not different since they are interchangeable).
</dd>
<dt><code class="inline">&gt;&gt;</code> (test for neoschizomer)</dt>
<dd><code class="inline">True</code> if the enzymes recognise the same site, but cut it in a different way (i.e. the enzymes are neoschizomers).
</dd>
<dt><code class="inline">%</code> (test compatibilty)</dt>
<dd>Test the compatibility of the ending produced by the enzymes (will be <code class="inline">True</code> if the fragments produced with one of the enzyme can directly be ligated to fragments produced by the other).
</dd>
</dl>
<p>Let's use <code class="inline">Acc65I</code> and its isoschizomers as example:</p>
<div class="sourceCode"><pre class="sourceCode python"><code class="sourceCode python"><span class="op">&gt;&gt;&gt;</span> Acc65I.isoschizomers()
[Asp718I, KpnI]
<span class="op">&gt;&gt;&gt;</span> Acc65I.elucidate()
<span class="co">&#39;G^GTAC_C&#39;</span>
<span class="op">&gt;&gt;&gt;</span> Asp718I.elucidate()
<span class="co">&#39;G^GTAC_C&#39;</span>
<span class="op">&gt;&gt;&gt;</span> KpnI.elucidate()
<span class="co">&#39;G_GTAC^C&#39;</span>
<span class="op">&gt;&gt;&gt;</span> <span class="co"># Asp718I and Acc65I are true isoschizomers,</span>
<span class="op">&gt;&gt;&gt;</span> <span class="co"># they recognise the same site and cut it the</span>
<span class="op">&gt;&gt;&gt;</span> <span class="co"># same way.</span>
<span class="op">&gt;&gt;&gt;</span> <span class="co"># KpnI is a neoschizomers of the 2 others.</span>
<span class="op">&gt;&gt;&gt;</span> <span class="co"># Here are the results of the 4 operators</span>
<span class="op">&gt;&gt;&gt;</span> <span class="co"># for each pair of enzymes:</span>
<span class="op">&gt;&gt;&gt;</span>
<span class="op">&gt;&gt;&gt;</span> <span class="co">############# x == y  (x is y)</span>
<span class="op">&gt;&gt;&gt;</span> Acc65I <span class="op">==</span> Acc65I    <span class="co"># same enzyme =&gt; True</span>
<span class="va">True</span>
<span class="op">&gt;&gt;&gt;</span> Acc65I <span class="op">==</span> KpnI      <span class="co"># all other cases =&gt; False</span>
<span class="va">False</span>
<span class="op">&gt;&gt;&gt;</span> Acc65I <span class="op">==</span> Asp718I
<span class="va">False</span>
<span class="op">&gt;&gt;&gt;</span> Acc65I <span class="op">==</span> EcoRI
<span class="va">False</span>
<span class="op">&gt;&gt;&gt;</span> <span class="co">############ x != y  (x and y are not true isoschizomers)</span>
<span class="op">&gt;&gt;&gt;</span> Acc65I <span class="op">!=</span> Acc65I    <span class="co"># same enzyme =&gt; False</span>
<span class="va">False</span>
<span class="op">&gt;&gt;&gt;</span> Acc65I <span class="op">!=</span> Asp718I   <span class="co"># different enzymes, but cut same manner =&gt; False</span>
<span class="va">False</span>
<span class="op">&gt;&gt;&gt;</span> Acc65I <span class="op">!=</span> KpnI      <span class="co"># all other cases =&gt; True</span>
<span class="va">True</span>
<span class="op">&gt;&gt;&gt;</span> Acc65I <span class="op">!=</span> EcoRI
<span class="va">True</span>
<span class="op">&gt;&gt;&gt;</span> <span class="co">###########  x &gt;&gt; y (x is neoschizomer of y)</span>
<span class="op">&gt;&gt;&gt;</span> Acc65I <span class="op">&gt;&gt;</span> Acc65I    <span class="co"># same enzyme =&gt; False</span>
<span class="va">False</span>
<span class="op">&gt;&gt;&gt;</span> Acc65I <span class="op">&gt;&gt;</span> Asp718I   <span class="co"># same site, same cut =&gt; False</span>
<span class="va">False</span>
<span class="op">&gt;&gt;&gt;</span> Acc65I <span class="op">&gt;&gt;</span> EcoRI     <span class="co"># different site =&gt; False</span>
<span class="va">False</span>
<span class="op">&gt;&gt;&gt;</span> Acc65I <span class="op">&gt;&gt;</span> KpnI      <span class="co"># same site, different cut =&gt; True</span>
<span class="va">True</span>
<span class="op">&gt;&gt;&gt;</span> <span class="co">########### x % y   (fragments produced by x and fragments produced by y</span>
<span class="op">&gt;&gt;&gt;</span> <span class="co">#            can be directly ligated to each other)</span>
<span class="op">&gt;&gt;&gt;</span> Acc65I <span class="op">%</span> Asp718I
<span class="va">True</span>
<span class="op">&gt;&gt;&gt;</span> Acc65I <span class="op">%</span> Acc65I
<span class="va">True</span>
<span class="op">&gt;&gt;&gt;</span> Acc65I <span class="op">%</span> KpnI   <span class="co"># KpnI -&gt; &#39;3 overhang, Acc65I-&gt; 5&#39; overhang =&gt; False</span>
<span class="va">False</span>
<span class="op">&gt;&gt;&gt;</span>
<span class="op">&gt;&gt;&gt;</span> SunI.elucidate()
<span class="co">&#39;C^GTAC_G&#39;</span>
<span class="op">&gt;&gt;&gt;</span> SunI <span class="op">==</span> Acc65I
<span class="va">False</span>
<span class="op">&gt;&gt;&gt;</span> SunI <span class="op">!=</span> Acc65I
<span class="va">True</span>
<span class="op">&gt;&gt;&gt;</span> SunI <span class="op">&gt;&gt;</span> Acc65I
<span class="va">False</span>
<span class="op">&gt;&gt;&gt;</span> SunI <span class="op">%</span> Acc65I  <span class="co"># different site, same overhang (5&#39; GTAC) =&gt; True</span>
<span class="va">True</span>
<span class="op">&gt;&gt;&gt;</span> SmaI <span class="op">%</span> EcoRV   <span class="co"># 2 Blunt enzymes, all blunt enzymes are compatible =&gt; True</span>
<span class="va">True</span></code></pre></div>
<h4 id="other-facilities-provided-by-the-enzyme-classes"><a name="1.7"></a>1.7 Other facilities provided by the enzyme classes</h4>
<p>The <code class="inline">Restriction</code> class provides quite a number of others methods. We will not go through all of them, but only have a quick look to the most useful ones.</p>
<p>Not all enzymes possess the same properties when it comes to the way they digest a DNA. If you want to know more about the way a particular enzyme cut you can use the three following methods. They are fairly straightforward to understand and refer to the ends that the enzyme produces: blunt, 5' overhanging (also called 3' recessed) sticky end and 3' overhanging (or 5' recessed) sticky end.</p>
<div class="sourceCode"><pre class="sourceCode python"><code class="sourceCode python"><span class="op">&gt;&gt;&gt;</span> EcoRI.is_blunt()
<span class="va">False</span>
<span class="op">&gt;&gt;&gt;</span> EcoRI.is_5overhang()
<span class="va">True</span>
<span class="op">&gt;&gt;&gt;</span> EcoRI.is_3overhang()
<span class="va">False</span></code></pre></div>
<p>A more detailled view of the restriction site can be produced using the <code class="inline">elucidate()</code> method. The <code class="inline">^</code> refers to the position of the cut in the sense strand of the sequence, <code class="inline">_</code> to the cut on the antisense or complementary strand. <code class="inline">^_</code> means blunt.</p>
<div class="sourceCode"><pre class="sourceCode python"><code class="sourceCode python"><span class="op">&gt;&gt;&gt;</span> EcoRI.elucidate()
<span class="co">&#39;G^AATT_C&#39;</span>
<span class="op">&gt;&gt;&gt;</span> KpnI.elucidate()
<span class="co">&#39;G_GTAC^C&#39;</span>
<span class="op">&gt;&gt;&gt;</span> EcoRV.elucidate()
<span class="co">&#39;GAT^_ATC&#39;</span></code></pre></div>
<p>The method <code class="inline">frequency()</code> will give you the statistical frequency of the enzyme site.</p>
<div class="sourceCode"><pre class="sourceCode python"><code class="sourceCode python"><span class="op">&gt;&gt;&gt;</span> EcoRI.frequency()
<span class="dv">4096</span>
<span class="op">&gt;&gt;&gt;</span> XhoII.elucidate()
<span class="co">&#39;R^GATC_Y&#39;</span>
<span class="op">&gt;&gt;&gt;</span> XhoII.frequency()
<span class="dv">1024</span></code></pre></div>
<p>To get the length of a the recognition sequence of an enzyme use the built-in function <code class="inline">len()</code>:</p>
<div class="sourceCode"><pre class="sourceCode python"><code class="sourceCode python"><span class="op">&gt;&gt;&gt;</span> <span class="bu">len</span>(EcoRI)
<span class="dv">6</span>
<span class="op">&gt;&gt;&gt;</span> BstXI.elucidate()
<span class="co">&#39;CCAN_NNNN^NTGG&#39;</span>
<span class="op">&gt;&gt;&gt;</span> <span class="bu">len</span>(BstXI)
<span class="dv">12</span>
<span class="op">&gt;&gt;&gt;</span> FokI.site
<span class="co">&#39;GGATG&#39;</span>
<span class="op">&gt;&gt;&gt;</span> FokI.elucidate()    <span class="co"># FokI cut well outside its recognition site</span>
<span class="co">&#39;GGATGNNNNNNNNN^NNNN_N&#39;</span>
<span class="op">&gt;&gt;&gt;</span> <span class="bu">len</span>(FokI)       <span class="co"># its length is the length of the recognition site</span>
<span class="dv">5</span></code></pre></div>
<p>Also interesting are the methods dealing with isoschizomers. For memory, two enzymes are <em>isoschizomers</em> if they share a same recognition site. A further division is made between isoschizomers (same name, recognise the same sequence and cut the same way) and <em>neoschizomers</em> which cut at different positions. <em>Equischizomer</em> is an arbitrary choice to design &quot;isoschizomers_that_are_not_neoschizomers&quot; as this last one was a bit long. Another set of method <code class="inline">one_enzyme.is_*schizomers(one_other_enzyme)</code>, allow to test 2 enzymes against each other.</p>
<div class="sourceCode"><pre class="sourceCode python"><code class="sourceCode python"><span class="op">&gt;&gt;&gt;</span> Acc65I.isoschizomers()
[Asp718I, KpnI]
<span class="op">&gt;&gt;&gt;</span> Acc65I.neoschizomers()
[KpnI]
<span class="op">&gt;&gt;&gt;</span> Acc65I.equischizomers()
[Asp718I]
<span class="op">&gt;&gt;&gt;</span> KpnI.elucidate()
<span class="co">&#39;G_GTAC^C&#39;</span>
<span class="op">&gt;&gt;&gt;</span> Acc65I.elucidate()
<span class="co">&#39;G^GTAC_C&#39;</span>
<span class="op">&gt;&gt;&gt;</span> KpnI.is_neoschizomer(Acc65I)
<span class="va">True</span>
<span class="op">&gt;&gt;&gt;</span> KpnI.is_neoschizomer(KpnI)
<span class="va">False</span>
<span class="op">&gt;&gt;&gt;</span> KpnI.is_isoschizomer(Acc65I)
<span class="va">True</span>
<span class="op">&gt;&gt;&gt;</span> KpnI.is_isoschizomer(KpnI)
<span class="va">True</span>
<span class="op">&gt;&gt;&gt;</span> KpnI.is_equischizomer(Acc65I)
<span class="va">False</span>
<span class="op">&gt;&gt;&gt;</span> KpnI.is_equischizomer(KpnI)
<span class="va">True</span></code></pre></div>
<p><code class="inline">suppliers()</code> will get you the list of all the suppliers of the enzyme. <code class="inline">all_suppliers()</code> will give you all the suppliers in the database.</p>
<h3 id="the-restrictionbatch-class-a-class-to-deal-with-several-enzymes"><a name="2"></a>2. The RestrictionBatch class: a class to deal with several enzymes</h3>
<p>If you want to make a restriction map of a sequence, using individual enzymes can become tedious and will endures a big overhead due to the repetitive conversion of the sequence to a <code class="inline">FormattedSeq</code> (see <a href="#5">Chapter 5</a>). <code class="inline">Restriction</code> provides a class to make easier the use of large number of enzymes in one go: <code class="inline">RestrictionBatch</code>. <code class="inline">RestrictionBatch</code> will help you to manipulate lots of enzymes with a single command. Moreover all the enzymes in the restriction batch will share the same converted sequence, reducing the overhead.</p>
<h4 id="creating-a-restrictionbatch"><a name="2.1"></a><span class="mozTocH4"></span>2.1 Creating a RestrictionBatch</h4>
<p>You can initiate a restriction batch by passing it a list of enzymes or enzyme names as argument.</p>
<div class="sourceCode"><pre class="sourceCode python"><code class="sourceCode python"><span class="op">&gt;&gt;&gt;</span> rb <span class="op">=</span> RestrictionBatch([EcoRI])
<span class="op">&gt;&gt;&gt;</span> rb
RestrictionBatch([<span class="st">&#39;EcoRI&#39;</span>])
<span class="op">&gt;&gt;&gt;</span> rb2 <span class="op">=</span> RestrictionBatch([<span class="st">&#39;EcoRI&#39;</span>])
<span class="op">&gt;&gt;&gt;</span> rb2
RestrictionBatch([<span class="st">&#39;EcoRI&#39;</span>])
<span class="op">&gt;&gt;&gt;</span> rb <span class="op">==</span> rb2
<span class="va">True</span></code></pre></div>
<p>Adding a new enzyme to a restriction batch is easy:</p>
<div class="sourceCode"><pre class="sourceCode python"><code class="sourceCode python"><span class="op">&gt;&gt;&gt;</span> rb.add(KpnI)
<span class="op">&gt;&gt;&gt;</span> rb
RestrictionBatch([<span class="st">&#39;EcoRI&#39;</span>, <span class="st">&#39;KpnI&#39;</span>])
<span class="op">&gt;&gt;&gt;</span> rb <span class="op">+=</span> EcoRV
<span class="op">&gt;&gt;&gt;</span> rb
RestrictionBatch([<span class="st">&#39;EcoRI&#39;</span>, <span class="st">&#39;EcoRV&#39;</span>, <span class="st">&#39;KpnI&#39;</span>])])</code></pre></div>
<p>Another way to create a RestrictionBatch is by simply adding restriction enzymes together, this is particularly useful for small batches:</p>
<div class="sourceCode"><pre class="sourceCode python"><code class="sourceCode python"><span class="op">&gt;&gt;&gt;</span> rb3 <span class="op">=</span> EcoRI <span class="op">+</span> KpnI <span class="op">+</span> EcoRV
<span class="op">&gt;&gt;&gt;</span> rb3
RestrictionBatch([<span class="st">&#39;EcoRI&#39;</span>, <span class="st">&#39;EcoRV&#39;</span>, <span class="st">&#39;KpnI&#39;</span>])</code></pre></div>
<h4 id="restricting-a-restrictionbatch-to-a-particular-supplier"><a name="2.2"></a>2.2 Restricting a RestrictionBatch to a particular supplier</h4>
<p>The Restriction package is based upon the <strong>REBASE</strong> database. This database gives a list of suppliers for each enzyme. It would be a shame not to make use of this facility. You can produce a <code class="inline">RestrictionBatch</code> containing only enzymes from one or a few supplier(s). Here is how to do it:</p>
<div class="sourceCode"><pre class="sourceCode python"><code class="sourceCode python"><span class="op">&gt;&gt;&gt;</span> rb_supp <span class="op">=</span> RestrictionBatch(first<span class="op">=</span>[], suppliers<span class="op">=</span>[<span class="st">&#39;C&#39;</span>,<span class="st">&#39;B&#39;</span>,<span class="st">&#39;E&#39;</span>,<span class="st">&#39;I&#39;</span>,<span class="st">&#39;K&#39;</span>,<span class="st">&#39;J&#39;</span>,<span class="st">&#39;M&#39;</span>,
<span class="co">&#39;O&#39;</span>,<span class="st">&#39;N&#39;</span>,<span class="st">&#39;Q&#39;</span>,<span class="st">&#39;S&#39;</span>,<span class="st">&#39;R&#39;</span>,<span class="st">&#39;V&#39;</span>,<span class="st">&#39;Y&#39;</span>,<span class="st">&#39;X&#39;</span>])
<span class="op">&gt;&gt;&gt;</span> <span class="co"># This will create a RestrictionBatch with the all enzymes which possess a s</span>
upplier.
<span class="op">&gt;&gt;&gt;</span> <span class="bu">len</span>(rb_supp)  <span class="co"># May 2016</span>
<span class="dv">622</span></code></pre></div>
<p>The argument <code class="inline">suppliers</code> take a list of one or several single letter codes corresponding to the supplier(s). The codes are the same as defined in REBASE. As it would be a pain to have to remember each supplier code, <code class="inline">RestrictionBatch</code> provides a method which show the pair code &lt;=&gt; supplier:</p>
<div class="sourceCode"><pre class="sourceCode python"><code class="sourceCode python"><span class="op">&gt;&gt;&gt;</span> RestrictionBatch.show_codes()  <span class="co"># as of May 2016 REBASE release.</span>
C <span class="op">=</span> Minotech Biotechnology
B <span class="op">=</span> Life Technologies
E <span class="op">=</span> Agilent Technologies
I <span class="op">=</span> SibEnzyme Ltd.
K <span class="op">=</span> Takara Bio Inc.
J <span class="op">=</span> Nippon Gene Co., Ltd.
M <span class="op">=</span> Roche Applied Science
O <span class="op">=</span> Toyobo Biochemicals
N <span class="op">=</span> New England Biolabs
Q <span class="op">=</span> Molecular Biology Resources <span class="op">-</span> CHIMERx
S <span class="op">=</span> Sigma Chemical Corporation
R <span class="op">=</span> Promega Corporation
V <span class="op">=</span> Vivantis Technologies
Y <span class="op">=</span> SinaClon BioScience Co.
X <span class="op">=</span> EURx Ltd.
<span class="op">&gt;&gt;&gt;</span> <span class="co"># You can now choose a code and built your RestrictionBatch</span></code></pre></div>
<p>This way of producing a <code class="inline">RestrictionBatch</code> can drastically reduce the amount of useless output from a restriction analysis, limiting the search to enzymes that you can get hold of and limiting the risks of nervous breakdown. Nothing is more frustrating than to get the perfect enzyme for a sub-cloning only to find it's not commercially available.</p>
<h4 id="adding-enzymes-to-a-restrictionbatch"><a name="2.3"></a>2.3 Adding enzymes to a RestrictionBatch</h4>
<p>Adding an enzyme to a batch if the enzyme is already present will not raise an exception, but will have no effects. Sometimes you want to get an enzyme from a <code class="inline">RestrictionBatch</code> or add it to the batch if it is not present. You will use the <code class="inline">get</code> method setting the second argument <code class="inline">add</code> to <code class="inline">True</code>.</p>
<div class="sourceCode"><pre class="sourceCode python"><code class="sourceCode python"><span class="op">&gt;&gt;&gt;</span> rb3
RestrictionBatch([<span class="st">&#39;EcoRI&#39;</span>, <span class="st">&#39;EcoRV&#39;</span>, <span class="st">&#39;KpnI&#39;</span>])
<span class="op">&gt;&gt;&gt;</span> rb3.add(EcoRI)
<span class="op">&gt;&gt;&gt;</span> rb3
RestrictionBatch([<span class="st">&#39;EcoRI&#39;</span>, <span class="st">&#39;EcoRV&#39;</span>, <span class="st">&#39;KpnI&#39;</span>])
<span class="op">&gt;&gt;&gt;</span> rb3.get(EcoRI)
EcoRI
<span class="op">&gt;&gt;&gt;</span> rb3.get(SmaI)

Traceback (most recent call last):
  File <span class="st">&quot;&lt;pyshell#4&gt;&quot;</span>, line <span class="dv">1</span>, <span class="op">in</span> <span class="op">-</span>toplevel<span class="op">-</span>
    rb3.get(SmaI)
  File <span class="st">&quot;/usr/lib/Python2.3/site-packages/Bio/Restriction/Restriction.py&quot;</span>, line <span class="dv">1800</span>, <span class="op">in</span> get
    <span class="cf">raise</span> <span class="pp">ValueError</span>, <span class="st">&#39;enzyme </span><span class="sc">%s</span><span class="st"> is not in RestrictionBatch&#39;</span><span class="op">%</span>e.<span class="va">__name__</span>
<span class="pp">ValueError</span>: enzyme SmaI <span class="op">is</span> <span class="op">not</span> <span class="op">in</span> RestrictionBatch
<span class="op">&gt;&gt;&gt;</span> rb3.get(SmaI, add<span class="op">=</span><span class="va">True</span>)
SmaI
<span class="op">&gt;&gt;&gt;</span> rb3
RestrictionBatch([<span class="st">&#39;EcoRI&#39;</span>, <span class="st">&#39;EcoRV&#39;</span>, <span class="st">&#39;KpnI&#39;</span>, <span class="st">&#39;SmaI&#39;</span>])</code></pre></div>
<h4 id="removing-enzymes-from-a-restrictionbatch"><a name="2.4"></a>2.4 Removing enzymes from a RestrictionBatch</h4>
<p>Removing enzymes from a batch is done using the <code class="inline">remove()</code> method. If the enzyme is not present in the batch this will raise a <code class="inline">KeyError</code>. If the value you want to remove is not an enzyme this will raise a <code class="inline">ValueError</code>.</p>
<div class="sourceCode"><pre class="sourceCode python"><code class="sourceCode python"><span class="op">&gt;&gt;&gt;</span> rb3.remove(EcoRI)
<span class="op">&gt;&gt;&gt;</span> rb3
RestrictionBatch([<span class="st">&#39;EcoRV&#39;</span>, <span class="st">&#39;KpnI&#39;</span>, <span class="st">&#39;SmaI&#39;</span>])
<span class="op">&gt;&gt;&gt;</span> rb3.remove(EcoRI)

Traceback (most recent call last):
  File <span class="st">&quot;&lt;pyshell#14&gt;&quot;</span>, line <span class="dv">1</span>, <span class="op">in</span> <span class="op">-</span>toplevel<span class="op">-</span>
    rb3.remove(<span class="st">&#39;EcoRI&#39;</span>)
  File <span class="st">&quot;/usr/lib/Python2.3/site-packages/Bio/Restriction/Restriction.py&quot;</span>, line <span class="dv">1839</span>, <span class="op">in</span> remove
    <span class="cf">return</span> Set.remove(<span class="va">self</span>, <span class="va">self</span>.<span class="bu">format</span>(other))
  File <span class="st">&quot;/usr/lib/Python2.3/sets.py&quot;</span>, line <span class="dv">534</span>, <span class="op">in</span> remove
    <span class="kw">del</span> <span class="va">self</span>._data[element]
<span class="pp">KeyError</span>: EcoRI
<span class="op">&gt;&gt;&gt;</span> rb3 <span class="op">+=</span> EcoRI
<span class="op">&gt;&gt;&gt;</span> rb3
RestrictionBatch([<span class="st">&#39;EcoRI&#39;</span>, <span class="st">&#39;EcoRV&#39;</span>, <span class="st">&#39;KpnI&#39;</span>, <span class="st">&#39;SmaI&#39;</span>])
<span class="op">&gt;&gt;&gt;</span> rb3.remove(<span class="st">&#39;EcoRI&#39;</span>)
<span class="op">&gt;&gt;&gt;</span> rb3
RestrictionBatch([<span class="st">&#39;EcoRV&#39;</span>, <span class="st">&#39;KpnI&#39;</span>, <span class="st">&#39;SmaI&#39;</span>])
<span class="op">&gt;&gt;&gt;</span> rb3.remove(<span class="st">&#39;spam&#39;</span>)

Traceback (most recent call last):
  File <span class="st">&quot;&lt;pyshell#18&gt;&quot;</span>, line <span class="dv">1</span>, <span class="op">in</span> <span class="op">-</span>toplevel<span class="op">-</span>
    rb3.remove(<span class="st">&#39;spam&#39;</span>)
  File <span class="st">&quot;/usr/lib/Python2.3/site-packages/Bio/Restriction/Restriction.py&quot;</span>, line <span class="dv">1839</span>, <span class="op">in</span> remove
    <span class="cf">return</span> Set.remove(<span class="va">self</span>, <span class="va">self</span>.<span class="bu">format</span>(other))
  File <span class="st">&quot;/usr/lib/Python2.3/site-packages/Bio/Restriction/Restriction.py&quot;</span>, line <span class="dv">1871</span>, <span class="op">in</span> <span class="bu">format</span>
    <span class="cf">raise</span> <span class="pp">ValueError</span>, <span class="st">&#39;</span><span class="sc">%s</span><span class="st"> is not a RestrictionType&#39;</span><span class="op">%</span>y.__class__
<span class="pp">ValueError</span>: <span class="op">&lt;</span><span class="bu">type</span> <span class="st">&#39;str&#39;</span><span class="op">&gt;</span> <span class="op">is</span> <span class="op">not</span> a RestrictionType</code></pre></div>
<h4 id="manipulating-restrictionbatch"><a name="2.5"></a>2.5 Manipulating RestrictionBatch</h4>
<p>You can not, however, add batches together, as they are Python <code class="inline">sets</code>. You must use the pipe operator <code class="inline">|</code> instead. You can find the intersection between 2 batches using <code class="inline">&amp;</code> (see the Python documentation about <code class="inline">sets</code> for more information.</p>
<div class="sourceCode"><pre class="sourceCode python"><code class="sourceCode python"><span class="op">&gt;&gt;&gt;</span> rb3 <span class="op">=</span> EcoRI <span class="op">+</span> KpnI <span class="op">+</span> EcoRV
<span class="op">&gt;&gt;&gt;</span> rb3
RestrictionBatch([<span class="st">&#39;EcoRI&#39;</span>, <span class="st">&#39;EcoRV&#39;</span>, <span class="st">&#39;KpnI&#39;</span>])
<span class="op">&gt;&gt;&gt;</span> rb4 <span class="op">=</span> SmaI <span class="op">+</span> PstI
<span class="op">&gt;&gt;&gt;</span> rb4
RestrictionBatch([<span class="st">&#39;PstI&#39;</span>, <span class="st">&#39;SmaI&#39;</span>])
<span class="op">&gt;&gt;&gt;</span> rb3 <span class="op">+</span> rb4

Traceback (most recent call last):
  File <span class="st">&quot;&lt;pyshell#23&gt;&quot;</span>, line <span class="dv">1</span>, <span class="op">in</span> <span class="op">-</span>toplevel<span class="op">-</span>
    rb3 <span class="op">+</span> rb4
  File <span class="st">&quot;/usr/lib/Python2.3/site-packages/Bio/Restriction/Restriction.py&quot;</span>, line <span class="dv">1829</span>, <span class="op">in</span> <span class="fu">__add__</span>
    new.add(other)
  File <span class="st">&quot;/usr/lib/Python2.3/site-packages/Bio/Restriction/Restriction.py&quot;</span>, line <span class="dv">1848</span>, <span class="op">in</span> add
    <span class="cf">return</span> Set.add(<span class="va">self</span>, <span class="va">self</span>.<span class="bu">format</span>(other))
  File <span class="st">&quot;/usr/lib/Python2.3/site-packages/Bio/Restriction/Restriction.py&quot;</span>, line <span class="dv">1871</span>, <span class="op">in</span> <span class="bu">format</span>
    <span class="cf">raise</span> <span class="pp">ValueError</span>, <span class="st">&#39;</span><span class="sc">%s</span><span class="st"> is not a RestrictionType&#39;</span><span class="op">%</span>y.__class__
<span class="pp">ValueError</span>: <span class="op">&lt;</span><span class="kw">class</span> <span class="st">&#39;Bio.Restriction.Restriction.RestrictionBatch&#39;</span><span class="op">&gt;</span> <span class="op">is</span> <span class="op">not</span> a RestrictionType
<span class="op">&gt;&gt;&gt;</span> rb3 <span class="op">|</span> rb4
RestrictionBatch([<span class="st">&#39;EcoRI&#39;</span>, <span class="st">&#39;EcoRV&#39;</span>, <span class="st">&#39;KpnI&#39;</span>, <span class="st">&#39;PstI&#39;</span>, <span class="st">&#39;SmaI&#39;</span>])
<span class="op">&gt;&gt;&gt;</span> rb3 <span class="op">&amp;</span> rb4
RestrictionBatch([])
<span class="op">&gt;&gt;&gt;</span> rb4 <span class="op">+=</span> EcoRI
<span class="op">&gt;&gt;&gt;</span> rb4
RestrictionBatch([<span class="st">&#39;EcoRI&#39;</span>, <span class="st">&#39;PstI&#39;</span>, <span class="st">&#39;SmaI&#39;</span>])
<span class="op">&gt;&gt;&gt;</span> rb3 <span class="op">&amp;</span> rb4
RestrictionBatch([<span class="st">&#39;EcoRI&#39;</span>])</code></pre></div>
<h4 id="analysing-sequences-with-a-restrictionbatch"><a name="2.6"></a>2.6 Analysing sequences with a RestrictionBatch</h4>
<p>To analyse a sequence for potential site, you can use the <code class="inline">search</code> method of the batch, the same way you did for restriction enzymes. The results is no longer a list however, but a dictionary. The keys of the dictionary are the names of the enzymes and the value a list of position site. <code class="inline">RestrictionBatch</code> does not implement a <code class="inline">catalyse</code> method, as it would not have a real meaning when used with large batch.</p>
<div class="sourceCode"><pre class="sourceCode python"><code class="sourceCode python"><span class="op">&gt;&gt;&gt;</span> new_seq <span class="op">=</span> Seq(<span class="st">&#39;TTCAAAAAAAAAAAAAAAAAAAAAAAAAAAAGAA&#39;</span>, IUPACAmbiguousDNA())
<span class="op">&gt;&gt;&gt;</span> rb.search(new_seq)
{<span class="st">&#39;KpnI&#39;</span>: [], <span class="st">&#39;EcoRV&#39;</span>: [], <span class="st">&#39;EcoRI&#39;</span>: []}
<span class="op">&gt;&gt;&gt;</span> rb.search(new_seq, linear<span class="op">=</span><span class="va">False</span>)
{<span class="st">&#39;KpnI&#39;</span>: [], <span class="st">&#39;EcoRV&#39;</span>: [], <span class="st">&#39;EcoRI&#39;</span>: [<span class="dv">33</span>]}</code></pre></div>
<h4 id="other-restrictionbatch-methods"><a name="2.7"></a>2.7 Other RestrictionBatch methods</h4>
<p>Amongst the other methods provided by <code class="inline">RestrictionBatch</code>, <code class="inline">elements()</code> which return a list of all the element names alphabetically sorted, is certainly the most useful.</p>
<div class="sourceCode"><pre class="sourceCode python"><code class="sourceCode python"><span class="op">&gt;&gt;&gt;</span> rb <span class="op">=</span> EcoRI <span class="op">+</span> KpnI <span class="op">+</span> EcoRV
<span class="op">&gt;&gt;&gt;</span> rb.elements()
[<span class="st">&#39;EcoRI&#39;</span>, <span class="st">&#39;EcoRV&#39;</span>, <span class="st">&#39;KpnI&#39;</span>]</code></pre></div>
<p>If you don't care about the alphabetical order use the method <code class="inline">as_string()</code>, to get the same thing a bit faster. The list is not sorted. The order is random as Python sets are dictionary.</p>
<div class="sourceCode"><pre class="sourceCode python"><code class="sourceCode python"><span class="op">&gt;&gt;&gt;</span> rb <span class="op">=</span> EcoRI <span class="op">+</span> KpnI <span class="op">+</span> EcoRV
<span class="op">&gt;&gt;&gt;</span> rb.as_string()
[<span class="st">&#39;EcoRI&#39;</span>, <span class="st">&#39;KpnI&#39;</span>, <span class="st">&#39;EcoRV&#39;</span>]</code></pre></div>
<p>Other <code class="inline">RestrictionBatch</code> methods are generally used for particular purposes and will not be discussed here. See the <a href="https://github.com/biopython/biopython/tree/master/Bio/Restriction">source</a> if you are interested.</p>
<h3 id="allenzymes-and-commonly-two-preconfigured-restrictionbatches"><a name="3"></a>3. AllEnzymes and CommOnly: two preconfigured RestrictionBatches</h3>
<p>While it is sometime practical to produce a <code class="inline">RestrictionBatch</code> of your own you will certainly more frequently use the two batches provided with the <code class="inline">Restriction</code> packages: <code class="inline">AllEnzymes</code> and <code class="inline">CommOnly</code>. These two batches contain respectively all the enzymes in the database and only the enzymes which have a commercial supplier. They are rather big, but that's what make them useful. With these batch you can produce a full description of a sequence with a single command. You can use these two batch as any other batch.</p>
<div class="sourceCode"><pre class="sourceCode python"><code class="sourceCode python"><span class="op">&gt;&gt;&gt;</span> <span class="bu">len</span>(AllEnzymes)
<span class="dv">778</span>
<span class="op">&gt;&gt;&gt;</span> <span class="bu">len</span>(CommOnly)
<span class="dv">622</span>
<span class="op">&gt;&gt;&gt;</span> AllEnzymes.search(new_seq) ...</code></pre></div>
<p>There is not a lot to say about them apart the fact that they are present. They are really normal batches, and you can use them as any other batch.</p>
<h3 id="the-analysis-class-even-simpler-restriction-analysis"><a name="4"></a>4. The Analysis class: even simpler restriction analysis</h3>
<p><code class="inline">RestrictionBatch</code> can give you a dictionary with the sites for all the enzymes in a batch. However, it is sometime nice to get something a bit easier to read than a Python dictionary. Complex restriction analysis are not easy with <code class="inline">RestrictionBatch</code>. Some refinements in the way to search a sequence for restriction sites will help. <code class="inline">Analysis</code> provides a serie of command to customise the results obtained from a pair restriction batch/sequence and some facilities to make the output sligthly more human readable.</p>
<h4 id="setting-up-an-analysis"><a name="4.1"></a>4.1 Setting up an Analysis</h4>
<p>To build a restriction analysis you will need a <code class="inline">RestrictionBatch</code> and a sequence and to tell it if the sequence is linear or circular. The first argument <code class="inline">Analysis</code> takes is the restriction batch, the second is the sequence. If the third argument is not provided, <code class="inline">Analysis</code> will assume the sequence is linear.</p>
<div class="sourceCode"><pre class="sourceCode python"><code class="sourceCode python"><span class="op">&gt;&gt;&gt;</span> new_seq <span class="op">=</span> Seq(<span class="st">&#39;TTCAAAAAAAAAAAAAAAAAAAAAAAAAAAAGAA&#39;</span>, IUPACAmbiguousDNA())
<span class="op">&gt;&gt;&gt;</span> rb <span class="op">=</span> RestrictionBatch([EcoRI, KpnI, EcoRV])
<span class="op">&gt;&gt;&gt;</span> Ana <span class="op">=</span> Analysis(rb, new_seq, linear<span class="op">=</span><span class="va">False</span>)
<span class="op">&gt;&gt;&gt;</span> Ana
Analysis(RestrictionBatch([<span class="st">&#39;EcoRI&#39;</span>, <span class="st">&#39;EcoRV&#39;</span>, <span class="st">&#39;KpnI&#39;</span>]),Seq(<span class="st">&#39;TTCAAAAAAAAAAAAAAAAAA</span>
<span class="st">AAAAAAAAAAGAA&#39;</span>, IUPACAmbiguousDNA()),<span class="va">False</span>)</code></pre></div>
<h4 id="full-restriction-analysis"><a name="4.2"></a>4.2 Full restriction analysis</h4>
<p>Once you have created your new <code class="inline">Analysis</code>, you can use it to get a restriction analysis of your sequence. The way to make a full restriction analysis of the sequence is:</p>
<div class="sourceCode"><pre class="sourceCode python"><code class="sourceCode python"><span class="op">&gt;&gt;&gt;</span> Ana.full()
{<span class="st">&#39;KpnI&#39;</span>: [], <span class="st">&#39;EcoRV&#39;</span>: [], <span class="st">&#39;EcoRI&#39;</span>: [<span class="dv">33</span>]}</code></pre></div>
<p>This is much the same as the output of a <code class="inline">RestrictionBatch.search</code> method. You will get a more easy to read output with <code class="inline">print_that</code> used without argument:</p>
<div class="sourceCode"><pre class="sourceCode python"><code class="sourceCode python"><span class="op">&gt;&gt;&gt;</span> <span class="co"># let&#39;s create a something a bit more complex to analyse.</span>
<span class="op">&gt;&gt;&gt;</span>
<span class="op">&gt;&gt;&gt;</span> rb <span class="op">=</span> RestrictionBatch([], [<span class="st">&#39;C&#39;</span>])  <span class="co"># we will explain the meaning of the</span>
<span class="op">&gt;&gt;&gt;</span>                               <span class="co"># double list argument later.</span>
<span class="op">&gt;&gt;&gt;</span>
<span class="op">&gt;&gt;&gt;</span> multi_site <span class="op">=</span> Seq.Seq(<span class="st">&#39;AAA&#39;</span> <span class="op">+</span> EcoRI.site <span class="op">+</span> <span class="st">&#39;G&#39;</span> <span class="op">+</span> KpnI.site <span class="op">+</span> EcoRV.site <span class="op">+</span>
                     <span class="co">&#39;CT&#39;</span> <span class="op">+</span> SmaI.site <span class="op">+</span> <span class="st">&#39;GT&#39;</span> <span class="op">+</span> FokI.site <span class="op">+</span> <span class="st">&#39;GAAAGGGC&#39;</span> <span class="op">+</span>
                      EcoRI.site <span class="op">+</span> <span class="st">&#39;ACGT&#39;</span>, IUPACAmbiguousDNA())
<span class="op">&gt;&gt;&gt;</span> Analong <span class="op">=</span> Analysis(rb, multi_site)
<span class="op">&gt;&gt;&gt;</span> Analong.full()
{BglI: [], BstEII: [], AsuII: [], HinfI: [], SfiI: [], PspPI: [], BsiSI: [<span class="dv">27</span>], S
alI: [], SlaI: [], NcoI: [], NotI: [], PstI: [], StyI: [], BseBI: [], PvuII: [],
HindIII: [], BglII: [], ApaLI: [], TaqI: [], BssAI: [], AluI: [], SstI: [], Bse
CI: [], Sau3AI: [], HpaI: [], SnaBI: [], NheI: [], BclI: [], KpnI: [<span class="dv">16</span>], NruI: [
], MspCI: [], BshFI: [], CspAI: [], RsaI: [<span class="dv">14</span>], EcoRV: [<span class="dv">20</span>], SphI: [], BamHI: []
, MboI: [], SgrBI: [], SspI: [], ScaI: [], XbaI: [], SseBI: [], NaeI: [], EcoRI:
[<span class="dv">5</span>, <span class="dv">47</span>], SmaI: [<span class="dv">28</span>], BseAI: []}
<span class="op">&gt;&gt;&gt;</span>
<span class="op">&gt;&gt;&gt;</span> <span class="co"># The results are here but it is difficult to read. let&#39;s try print_that</span>
<span class="op">&gt;&gt;&gt;</span>
<span class="op">&gt;&gt;&gt;</span> Analong.print_that()

BsiSI      :  <span class="dv">27</span>.
RsaI       :  <span class="dv">14</span>.
EcoRI      :  <span class="dv">5</span>, <span class="dv">47</span>.
EcoRV      :  <span class="dv">20</span>.
KpnI       :  <span class="dv">16</span>.
SmaI       :  <span class="dv">28</span>.

   Enzymes which do <span class="op">not</span> cut the sequence.

AluI      BshFI     MboI      Sau3AI    TaqI      BseBI     HinfI     PspPI
ApaLI     AsuII     BamHI     BclI      BglII     BseAI     BseCI     BssAI
CspAI     HindIII   HpaI      MspCI     NaeI      NcoI      NheI      NruI
PstI      PvuII     SalI      ScaI      SgrBI     SlaI      SnaBI     SphI
SseBI     SspI      SstI      StyI      XbaI      BstEII    NotI      BglI
SfiI</code></pre></div>
<p>Much clearer, is'nt ? The output is optimised for a shell 80 columns wide. If the output seems odd, check that the width of your shell is at least 80 columns.</p>
<h4 id="changing-the-title"><a name="4.3"></a>4.3 Changing the title</h4>
<p>You can provide a title to the analysis and modify the sentence 'Enzymes which do not cut the sequence', by setting the two optional arguments of <code class="inline">print_that</code>, <code class="inline">title</code> and <code class="inline">s1</code>. No formatting will be done on these strings so if you have to include the newline (<code class="inline">\n</code>) as you see fit:</p>
<div class="sourceCode"><pre class="sourceCode python"><code class="sourceCode python"><span class="op">&gt;&gt;&gt;</span> Analong.print_that(<span class="va">None</span>, title<span class="op">=</span><span class="st">&#39;sequence = multi_site</span><span class="ch">\n\n</span><span class="st">&#39;</span>)

sequence <span class="op">=</span> multi_site

BsiSI      :  <span class="dv">27</span>.
RsaI       :  <span class="dv">14</span>.
EcoRI      :  <span class="dv">5</span>, <span class="dv">47</span>.
EcoRV      :  <span class="dv">20</span>.
KpnI       :  <span class="dv">16</span>.
SmaI       :  <span class="dv">28</span>.

   Enzymes which do <span class="op">not</span> cut the sequence.

AluI      BshFI     MboI      Sau3AI    TaqI      BseBI     HinfI     PspPI
ApaLI     AsuII     BamHI     BclI      BglII     BseAI     BseCI     BssAI
CspAI     HindIII   HpaI      MspCI     NaeI      NcoI      NheI      NruI
PstI      PvuII     SalI      ScaI      SgrBI     SlaI      SnaBI     SphI
SseBI     SspI      SstI      StyI      XbaI      BstEII    NotI      BglI
SfiI

<span class="op">&gt;&gt;&gt;</span> Analong.print_that(<span class="va">None</span>, title<span class="op">=</span><span class="st">&#39;sequence = multi_site</span><span class="ch">\n\n</span><span class="st">&#39;</span>,
                   s1<span class="op">=</span><span class="st">&#39;</span><span class="ch">\n</span><span class="st"> no site:</span><span class="ch">\n\n</span><span class="st">&#39;</span>)

sequence <span class="op">=</span> multi_site

BsiSI      :  <span class="dv">27</span>.
RsaI       :  <span class="dv">14</span>.
EcoRI      :  <span class="dv">5</span>, <span class="dv">47</span>.
EcoRV      :  <span class="dv">20</span>.
KpnI       :  <span class="dv">16</span>.
SmaI       :  <span class="dv">28</span>.

 no site:

AluI      BshFI     MboI      Sau3AI    TaqI      BseBI     HinfI     PspPI
ApaLI     AsuII     BamHI     BclI      BglII     BseAI     BseCI     BssAI
CspAI     HindIII   HpaI      MspCI     NaeI      NcoI      NheI      NruI
PstI      PvuII     SalI      ScaI      SgrBI     SlaI      SnaBI     SphI
SseBI     SspI      SstI      StyI      XbaI      BstEII    NotI      BglI
SfiI</code></pre></div>
<h4 id="customising-the-output"><a name="4.4"></a>4.4 Customising the output</h4>
<p>You can modify some aspects of the output interactively. There is three main type of output, two listing types (alphabetically sorted and sorted by number of site) and map-like type. To change the output, use the method <code class="inline">print_as()</code> of <code class="inline">Analysis</code>. The change of output is permanent for the instance of <code class="inline">Analysis</code> (that is until the next time you use <code class="inline">print_as()</code>). The argument of <code class="inline">print_as()</code> are strings: <code class="inline">'map'</code>, <code class="inline">'number'</code> or <code class="inline">'alpha'</code>. As you have seen previously the default behaviour is an alphabetical list (<code class="inline">'alpha'</code>).</p>
<div class="sourceCode"><pre class="sourceCode python"><code class="sourceCode python"><span class="op">&gt;&gt;&gt;</span> Analong.print_as(<span class="st">&#39;map&#39;</span>)
<span class="op">&gt;&gt;&gt;</span> Analong.print_that()

    <span class="dv">5</span> EcoRI
    <span class="op">|</span>
    <span class="op">|</span>        <span class="dv">14</span> RsaI
    <span class="op">|</span>        <span class="op">|</span>
    <span class="op">|</span>        <span class="op">|</span> <span class="dv">16</span> KpnI
    <span class="op">|</span>        <span class="op">|</span> <span class="op">|</span>
    <span class="op">|</span>        <span class="op">|</span> <span class="op">|</span>   <span class="dv">20</span> EcoRV
    <span class="op">|</span>        <span class="op">|</span> <span class="op">|</span>   <span class="op">|</span>
    <span class="op">|</span>        <span class="op">|</span> <span class="op">|</span>   <span class="op">|</span>      <span class="dv">27</span> BsiSI
    <span class="op">|</span>        <span class="op">|</span> <span class="op">|</span>   <span class="op">|</span>      <span class="op">|</span>
    <span class="op">|</span>        <span class="op">|</span> <span class="op">|</span>   <span class="op">|</span>      <span class="op">|</span><span class="dv">28</span> SmaI
    <span class="op">|</span>        <span class="op">|</span> <span class="op">|</span>   <span class="op">|</span>      <span class="op">||</span>
    <span class="op">|</span>        <span class="op">|</span> <span class="op">|</span>   <span class="op">|</span>      <span class="op">||</span>                  <span class="dv">47</span> EcoRI
    <span class="op">|</span>        <span class="op">|</span> <span class="op">|</span>   <span class="op">|</span>      <span class="op">||</span>                  <span class="op">|</span>
AAAGAATTCGGGTACCGATATCCTCCCGGGGTGGATGGAAAGGGCGAATTCACGT
<span class="op">|||||||||||||||||||||||||||||||||||||||||||||||||||||||</span>
TTTCTTAAGCCCATGGCTATAGGAGGGCCCCACCTACCTTTCCCGCTTAAGTGCA
<span class="dv">1</span>                                                    <span class="dv">55</span>


   Enzymes which do <span class="op">not</span> cut the sequence.

AluI      BshFI     MboI      Sau3AI    TaqI      BseBI     HinfI     PspPI
ApaLI     AsuII     BamHI     BclI      BglII     BseAI     BseCI     BssAI
CspAI     HindIII   HpaI      MspCI     NaeI      NcoI      NheI      NruI
PstI      PvuII     SalI      ScaI      SgrBI     SlaI      SnaBI     SphI
SseBI     SspI      SstI      StyI      XbaI      BstEII    NotI      BglI
SfiI

<span class="op">&gt;&gt;&gt;</span> Analong.print_as(<span class="st">&#39;number&#39;</span>)
<span class="op">&gt;&gt;&gt;</span> Analong.print_that()



enzymes which cut <span class="dv">1</span> times :

BsiSI      :  <span class="dv">27</span>.
RsaI       :  <span class="dv">14</span>.
EcoRV      :  <span class="dv">20</span>.
KpnI       :  <span class="dv">16</span>.
SmaI       :  <span class="dv">28</span>.


enzymes which cut <span class="dv">2</span> times :

EcoRI      :  <span class="dv">5</span>, <span class="dv">47</span>.

   Enzymes which do <span class="op">not</span> cut the sequence.

AluI      BshFI     MboI      Sau3AI    TaqI      BseBI     HinfI     PspPI
ApaLI     AsuII     BamHI     BclI      BglII     BseAI     BseCI     BssAI
CspAI     HindIII   HpaI      MspCI     NaeI      NcoI      NheI      NruI
PstI      PvuII     SalI      ScaI      SgrBI     SlaI      SnaBI     SphI
SseBI     SspI      SstI      StyI      XbaI      BstEII    NotI      BglI
SfiI

<span class="op">&gt;&gt;&gt;</span></code></pre></div>
<p>To come back to the previous behaviour:</p>
<div class="sourceCode"><pre class="sourceCode python"><code class="sourceCode python"><span class="op">&gt;&gt;&gt;</span> Analong.print_as(<span class="st">&#39;alpha&#39;</span>)
<span class="op">&gt;&gt;&gt;</span> Analong.print_that()

BsiSI      :  <span class="dv">27</span>.
RsaI       :  <span class="dv">14</span>.
EcoRI      :  <span class="dv">5</span>, <span class="dv">47</span>.
EcoRV      :  <span class="dv">20</span>.
etc ...</code></pre></div>
<h4 id="fancier-restriction-analysis"><a name="4.5"></a>4.5 Fancier restriction analysis</h4>
<p>I will not go into the detail for each single method, here are all the functions that are available. Most are perfectly self explanatory and the others are fairly well documented (use <code class="inline">help('Analysis.command_name')</code>). The methods are:</p>
<div class="sourceCode"><pre class="sourceCode python"><code class="sourceCode python">full(<span class="va">self</span>,linear<span class="op">=</span><span class="va">True</span>)
blunt(<span class="va">self</span>,dct <span class="op">=</span> <span class="va">None</span>)
overhang5(<span class="va">self</span>, dct<span class="op">=</span><span class="va">None</span>)
overhang3(<span class="va">self</span>, dct<span class="op">=</span><span class="va">None</span>)
defined(<span class="va">self</span>,dct<span class="op">=</span><span class="va">None</span>)
with_sites(<span class="va">self</span>, dct<span class="op">=</span><span class="va">None</span>)
without_site(<span class="va">self</span>, dct<span class="op">=</span><span class="va">None</span>)
with_N_sites(<span class="va">self</span>, N, dct<span class="op">=</span><span class="va">None</span>)
with_number_list(<span class="va">self</span>, <span class="bu">list</span>, dct<span class="op">=</span><span class="va">None</span>)
with_name(<span class="va">self</span>, names, dct<span class="op">=</span><span class="va">None</span>)
with_site_size(<span class="va">self</span>, site_size, dct<span class="op">=</span><span class="va">None</span>)
only_between(<span class="va">self</span>, start, end, dct<span class="op">=</span><span class="va">None</span>)
between(<span class="va">self</span>,start, end, dct<span class="op">=</span><span class="va">None</span>)
show_only_between(<span class="va">self</span>, start, end, dct<span class="op">=</span><span class="va">None</span>)
only_outside(<span class="va">self</span>, start, end, dct <span class="op">=</span><span class="va">None</span>)
outside(<span class="va">self</span>, start, end, dct<span class="op">=</span><span class="va">None</span>)
do_not_cut(<span class="va">self</span>, start, end, dct <span class="op">=</span><span class="va">None</span>)</code></pre></div>
<p>Using these methods is simple:</p>
<div class="sourceCode"><pre class="sourceCode python"><code class="sourceCode python"><span class="op">&gt;&gt;&gt;</span> new_seq <span class="op">=</span> Seq(<span class="st">&#39;TTCAAAAAAAAAAAAAAAAAAAAAAAAAAAAGAA&#39;</span>, IUPACAmbiguousDNA())
<span class="op">&gt;&gt;&gt;</span> rb <span class="op">=</span> RestrictionBatch([EcoRI, KpnI, EcoRV])
<span class="op">&gt;&gt;&gt;</span> Ana <span class="op">=</span> Analysis(rb, new_seq, linear<span class="op">=</span><span class="va">False</span>)
<span class="op">&gt;&gt;&gt;</span> Ana
Analysis(RestrictionBatch([<span class="st">&#39;EcoRI&#39;</span>, <span class="st">&#39;EcoRV&#39;</span>, <span class="st">&#39;KpnI&#39;</span>]),Seq(<span class="st">&#39;TTCAAAAAAAAAAAAAAAAAA</span>
<span class="st">AAAAAAAAAAGAA&#39;</span>, IUPACAmbiguousDNA()),<span class="va">False</span>)
<span class="op">&gt;&gt;&gt;</span> Ana.blunt()  <span class="co"># output only the result for enzymes which cut blunt</span>
{<span class="st">&#39;EcoRV&#39;</span>: []}
<span class="op">&gt;&gt;&gt;</span> Ana.full()  <span class="co"># all the enzymes in the RestrictionBatch</span>
{<span class="st">&#39;KpnI&#39;</span>: [], <span class="st">&#39;EcoRV&#39;</span>: [], <span class="st">&#39;EcoRI&#39;</span>: [<span class="dv">33</span>]}
<span class="op">&gt;&gt;&gt;</span> Ana.with_sites()  <span class="co"># output only the result for enzymes which have a site</span>
{<span class="st">&#39;EcoRI&#39;</span>: [<span class="dv">33</span>]}
<span class="op">&gt;&gt;&gt;</span> Ana.without_site()  <span class="co"># output only the enzymes which have no site</span>
{<span class="st">&#39;KpnI&#39;</span>: [], <span class="st">&#39;EcoRV&#39;</span>: []}
<span class="op">&gt;&gt;&gt;</span> Ana.only_between(<span class="dv">1</span>, <span class="dv">20</span>)  <span class="co"># the enzymes which cut between position 1 and 20</span>
{}
<span class="op">&gt;&gt;&gt;</span> Ana.only_between(<span class="dv">20</span>, <span class="dv">34</span>)  <span class="co"># etc...</span>
{<span class="st">&#39;EcoRI&#39;</span>: [<span class="dv">33</span>]}
<span class="op">&gt;&gt;&gt;</span> Ana.only_outside(<span class="dv">20</span>, <span class="dv">34</span>)
{}
<span class="op">&gt;&gt;&gt;</span> Ana.with_name([EcoRI])
{<span class="st">&#39;EcoRI&#39;</span>: [<span class="dv">33</span>]}
<span class="op">&gt;&gt;&gt;</span></code></pre></div>
<p>To get a nice output, you still use <code class="inline">print_that</code> but this time with the command you want executed as argument.</p>
<div class="sourceCode"><pre class="sourceCode python"><code class="sourceCode python"><span class="op">&gt;&gt;&gt;</span> Ana.print_that(Ana.blunt())

   Enzymes which do <span class="op">not</span> cut the sequence.

EcoRV

<span class="op">&gt;&gt;&gt;</span> pt <span class="op">=</span> Ana.print_that
<span class="op">&gt;&gt;&gt;</span> pt(Ana.with_sites())

EcoRI      :  <span class="dv">33</span>.

<span class="op">&gt;&gt;&gt;</span> pt(Ana.without_site())

   Enzymes which do <span class="op">not</span> cut the sequence.

EcoRV     KpnI

<span class="op">&gt;&gt;&gt;</span> <span class="co"># etc ...</span></code></pre></div>
<h4 id="more-complex-analysis"><a name="4.6"></a>4.6 More complex analysis</h4>
<p>All of these methods (except <code class="inline">full()</code> which, well ... do a full restriction analysis) can be supplied with an additional dictionary. If no dictionary is supplied a full restriction analysis is used as starting point. Otherwise the dictionary provided by the argument <code class="inline">dct</code> is used. The dictionary must be formatted as the result of <code class="inline">RestrictionBatch.search</code>. Therefore of the form <code class="inline">{'enzyme_name': [position1, position2],...}</code>, where <em>position1</em> and <em>position2</em> are integers. All methods list previously output such dictionaries and can be used as starting point.</p>
<p>Using this method you can build really complex query by chaining several method one after the other. For example if you want all the enzymes which are 5' overhang and cut the sequence only once, you have two ways to go:</p>
<p>The hard way consist to build a restriction batch containing only 5' overhang enzymes and use this batch to create a new <code class="inline">Analysis</code> instance and then use the method <code class="inline">with_N_sites()</code> as follow:</p>
<div class="sourceCode"><pre class="sourceCode python"><code class="sourceCode python"><span class="op">&gt;&gt;&gt;</span> rbov5 <span class="op">=</span> RestrictionBatch([x <span class="cf">for</span> x <span class="op">in</span> rb <span class="cf">if</span> x.is_5overhang()])
<span class="op">&gt;&gt;&gt;</span> Anaov5 <span class="op">=</span> Analysis(rbov5, new_seq, linear<span class="op">=</span><span class="va">False</span>)
<span class="op">&gt;&gt;&gt;</span> Anaov5.with_N_sites(<span class="dv">1</span>)
{<span class="st">&#39;EcoRI&#39;</span> : [<span class="dv">33</span>]}</code></pre></div>
<p>The easy solution is to chain several <code class="inline">Analysis</code> methods. This is possible since each method return a dictionary as results and is able to take a dictionary as input:</p>
<div class="sourceCode"><pre class="sourceCode python"><code class="sourceCode python"><span class="op">&gt;&gt;&gt;</span> Ana.with_N_sites(<span class="dv">1</span>, Ana.overhang5())
{<span class="st">&#39;EcoRI&#39;</span>: [<span class="dv">33</span>]}</code></pre></div>
<p>The dictionary is always the last argument whatever the command you use.</p>
<p>The way to prefer certainly depends of the conditions you will use your <code class="inline">Analysis</code> instance. If you are likely to frequently reuse the same batch with different sequences, using a dedicated <code class="inline">RestrictionBatch</code> might be faster as the batch is likely to be smaller. Chaining methods is generally quicker when working with an interactive shell. In a script, the extended syntax may be easier to understand in a few months.</p>
<h3 id="advanced-features-the-formattedseq-class"><a name="5"></a>5. Advanced features: the FormattedSeq class</h3>
<p>Restriction enzymes require a much more strict formatting of the DNA sequences than <code class="inline">Bio.Seq</code> object provides. For example, the restriction enzymes expect to find an ungapped (no space) upper-case sequence, while <code class="inline">Bio.Seq</code> object allow sequences to be in lower-case separated by spaces. Therefore when a restriction enzyme analyse a <code class="inline">Bio.Seq</code> object (be it a <code class="inline">Seq</code> or a <code class="inline">MutableSeq</code>), the object undergoes a conversion. The class <code class="inline">FormattedSeq</code> ensure the smooth conversion from a <code class="inline">Bio.Seq</code> object to something which can be safely be used by the enzyme.</p>
<p>While this conversion is done automatically by the enzymes if you provide them with a <code class="inline">Seq</code> or a <code class="inline">MutableSeq</code>, there is time where it will be more efficient to realise the conversion before hand. Each time a <code class="inline">Seq</code> object is passed to an enzyme for analysis you pay a overhead due to the conversion. When analysing the same sequence over and over, it will be faster to convert the sequence, store the conversion and then use only the converted sequence.</p>
<h4 id="creating-a-formattedseq"><a name="5.1"></a>5.1 Creating a FormattedSeq</h4>
<p>Creating a <code class="inline">FormattedSeq</code> from a <code class="inline">Bio.Seq</code> object is simple. The first argument of <code class="inline">FormattedSeq</code> is the sequence you wish to convert. You can specify a shape with the second argument <code class="inline">linear</code>, if you don't the <code class="inline">FormattedSeq</code> will be linear:</p>
<div class="sourceCode"><pre class="sourceCode python"><code class="sourceCode python"><span class="op">&gt;&gt;&gt;</span> <span class="im">from</span> Bio.Restriction <span class="im">import</span> <span class="op">*</span>
<span class="op">&gt;&gt;&gt;</span> <span class="im">from</span> Bio.Seq <span class="im">import</span> Seq
<span class="op">&gt;&gt;&gt;</span> seq <span class="op">=</span> Seq(<span class="st">&#39;TTCAAAAAAAAAAGAATTCAAAAGAA&#39;</span>)
<span class="op">&gt;&gt;&gt;</span> linear_fseq <span class="op">=</span> FormattedSeq(seq, linear<span class="op">=</span><span class="va">True</span>)
<span class="op">&gt;&gt;&gt;</span> default_fseq <span class="op">=</span> FormattedSeq(seq)
<span class="op">&gt;&gt;&gt;</span> circular_fseq <span class="op">=</span> FormattedSeq(seq, linear<span class="op">=</span><span class="va">False</span>)
<span class="op">&gt;&gt;&gt;</span> linear_fseq
FormattedSeq(Seq(<span class="st">&#39;TTCAAAAAAAAAAGAATTCAAAAGAA&#39;</span>, Alphabet()), linear<span class="op">=</span><span class="va">True</span>)
<span class="op">&gt;&gt;&gt;</span> linear_fseq.is_linear()
<span class="va">True</span>
<span class="op">&gt;&gt;&gt;</span> default_fseq.is_linear()
<span class="va">True</span>
<span class="op">&gt;&gt;&gt;</span> circular_fseq.is_linear()
<span class="va">False</span>
<span class="op">&gt;&gt;&gt;</span> circular_fseq
FormattedSeq(Seq(<span class="st">&#39;TTCAAAAAAAAAAGAATTCAAAAGAA&#39;</span>, Alphabet()), linear<span class="op">=</span><span class="va">False</span>)</code></pre></div>
<h4 id="unlike-bio.seq-formattedseq-retains-information-about-their-shape"><a name="5.2"></a>5.2 Unlike Bio.Seq, FormattedSeq retains information about their shape</h4>
<p><code class="inline">FormattedSeq</code> retains information about the shape of the sequence. Therefore unlike with <code class="inline">Seq</code> and <code class="inline">MutableSeq</code> you don't need to specify the shape of the sequence when using <code class="inline">search()</code> or <code class="inline">catalyse()</code>:</p>
<div class="sourceCode"><pre class="sourceCode python"><code class="sourceCode python"><span class="op">&gt;&gt;&gt;</span> EcoRI.search(linear_fseq)
[<span class="dv">15</span>]
<span class="op">&gt;&gt;&gt;</span> EcoRI.search(circular_fseq)  <span class="co"># no need to specify the shape</span>
[<span class="dv">15</span>, <span class="dv">25</span>]</code></pre></div>
<p>In fact, the shape of a FormattedSeq is not altered by the second argument of the commands <code class="inline">search()</code> and <code class="inline">catalyse()</code>:</p>
<div class="sourceCode"><pre class="sourceCode python"><code class="sourceCode python"><span class="op">&gt;&gt;&gt;</span> <span class="co"># In fact the shape is blocked.</span>
<span class="op">&gt;&gt;&gt;</span> <span class="co"># The 3 following commands give the same results</span>
<span class="op">&gt;&gt;&gt;</span> <span class="co"># which correspond to a circular sequence</span>
<span class="op">&gt;&gt;&gt;</span> EcoRI.search(circular_fseq)
[<span class="dv">15</span>, <span class="dv">25</span>]
<span class="op">&gt;&gt;&gt;</span> EcoRI.search(circular_fseq, linear<span class="op">=</span><span class="va">True</span>)
[<span class="dv">15</span>, <span class="dv">25</span>]
<span class="op">&gt;&gt;&gt;</span> EcoRI.search(circular_fseq, linear<span class="op">=</span><span class="va">False</span>)
[<span class="dv">15</span>, <span class="dv">25</span>]</code></pre></div>
<h4 id="changing-the-shape-of-a-formattedseq"><a name="5.3"></a>5.3 Changing the shape of a FormattedSeq</h4>
<p>You can however change the shape of the <code class="inline">FormattedSeq</code>. The command to use are:</p>
<div class="sourceCode"><pre class="sourceCode python"><code class="sourceCode python">FormattedSeq.to_circular() <span class="op">=&gt;</span> new FormattedSeq, shape will be circular.
FormattedSeq.to_linear()   <span class="op">=&gt;</span> new FormattedSeq, shape will be linear
FormattedSeq.circularise() <span class="op">=&gt;</span> change the shape of FormattedShape to circular
FormattedSeq.linearise()   <span class="op">=&gt;</span> change the shape of FormattedShape to linear</code></pre></div>
<div class="sourceCode"><pre class="sourceCode python"><code class="sourceCode python"><span class="op">&gt;&gt;&gt;</span> circular_fseq
FormatedSeq(Seq(<span class="st">&#39;TTCAAAAAAAAAAGAATTCAAAAGAA&#39;</span>, Alphabet()), linear<span class="op">=</span><span class="va">False</span>)
<span class="op">&gt;&gt;&gt;</span> circular_fseq.is_linear()
<span class="va">False</span>
<span class="op">&gt;&gt;&gt;</span> circular_fseq <span class="op">==</span> linear_fseq
<span class="va">False</span>
<span class="op">&gt;&gt;&gt;</span> newseq <span class="op">=</span> circular_fseq.to_linear()
<span class="op">&gt;&gt;&gt;</span> circular_fseq
FormatedSeq(Seq(<span class="st">&#39;TTCAAAAAAAAAAGAATTCAAAAGAA&#39;</span>, Alphabet()), linear<span class="op">=</span><span class="va">False</span>)
<span class="op">&gt;&gt;&gt;</span> newseq
FormatedSeq(Seq(<span class="st">&#39;TTCAAAAAAAAAAGAATTCAAAAGAA&#39;</span>, Alphabet()), linear<span class="op">=</span><span class="va">True</span>)
<span class="op">&gt;&gt;&gt;</span> circular_fseq.linearise()
<span class="op">&gt;&gt;&gt;</span> circular_fseq
FormatedSeq(Seq(<span class="st">&#39;TTCAAAAAAAAAAGAATTCAAAAGAA&#39;</span>, Alphabet()), linear<span class="op">=</span><span class="va">True</span>)
<span class="op">&gt;&gt;&gt;</span> circular_fseq.is_linear()
<span class="va">True</span>
<span class="op">&gt;&gt;&gt;</span> circular_fseq <span class="op">==</span> linear_fseq
<span class="va">True</span>
<span class="op">&gt;&gt;&gt;</span> EcoRI.search(circular_fseq) <span class="co"># which is now linear</span>
[<span class="dv">15</span>]</code></pre></div>
<h4 id="using-and-operators-with-formattedseq"><a name="5.4"></a>5.4 Using / and // operators with FormattedSeq</h4>
<p>Not having to specify the shape of the sequence to analyse gives you the opportunity to use the shorthand '/' and '//' with restriction enzymes:</p>
<div class="sourceCode"><pre class="sourceCode python"><code class="sourceCode python"><span class="op">&gt;&gt;&gt;</span> EcoRI<span class="op">/</span>linear_fseq  <span class="co"># &lt;=&gt; EcoRI.search(linear_fseq)</span>
[<span class="dv">15</span>]
<span class="op">&gt;&gt;&gt;</span> linear_fseq<span class="op">/</span>EcoRI  <span class="co"># &lt;=&gt; EcoRI.search(linear_fseq)</span>
[<span class="dv">15</span>]
<span class="op">&gt;&gt;&gt;</span> EcoRI<span class="op">//</span>linear_fseq <span class="co"># &lt;=&gt; linear_fseq//EcoRI &lt;=&gt; EcoRI.catalyse(linear_fseq)</span>
(Seq(<span class="st">&#39;TTCAAAAAAAAAAG&#39;</span>, Alphabet()), Seq(<span class="st">&#39;AATTCAAAAGAA&#39;</span>, Alphabet()))</code></pre></div>
<p>Another way to avoid the overhead due to a repetitive conversion from a <code class="inline">Seq</code> object to a <code class="inline">FormattedSeq</code> is to use a <a href="#2"><code class="inline">RestrictionBatch</code></a>.</p>
<p>To conclude, the performance gain achieved when using a <code class="inline">FormattedSeq</code> instead of a <code class="inline">Seq</code> is not huge. The analysis of a 10 kb sequence by all the enzymes in <code class="inline">AllEnzymes</code> (<code class="inline">for x in AllEnzymes: x.search(seq)</code>, 867 enzymes) is 7 % faster when using a <code class="inline">FormattedSeq</code> than a <code class="inline">Seq</code>. Using a <code class="inline">RestrictionBatch</code> (<code class="inline">AllEnzymes.search(seq)</code>) is about as fast as using a <code class="inline">FormattedSeq</code> the first time the search is run. This however is dramatically reduced in subsequent runs with the same sequence (<code class="inline">RestrictionBatch</code> keeps in memory the result of their last run while the sequence is not changed).</p>
<h3 id="more-advanced-features"><a name="6"></a>6. More advanced features</h3>
<p>This chapter addresses some more advanced features of the packages, most users can safely ignore it.</p>
<h4 id="updating-the-enzymes-from-rebase"><a name="6.1"></a>6.1 Updating the enzymes from REBASE</h4>
<p>Most people will certainly not need to update the enzymes. The restriction enzyme package will be updated in with each new release of Biopython. But if you wish to get an update in between Biopython-releases here is how to do it.</p>
<p>First, you have to download the two scripts <code class="inline">rebase_update.py</code> and <code class="inline">ranacompile.py</code>: Go to <a href="https://github.com/biopython/biopython/tree/master/Scripts/Restriction">https://github.com/biopython/biopython/tree/master/Scripts/Restriction</a>, click on the respective file and press the '<strong>Raw</strong>' button on the top right of the code window. Then, with right-click, save the file. Both scripts must be in the same directory.</p>
<h5 id="fetching-the-recent-enzyme-files-manually-from-rebase"><a name="6.1.1"></a>6.1.1 Fetching the recent enzyme files manually from REBASE</h5>
<p>Each month, <a href="http://rebase.neb.com/rebase/rebase.html">REBASE</a> release a new compilation of data about restriction enzymes. While the enzymes do not change so frequently, you may wish to update the restriction enzymes classes. The first thing to do is to get the last rebase file. You can find the release of REBASE at <a href="http://rebase.neb.com/rebase/rebase.files.html">http://rebase.neb.com/rebase/rebase.files.html</a>. The file you are interested in are in the EMBOSS format. You can download the files directly from the REBASE ftp server using your browser. The file are situated at <a href="ftp://ftp.neb.com/pub/rebase">ftp://ftp.neb.com/pub/rebase</a>. You will have to download 3 files: <code class="inline">emboss_e.###</code>, <code class="inline">emboss_r.###</code>, and <code class="inline">emboss_s.###</code>. The <code class="inline">###</code> is a 3 digit number corresponding to the year and month of the release. The first digit is the year, the two last are the month: so July 2015 will be: 507; October 2016: 610, etc... Download the three file corresponding to the current month and place them in the same folder as your <code class="inline">rebase_update.py</code> and <code class="inline">ranacompiler.py</code> scripts.</p>
<h5 id="fetching-the-recent-enzyme-files-with-rebase_update.py"><a name="6.1.2"></a>6.1.2 Fetching the recent enzyme files with rebase_update.py</h5>
<p>Another way to do the same thing is to use the <code class="inline">rebase_update.py</code> script. It will connect directly to the rebase ftp server and download the last batch of emboss files. From a DOS or Unix shell do the following:</p>
<div class="sourceCode"><pre class="sourceCode bash"><code class="sourceCode bash">$ <span class="kw">cd</span> path_to_the_update_script
$ <span class="kw">rebase_update.py</span> -p http://www.somewhere.com:8000

<span class="kw">Please</span> wait, trying to connect to Rebase

<span class="kw">copying</span> ftp://ftp.neb.com/pub/rebase/emboss_e.407
<span class="kw">to</span> /cvsroot/bioPython/Bio/Restriction/Scripts/emboss_e.407
<span class="kw">copying</span> ftp://ftp.neb.com/pub/rebase/emboss_s.407
<span class="kw">to</span> /cvsroot/bioPython/Bio/Restriction/Scripts/emboss_s.407
<span class="kw">copying</span> ftp://ftp.neb.com/pub/rebase/emboss_r.407
<span class="kw">to</span> /cvsroot/bioPython/Bio/Restriction/Scripts/emboss_r.407</code></pre></div>
<p>Some explanation are needed: <code class="inline">-p</code> is the switch to indicate to the script you are using a proxy. If you use a ftp proxy enter its address and the connection port after the '<code class="inline">:</code>'.</p>
<h5 id="compiling-a-new-dictionary-with-ranacompiler.py"><a name="6.1.3"></a>6.1.3 Compiling a new dictionary with ranacompiler.py</h5>
<p>Once you have got the recent emboss files you can compile a new module containing the data necessary to create restriction enzyme.</p>
<p>Note: if the emboss files are not present in the current directory or if they are not up to date, <code class="inline">ranacompiler.py</code> will invoke the script <a href="#6.1.2"><code class="inline">rebase_update.py</code></a>, which needs to be installed in the same folder. You will need to use the same options as before (ie <code class="inline">-m</code> and <code class="inline">-p</code>). See the previous paragraph on <a href="#6.1.2"><code class="inline">rebase_update.py</code></a> for more details.</p>
<p>For simplicity let's assume we have put the emboss files in the same folder as the files which contains the script <code class="inline">ranacompiler.py</code>. You may have the change the mode of the file to make it executable:</p>
<div class="sourceCode"><pre class="sourceCode bash"><code class="sourceCode bash">$ <span class="kw">cd</span> path_to_the_ranacompiler_script
$ <span class="kw">chmod</span> <span class="st">&#39;+x&#39;</span> ranacompiler.py</code></pre></div>
<p>Now execute the script:</p>
<div class="sourceCode"><pre class="sourceCode bash"><code class="sourceCode bash">$ <span class="kw">Python</span> ranacompiler.py  <span class="co"># or ./ranacompiler.py</span></code></pre></div>
<p>You get normally the following message:</p>
<div class="sourceCode"><pre class="sourceCode bash"><code class="sourceCode bash">$ <span class="kw">./ranacompiler.py</span>

 <span class="kw">Using</span> the files : emboss_e.407, emboss_r.407, emboss_s.407

<span class="kw">WARNING</span> : HaeIV cut twice with different overhang length each time.
        <span class="kw">Unable</span> to deal with this behaviour.
        <span class="kw">This</span> enzyme will not be included in the database. Sorry.
        <span class="kw">Checking</span> :
<span class="kw">Anyway</span>, HaeIV is not commercially available.

<span class="kw">WARNING</span> : HpyUM037X has two different sites.


<span class="kw">The</span> new database contains 867 enzymes.

<span class="kw">Writing</span> the dictionary containing the new Restriction classes...
<span class="kw">OK.</span>

<span class="kw">Writing</span> the dictionary containing the suppliers datas...
<span class="kw">OK.</span>

<span class="kw">Writing</span> the dictionary containing the Restriction types....
<span class="kw">OK.</span>

 <span class="kw">******************************************************************************</span>

                <span class="kw">Compilation</span> of the new dictionary : OK.
                <span class="kw">Installation</span> : No.

 <span class="kw">You</span> will find the newly created <span class="st">&#39;Restriction_Dictionary.py&#39;</span> file
 <span class="kw">in</span> <span class="kw">the</span>  :

        <span class="kw">/path/where/you/run/ranacompiler.py</span>

 <span class="kw">Make</span> a copy of <span class="st">&#39;Restriction_Dictionary.py&#39;</span> and place it with
 <span class="kw">the</span> other Restriction libraries.

 <span class="kw">note</span> :
 <span class="kw">This</span> folder should be :

        <span class="kw">path_to_python/site-packages/Bio/Restriction</span>

 <span class="kw">******************************************************************************</span></code></pre></div>
<p>The first line indicate which emboss files have been used for the present compilation. You can safely ignore the warnings as long as the <code class="inline">compilation of the new dictionary : OK.</code> is present in the last part of the output. They are here for debugging purpose. The number of enzymes in the new module is indicated as well as a list of the dictionary which have been compiled. The last part indicate that the module has been succesfully created but not installed. To finish the update you must copy the file <code class="inline">Restriction_Dictionary.py</code> into the folder <code class="inline">/your_python_path/site-packages/Bio/Restriction/</code> as indicated by the script. Looking into the present folder, you will see to new files: the newly created dictionary <code class="inline">Restriction_Dictionary.py</code> and <code class="inline">Restriction_Dictionary.old</code>. This last file containing the old dictionary to which you can revert in case anything the new file is corrupted (this should not happen since the script is happy enough the new dictionary is good, but if there is a problem it is always nice to know you can revert to the previous setting without having to reinstall the whole thing.</p>
<p>If you whish, the script may install the folder for you as well, but you will have to run it as root if your normal user has no write access to your Python installation (and it should'nt). Use the command <code class="inline">ranacompiler.py -i</code> or <code class="inline">ranacompiler.py --install</code> for this.</p>
<p>If anything goes wrong (you have no write access to the destination folder for example) the script will let you know it did not perform the installation. It will however still save the new module in the current directory.</p>
<p>As you can see the script is not very bright and will redo the compilation each time it is invoked, no matter if a previous version of the module is already present.</p>
<h4 id="subclassing-the-class-analysis"><a name="6.2"></a>6.2 Subclassing the class Analysis</h4>
<p>As seen previously, you can modify some aspects of the <code class="inline">Analysis</code> output interactively. However if you want to write your own <code class="inline">Analysis</code> class, you may wish to provide others output facilities than is given in this package. Depending on what you want to do you may get away with simply changing the <code class="inline">make_format</code> method of your derived class or you will need to provide new methods. Rather than get into a long explanation, here is the implementation of a rather useless <code class="inline">Analysis</code> class:</p>
<div class="sourceCode"><pre class="sourceCode python"><code class="sourceCode python"><span class="op">&gt;&gt;&gt;</span> <span class="kw">class</span> UselessAnalysis(Analysis):

    <span class="kw">def</span> <span class="fu">__init__</span>(<span class="va">self</span>, rb<span class="op">=</span>RestrictionBatch(), seq<span class="op">=</span>Seq(<span class="st">&#39;&#39;</span>), lin<span class="op">=</span><span class="va">True</span>):
    <span class="co">&quot;&quot;&quot;UselessAnalysis -&gt; A class that waste your time&quot;&quot;&quot;</span>
    <span class="co">#</span>
    <span class="co">#    Unless you want to do something more fancy all</span>
    <span class="co">#    you need to do here is instantiate Analysis.</span>
    <span class="co">#    Don&#39;t forget the self in __init__</span>
    <span class="co">#</span>
        Analysis.<span class="fu">__init__</span>(<span class="va">self</span>, rb, seq, lin)

    <span class="kw">def</span> make_format(<span class="va">self</span>, cut<span class="op">=</span>[], t<span class="op">=</span><span class="st">&#39;&#39;</span>, nc<span class="op">=</span>[], s<span class="op">=</span><span class="st">&#39;&#39;</span>):
    <span class="co">&quot;&quot;&quot;not funny&quot;&quot;&quot;</span>
    <span class="co">#</span>
    <span class="co">#    Generally, you don&#39;t need to do anything else here</span>
    <span class="co">#    This will tell to your new class to default to the</span>
    <span class="co">#    _make_joke format.</span>
    <span class="co">#</span>
        <span class="cf">return</span> <span class="va">self</span>._make_joke(cut, t, nc, s)

    <span class="kw">def</span> print_as(<span class="va">self</span>, what<span class="op">=</span><span class="st">&#39;joke&#39;</span>):
    <span class="co">&quot;&quot;&quot;Never know somebody might want to change the behaviour of</span>
<span class="co">    this class.&quot;&quot;&quot;</span>
    <span class="co">#</span>
    <span class="co">#    add your new option to print_as</span>
    <span class="co">#</span>
        <span class="cf">if</span> what <span class="op">==</span> <span class="st">&#39;joke&#39;</span>:
        <span class="va">self</span>.make_format <span class="op">=</span> <span class="va">self</span>._make_joke
            <span class="cf">return</span>
    <span class="cf">else</span>:
        <span class="co">#</span>
        <span class="co">#   The other options will be treated as before</span>
        <span class="co">#</span>
        <span class="cf">return</span> Analysis.print_as(<span class="va">self</span>, what)

    <span class="kw">def</span> _make_joke(<span class="va">self</span>, cut<span class="op">=</span>[], title<span class="op">=</span><span class="st">&#39;&#39;</span>, nc<span class="op">=</span>[], s1<span class="op">=</span><span class="st">&#39;&#39;</span>):
    <span class="co">&quot;&quot;&quot;UA._make_joke(cut, t, nc, s) -&gt; new analysis output&quot;&quot;&quot;</span>
    <span class="co">#</span>
    <span class="co">#    starting your new method with &#39;_make_&#39;</span>
    <span class="co">#    will give a hint to what it is suppose to do</span>
    <span class="co">#</span>
    <span class="co">#    We will not process the non-cutting enzymes</span>
    <span class="co">#    Their names are in nc</span>
    <span class="co">#    s1 is the string printed before them</span>
    <span class="co">#</span>
    <span class="cf">if</span> <span class="op">not</span> title:
        title <span class="op">=</span> <span class="st">&#39;</span><span class="ch">\n</span><span class="st">You have guessed right the following enzymes:</span><span class="ch">\n\n</span><span class="st">&#39;</span>
    <span class="cf">for</span> name, sites <span class="op">in</span> cut:
        <span class="co">#</span>
        <span class="co">#    cut contains:</span>
        <span class="co">#    - the name of the enzymes which cut the sequence (name)</span>
        <span class="co">#    - a list of the site positions (sites)</span>
        <span class="co">#</span>
        guess <span class="op">=</span> <span class="bu">raw_input</span>(<span class="st">&quot;next enzyme is </span><span class="sc">%s</span><span class="st">, Guess how many sites ?</span><span class="ch">\n</span><span class="st">&gt;&gt;&gt; &quot;</span><span class="op">%</span>name)
            <span class="cf">try</span>:
                guess <span class="op">=</span> <span class="bu">int</span>(guess)
            <span class="cf">except</span>:
                guess <span class="op">=</span> <span class="va">None</span>
            <span class="cf">if</span> guess <span class="op">==</span> <span class="bu">len</span>(sites):
                <span class="bu">print</span> <span class="st">&#39;You did guess right. Good. Next.&#39;</span>
        result <span class="op">=</span> <span class="st">&#39;</span><span class="sc">%i</span><span class="st"> site&#39;</span> <span class="op">%</span> guess
        <span class="cf">if</span> guess <span class="op">&gt;</span> <span class="dv">1</span>:
            result <span class="op">+=</span> <span class="st">&#39;s&#39;</span>

        <span class="co">#</span>
        <span class="co">#    now we format the line. See the PrintFormat module</span>
        <span class="co">#    for some examples</span>
        <span class="co">#   PrintFormat.__section_list and _make_map are good start.</span>
        <span class="co">#</span>
                title<span class="op">=</span><span class="st">&#39;&#39;</span>.join((title, <span class="bu">str</span>(name).ljust(<span class="va">self</span>.NameWidth),
                <span class="st">&#39; :  &#39;</span>, result, <span class="st">&#39;.</span><span class="ch">\n</span><span class="st">&#39;</span>))
    <span class="bu">print</span> <span class="st">&#39;</span><span class="ch">\n</span><span class="st">No more enzyme.&#39;</span>
        <span class="cf">return</span>  title
    <span class="co">#</span>
    <span class="co">#    I you want to print the non cutting enzymes use</span>
    <span class="co">#    the following return instead of the previous one:</span>
    <span class="co">#</span>
    <span class="co">#return  title + t + self._make_nocut_only(nc,s1)</span>

<span class="op">&gt;&gt;&gt;</span> <span class="co"># You initiate and use it as before</span>
<span class="op">&gt;&gt;&gt;</span> rb <span class="op">=</span> RestrictionBatch([], [<span class="st">&#39;A&#39;</span>])
<span class="op">&gt;&gt;&gt;</span> multi_site <span class="op">=</span> Seq(<span class="st">&#39;AAA&#39;</span> <span class="op">+</span> EcoRI.site <span class="op">+</span><span class="st">&#39;G&#39;</span> <span class="op">+</span> KpnI.site <span class="op">+</span> EcoRV.site <span class="op">+</span> <span class="st">&#39;CT&#39;</span> <span class="op">+\</span>
SmaI.site <span class="op">+</span> <span class="st">&#39;GT&#39;</span> <span class="op">+</span> FokI.site <span class="op">+</span> <span class="st">&#39;GAAAGGGC&#39;</span> <span class="op">+</span> EcoRI.site <span class="op">+</span> <span class="st">&#39;ACGT&#39;</span>, IUPACAmbiguousDNA())
<span class="op">&gt;&gt;&gt;</span>
<span class="op">&gt;&gt;&gt;</span> b <span class="op">=</span> UselessAnalysis(rb, multi_site)
<span class="op">&gt;&gt;&gt;</span> b.print_that() <span class="co"># Well, I let you discover if you haven&#39;t already guessed</span></code></pre></div>
<p>Using this example, as a template you should now be able to subclass <code class="inline">Analysis</code> as you wish. You will found more implementation details in the module <code class="inline">Bio.Restriction.PrintFormat</code> which contains the class providing all the <code class="inline">_make_*</code> methods.</p>
<h3 id="limitation-and-caveat"><a name="7"></a>7. Limitation and caveat</h3>
<p>Particularly, the class <code class="inline">Analysis</code> is a quick and dirty implementation based on the facilities furnished by the package. Please check your results and report any fault.</p>
<p>On a more general basis, <code class="inline">Restriction</code> have some other limitations:</p>
<h4 id="all-dna-are-non-methylated"><a name="7.1"></a>7.1 All DNA are non methylated</h4>
<p>No facility to work with methylated DNA has been implemented yet. As far as the enzyme classes are concerned all DNA is non methylated DNA. Implementation of methylation sensibility will possibly occur in the future. But for now, if your sequence is methylated, you will have to check if the site is methylated using other means.</p>
<h4 id="no-support-for-star-activity"><a name="7.2"></a>7.2 No support for star activity</h4>
<p>As before no support has been yet implemented to find site mis-recognised by enzymes under high salt concentration conditions, the so-called star activity. This will be implemented as soon as I can get a good source of information for that.</p>
<h4 id="safe-to-use-with-degenerated-dna"><a name="7.3"></a>7.3 Safe to use with degenerated DNA</h4>
<p>It is safe to use degenerated DNA as input for the query. You will not be flooded with meaningless results. But this come at a price: GAA<strong><em>N</em></strong>TC will not be recognised as a potential EcoRI site for example, in fact it will not be recognised at all. Degenerated sequences will not be analysed. If your sequence is not fully sequenced, you will certainly miss restriction sites:</p>
<div class="sourceCode"><pre class="sourceCode python"><code class="sourceCode python"><span class="op">&gt;&gt;&gt;</span> a <span class="op">=</span> Seq(<span class="st">&#39;nnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnGAATTCrrrrrrrrrrr&#39;</span>, IUPACAmbiguou
sDNA())
<span class="op">&gt;&gt;&gt;</span> EcoRI.search(a)
[<span class="dv">36</span>]
<span class="op">&gt;&gt;&gt;</span> b <span class="op">=</span> Seq(<span class="st">&#39;nnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnGAAnTCrrrrrrrrrrr&#39;</span>, IUPACAmbiguou
sDNA())
<span class="op">&gt;&gt;&gt;</span> EcoRI.search(b)
[]</code></pre></div>
<h4 id="non-standard-bases-in-dna-are-not-allowed"><a name="7.4"></a>7.4 Non standard bases in DNA are not allowed</h4>
<p>While you can use degenerated DNA, using non standard base alphabet will make the enzymes choke, even if <code class="inline">Bio.Seq.Seq</code> accepts them. However, space-like characters (' ', '', '', ...) and digit will be removed but will not stop the enzyme analysing the sequence. You can use them but the fragments produced by <code class="inline">catalyse</code> will have lost any formatting. <code class="inline">catalyse</code> tries to keep the original case of the sequence (i.e lower case sequences will generate lower case fragments, upper case sequences upper case fragments), but mixed case will return upper case fragments:</p>
<div class="sourceCode"><pre class="sourceCode python"><code class="sourceCode python"><span class="op">&gt;&gt;&gt;</span> c <span class="op">=</span> Seq(<span class="st">&#39;xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxGAANTCrrrrrrrrrrr&#39;</span>, IUPACAmbiguou
sDNA())
<span class="op">&gt;&gt;&gt;</span> EcoRI.search(c)

Traceback (most recent call last):
  File <span class="st">&quot;&lt;pyshell#110&gt;&quot;</span>, line <span class="dv">1</span>, <span class="op">in</span> <span class="op">-</span>toplevel<span class="op">-</span>
    EcoRI.search(b)
  File <span class="st">&quot;/usr/lib/Python2.3/site-packages/Bio/Restriction/Restriction.py&quot;</span>, line <span class="dv">3</span>
  <span class="dv">96</span>, <span class="op">in</span> search
    cls.dna <span class="op">=</span> FormatedSeq(dna, linear)
  File <span class="st">&quot;/usr/lib/Python2.3/site-packages/Bio/Restriction/Restriction.py&quot;</span>, line <span class="dv">1</span>
  <span class="dv">37</span>, <span class="op">in</span> <span class="fu">__init__</span>
    <span class="va">self</span>.<span class="bu">format</span>()
  File <span class="st">&quot;/usr/lib/Python2.3/site-packages/Bio/Restriction/Restriction.py&quot;</span>, line <span class="dv">1</span>
  <span class="dv">53</span>, <span class="op">in</span> <span class="bu">format</span>
    <span class="cf">raise</span> AlphabetError, <span class="st">&quot; &#39;</span><span class="sc">%s</span><span class="st">&#39; is not in the IUPAC alphabet&quot;</span> <span class="op">%</span> s
AlphabetError: <span class="st">&#39;X&#39;</span> <span class="op">is</span> <span class="op">not</span> <span class="op">in</span> the IUPAC alphabet
<span class="op">&gt;&gt;&gt;</span> d <span class="op">=</span> Seq(<span class="st">&#39;1 nnnnn nnnnn nnnnn nnnnn nnnnn </span><span class="ch">\n</span><span class="st">\</span>
<span class="st">26 nnnnn nnnnG AATTC rrrrr rrrrr </span><span class="ch">\n</span><span class="st">\</span>
<span class="st">51 r&#39;</span>, IUPACAmbiguousDNA())
<span class="op">&gt;&gt;&gt;</span> d
Seq(<span class="st">&#39;1 nnnnn nnnnn nnnnn nnnnn nnnnn </span><span class="ch">\n</span><span class="st">26 nnnnn nnnnG AATTC rrrrr rrrrr </span><span class="ch">\n</span><span class="st">51 r&#39;</span>,
 IUPACAmbiguousDNA())
<span class="op">&gt;&gt;&gt;</span> EcoRI.search(d)
[<span class="dv">36</span>]
<span class="op">&gt;&gt;&gt;</span> EcoRI.catalyse(d)
(Seq(<span class="st">&#39;AATTCRRRRRRRRRRR&#39;</span>, IUPACAmbiguousDNA()), Seq(<span class="st">&#39;NNNNNNNNNNNNNNNNNNNNNNNNNNNN</span>
<span class="st">NNNNNNG&#39;</span>, IUPACAmbiguousDNA()))
<span class="op">&gt;&gt;&gt;</span> e <span class="op">=</span> Seq(<span class="st">&#39;nnnnGAATTCrr&#39;</span>, IUPACAmbiguousDNA())
<span class="op">&gt;&gt;&gt;</span> f <span class="op">=</span> Seq(<span class="st">&#39;NNNNGAATTCRR&#39;</span>, IUPACAmbiguousDNA())
<span class="op">&gt;&gt;&gt;</span> g <span class="op">=</span> Seq(<span class="st">&#39;nnnngaattcrr&#39;</span>, IUPACAmbiguousDNA())
<span class="op">&gt;&gt;&gt;</span> EcoRI.catalyse(e)
(Seq(<span class="st">&#39;NNNNG&#39;</span>, IUPACAmbiguousDNA()), Seq(<span class="st">&#39;AATTCRR&#39;</span>, IUPACAmbiguousDNA()))
<span class="op">&gt;&gt;&gt;</span> EcoRI.catalyse(f)
(Seq(<span class="st">&#39;NNNNG&#39;</span>, IUPACAmbiguousDNA()), Seq(<span class="st">&#39;AATTCRR&#39;</span>, IUPACAmbiguousDNA()))
<span class="op">&gt;&gt;&gt;</span> EcoRI.catalyse(g)
(Seq(<span class="st">&#39;nnnng&#39;</span>, IUPACAmbiguousDNA()), Seq(<span class="st">&#39;aattcrr&#39;</span>, IUPACAmbiguousDNA()))</code></pre></div>
<p>Not allowing other letters than IUPAC might seems drastic but this is really to limit errors. It is not totally fool proof but it does help.</p>
<h4 id="sites-found-at-the-edge-of-linear-dna-might-not-be-accessible-in-a-real-digestion"><a name="7.5"></a>7.5 Sites found at the edge of linear DNA might not be accessible in a real digestion</h4>
<p>While sites clearly outsides a sequence will not be reported, nothing has been done to try to determine if a restriction site at the end of a linear sequence is valid:</p>
<div class="sourceCode"><pre class="sourceCode python"><code class="sourceCode python"><span class="op">&gt;&gt;&gt;</span> d <span class="op">=</span> Seq(<span class="st">&#39;GAATTCAAAAAAAAAAAAAAAAAAAAAAAAAAGGATG&#39;</span>, IUPACAmbiguousDNA())
<span class="op">&gt;&gt;&gt;</span> FokI.site           <span class="co"># site present</span>
<span class="co">&#39;GGATG&#39;</span>
<span class="op">&gt;&gt;&gt;</span> FokI.elucidate()        <span class="co"># but cut outside the sequence</span>
<span class="co">&#39;GGATGNNNNNNNNN^NNNN_N&#39;</span>
<span class="op">&gt;&gt;&gt;</span> FokI.search(d)      <span class="co"># therefore no site found</span>
[]
<span class="op">&gt;&gt;&gt;</span> EcoRI.search(d)
[<span class="dv">2</span>]</code></pre></div>
<p><code class="inline">EcoRI</code> finds a site at position 2 even if it is highly unlikely that EcoRI accepts to cut this site in a tube. It is generally considered that at about 5 nucleotides must separate the site from the edge of the sequence to be reasonably sure the enzyme will work correctly. This &quot;security margin&quot; is variable from one enzyme to the other. In doubt consult the documentation for the enzyme.</p>
<h4 id="restriction-reports-cutting-sites-not-enzyme-recognition-sites"><a name="7.6"></a>7.6 Restriction reports cutting sites not enzyme recognition sites</h4>
<p>Some enzymes will cut twice each time they encounter a restriction site. The enzymes in this package report both cut not the site. Other software may only reports restriction sites. Therefore the output given for some enzymes might seems to be the double when compared with the results of these software. It is not a bug.</p>
<div class="sourceCode"><pre class="sourceCode python"><code class="sourceCode python"><span class="op">&gt;&gt;&gt;</span> AloI.cut_twice()
<span class="va">True</span>
<span class="op">&gt;&gt;&gt;</span> AloI.fst5              <span class="co"># first cut</span>
<span class="op">-</span><span class="dv">7</span>
<span class="op">&gt;&gt;&gt;</span> AloI.scd5          <span class="co"># second cut</span>
<span class="dv">25</span>
<span class="op">&gt;&gt;&gt;</span> AloI.site
<span class="co">&#39;GAACNNNNNNTCC&#39;</span>
<span class="op">&gt;&gt;&gt;</span> b <span class="op">=</span> Seq(<span class="st">&#39;AAAAAAAAAAA&#39;</span><span class="op">+</span> AloI.site <span class="op">+</span> <span class="st">&#39;AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA&#39;</span>)
<span class="op">&gt;&gt;&gt;</span> b
Seq(<span class="st">&#39;AAAAAAAAAAAGAACNNNNNNTCCAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA&#39;</span>, Alphabet())
<span class="op">&gt;&gt;&gt;</span> AloI.search(b)  <span class="co"># one site, two cuts -&gt; two positions</span>
[<span class="dv">5</span>, <span class="dv">37</span>]</code></pre></div>
<h3 id="annex-modifying-dir-to-use-with-from-bio.restriction-import"><a name="8"></a>8. Annex: modifying dir() to use with from Bio.Restriction import *</h3>
<p>Having all the enzymes imported directly in the shell is useful when working in an interactive shell (even if it is not recommended by the purists). Here is a little hack to get some sanity back when using dir() in those conditions:</p>
<div class="sourceCode"><pre class="sourceCode python"><code class="sourceCode python"><span class="op">&gt;&gt;&gt;</span> <span class="co"># we will change the builtin dir() function to get ride of the enzyme names.</span>
<span class="op">&gt;&gt;&gt;</span> <span class="im">import</span> sys
<span class="op">&gt;&gt;&gt;</span> <span class="kw">def</span> <span class="bu">dir</span>(<span class="bu">object</span><span class="op">=</span><span class="va">None</span>):
    <span class="co">&quot;&quot;&quot;dir([object]) -&gt; list of string.</span>

<span class="co">    over-ride the built-in function to get some clarity.&quot;&quot;&quot;</span>
    <span class="cf">if</span> <span class="bu">object</span>:
        <span class="co"># we only want to modify dir(),</span>
        <span class="co"># so here we return the result of the builtin function.</span>
        <span class="cf">return</span> __builtins__.<span class="bu">dir</span>(<span class="bu">object</span>)
    <span class="cf">else</span>:
        <span class="co"># now the part we want to modify.</span>
        <span class="co"># All the enzymes are in a RestrictionBatch (we will talk about</span>
        <span class="co"># that later, for the moment simply believe me).</span>
        <span class="co"># So if we remove from the results of dir() everything which is</span>
        <span class="co"># in AllEnzymes we will get a much shorter list when we do dir()</span>
        <span class="co">#</span>
        <span class="co"># the current level is __main__ ie dir() is equivalent to</span>
        <span class="co"># ask what&#39;s in __main__ at the moment.</span>
        <span class="co"># we can&#39;t access __main__ directly.</span>
        <span class="co"># so we will use sys.modules[&#39;__main__&#39;] to reach it.</span>
        <span class="co"># the following list comprehension remove from the result of</span>
        <span class="co"># dir() everything which is also present in AllEnzymes.</span>
        <span class="co">#</span>
        <span class="cf">return</span> [x <span class="cf">for</span> x <span class="op">in</span> __builtins__.<span class="bu">dir</span>(sys.modules[<span class="st">&#39;__main__&#39;</span>])
            <span class="cf">if</span> <span class="op">not</span> x <span class="op">in</span> AllEnzymes]

<span class="op">&gt;&gt;&gt;</span> <span class="co"># now let&#39;s see if it works.</span>
<span class="op">&gt;&gt;&gt;</span> <span class="bu">dir</span>()
[<span class="st">&#39;AllEnzymes&#39;</span>, <span class="st">&#39;Analysis&#39;</span>, <span class="st">&#39;CommOnly&#39;</span>, <span class="st">&#39;NonComm&#39;</span>, <span class="st">&#39;PrintFormat&#39;</span>, <span class="st">&#39;RanaConfig&#39;</span>,
 <span class="co">&#39;Restriction&#39;</span>, <span class="st">&#39;RestrictionBatch&#39;</span>, <span class="st">&#39;Restriction_Dictionary&#39;</span>, <span class="st">&#39;__builtins__&#39;</span>,
 <span class="co">&#39;__doc__&#39;</span>, <span class="st">&#39;__name__&#39;</span>, <span class="st">&#39;dir&#39;</span>, <span class="st">&#39;sys&#39;</span>]
<span class="op">&gt;&gt;&gt;</span> <span class="co"># ok that&#39;s much better.</span>
<span class="op">&gt;&gt;&gt;</span> <span class="co"># The enzymes are still there</span>
<span class="op">&gt;&gt;&gt;</span> EcoRI.site
<span class="co">&#39;GAATTC&#39;</span></code></pre></div>
</body>
</html>