1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268
|
% This is the main LaTeX file which is used to produce the Biopython
% Tutorial documentation.
%
% If you just want to read the documentation, you can pick up ready-to-go
% copies in both pdf and html format from:
%
% http://biopython.org/DIST/docs/tutorial/Tutorial.html
% http://biopython.org/DIST/docs/tutorial/Tutorial.pdf
%
% If you want to typeset the documentation, you'll need a standard TeX/LaTeX
% distribution (I use teTeX, which works great for me on Unix platforms).
% Additionally, you need HeVeA (or at least hevea.sty), which can be
% found at:
%
% http://pauillac.inria.fr/~maranget/hevea/index.html
%
% You will also need the pictures included in the document, some of
% which are UMLish diagrams created by Dia
% (http://www.lysator.liu.se/~alla/dia/dia.html).
% These diagrams are available from Biopython git in the original dia
% format, which you can easily save as .png format using Dia itself.
% They are also checked in as the png files, so if you make
% modifications to the original dia files, the png files should also be
% changed.
%
% Once you're all set, you should be able to generate pdf by running:
%
% pdflatex Tutorial.tex (to generate the first draft)
% pdflatex Tutorial.tex (to get the cross references right)
% pdflatex Tutorial.tex (to get the table of contents right)
%
% To generate the html, you'll need HeVeA installed. You should be
% able to just run:
%
% hevea -fix Tutorial.tex
%
% However, on older versions of hevea you may first need to remove the
% Tutorial.aux file generated by LaTeX, then run hevea twice to get
% the references right.
%
% If you want to typeset this and have problems, please report them
% at biopython-dev@biopython.org, and we'll try to get things resolved. We
% always love to have people interested in the documentation!
\documentclass{report}
\usepackage{url}
\usepackage{fullpage}
\usepackage{hevea}
\usepackage{graphicx}
\usepackage{listings}
% make everything have section numbers
\setcounter{secnumdepth}{4}
% Make links between references
\usepackage{hyperref}
\newif\ifpdf
\ifx\pdfoutput\undefined
\pdffalse
\else
\pdfoutput=1
\pdftrue
\fi
\ifpdf
\hypersetup{colorlinks=true, hyperindex=true, citecolor=red, urlcolor=blue}
\fi
\begin{document}
\begin{htmlonly}
\title{Biopython Tutorial and Cookbook}
\end{htmlonly}
\begin{latexonly}
\title{
%Hack to get the logo on the PDF front page:
\includegraphics[width=\textwidth]{images/biopython.jpg}\\
%Hack to get some white space using a blank line:
~\\
Biopython Tutorial and Cookbook}
\end{latexonly}
\author{Jeff Chang, Brad Chapman, Iddo Friedberg, Thomas Hamelryck, \\
Michiel de Hoon, Peter Cock, Tiago Antao, Eric Talevich, Bartek Wilczy\'{n}ski}
\date{Last Update -- 25 August 2016 (Biopython 1.68)}
%Hack to get the logo at the start of the HTML front page:
%(hopefully this isn't going to be too wide for most people)
\begin{rawhtml}
<P ALIGN="center">
<IMG ALIGN="center" SRC="images/biopython.jpg" TITLE="Biopython Logo" ALT="[Biopython Logo]" width="1024" height="288" />
</p>
\end{rawhtml}
\maketitle
\tableofcontents
%\chapter{Introduction}
%\label{chapter:introduction}
\include{Tutorial/chapter_introduction}
%\chapter{Quick Start -- What can you do with Biopython?}
%\label{chapter:quick-start}
\include{Tutorial/chapter_quick_start}
%\chapter{Sequence objects}
%\label{chapter:Bio.Seq}
\include{Tutorial/chapter_seq_objects}
%\chapter{Sequence annotation objects}
%\label{chapter:SeqRecord}
\include{Tutorial/chapter_seq_annot}
%\chapter{Sequence Input/Output}
%\label{chapter:Bio.SeqIO}
\include{Tutorial/chapter_seqio}
%\chapter{Multiple Sequence Alignment objects}
%\label{chapter:Bio.AlignIO}
\include{Tutorial/chapter_align}
%\chapter{BLAST}
%\label{chapter:blast}
\include{Tutorial/chapter_blast}
%\chapter{BLAST and other sequence search tools (\textit{experimental code})}
%\label{chapter:searchio}
\include{Tutorial/chapter_searchio}
%\chapter{Accessing NCBI's Entrez databases}
%\label{chapter:entrez}
\include{Tutorial/chapter_entrez}
%\chapter{Swiss-Prot and ExPASy}
%\label{chapter:swiss_prot}
\include{Tutorial/chapter_uniprot}
%\chapter{Going 3D: The PDB module}
\include{Tutorial/chapter_pdb}
%\chapter{Bio.PopGen: Population genetics}
\include{Tutorial/chapter_popgen}
%\chapter{Phylogenetics with Bio.Phylo}
%\label{sec:Phylo}
\include{Tutorial/chapter_phylo}
%\chapter{Sequence motif analysis using Bio.motifs}
\include{Tutorial/chapter_motifs}
%\chapter{Cluster analysis}
\include{Tutorial/chapter_cluster}
%\chapter{Supervised learning methods}
\include{Tutorial/chapter_learning}
%\chapter{Graphics including GenomeDiagram}
%\label{chapter:graphics}
\include{Tutorial/chapter_graphics}
%\chapter{KEGG}
%\label{chap:kegg}
\include{Tutorial/chapter_kegg}
%\chapter{Bio.phenotype: analyse phenotypic data}
%\label{chap:phenotype}
\include{Tutorial/chapter_phenotype}
%\chapter{Cookbook -- Cool things to do with it}
%\label{chapter:cookbook}
\include{Tutorial/chapter_cookbook}
%\chapter{The Biopython testing framework}
%\label{sec:regr_test}
\include{Tutorial/chapter_testing}
%\chapter{Advanced}
%\label{chapter:advanced}
\include{Tutorial/chapter_advanced}
%\chapter{Where to go from here -- contributing to Biopython}
\include{Tutorial/chapter_contributing}
%\chapter{Appendix: Useful stuff about Python}
%\label{sec:appendix}
\include{Tutorial/chapter_appendix}
\begin{thebibliography}{99}
\bibitem{cock2009}
Peter J. A. Cock, Tiago Antao, Jeffrey T. Chang, Brad A. Chapman, Cymon J. Cox, Andrew Dalke, Iddo Friedberg, Thomas Hamelryck, Frank Kauff, Bartek Wilczynski, Michiel J. L. de Hoon: ``Biopython: freely available Python tools for computational molecular biology and bioinformatics''. {\it Bioinformatics} {\bf 25} (11), 1422--1423 (2009). \href{http://dx.doi.org/10.1093/bioinformatics/btp163}{doi:10.1093/bioinformatics/btp163},
\bibitem{pritchard2006}
Leighton Pritchard, Jennifer A. White, Paul R.J. Birch, Ian K. Toth: ``GenomeDiagram: a python package for the visualization of large-scale genomic data''. {\it Bioinformatics} {\bf 22} (5): 616--617 (2006).
\href{http://dx.doi.org/10.1093/bioinformatics/btk021}{doi:10.1093/bioinformatics/btk021},
\bibitem{toth2006}
Ian K. Toth, Leighton Pritchard, Paul R. J. Birch: ``Comparative genomics reveals what makes an enterobacterial plant pathogen''. {\it Annual Review of Phytopathology} {\bf 44}: 305--336 (2006).
\href{http://dx.doi.org/10.1146/annurev.phyto.44.070505.143444}{doi:10.1146/annurev.phyto.44.070505.143444},
\bibitem{vanderauwera2009}
G\'eraldine A. van der Auwera, Jaroslaw E. Kr\'ol, Haruo Suzuki, Brian Foster, Rob van Houdt, Celeste J. Brown, Max Mergeay, Eva M. Top: ``Plasmids captured in C. metallidurans CH34: defining the PromA family of broad-host-range plasmids''.
\textit{Antonie van Leeuwenhoek} {\bf 96} (2): 193--204 (2009).
\href{http://dx.doi.org/10.1007/s10482-009-9316-9}{doi:10.1007/s10482-009-9316-9}
\bibitem{proux2002}
Caroline Proux, Douwe van Sinderen, Juan Suarez, Pilar Garcia, Victor Ladero, Gerald F. Fitzgerald, Frank Desiere, Harald Br\"ussow:
``The dilemma of phage taxonomy illustrated by comparative genomics of Sfi21-Like Siphoviridae in lactic acid bacteria''. \textit{Journal of Bacteriology} {\bf 184} (21): 6026--6036 (2002).
\href{http://dx.doi.org/10.1128/JB.184.21.6026-6036.2002}{http://dx.doi.org/10.1128/JB.184.21.6026-6036.2002}
\bibitem{jupe2012}
Florian Jupe, Leighton Pritchard, Graham J. Etherington, Katrin MacKenzie, Peter JA Cock, Frank Wright, Sanjeev Kumar Sharma1, Dan Bolser, Glenn J Bryan, Jonathan DG Jones, Ingo Hein: ``Identification and localisation of the NB-LRR gene family within the potato genome''. \textit{BMC Genomics} {\bf 13}: 75 (2012).
\href{http://dx.doi.org/10.1186/1471-2164-13-75}{http://dx.doi.org/10.1186/1471-2164-13-75}
\bibitem{cock2010}
Peter J. A. Cock, Christopher J. Fields, Naohisa Goto, Michael L. Heuer, Peter M. Rice: ``The Sanger FASTQ file format for sequences with quality scores, and the Solexa/Illumina FASTQ variants''. \textit{Nucleic Acids Research} {\bf 38} (6): 1767--1771 (2010). \href{http://dx.doi.org/10.1093/nar/gkp1137}{doi:10.1093/nar/gkp1137}
\bibitem{brown1999}
Patrick O. Brown, David Botstein: ``Exploring the new world of the genome with DNA microarrays''. \textit{Nature Genetics} {\bf 21} (Supplement 1), 33--37 (1999). \href{http://dx.doi.org/10.1038/4462}{doi:10.1038/4462}
\bibitem{talevich2012}
Eric Talevich, Brandon M. Invergo, Peter J.A. Cock, Brad A. Chapman: ``Bio.Phylo: A unified toolkit for processing, analyzing and visualizing phylogenetic trees in Biopython''. \textit{BMC Bioinformatics} {\bf 13}: 209 (2012). \href{http://dx.doi.org/10.1186/1471-2105-13-209}{doi:10.1186/1471-2105-13-209}
\bibitem{cornish1985}
Athel Cornish-Bowden: ``Nomenclature for incompletely specified bases in nucleic acid sequences: Recommendations 1984.'' \textit{Nucleic Acids Research} {\bf 13} (9): 3021--3030 (1985). \href{http://dx.doi.org/10.1093/nar/13.9.3021}{doi:10.1093/nar/13.9.3021}
\bibitem{cavener1987}
Douglas R. Cavener: ``Comparison of the consensus sequence flanking translational start sites in Drosophila and vertebrates.'' \textit{Nucleic Acids Research} {\bf 15} (4): 1353--1361 (1987). \href{http://dx.doi.org/10.1093/nar/15.4.1353}{doi:10.1093/nar/15.4.1353}
\bibitem{bailey1994}
Timothy L. Bailey and Charles Elkan: ``Fitting a mixture model by expectation maximization to discover motifs in biopolymers'', \textit{Proceedings of the Second International Conference on Intelligent Systems for Molecular Biology} 28--36. AAAI Press, Menlo Park, California (1994).
\bibitem{chapman2000}
Brad Chapman and Jeff Chang: ``Biopython: Python tools for computational biology''. \textit{ACM SIGBIO Newsletter} {\bf 20} (2): 15--19 (August 2000).
\bibitem{dehoon2004}
Michiel J. L. de Hoon, Seiya Imoto, John Nolan, Satoru Miyano: ``Open source clustering software''. \textit{Bioinformatics} {\bf 20} (9): 1453--1454 (2004). \href{http://dx.doi.org/10.1093/bioinformatics/bth078}{doi:10.1093/bioinformatics/bth078}
\bibitem{eisen1998}
Michiel B. Eisen, Paul T. Spellman, Patrick O. Brown, David Botstein: ``Cluster analysis and display of genome-wide expression patterns''. \textit{Proceedings of the National Academy of Science USA} {\bf 95} (25): 14863--14868 (1998). \href{http://dx.doi.org/10.1073/pnas.96.19.10943-c}{doi:10.1073/pnas.96.19.10943-c}
\bibitem{golub1971}
Gene H. Golub, Christian Reinsch: ``Singular value decomposition and least squares solutions''. In \textit{Handbook for Automatic Computation}, {\bf 2}, (Linear Algebra) (J. H. Wilkinson and C. Reinsch, eds), 134--151. New York: Springer-Verlag (1971).
\bibitem{golub1989}
Gene H. Golub, Charles F. Van Loan: \textit{Matrix computations}, 2nd edition (1989).
\bibitem{hamelryck2003a}
Thomas Hamelryck and Bernard Manderick: 11PDB parser and structure class
implemented in Python''. \textit{Bioinformatics}, \textbf{19} (17): 2308--2310 (2003) \href{http://dx.doi.org/10.1093/bioinformatics/btg299}{doi: 10.1093/bioinformatics/btg299}.
\bibitem{hamelryck2003b}
Thomas Hamelryck: ``Efficient identification of side-chain patterns using a multidimensional index tree''. \textit{Proteins} {\bf 51} (1): 96--108 (2003). \href{http://dx.doi.org/10.1002/prot.10338}{doi:10.1002/prot.10338}
\bibitem{hamelryck2005}
Thomas Hamelryck: ``An amino acid has two sides; A new 2D measure provides a different view of solvent exposure''. \textit{Proteins} {\bf 59} (1): 29--48 (2005). \href{http://dx.doi.org/10.1002/prot.20379}{doi:10.1002/prot.20379}.
\bibitem{hartigan1975}
John A. Hartiga. \textit{Clustering algorithms}. New York: Wiley (1975).
\bibitem{jain1988}
Anil L. Jain, Richard C. Dubes: \textit{Algorithms for clustering data}. Englewood Cliffs, N.J.: Prentice Hall (1988).
\bibitem{kachitvichyanukul1988}
Voratas Kachitvichyanukul, Bruce W. Schmeiser: Binomial Random Variate Generation. \textit{Communications of the ACM} {\bf 31} (2): 216--222 (1988). \href{http://dx.doi.org/10.1145/42372.42381}{doi:10.1145/42372.42381}
\bibitem{kohonen1997}
Teuvo Kohonen: ``Self-organizing maps'', 2nd Edition. Berlin; New York: Springer-Verlag (1997).
\bibitem{lecuyer1988}
Pierre L'Ecuyer: ``Efficient and Portable Combined Random Number Generators.''
\textit{Communications of the ACM} {\bf 31} (6): 742--749,774 (1988). \href{http://dx.doi.org/10.1145/62959.62969}{doi:10.1145/62959.62969}
\bibitem{majumdar2005}
Indraneel Majumdar, S. Sri Krishna, Nick V. Grishin: ``PALSSE: A program to delineate linear secondary structural elements from protein structures.'' \textit{BMC Bioinformatics}, {\bf 6}: 202 (2005). \href{http://dx.doi.org/10.1186/1471-2105-6-202}{doi:10.1186/1471-2105-6-202}.
\bibitem{matys2003}
V. Matys, E. Fricke, R. Geffers, E. G\"ossling, M. Haubrock, R. Hehl, K. Hornischer, D. Karas, A.E. Kel, O.V. Kel-Margoulis, D.U. Kloos, S. Land, B. Lewicki-Potapov, H. Michael, R. M\"unch, I. Reuter, S. Rotert, H. Saxel, M. Scheer, S. Thiele, E. Wingender E: ``TRANSFAC: transcriptional regulation, from patterns to profiles.'' Nucleic Acids Research {\bf 31} (1): 374--378 (2003). \href{http://dx.doi.org/10.1093/nar/gkg108}{doi:10.1093/nar/gkg108}
\bibitem{sibson1973}
Robin Sibson: ``SLINK: An optimally efficient algorithm for the single-link cluster method''. \textit{The Computer Journal} {\bf 16} (1): 30--34 (1973). \href{http://dx.doi.org/10.1093/comjnl/16.1.30}{doi:10.1093/comjnl/16.1.30}
\bibitem{snedecor1989}
George W. Snedecor, William G. Cochran: \textit{Statistical methods}. Ames, Iowa: Iowa State University Press (1989).
\bibitem{tamayo1999}
Pablo Tamayo, Donna Slonim, Jill Mesirov, Qing Zhu, Sutisak Kitareewan, Ethan Dmitrovsky, Eric S. Lander, Todd R. Golub: ``Interpreting patterns of gene expression with self-organizing maps: Methods and application to hematopoietic differentiation''. \textit{Proceedings of the National Academy of Science USA} {\bf 96} (6): 2907--2912 (1999). \href{http://dx.doi.org/10.1073/pnas.96.6.2907}{doi:10.1073/pnas.96.6.2907}
\bibitem{tryon1970}
Robert C. Tryon, Daniel E. Bailey: \textit{Cluster analysis}. New York: McGraw-Hill (1970).
\bibitem{tukey1977}
John W. Tukey: ``Exploratory data analysis''. Reading, Mass.: Addison-Wesley Pub. Co. (1977).
\bibitem{yeung2001}
Ka Yee Yeung, Walter L. Ruzzo: ``Principal Component Analysis for clustering gene expression data''. \textit{Bioinformatics} {\bf 17} (9): 763--774 (2001). \href{http://dx.doi.org/10.1093/bioinformatics/17.9.763}{doi:10.1093/bioinformatics/17.9.763}
\bibitem{saldanha2004}
Alok Saldanha: ``Java Treeview---extensible visualization of microarray data''. \textit{Bioinformatics} {\bf 20} (17): 3246--3248 (2004).
\href{http://dx.doi.org/10.1093/bioinformatics/bth349}{http://dx.doi.org/10.1093/bioinformatics/bth349}
\end{thebibliography}
\end{document}
|