File: seqsplit.dtx

package info (click to toggle)
texlive-extra 2012.20120611-2
links: PTS
area: main
in suites: wheezy
size: 1,508,552 kB
sloc: perl: 75,280; makefile: 12,192; sh: 9,570; python: 6,296; csh: 2,987; java: 1,367; ansic: 1,032; xml: 896; lisp: 541; lex: 358; ruby: 150; tcl: 142; sed: 28; pascal: 18; awk: 10
file content (424 lines) | stat: -rw-r--r-- 14,921 bytes
parent folder | download | duplicates (11)
% \iffalse
%<*gobble>
% $Id: seqsplit.dtx,v 1.3 2006/08/08 00:02:08 boris Exp $
%
% Copyright 2006, Boris Veytsman <borisv@lk.net>
% This work may be distributed and/or modified under the
% conditions of the LaTeX Project Public License, either
% version 1.3 of this license or (at your option) any 
% later version.
% The latest version of the license is in
%    http://www.latex-project.org/lppl.txt
% and version 1.3 or later is part of all distributions of
% LaTeX version 2003/06/01 or later.
%
% This work has the LPPL maintenance status `maintained'.
%
% The Current Maintainer of this work is Boris Veytsman
%
% This work consists of the file seqsplit.dtx and the
% derived files seqsplit.sty, seqsplit.dtx. 
%
% \fi 
% \CheckSum{50}
%
% \changes{v0.1}{2006/08/07}{The first released version}
%
%% \CharacterTable
%%  {Upper-case    \A\B\C\D\E\F\G\H\I\J\K\L\M\N\O\P\Q\R\S\T\U\V\W\X\Y\Z
%%   Lower-case    \a\b\c\d\e\f\g\h\i\j\k\l\m\n\o\p\q\r\s\t\u\v\w\x\y\z
%%   Digits        \0\1\2\3\4\5\6\7\8\9
%%   Exclamation   \!     Double quote  \"     Hash (number) \#
%%   Dollar        \$     Percent       \%     Ampersand     \&
%%   Acute accent  \'     Left paren    \(     Right paren   \)
%%   Asterisk      \*     Plus          \+     Comma         \,
%%   Minus         \-     Point         \.     Solidus       \/
%%   Colon         \:     Semicolon     \;     Less than     \<
%%   Equals        \=     Greater than  \>     Question mark \?
%%   Commercial at \@     Left bracket  \[     Backslash     \\
%%   Right bracket \]     Circumflex    \^     Underscore    \_
%%   Grave accent  \`     Left brace    \{     Vertical bar  \|
%%   Right brace   \}     Tilde         \~} 
%
%\iffalse
%    \begin{macrocode}
\documentclass{ltxdoc}
\usepackage{array}
\usepackage{url}
\usepackage{seqsplit}
\DoNotIndex{\NeedsTeXFormat, \ProvidesPackage, \def, \hspace}
\DoNotIndex{\futurelet, \@gobble, \ifx, \else, \fi, \relax}
\DoNotIndex{\ifmmode, \fi, \allowbreak}
\PageIndex
\CodelineIndex
\RecordChanges
\EnableCrossrefs
\begin{document}
  \DocInput{seqsplit.dtx}
\end{document}
%    \end{macrocode}
%</gobble> 
% \fi
% \MakeShortVerb{|}
%
%\GetFileInfo{seqsplit.sty}
% \title{Splitting Long Sequences of Letters (DNA, RNA, Proteins,
%   Etc.)\thanks{\copyright Boris Veytsman, 2006}}
% \author{Boris Veytsman}
% \date{\filedate, \fileversion}
% \maketitle
%
% \begin{abstract}
%   Sometimes one needs to typeset long sentences of letters, which
%   should not have spaces between them (like letters in words), but
%   could be split between lines at any point, and without a
%   hyphenation character.  This package provides a command for such
%   sequences. 
% \end{abstract}
%
% \tableofcontents
%
% \clearpage
%
%\section{Introduction}
%\label{sec:intro}
%
% At a recent Practical\TeX{} conference (Practical\TeX-2006, Rutgers,
% New Jersey, USA, \url{http://www.tug.org/practicaltex2006}) Klaus
% H\"oppner asked, how one typesets long sequences like the ones
% related to DNA code.  Usually there is no space between letters, but
% a sequence could be split at any point and continued on the next
% line.   The audience suggested several solutions to this problem.
% One solution, for example, was to define a new language, where
% hyphenation is possible at any point, and hyphenation character is
% empty.  However, this would require regeneration of all \TeX{}
% formats, which might be not practical or even not possible.  Another
% solution, suggested, if my memory is right, by Peter Flynn, was to
% scan the sequence and insert a breaking point after each letter.
% This later approach is implemented in this package.
%
%
%
%\section{User Interface}
%\label{sec:interface}
%
%
%\subsection{Main Command}
%\label{sec:command}
%
% \DescribeMacro{\seqsplit}
% The main (and actually the only) command in this package is
% |\seqsplit|.  Its usage is very simple, for example to typeset the
% gene HBB, related to sickle cell anaemia (actually, the
% corresponding mRNA Reference Sequence), we use the following:
% \begin{verbatim}
% \seqsplit{%
% acatttgcttctgacacaactgtgttcactagcaacctcaaacagacaccatggtgcatc%
% tgactcctgaggagaagtctgccgttactgccctgtggggcaaggtgaacgtggatgaag%
% ttggtggtgaggccctgggcaggctgctggtggtctacccttggacccagaggttctttg%
% agtcctttggggatctgtccactcctgatgctgttatgggcaaccctaaggtgaaggctc%
% atggcaagaaagtgctcggtgcctttagtgatggcctggctcacctggacaacctcaagg%
% gcacctttgccacactgagtgagctgcactgtgacaagctgcacgtggatcctgagaact%
% tcaggctcctgggcaacgtgctggtctgtgtgctggcccatcactttggcaaagaattca%
% ccccaccagtgcaggctgcctatcagaaagtggtggctggtgtggctaatgccctggccc%
% acaagtatcactaagctcgctttcttgctgtccaatttctattaaaggttcctttgttcc%
% ctaagtccaactactaaactgggggatattatgaagggccttgagcatctggattctgcc%
% taataaaaaacatttattttcattgc}.
% \end{verbatim}
% which produces
% \begin{quote}
% \seqsplit{%
% acatttgcttctgacacaactgtgttcactagcaacctcaaacagacaccatggtgcatc%
% tgactcctgaggagaagtctgccgttactgccctgtggggcaaggtgaacgtggatgaag%
% ttggtggtgaggccctgggcaggctgctggtggtctacccttggacccagaggttctttg%
% agtcctttggggatctgtccactcctgatgctgttatgggcaaccctaaggtgaaggctc%
% atggcaagaaagtgctcggtgcctttagtgatggcctggctcacctggacaacctcaagg%
% gcacctttgccacactgagtgagctgcactgtgacaagctgcacgtggatcctgagaact%
% tcaggctcctgggcaacgtgctggtctgtgtgctggcccatcactttggcaaagaattca%
% ccccaccagtgcaggctgcctatcagaaagtggtggctggtgtggctaatgccctggccc%
% acaagtatcactaagctcgctttcttgctgtccaatttctattaaaggttcctttgttcc%
% ctaagtccaactactaaactgggggatattatgaagggccttgagcatctggattctgcc%
% taataaaaaacatttattttcattgc}.
% \end{quote}
% Note that the breaking points in the code (commented out by \%) have
% nothing to do with the breaking points in the typeset sequence and
% are introduced only for readability of the code.
%
% The corresponding protein sequence ($\beta$-globulin) is shorter:
% \begin{verbatim}
% \seqsplit{%
% mvhltpeeksavtalwgkvnvdevggealgrllvvypwtqrffesfgdlstpdavmgnpk%
% vkahgkkvlgafsdglahldnlkgtfatlselhcdklhvdpenfrllgnvlvcvlahhfg%
% keftppvqaayqkvvagvanalahkyh}.
% \end{verbatim}
% \begin{quote}
% \seqsplit{%
% mvhltpeeksavtalwgkvnvdevggealgrllvvypwtqrffesfgdlstpdavmgnpk%
% vkahgkkvlgafsdglahldnlkgtfatlselhcdklhvdpenfrllgnvlvcvlahhfg%
% keftppvqaayqkvvagvanalahkyh}.
% \end{quote}
%
% The command works in math mode as well:
% \begin{verbatim}
% $\pi = \seqsplit{%
% 3.
% 1415926535 8979323846 2643383279 5028841971 6939937510
% 5820974944 5923078164 0628620899 8628034825 3421170679
% 8214808651 3282306647 0938446095 5058223172 5359408128
% 4811174502 8410270193 8521105559 6446229489 5493038196
% 4428810975 6659334461 2847564823 3786783165 2712019091
% 4564856692 3460348610 4543266482 1339360726 0249141273
% 7245870066 0631558817 4881520920 9628292540 9171536436
% 7892590360 0113305305 4882046652 1384146951 9415116094
% 3305727036 5759591953 0921861173 8193261179 3105118548
% 0744623799 6274956735 1885752724 8912279381 8301194912
% 9833673362 4406566430 8602139494 6395224737 1907021798
% 6094370277 0539217176 2931767523 8467481846 7669405132
% 0005681271 4526356082 7785771342 7577896091 7363717872
% 1468440901 2249534301 4654958537 1050792279 6892589235}
% \ldots$
% \end{verbatim}
% \begin{quote}
% $\pi = \seqsplit{%
% 3.
% 1415926535 8979323846 2643383279 5028841971 6939937510
% 5820974944 5923078164 0628620899 8628034825 3421170679
% 8214808651 3282306647 0938446095 5058223172 5359408128
% 4811174502 8410270193 8521105559 6446229489 5493038196
% 4428810975 6659334461 2847564823 3786783165 2712019091
% 4564856692 3460348610 4543266482 1339360726 0249141273
% 7245870066 0631558817 4881520920 9628292540 9171536436
% 7892590360 0113305305 4882046652 1384146951 9415116094
% 3305727036 5759591953 0921861173 8193261179 3105118548
% 0744623799 6274956735 1885752724 8912279381 8301194912
% 9833673362 4406566430 8602139494 6395224737 1907021798
% 6094370277 0539217176 2931767523 8467481846 7669405132
% 0005681271 4526356082 7785771342 7577896091 7363717872
% 1468440901 2249534301 4654958537 1050792279 6892589235}
% \ldots$
% \end{quote}
%
%\subsection{Customization}
%\label{sec:customization}
%
% \DescribeMacro{\seqinsert} The command |\seqsplit| can be customized
% by redefining the command |\seqinsert|, which is the macro that is
% inserted between the letters of the sequence.  By default it is
% defined as |\allowbreak| in math mode and |\hspace{0pt plus 0.02em}|
% in text mode: a slightly stretchable glue of zero length.  This
% definition gives \TeX{} a chance to justify the lines.  However,
% there might be other definitions.  For example, if we want hyphens
% at the breakpoints in text mode, we can use:
% \begin{quote}
%   |\renewcommand{\seqinsert}{\ifmmode\allowbreak\else\-\fi}|
% \end{quote}
% which produces for the $\beta$-globulin protein from the previous
% section the following:
% \begin{quote}
%   \renewcommand{\seqinsert}{\ifmmode\allowbreak\else\-\fi}
% \seqsplit{%
% mvhltpeeksavtalwgkvnvdevggealgrllvvypwtqrffesfgdlstpdavmgnpk%
% vkahgkkvlgafsdglahldnlkgtfatlselhcdklhvdpenfrllgnvlvcvlahhfg%
% keftppvqaayqkvvagvanalahkyh}.
% \end{quote}
% Another redefinition,
% \begin{quote}
%   |\renewcommand{\seqinsert}{\ifmmode\allowbreak\else{} \fi}|,
% \end{quote}
% produces an output with spaces between letters.  Note that there is
% no space between the last letter and the dot: the package takes care
% of this:
% \begin{quote}
%   \renewcommand{\seqinsert}{\ifmmode\allowbreak\else{} \fi}
% \seqsplit{%
% mvhltpeeksavtalwgkvnvdevggealgrllvvypwtqrffesfgdlstpdavmgnpk%
% vkahgkkvlgafsdglahldnlkgtfatlselhcdklhvdpenfrllgnvlvcvlahhfg%
% keftppvqaayqkvvagvanalahkyh}.
% \end{quote}
% 
%
%
%\subsection{Grouping and Commands}
%\label{sec:grouping}
%
% The command |\seqsplit| does not insert breakpoints between the
% letters inside braces |{...}|.  Compare the typesetting of
% $\beta$-globulin in Section~\ref{sec:command} and the following
% example:
% \begin{verbatim}
% \seqsplit{%
% mvhltpeeksavtalwgkvnvdevggealgrllvvypwtqrffesfgdlstpdavmgnpk%
% v{kahg}kkvlgafsdglahldnlkgtfatlselhcdklhvdpenfrllgnvlvcvlahhfg%
% keftppvqaayqkvvagvanalahkyh}.
% \end{verbatim}
% \begin{quote}
% \seqsplit{%
% mvhltpeeksavtalwgkvnvdevggealgrllvvypwtqrffesfgdlstpdavmgnpk%
% v{kahg}kkvlgafsdglahldnlkgtfatlselhcdklhvdpenfrllgnvlvcvlahhfg%
% keftppvqaayqkvvagvanalahkyh}.
% \end{quote}
% The braces around |{kahg}| prevented a splitting of this group.
% This effect can be used for typesetting special substrings inside
% sequences.
%
% The way |\seqsplit| works interferes with formatting commands like
% |\textit|.  Therefore the sequence |{kahg}| is \emph{not} italicized
% in the following example:
% \begin{verbatim}
% \seqsplit{%
% mvhltpeeksavtalwgkvnvdevggealgrllvvypwtqrffesfgdlstpdavmgnpk%
% v\textit{kahg}kkvlgafsdglahldnlkgtfatlselhcdklhvdpenfrllgnvl%
% vcvlahhfgkeftppvqaayqkvvagvanalahkyh}.
% \end{verbatim}
% \begin{quote}
% \seqsplit{%
% mvhltpeeksavtalwgkvnvdevggealgrllvvypwtqrffesfgdlstpdavmgnpk%
% v\textit{kahg}kkvlgafsdglahldnlkgtfatlselhcdklhvdpenfrllgnvl%
% vcvlahhfgkeftppvqaayqkvvagvanalahkyh}.
% \end{quote}
%
% Using grouping |{\textit{kahg}}| we can save the situation:
% \begin{verbatim}
% \seqsplit{%
% mvhltpeeksavtalwgkvnvdevggealgrllvvypwtqrffesfgdlstpdavmgnpk%
% v{\textit{kahg}}kkvlgafsdglahldnlkgtfatlselhcdklhvdpenfrllgnvl%
% vcvlahhfgkeftppvqaayqkvvagvanalahkyh}.
% \end{verbatim}
% \begin{quote}
% \seqsplit{%
% mvhltpeeksavtalwgkvnvdevggealgrllvvypwtqrffesfgdlstpdavmgnpk%
% v{\textit{kahg}}kkvlgafsdglahldnlkgtfatlselhcdklhvdpenfrllgnvl%
% vcvlahhfgkeftppvqaayqkvvagvanalahkyh}.
% \end{quote}
%
% If we want the italicized sequence to be splittable as well, we can
% use \emph{nested} |\seqsplit|:
% \begin{verbatim}
% \seqsplit{%
% mvhltpeeksavtalwgkvnvdevggealgrllvvypwtqrffesfgdlstpdavmgnpk%
% v{\textit{\seqsplit{kahg}}}kkvlgafsdglahldnlkgtfatlselhcdklhvdpenfrllgnvl%
% vcvlahhfgkeftppvqaayqkvvagvanalahkyh}.
% \end{verbatim}
% \begin{quote}
% \seqsplit{%
% mvhltpeeksavtalwgkvnvdevggealgrllvvypwtqrffesfgdlstpdavmgnpk%
% v{\textit{\seqsplit{kahg}}}kkvlgafsdglahldnlkgtfatlselhcdklhvdpenfrllgnvl%
% vcvlahhfgkeftppvqaayqkvvagvanalahkyh}.
% \end{quote}
%
% These tricks allow one to produce splittable sequences with a rather
% complex formatting.
%
%
%\StopEventually{}
%
% \clearpage
% 
% \section{Implementation}
% \label{sec:implementation}
% 
%
%\subsection{Declarations}
%\label{sec:decl}
% 
%  We start with declaration, who we are:
%
%
%    \begin{macrocode}
%<*style>
\NeedsTeXFormat{LaTeX2e}
\ProvidesPackage{seqsplit}
[2006/08/07 v0.1 Splitting long sequences (DNA, RNA, proteins, etc.) ]
%    \end{macrocode}
%
%
%
%
%\subsection{Inserted Text}
%\label{sec:insertion}
%
% 
% \begin{macro}{\seqinsert}
%   This is the macro we insert between letters:
%    \begin{macrocode}
\def\seqinsert{\ifmmode\allowbreak\else\hspace{0pt plus 0.02em}\fi}
%    \end{macrocode}
% \end{macro}
% 
% 
%
%\subsection{Scanner}
%\label{sec:scanner}
%
% The scanner code is not too trivial.  Here we describe it in detail.
%
% \begin{macro}{\seqsplit}
%   The main (actually, the only) user-space macro just starts the
%   scanner.
%    \begin{macrocode}
\def\seqsplit#1{\SQSPL@scan#1\SQSPL@end}
%    \end{macrocode}
% \end{macro}
%
% The macro |\SQSPL@end| is never expanded, it is just a marker.  
% \begin{macro}{\SQSPL@scan}
%   The macro |\SQSPL@scan| saves the next token in the special
%   register |\SQSPL@next|, so we can decide what to do with it:
%    \begin{macrocode}
\def\SQSPL@scan{\futurelet\SQSPL@next\SQSPL@scani}
%    \end{macrocode}
% \end{macro}
% \begin{macro}{\SQSPL@scani}
%   Now since we know the next token, we can decide to either stop the
%   expansion if we met the end, or continue it if we did not.
%    \begin{macrocode}
\def\SQSPL@scani#1{%
  \ifx \SQSPL@end \SQSPL@next \def\SQSPL@process{\@gobble}%
  \else \def\SQSPL@process{\SQSPL@doprocess}\fi%
  \SQSPL@process{#1}}
%    \end{macrocode}
% \end{macro}
% \begin{macro}{\SQSPL@doprocess}
%   The processing of a letter depends on what is the next letter.  If
%   the sequence is finished, we should not insert anything after the
%   last letter: we do not want to break the line between the sequence
%   and, say, a comma.  Therefore we insert a special smart macro:
%    \begin{macrocode}
\def\SQSPL@doprocess#1{#1\SQSPL@insert}
%    \end{macrocode}
% \end{macro}
% \begin{macro}{\SQSPL@insert}
%   The macro |\SQSPL@insert| uses |\futurelet| to check whether the
%   processed letter is the last one in the sentence:
%    \begin{macrocode}
\def\SQSPL@insert{\futurelet\SQSPL@next\SQSPL@doinsert}
%    \end{macrocode}
% \end{macro}
% \begin{macro}{\SQSPL@doinsert}
%   And this is the macro that inserts |\seqinsert| and continues
%   scanning:
%    \begin{macrocode}
\def\SQSPL@doinsert{%
  \ifx \SQSPL@end \SQSPL@next \relax%
  \else \seqinsert \fi%
  \SQSPL@scan}
%    \end{macrocode}
% \end{macro}
%
%
%\subsection{The Last Words}
%\label{sec:last}
%
%
%
%    \begin{macrocode}
%</style>
%    \end{macrocode}
%\Finale
%\clearpage
%
%\PrintChanges
%\clearpage
%\PrintIndex
%
\endinput