Author: Andreas Tille <tille@debian.org>
Description: Several patches for arb that enable new formats and contain other 
             needed fixes

diff -ubrN readseq-1.orig/readseq.c readseq-1/readseq.c
--- readseq-1.orig/readseq.c	1993-02-01 01:00:00.000000000 +0100
+++ readseq-1/readseq.c	2007-11-14 12:14:36.000000000 +0100
@@ -93,6 +93,10 @@
 	      = fix bug for possible memory overrun when truncating seqs for
 		Phylip or Paup formats (thanks Anthony Persechini)
 
+  13 Sep 96   GSt + RL (Steger@biophys.uni-duesseldorf.de)
+  	      * real time in MSF format (Main); #include <time.h>
+              + added VIE multi sequence file format
+              + added LinAll sequence file format
  */
 
 
@@ -169,8 +173,11 @@
 
 
 #include <stdio.h>
+#include <stdlib.h>	/* MSch */
 #include <string.h>
 #include <ctype.h>
+#include <time.h>	/* RL */
+#include <ncbi.h>
 
 #include "ureadseq.h"
 
@@ -199,9 +206,11 @@
     "16. ASN.1",
     "17. PAUP/NEXUS",
     "18. Pretty (out-only)",
+    "19. LinAll",
+    "20. Vienna",
     "" };
 
-#define kFormCount  30
+#define kFormCount  32
 #define kMaxFormName 15
 
 const  struct formatTable {
@@ -238,6 +247,8 @@
     {"paup", kPAUP},
     {"nexus", kPAUP},
     {"pretty", kPretty},
+    {"linall", kLINALL},
+    {"vie", kVIE},
   };
 
 const char *kASN1headline = "Bioseq-set ::= {\nseq-set {\n";
@@ -415,7 +426,7 @@
         fprintf( stderr, "        %-20s      %-20s\n",
                         formats[i], formats[midi+i]);
       fprintf(stderr,"\nChoose an output format (name or #): \n");
-      gets(sform);
+      fgets(sform, 127, stdin);
       outform = parseformat(sform);
       if (outform == kNoformat) outform = kPearson;
       return outform;
@@ -708,8 +719,12 @@
 #else
 #define Exit(a)   exit(a)
 
+#ifdef NCBI
+Nlm_Int2 Nlm_Main(void)
+#else
 main( int argc, char *argv[])
 #endif
+#endif
 {
 boolean   closein = false;
 short     ifile, nseq, atseq, format, err = 0, seqtype = kDNA,
@@ -721,6 +736,14 @@
 char      stempstore[256], *stemp = stempstore;
 FILE      *ftmp, *fin, *fout;
 long      outindexmax= 0, noutindex= 0, *outindex = NULL;
+time_t	  time_val;	    /* GSt + RL */
+size_t	  size_timestr = 50;/* GSt + RL */
+char	  timestr[50];	    /* GSt + RL */
+
+#ifdef NCBI
+int argc;
+char** argv;
+#endif
 
 #define exit_main(err) {        \
   if (closeout) fclose(fout);   \
@@ -739,6 +762,10 @@
 
 
   resetGlobals();
+#if NCBI
+  argc = Nlm_GetArgc(); 
+  argv = Nlm_GetArgv();
+#endif
   foo = stdout;
   progname = argv[0];
   *oname = 0;
@@ -764,7 +791,7 @@
 
   quietly = (dopipe || (gotinputfile && (listonly || whichSeq != 0)));
 
-  if (verbose || (!quietly && !gotinputfile)) fprintf( stderr, title);
+  //if (verbose || (!quietly && !gotinputfile)) fprintf( stderr, "%s\n", title);
   ifile = 1;
 
                             /* UI: Choose output */
@@ -1003,6 +1030,13 @@
           else if (dolower)
             for (i = 0; i<seqlen; i++) seq[i] = to_lower(seq[i]);
 
+/* This is an undocumented patch from ARB which hopefully does no harm A. Tille */
+#ifdef ARB
+	  if (outform==kPhylip){
+            for (i = 0; i<seqlen; i++) if (seq[i] == '.') seq[i] = '?';
+	  }
+#endif
+
           if (doreverse) {
             long  j, k;
             char  ctemp;
@@ -1014,8 +1048,15 @@
             }
 
           if ((gPretty.isactive || outform==kPAUP) && gPretty.domatch && firstseq != NULL) {
+#ifdef ARB
+            for (i=0; i<seqlen; i++){
+		if (seq[i] == gPretty.matchchar) seq[i] = 'o';
+              if (seq[i]==firstseq[i]) seq[i]= gPretty.matchchar;
+	      }
+#else
             for (i=0; i<seqlen; i++)
               if (seq[i]==firstseq[i]) seq[i]= gPretty.matchchar;
+#endif
             }
 
 
@@ -1070,9 +1111,15 @@
     }
 
   if (outform == kMSF) {
+    time(&time_val);								/* GSt + RL */
+    strftime(timestr, size_timestr, "%B %e, %Y  %H:%M", localtime(&time_val));	/* GSt + RL */
     if (*oname) cp= oname; else cp= inputfile;
+    fprintf(foo,"\n %s  MSF: %d  Type: N  %s  Check: %d ..\n\n",		/* GSt + RL */
+                  cp, seqlen, timestr, checkall);
+/*    
     fprintf(foo,"\n %s  MSF: %d  Type: N  January 01, 1776  12:00  Check: %d ..\n\n",
                   cp, seqlen, checkall);
+*/
     }
 
   if (outform == kPAUP) {
diff -ubrN readseq-1.orig/ureadseq.c readseq-1/ureadseq.c
--- readseq-1.orig/ureadseq.c	1998-09-03 02:00:00.000000000 +0200
+++ readseq-1/ureadseq.c	2007-11-14 12:14:36.000000000 +0100
@@ -18,11 +18,14 @@
 
 
 #include <stdio.h>
+#define __NO_CTYPE
+#include <stdlib.h>	/* MSch */
 #include <ctype.h>
 #include <string.h>
 
 #define UREADSEQ_G
 #include "ureadseq.h"
+/* changed according to original which is the same with the changed header (at) */
 
 #pragma segment ureadseq
 
@@ -66,7 +69,7 @@
 # define Local      static    /* local functions */
 #endif
 
-#define kStartLength  500
+#define kStartLength  500000   /* 20Apr93 temp. bug fix */
 
 const char *aminos      = "ABCDEFGHIKLMNPQRSTVWXYZ*";
 const char *primenuc    = "ACGTU";
@@ -101,6 +104,9 @@
   long  linestart;
   char  s[256], *sp;
 
+#ifdef ARB
+  int (*isseqcharfirst8)(); /* Patch by o. strunk (ARB) to allow numbers in genbank sequences*/
+#endif
   int (*isseqchar)();
   /* int  (*isseqchar)(int c);  << sgi cc hates (int c) */
 };
@@ -150,9 +156,23 @@
 Local void addseq(char *s, struct ReadSeqVars *V)
 {
   char  *ptr;
+#ifdef ARB
+  /* Patch by o. strunk (ARB) to allow numbers in genbank sequences */
+	int             count = 0;
+#endif
 
+#ifdef ARB
+  if (V->addit){
+		for  (;*s != 0;s++,count++) {
+			if (count < 9 && V->isseqcharfirst8) {
+				if (!(V->isseqcharfirst8) (*s)) continue;
+			}else{
+				if (!(V->isseqchar) (*s)) continue;
+			}
+#else
   if (V->addit) while (*s != 0) {
     if ((V->isseqchar)(*s)) {
+#endif
       if (V->seqlen >= V->maxseq) {
         V->maxseq += kStartLength;
         ptr = (char*) realloc(V->seq, V->maxseq+1);
@@ -164,7 +184,9 @@
         }
       V->seq[(V->seqlen)++] = *s;
       }
+#ifndef ARB
     s++;
+#endif
     }
 }
 
@@ -324,6 +346,11 @@
 Local void readGenBank(struct ReadSeqVars *V)
 { /*GenBank -- many seqs/file */
 
+#ifdef ARB
+  /* Patch by o. strunk (ARB) to allow numbers in genbank sequences */
+	V->isseqchar = isSeqNumChar;
+	V->isseqcharfirst8 = isSeqChar;
+#endif
   while (!V->allDone) {
     strcpy(V->seqid, (V->s)+12);
     while (! (feof(V->f) || strstr(V->s,"ORIGIN") == V->s))
@@ -337,9 +364,44 @@
       }
     if (feof(V->f)) V->allDone = true;
   }
+#ifdef ARB
+       V->isseqchar = isSeqChar;
+       V->isseqcharfirst8 = 0;
+#endif
 }
 
 
+Local boolean endVIE( boolean *addend, boolean *ungetend, struct ReadSeqVars *V)	/* GSt + RL */
+{
+  if (*V->s == '>') { /* start of next seq */
+    *addend = false;
+    *ungetend= true;
+    return(true);
+    }
+  else
+    return(false);
+}
+
+
+Local void readVIE(struct ReadSeqVars *V)	/* GSt + RL */
+{
+  while (!V->allDone) {
+    strcpy(V->seqid, (V->s)+2);
+    readLoop(0, false, endVIE, V);
+    if (feof(V->f)) V->allDone = true;
+  }
+/*
+	printf("readVIE: V->nseq   = %d\n",V->nseq);
+	printf("readVIE: V->choice = %d\n",V->choice);
+	printf("readVIE: V->addit  = %d\n",V->addit);
+	printf("readVIE: V->seqlen = %ld\n",V->seqlen);
+	printf("readVIE: V->seqid  = %s\n",V->seqid);
+	printf("readVIE: V->s      = %s\n",V->s);
+	printf("readVIE: V->seqid  = %s\n",V->seqid);
+	printf("readVIE: V->s      = %s\n<<<last action\n",V->s);
+*/
+}
+
 Local boolean endNBRF( boolean *addend, boolean *ungetend, struct ReadSeqVars *V)
 {
   char  *a;
@@ -449,6 +511,46 @@
   }
 }
 
+Local void readLINALL(struct ReadSeqVars *V)	/* GSt */
+{
+  /* SeqLen[I4] Label[Char*60]
+     Seq[Char*70 per line]
+  */
+  int laenge;
+  int i;
+  
+  V->nseq++;			/* but there is only a single sequence ? */
+				// dprintf(("readLINALL: V->nseq   = %d\n",V->nseq));
+				/*  V->addit = (V->choice > 0); */		/* what's that for ???? */
+				// dprintf(("readLINALL: V->choice = %d\n",V->choice));
+				// dprintf(("readLINALL: V->addit  = %d\n",V->addit));
+				// dprintf(("readLINALL: V->seqid  = %s\n",V->seqid));
+				// dprintf(("readLINALL: V->s      = %s\n",V->s));
+				/*  if (V->addit) V->seqlen = 0; */		/* what's that for ???? */
+  sscanf(V->s, "%4d", &laenge); /* seqlen is in 1st 4 chars of 1st line */
+				// dprintf(("readLINALL: laenge    = %d\n",laenge));
+				// fflush(stdout);
+  strcpy(V->seqid, (V->s)+5);	/* label starts after 5th char of 1st line */
+				// dprintf(("readLINALL: V->seqid  = %s\n",V->seqid));
+				// fflush(stdout);
+  do {
+    V->done = feof(V->f);
+    getline(V);
+    if (!V->done) addseq((V->s), V);
+  } while ( !(V->done) && (V->seqlen)<=laenge );
+  V->seqlen = laenge;		/* only laenge chars are relevant for V->seq */
+				// dprintf(("readLINALL: V->s      = %s\n",V->s)); 
+				/*  if (V->choice == kListSequences) addinfo(V->seqid, V); */		/* what's that for ???? */
+				// dprintf(("readLINALL: V->seqid  = %s\n",V->seqid));
+				// dprintf(("readLINALL: V->seqlen = %ld\n",V->seqlen));
+				// dprintf(("readLINALL: V->seq    =>"));
+				// for ( i=0; i<V->seqlen; i++ ) dprintf(("%c",V->seq[i]));
+				//    dprintf(("<\n"));
+				// dprintf(("readLINALL: V->s      = %s\n<<<last action\n",V->s));
+  V->allDone = true;
+
+}
+
 
 
 Local boolean endFitch( boolean *addend, boolean *ungetend, struct ReadSeqVars *V)
@@ -956,6 +1058,8 @@
       case kZuker : readZuker(V); break;
       case kOlsen : readOlsen(V); break;
       case kMSF   : readMSF(V); break;
+      case kLINALL: readLINALL(V); break;
+      case kVIE   : readVIE(V); break;
 
       case kPAUP    : {
         boolean done= false;
@@ -1049,6 +1153,9 @@
   V.err = 0;
   V.nseq = 0;
   V.isseqchar = isSeqChar;
+#ifdef ARB
+  V.isseqcharfirst8 = 0;
+#endif
   if (V.choice == kListSequences) ; /* leave as is */
   else if (V.choice <= 0) V.choice = 1; /* default ?? */
   V.addit = (V.choice > 0);
@@ -1092,6 +1199,9 @@
   V.err = 0;
   V.nseq = 0;
   V.isseqchar = isSeqChar;
+#ifdef ARB
+  V.isseqcharfirst8 = 0;
+#endif
   if (V.choice == kListSequences) ; /* leave as is */
   else if (V.choice <= 0) V.choice = 1; /* default ?? */
   V.addit = (V.choice > 0);
@@ -1152,6 +1262,7 @@
   boolean   foundDNA= false, foundIG= false, foundStrider= false,
             foundGB= false, foundPIR= false, foundEMBL= false, foundNBRF= false,
             foundPearson= false, foundFitch= false, foundPhylip= false, foundZuker= false,
+            foundLINALL= false, foundVIE= false,
             gotolsen= false, gotpaup = false, gotasn1 = false, gotuw= false, gotMSF= false,
             isfitch= false,  isphylip= false, done= false;
   short     format= kUnknown;
@@ -1159,6 +1270,8 @@
   char      sp[256];
   long      linestart=0;
   int     maxlines2check=500;
+  int       linallSeqLen;
+  char      linallHeader[60];
 
 #define ReadOneLine(sp)   \
   { done |= (feof(fseq)); \
@@ -1225,8 +1338,9 @@
       foundPIR= true;
 
     else if (*sp == '>') {
-      if (sp[3] == ';') foundNBRF= true;
-      else foundPearson= true;
+      if      (sp[3] == ';') foundNBRF= true;	 /* {foundNBRF= true;    printf("foundNBRF\n");}    */
+      else if (sp[1] == ' ') foundVIE= true;	 /* {foundVIE= true;	  printf("foundVIE\n");}     */
+      else                   foundPearson= true; /* {foundPearson= true; printf("foundPearson\n");} */
       }
 
     else if (strstr(sp,"ID   ") == sp)
@@ -1239,9 +1353,16 @@
 
     else {
       if (nlines - *skiplines == 1) {
-        int ispp= 0, ilen= 0;
-        sscanf( sp, "%d%d", &ispp, &ilen);
-        if (ispp > 0 && ilen > 0) isphylip= true;
+        int ispp= 0, ilen= 0, icnt=0;
+        char junkstr[120];
+	memset(junkstr,0,120);
+        icnt= sscanf( sp, "%d%d%c", &ispp, &ilen, junkstr);
+        if (icnt == 2 && ispp > 0 && ilen > 0) {
+        	isphylip= true;
+	  }
+	else if (icnt==3 && ispp > 0 && ilen > 0 && *junkstr == ' ') {
+		isphylip= true;
+	  }
         }
       else if (isphylip && nlines - *skiplines == 2) {
         int  tseq;
@@ -1257,6 +1378,65 @@
         }
       if (isfitch & (splen > 20)) foundFitch= true;
 
+#ifdef DEBUG_LINALL
+      dprintf(("Check for LINALL\n"));
+      dprintf(("\tstrtol(sp,NULL,0) = %d\n",strtol(sp,NULL,0)));
+      dprintf(("\tnlines      = %d\n",nlines));
+      dprintf(("\tisphylip    = %d\n",isphylip));
+      dprintf(("\tfoundPhylip = %d\n",foundPhylip));
+      dprintf(("\tisfitch     = %d\n",isfitch));
+      dprintf(("\tfoundFitch  = %d\n",foundFitch));
+#endif
+
+      /*
+       * This format detection was highly bogus ...
+       * Lesson 1: always initialize variables (in case the conversion fails...)
+       * Lesson 2: strings are passed to sscanf _without_ & (string variable is a pointer already)
+       * Lesson 3: forget to check return codes from syscalls: you lose.
+       */
+
+      if (nlines==1) {
+         int rv;
+#ifdef DEBUG_LINALL
+         int i, sane=1;
+	 char *spp;
+
+	 /*
+	  * possible sanity check, for losers (see above)
+	  */
+	 for (spp=sp, i=0; i<4; i++, spp++)
+	   if (!(isspace(*spp) || isdigit(*spp))) {
+	      dprintf(("bogus linall format header: %s\n", sp));
+	      sane=0;
+	      break;
+	   } 
+#endif
+
+	 linallSeqLen = 0;
+	 *linallHeader = '\0';
+         rv = sscanf( sp, "%d %s", &linallSeqLen, linallHeader);
+
+#ifdef DEBUG_LINALL
+         dprintf(("\tsscanf rval   = %d\n",rv));
+         dprintf(("\tlinallSeqLen  = %d\n",linallSeqLen));
+         dprintf(("\tlinallHeader  = %s\n",linallHeader));
+         dprintf(("\tlinallHeader  = %d\n",strlen(linallHeader)));
+#endif
+
+         if (rv > 0 && linallSeqLen>0  &&
+             strlen(linallHeader)>0  &&
+             !(isphylip || foundPhylip)) {
+            /* !(isphylip || foundPhylip || isfitch || foundFitch)) {	*/
+           foundLINALL= true;	/* The 1st line contains the seqlength (4 digits), a blank, and a label (up to 60 char). */
+				/* The following lines contain the sequence with 70 chars per line. */
+#ifdef DEBUG_LINALL
+           dprintf(("debug: foundLINALL: %ld<\n",strtol(sp,NULL,0)));
+           dprintf(("debug: sp:%s<\n\n",sp));
+#endif
+         }
+
+      }
+        
       /* kRNA && kDNA are fairly certain...*/
       switch (getseqtype( sp, splen)) {
         case kOtherSeq: otherlines++; break;
@@ -1302,6 +1482,16 @@
       done= true;
       }
 
+    else if (foundLINALL) {
+       format= kLINALL;
+       done= true;
+       }
+       
+    else if (foundVIE) {
+    	format= kVIE;
+    	done= true;
+    	}
+
     else if ((dnalines > 1) || done || (nlines > maxlines2check)) {
           /* decide on most likely format */
           /* multichar idents: */
@@ -1785,6 +1975,27 @@
       linesout += 2;
       break;
 
+    case kLINALL:									/* GSt */
+	fprintf(outf,"%4d %-60s\n",seqlen,seqname);
+	strcpy(endstr,"\n");
+	linesout++;
+	width     = 70;
+	tab       =  0;
+    	spacer    = 0;
+	nameleft  = false;
+	nameright = false;
+	numleft   = false;
+	numright  = false;
+	break;
+	
+    case kVIE:										/* GSt + RL */
+	if ( strchr(seqname,' ') != NULL ) seqname[strchr(seqname,' ')-seqname] = '\0';	/* no blanks in label line */
+	fprintf(outf,"> %-s\n", seqname);
+    	linesout++;
+    	fprintf(outf,"%s\n\n",seq);	/* complete sequence in one line; additional blank line before next sequence */
+    	return linesout;		/* thus, do nothing else */
+    	break;
+    		
     default :
     case kZuker: /* don't attempt Zuker's ftn format */
     case kPearson:
@@ -1841,7 +2052,8 @@
         s[l++] = ' ';
       if (!baseonlynum) ibase++;
       else if (0==strchr(nocountsymbols,seq[i])) ibase++;
-      s[l++] = seq[i++];
+      if (outform==kLINALL) { s[l++] = to_upper(seq[i]); i++; }	/* GSt */
+      else		      s[l++] =          seq[i++] ;
       }
 
     if (l1 == width || i == seqlen) {
diff -ubrN readseq-1.orig/ureadseq.h readseq-1/ureadseq.h
--- readseq-1.orig/ureadseq.h	1992-12-30 01:00:00.000000000 +0100
+++ readseq-1/ureadseq.h	2007-11-14 12:14:36.000000000 +0100
@@ -66,8 +66,10 @@
 #define kASN1           16
 #define kPAUP           17
 #define kPretty         18
+#define kLINALL		19
+#define kVIE		20
 
-#define kMaxFormat      18
+#define kMaxFormat      20
 #define kMinFormat      1
 #define kNoformat       -1    /* format not tested */
 #define kUnknown        0     /* format not determinable */
@@ -100,7 +102,7 @@
   p.noleaves= p.domatch= p.degap= false;\
   p.matchchar='.';\
   p.gapchar='-';\
-  p.namewidth=8;\
+  p.namewidth=10;\
   p.numwidth=5;\
   p.interline=1;\
   p.spacer=10;\
