File: getseq.c

package info (click to toggle)
blimps 3.9%2Bds-1
  • links: PTS, VCS
  • area: non-free
  • in suites: bookworm, bullseye, buster
  • size: 6,812 kB
  • sloc: ansic: 43,271; csh: 553; perl: 116; makefile: 99; cs: 27; cobol: 23
file content (129 lines) | stat: -rw-r--r-- 4,000 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
/*=====================================================================*/
/*(C) Copyright 1996 by Fred Hutchinson Cancer Research Center         */
/*        getseq.c   Un-indexed extract of one sequence from dbs       */
/*     USE: getseq seqname dbname                                      */
/*        seqname = name of sequence
	      GENBANK:  LOCUS name (FASTA type 1)
	      EMBL:     ID name (FASTA type 3)
	      PIR:	ENTRY name (NBRF/CODATA format, FASTA type 2))
	      VMS:	name immediately following the ; on the first line.
			(NBRF/VMS format, FASTA type 5)
	      UNI:      name immediately following the > on the first line.
			(FASTA type 0)
          dbname = name of database file, program will determine its type

     * One output file is created for each entry in "universal" format
	(>title $, then sequence, then *). The name of the file
	is the 1st 8 characters of the entry name followed by .dna for
	GENBANK and by .pro for EMBL and UNI. EG: "MTB1$BAC.pro".
  KNOWN PROBLEMS:  *If input database is not sorted by ID, may miss some
		    requested entries.
		   *If input list file has extension .lst, overwrites it.
		   *VMS format does not have fragment information.
--------------------------------------------------------------------------
 2/25/96 J.Henikoff
 1/23/99 exit(-2) if requested sequence not found.
=========================================================================*/

#include "motifj.h"

int get_ids();
int lst_list();
int lis_list();
/*--------------  Routines from motmisc.obj --------------------------*/
void init_dbs();
int type_dbs();
struct db_id *makedbid();
int extract_seqs();
char *dir_unix();
struct split_name *split_names();

char Pros[FNAMELEN];

/*======================================================================*/
int main(argc, argv)
int argc;
char *argv[];
{
   char infile[FNAMELEN], seqname[FNAMELEN];
   char foutname[FNAMELEN];
   struct db_info *dbs[MAXDB];
   int totseqs, nids, frag;
   FILE *fin, *fout;
   struct db_id *ids, *id;

   if (argc < 3)
   {
      printf("\nGETSEQ: (C) Copyright 1996 by Fred Hutchinson");
      printf(" Cancer Research Center\n");
      printf("USAGE: getseq <seqname> <dbfile>\n");
      printf("       <seqname> = name of sequence to extract\n");
      printf("       <dbfile>  = sequence database\n");
      printf("       <outfile> = output file name\n");
   }
/*-------------  arg 1.  Name of sequence to extract ---------------------*/
   if (argc > 1)
      strcpy(seqname, argv[1]);
   else
   {
      printf("\nEnter name of sequence to extract: ");
      gets(seqname);
   }
   ids = makedbid();
   id = makedbid(); ids->next = id;
   strcpy(id->entry, seqname);
   nids = 1;

/*----------------- arg 2 database name --------------------------------*/
   if (argc > 2)
      strcpy(infile, argv[2]);
   else
   {
      printf("\nEnter name of database file to extract sequence from: ");
      gets(infile);
   }
   if ( (fin=fopen(infile, "r")) == NULL)
   {
      printf("\nCannot open file %s\n", infile);
      exit(-1);
   }
 
/*-------------  arg 3.  Output file name---------------------------------*/
   foutname[0] = '\0';
   if (argc > 3)
      strcpy(foutname, argv[3]);
   else
   {
      printf("\nEnter name of output file: ");
      gets(foutname);
   }
   if (strlen(foutname))
   {
      if ( (fout=fopen(foutname, "w+t")) == NULL)
      {
         printf("\nCannot open file %s\n", foutname);
         exit(-1);
      }
      else
         printf("\nExtracting sequence to %s", foutname);
   }
   else fout = stdout;

/*------------------- Extract the sequence ---------------------------*/
   Pros[0] = '\0';
   frag = YES;
   if (nids > 0)
   {
      init_dbs(dbs);		       /* load database infor. */
      totseqs = extract_seqs(nids, dbs, fin, ids, Pros, fout, frag);
   }
   else  totseqs = 0;
/*
   printf("\n%d sequences extracted\n",  totseqs);
*/
   fclose(fin);

   if (totseqs > 0) exit(0);
   else             exit(-2);
}  /*  end of main */