1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129
|
/*=====================================================================*/
/*(C) Copyright 1996 by Fred Hutchinson Cancer Research Center */
/* getseq.c Un-indexed extract of one sequence from dbs */
/* USE: getseq seqname dbname */
/* seqname = name of sequence
GENBANK: LOCUS name (FASTA type 1)
EMBL: ID name (FASTA type 3)
PIR: ENTRY name (NBRF/CODATA format, FASTA type 2))
VMS: name immediately following the ; on the first line.
(NBRF/VMS format, FASTA type 5)
UNI: name immediately following the > on the first line.
(FASTA type 0)
dbname = name of database file, program will determine its type
* One output file is created for each entry in "universal" format
(>title $, then sequence, then *). The name of the file
is the 1st 8 characters of the entry name followed by .dna for
GENBANK and by .pro for EMBL and UNI. EG: "MTB1$BAC.pro".
KNOWN PROBLEMS: *If input database is not sorted by ID, may miss some
requested entries.
*If input list file has extension .lst, overwrites it.
*VMS format does not have fragment information.
--------------------------------------------------------------------------
2/25/96 J.Henikoff
1/23/99 exit(-2) if requested sequence not found.
=========================================================================*/
#include "motifj.h"
int get_ids();
int lst_list();
int lis_list();
/*-------------- Routines from motmisc.obj --------------------------*/
void init_dbs();
int type_dbs();
struct db_id *makedbid();
int extract_seqs();
char *dir_unix();
struct split_name *split_names();
char Pros[FNAMELEN];
/*======================================================================*/
int main(argc, argv)
int argc;
char *argv[];
{
char infile[FNAMELEN], seqname[FNAMELEN];
char foutname[FNAMELEN];
struct db_info *dbs[MAXDB];
int totseqs, nids, frag;
FILE *fin, *fout;
struct db_id *ids, *id;
if (argc < 3)
{
printf("\nGETSEQ: (C) Copyright 1996 by Fred Hutchinson");
printf(" Cancer Research Center\n");
printf("USAGE: getseq <seqname> <dbfile>\n");
printf(" <seqname> = name of sequence to extract\n");
printf(" <dbfile> = sequence database\n");
printf(" <outfile> = output file name\n");
}
/*------------- arg 1. Name of sequence to extract ---------------------*/
if (argc > 1)
strcpy(seqname, argv[1]);
else
{
printf("\nEnter name of sequence to extract: ");
gets(seqname);
}
ids = makedbid();
id = makedbid(); ids->next = id;
strcpy(id->entry, seqname);
nids = 1;
/*----------------- arg 2 database name --------------------------------*/
if (argc > 2)
strcpy(infile, argv[2]);
else
{
printf("\nEnter name of database file to extract sequence from: ");
gets(infile);
}
if ( (fin=fopen(infile, "r")) == NULL)
{
printf("\nCannot open file %s\n", infile);
exit(-1);
}
/*------------- arg 3. Output file name---------------------------------*/
foutname[0] = '\0';
if (argc > 3)
strcpy(foutname, argv[3]);
else
{
printf("\nEnter name of output file: ");
gets(foutname);
}
if (strlen(foutname))
{
if ( (fout=fopen(foutname, "w+t")) == NULL)
{
printf("\nCannot open file %s\n", foutname);
exit(-1);
}
else
printf("\nExtracting sequence to %s", foutname);
}
else fout = stdout;
/*------------------- Extract the sequence ---------------------------*/
Pros[0] = '\0';
frag = YES;
if (nids > 0)
{
init_dbs(dbs); /* load database infor. */
totseqs = extract_seqs(nids, dbs, fin, ids, Pros, fout, frag);
}
else totseqs = 0;
/*
printf("\n%d sequences extracted\n", totseqs);
*/
fclose(fin);
if (totseqs > 0) exit(0);
else exit(-2);
} /* end of main */
|