1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243
|
/* sqnutils.h
* ===========================================================================
*
* PUBLIC DOMAIN NOTICE
* National Center for Biotechnology Information (NCBI)
*
* This software/database is a "United States Government Work" under the
* terms of the United States Copyright Act. It was written as part of
* the author's official duties as a United States Government employee and
* thus cannot be copyrighted. This software/database is freely available
* to the public for use. The National Library of Medicine and the U.S.
* Government do not place any restriction on its use or reproduction.
* We would, however, appreciate having the NCBI and the author cited in
* any work or product based on this material
*
* Although all reasonable efforts have been taken to ensure the accuracy
* and reliability of the software and data, the NLM and the U.S.
* Government do not and cannot warrant the performance or results that
* may be obtained by using this software or data. The NLM and the U.S.
* Government disclaim all warranties, express or implied, including
* warranties of performance, merchantability or fitness for any particular
* purpose.
*
* ===========================================================================
*
* File Name: sqnutils.h
*
* Author: Jonathan Kans
*
* Version Creation Date: 9/2/97
*
* $Revision: 6.25 $
*
* File Description:
*
* Modifications:
* --------------------------------------------------------------------------
* Date Name Description of modification
* ------- ---------- -----------------------------------------------------
*
*
* ==========================================================================
*/
#ifndef _SQNUTILS_
#define _SQNUTILS_
#include <ncbi.h>
#include <sequtil.h>
#undef NLM_EXTERN
#ifdef NLM_IMPORT
#define NLM_EXTERN NLM_IMPORT
#else
#define NLM_EXTERN extern
#endif
#ifdef __cplusplus
extern "C" {
#endif
NLM_EXTERN SeqEntryPtr LIBCALL GetTopSeqEntryForEntityID (Uint2 entityID);
NLM_EXTERN SeqEntryPtr LIBCALL GetBestTopParentForData (Uint2 entityID, BioseqPtr bsp);
NLM_EXTERN SeqEntryPtr LIBCALL GetBestTopParentForItemID (Uint2 entityID, Uint2 itemID, Uint2 itemtype);
NLM_EXTERN SeqEntryPtr LIBCALL GetBestTopParentForDataEx (Uint2 entityID, BioseqPtr bsp, Boolean skipGenProdSet);
NLM_EXTERN SeqEntryPtr LIBCALL GetBestTopParentForItemIDEx (Uint2 entityID, Uint2 itemID, Uint2 itemtype, Boolean skipGenProdSet);
NLM_EXTERN SeqIdPtr SeqIdFindWorst (SeqIdPtr sip);
NLM_EXTERN SeqIdPtr MakeSeqID (CharPtr str);
NLM_EXTERN SeqIdPtr MakeUniqueSeqID (CharPtr prefix);
NLM_EXTERN DatePtr DateAdvance (DatePtr dp, Uint1 monthsToAdd);
NLM_EXTERN SeqEntryPtr LIBCALL FindNthSeqEntry (SeqEntryPtr sep, Int2 seq);
NLM_EXTERN SeqEntryPtr LIBCALL FindNthBioseq (SeqEntryPtr sep, Int2 seq);
NLM_EXTERN SeqEntryPtr LIBCALL FindNthSequinEntry (SeqEntryPtr sep, Int2 seq);
NLM_EXTERN SeqEntryPtr LIBCALL FindNucSeqEntry (SeqEntryPtr sep);
NLM_EXTERN Boolean LIBCALL SeqEntryHasNucs (SeqEntryPtr sep);
NLM_EXTERN Boolean LIBCALL SeqEntryHasProts (SeqEntryPtr sep);
NLM_EXTERN Boolean LIBCALL SeqEntryHasAligns (Uint2 entityID, SeqEntryPtr sep);
NLM_EXTERN Boolean LIBCALL PowerBLASTASN1Detected (SeqEntryPtr sep);
NLM_EXTERN Int2 EntityIDToGeneticCode (Uint2 entityID, BoolPtr mito, CharPtr taxname, size_t maxsize);
NLM_EXTERN Int2 SeqEntryToGeneticCode (SeqEntryPtr sep, BoolPtr mito, CharPtr taxname, size_t maxsize);
NLM_EXTERN Int2 SeqEntryToBioSource (SeqEntryPtr sep, BoolPtr mito, CharPtr taxname, size_t maxsize, BioSourcePtr PNTR biopp);
NLM_EXTERN SeqLocPtr CreateWholeInterval (SeqEntryPtr sep);
NLM_EXTERN SeqFeatPtr CreateNewFeature (SeqEntryPtr sep, SeqEntryPtr placeHere, Uint1 choice, SeqFeatPtr useThis);
NLM_EXTERN ValNodePtr CreateNewDescriptor (SeqEntryPtr sep, Uint1 choice);
NLM_EXTERN void UpdateLocalId (BioseqPtr bsp, CharPtr localId);
NLM_EXTERN void UpdateTitle (BioseqPtr bsp, CharPtr title);
NLM_EXTERN GeneRefPtr CreateNewGeneRef (CharPtr locus, CharPtr allele,
CharPtr desc, Boolean pseudo);
NLM_EXTERN ProtRefPtr CreateNewProtRef (CharPtr name, CharPtr desc,
CharPtr ec, CharPtr activity);
NLM_EXTERN CdRegionPtr CreateNewCdRgn (Int2 frame, Boolean orf, Int2 genCode);
NLM_EXTERN void SetSeqFeatData (SeqFeatPtr sfp, Pointer data);
NLM_EXTERN void SetSeqFeatProduct (SeqFeatPtr sfp, BioseqPtr bsp);
NLM_EXTERN void ResetSeqFeatInterval (SeqFeatPtr sfp);
NLM_EXTERN void AddSeqFeatInterval (SeqFeatPtr sfp, BioseqPtr bsp, Int4 from, Int4 to,
Boolean partial5, Boolean partial3);
NLM_EXTERN void AddSeqFeatPoint (SeqFeatPtr sfp, BioseqPtr bsp, Int4 location, Boolean fuzz_before, Boolean fuzz_after, Int2 strand);
/* AddSeqEntryToSeqEntry and ReplaceSeqEntryWithSeqEntry leave
the original target sep pointing to the new structure. */
NLM_EXTERN void AddSeqEntryToSeqEntry (SeqEntryPtr target, SeqEntryPtr insert, Boolean relink);
NLM_EXTERN void ReplaceSeqEntryWithSeqEntry (SeqEntryPtr target, SeqEntryPtr replaceWith, Boolean relink);
NLM_EXTERN void RemoveSeqEntryFromSeqEntry (SeqEntryPtr top, SeqEntryPtr del, Boolean relink);
NLM_EXTERN void RenormalizeNucProtSets (SeqEntryPtr sep, Boolean relink);
/* The following functions are called by the above when relink is TRUE. Examine the
code of ReplaceSeqEntryWithSeqEntry (in dlgutil2.c) to see how relink is treated. */
NLM_EXTERN void GetSeqEntryParent (SeqEntryPtr target, Pointer PNTR parentptr, Uint2Ptr parenttype);
NLM_EXTERN void SaveSeqEntryObjMgrData (SeqEntryPtr target, ObjMgrDataPtr PNTR omdptopptr, ObjMgrData PNTR omdataptr);
NLM_EXTERN void RestoreSeqEntryObjMgrData (SeqEntryPtr target, ObjMgrDataPtr omdptop, ObjMgrData PNTR omdataptr);
/* If relink FALSE, call SeqMgrLinkSeqEntry (target, parenttype, parentptr)
with original parent after all sequences have been added to the target. */
/* If relink FALSE, call SaveSeqEntryObjMgrData with the address of temporary
ObjMgrDataPtr and ObjMgrData variables, and after calling SeqMgrLinkSeqEntry to
update the link table, call RestoreSeqEntryObjMgrData with the value of the
temporary ObjMgrDataPtr and the address of the ObjMgrData variable. */
/* ExtractBioSourceAndPubs and ReplaceBioSourceAndPubs can be called before and
after AddSeqEntryToSeqEntry to propagate source and pub descriptors to top level. */
NLM_EXTERN ValNodePtr ExtractBioSourceAndPubs (SeqEntryPtr sep);
NLM_EXTERN void ReplaceBioSourceAndPubs (SeqEntryPtr sep, ValNodePtr descr);
/* SeqLocMerge combines feature intervals. It can be used to extend the gene feature
intervals, and (eventually) to fuse mutliple features into one. */
NLM_EXTERN SeqLocPtr SeqLocMerge (BioseqPtr target,
SeqLocPtr to, SeqLocPtr from,
Boolean single_interval, Boolean fuse_joints,
Boolean add_null);
NLM_EXTERN Boolean CheckSeqLocForPartial (SeqLocPtr location, BoolPtr p5ptr, BoolPtr p3ptr);
NLM_EXTERN void SetSeqLocPartial (SeqLocPtr location, Boolean partial5, Boolean partial3);
NLM_EXTERN void FreeAllFuzz (SeqLocPtr location);
NLM_EXTERN Boolean LocationHasNullsBetween (SeqLocPtr location);
NLM_EXTERN Boolean SeqLocBadSortOrder (BioseqPtr bsp, SeqLocPtr slp);
/* GetBioseqGivenSeqLoc returns a segmented bioseq if the SeqLoc is to the parts */
NLM_EXTERN BioseqPtr GetBioseqGivenSeqLoc (SeqLocPtr slp, Uint2 entityID);
NLM_EXTERN BioseqPtr GetBioseqGivenIDs (Uint2 entityID, Uint2 itemID, Uint2 itemtype);
NLM_EXTERN Uint2 GetItemIDGivenPointer (Uint2 entityID, Uint2 itemtype, Pointer lookfor);
NLM_EXTERN Uint2 FindFeatFromFeatDefType (Uint2 subtype);
/* UseLocalAsnloadDataAndErrMsg transiently sets paths to asnload, data, and errmsg
if they are packaged in the same directory as the executing program. */
NLM_EXTERN Boolean UseLocalAsnloadDataAndErrMsg (void);
/* GetRidOfLocusInSeqIds strips locus from all feature location and product seqIds */
NLM_EXTERN void GetRidOfLocusInSeqIds (Uint2 entityID, SeqEntryPtr sep);
NLM_EXTERN SeqLocPtr StripLocusFromSeqLoc (SeqLocPtr location);
NLM_EXTERN SeqIdPtr SeqIdStripLocus (SeqIdPtr sip);
/* ConvertPubSrcComDescsToFeats is useful when merging records */
NLM_EXTERN Boolean ConvertPubSrcComDescsToFeats (SeqEntryPtr sep, Boolean pub, Boolean src, Boolean com, Boolean toProts);
NLM_EXTERN void DeleteMultipleTitles (SeqEntryPtr sep, Pointer mydata, Int4 index, Int2 indent);
/* from Colombe */
NLM_EXTERN SeqLocPtr StringSearchInBioseq (SeqIdPtr sip, CharPtr sub);
/*****************************************************************************
*
* SequinEntryList (sep, mydata, mycallback, index, indent)
* traverses all Seq-entry nodes beginning with sep
* calls mycallback () at each node
* Does enter BioseqSets of _class "parts", but ignores the
* parts set itself
*
*****************************************************************************/
NLM_EXTERN Int4 SequinEntryList (SeqEntryPtr sep, Pointer mydata, SeqEntryFunc mycallback, Int4 index, Int2 indent);
#define SequinEntryCount( a ) SequinEntryList( a ,NULL,NULL,0,0)
#define SequinEntryExplore(a,b,c) SequinEntryList(a, b, c, 0L, 0)
/* Phrap reading function, based on sample code supplied by C. Magness, returns a SeqEntry list
of Bioseqs containing SeqGraphs, with individual reads removed and only contigs remaining */
NLM_EXTERN SeqEntryPtr ReadPhrapFile (FILE *fp);
/* SetPhrapContigOrder takes the results of ReadPhrapFile and a string indicating the order
of contigs, and returns a SeqEntryList in the desired order, with all other contigs removed */
NLM_EXTERN SeqEntryPtr SetPhrapContigOrder (SeqEntryPtr head, CharPtr contigs);
/* ReadContigList builds a far segmented bioseq from a table of accessions, starts, stops,
lengths, and (optional) strands. Gaps of a given length (with 0 start and stop) are also
allowed. */
NLM_EXTERN SeqEntryPtr ReadContigList (FILE *fp, Boolean coordinatesOnMaster);
/* ReadAsnFastaOrFlatFile reads object manager-registered ASN.1, FASTA, GenBank, EMBL, GenPept,
Feature table, Restriction table, Contig table, Message response, or saved UID list, with the
option of saving FASTA results as OBJ_FASTA (SimpleSeq) to avoid ID collisions */
NLM_EXTERN Pointer ReadAsnFastaOrFlatFile (FILE *fp, Uint2Ptr datatypeptr, Uint2Ptr entityIDptr,
Boolean forceNuc, Boolean forceProt,
Boolean parseFastaSeqId, Boolean fastaAsSimpleSeq);
/* BasicSeqEntryCleanup cleans up strings, moves gbquals to the appropriate field, and
does several other conversions, all without changing the itemID structure (which would
require reindexing) */
NLM_EXTERN void BasicSeqEntryCleanup (SeqEntryPtr sep);
#ifdef __cplusplus
}
#endif
#undef NLM_EXTERN
#ifdef NLM_EXPORT
#define NLM_EXTERN NLM_EXPORT
#else
#define NLM_EXTERN
#endif
#endif /* ndef _SQNUTILS_ */
|