1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185
|
/**********************************************************************
* File: cluster.h
* Author: Kevin Howe
* Copyright (C) Genome Research Limited, 2002-
*-------------------------------------------------------------------
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
* http://www.apache.org/licenses/LICENSE-2.0
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*-------------------------------------------------------------------
* NOTES:
* A DistanceMatrix should always be part of a Cluster
* It makes no sense to have a set of pairwise distances without the
* associated sequences (even if we just store their names)
**********************************************************************/
#ifndef _CLUSTER
#define _CLUSTER
#include "sequence.h"
#include "distancemat.h"
/******************* structure definitions ****************************/
struct Cluster {
unsigned int clustersize;
struct Sequence **members;
struct Sequence *consensus;
struct DistanceMatrix *matrix;
};
/*
Clusters contain groups of identical sequence. I intend to investigate
methods where clusters contain groups of similar (not necessarily
identical) sequences. The DistanceMatric field, although not currently
used, will allow for the building of trees from these clusters
*/
struct ClusterGroup {
unsigned int numclusters;
struct Cluster **clusters;
struct DistanceMatrix *matrix;
};
/********************** function prototypes ***************************/
/*********************************************************************
FUNCTION: alignment_to_ClusterGroup
DESCRIPTION:
This function returns a ClusterGroup, given an Alignment.
if the secind arg is true, In doing indentical sequences in the
alignment are merged. If bootstrapping is required, the consensus
alignment can be extracted from the ClusterGroup using
get_consensus_from_ClusterGroup
RETURNS: struct ClusterGroup
ARGS:
1. A source Alignment pointer
2. A boolean specifying whether duplicate sequences should be
merged.
NOTES:
*********************************************************************/
struct ClusterGroup *alignment_to_ClusterGroup( struct Alignment *,unsigned int);
/*********************************************************************
FUNCTION: clone_Cluster
DESCRIPTION:
This function makes a complete copy of the given Cluster
and returns it
RETURNS: struct Cluster *
ARGS:
struct Cluster *
NOTES:
*********************************************************************/
struct Cluster *clone_Cluster( struct Cluster *);
/*********************************************************************
FUNCTION: consensus_aln_from_ClusterGroup
DESCRIPTION:
This function creates an alignment by taking the consensus
sequences from each Cluster in the given ClusterGroup
RETURNS: struct ClusterGroup
ARGS:
1. A source Alignment pointer
2. A boolean specifying whether duplicate sequences should be
merged.
NOTES:
*********************************************************************/
struct Alignment *consensus_aln_from_ClusterGroup( struct ClusterGroup *);
/*********************************************************************
FUNCTION: empty_Cluster
DESCRIPTION:
This function handles the simple task of allocating the space
for a new Cluster.
RETURNS: struct Cluster *
ARGS:
NOTES:
*********************************************************************/
struct Cluster *empty_Cluster( void );
/*********************************************************************
FUNCTION: empty_ClusterGroup
DESCRIPTION:
This function handles the simple task of allocating the space
for a new ClusterGroup
RETURNS: struct Cluster *
ARGS:
NOTES:
*********************************************************************/
struct ClusterGroup *empty_ClusterGroup( void );
/*********************************************************************
FUNCTION: free_Cluster
DESCRIPTION:
This function releases the memory used by this Cluster and all of its
members
RETURNS: A null pointer
ARGS:
struct Cluster *
NOTES:
*********************************************************************/
void *free_Cluster( struct Cluster *);
/*********************************************************************
FUNCTION: free_ClusterGroup
DESCRIPTION:
This function releases the memory used by this Cluster and all of its
members
RETURNS: A null pointer
ARGS:
struct Cluster *
NOTES:
*********************************************************************/
void *free_ClusterGroup( struct ClusterGroup *);
/*********************************************************************
FUNCTION: merge_Cluster
DESCRIPTION:
Adds the sequences in second arg to first arg, freeing the second
arg, returning the result of this freeing (hopefully NULL);
RETURNS: The result of freeing the second cluster (NULL if all is well)
ARGS:
Destination Cluster *,
Source Cluster *
NOTES:
*********************************************************************/
void *merge_Cluster( struct Cluster *, struct Cluster *);
/*********************************************************************
FUNCTION: single_Sequence_Cluster
DESCRIPTION:
This function handles the simple task of allocating the space
for a new Cluster with the single given Sequence.
RETURNS: struct Cluster *
ARGS:
A pointer to a Sequence, or NULL for an empty Cluster
NOTES:
*********************************************************************/
struct Cluster *single_Sequence_Cluster( struct Sequence *);
/*********************************************************************
FUNCTION: single_Cluster_ClusterGroup
DESCRIPTION:
This function takes the given cluster and very simlpy makes a
single-Cluster ClusterGroup from it
RETURNS: struct ClusterGroup *
ARGS:
A pointer to a Cluster
NOTES:
*********************************************************************/
struct ClusterGroup *single_Cluster_ClusterGroup( struct Cluster *);
#endif
|