1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201
|
/**********************************************************************
* File: distancemat.h
* Author: Kevin Howe
* Copyright (C) Genome Research Limited, 2002-
*-------------------------------------------------------------------
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
* http://www.apache.org/licenses/LICENSE-2.0
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*-------------------------------------------------------------------
* NOTES:
* Functions and types for the manipulation of Distance Matrices
**********************************************************************/
#ifndef _DISTANCEMAT
#define _DISTANCEMAT
#include <math.h>
#include "util.h"
#include "align.h"
#include "time.h"
#define MAX_PHYLIP_NAME_LEN 100
/******************* structure definitions ****************************/
typedef float Distance;
struct DistanceMatrix {
Distance **data;
int size;
};
/********************** function prototypes ***************************/
/*********************************************************************
FUNCTION: calc_DistanceMatrix
DESCRIPTION:
Produces a distance matrix from the given multiple alignment
RETURNS: struct DistanceMatrix
ARGS:
A DistanceMatrix to fill in
A multiple alignment
A boolean indicating whether or not random columns should be used
for purposes of bootstrapping
A boolean indicating whether the Kimura distance adjustment is to
be used or not.
NOTES:
0. the given DistanceMatrix and Alignment should be of the same order
1. The matrix produced is in bottom-left triangular format; don't you
go trying to access that top-right section (I'm warning you...)
2. At the moment, the function calculates distance based on sequence
identity, using Kimura's function if that option is raised.
3. If use_rand_cols is true, then the matrix is constructed using
random sampling of columns, for the purposes of bootstrapping. At
the moment, the native function 'rand' is used to do this, suitable
seeded by time (by the caller). This may prove unsatisfactory...
4. Where no information is available to determine the distance
between two sequences, a value of twice the maximum observed
distance is assigned (inspiration from ISMB99 poster by Huson,
Smith and Warnow).
*********************************************************************/
void calc_DistanceMatrix(struct DistanceMatrix *,
struct Alignment *,
unsigned int,
unsigned int );
/*********************************************************************
FUNCTION: clone_DistanceMatrix
DESCRIPTION:
Produces a brand new DistanceMatrix, identical to the source
RETURNS: struct DistanceMatrix
ARGS:
A source distane matrix
NOTES:
1. The matrix produced is in bottom-left triangular format; don't you
go trying to access that top-right section (I'm warning you...)
*********************************************************************/
struct DistanceMatrix *clone_DistanceMatrix( struct DistanceMatrix *);
/*********************************************************************
FUNCTION: empty_DistanceMatrix
DESCRIPTION:
Produces an empty distance matrixof the given size, uninitialised
RETURNS: struct DistanceMatrix
ARGS:
The size of the matrix to be created
NOTES:
1. The matrix produced is in bottom-left triangular format; don't you
go trying to access that top-right section (I'm warning you...)
*********************************************************************/
struct DistanceMatrix *empty_DistanceMatrix( unsigned int );
/*********************************************************************
FUNCTION: free_DistanceMatrix
DESCRIPTION:
Frees the memory for the given distance matrix
RETURNS:
ARGS:
struct DistanceMatrix *
NOTES:
*********************************************************************/
void *free_DistanceMatrix( struct DistanceMatrix *);
/**********************************************************************
FUNCTION: index_DistanceMatrix
DESCRIPTION:
indexes the given distance matrix with the given indices,
returning the appropraite distance.
RETURNS: distance (float)
ARGS:
A distance matrix *
row index
column index
NOTES:
This function is necessary to account for the fact that the distance
matrix may be implemented as a symmtrical or triangular matrix.
It therefore abstracts the internals of the distance matrix, at the
cost of a function call for each lookup (is this wise...?)
**********************************************************************/
Distance index_DistanceMatrix( struct DistanceMatrix *, unsigned int, unsigned int );
/*********************************************************************
FUNCTION: print_DistanceMatrix
DESCRIPTION:
Prints the given distance matrix.
RETURNS:
ARGS:
struct DistanceMatrix *
NOTES:
A DistanceMatrix does not exist in isolation in practice but as
part of a Cluster (this is to maintain the tight coupling between
the matrix and the sequences for which it is expressing the distances).
Therefore, to read or write a useful distance
matrix (for compatibility with the phylip package for example)
use write_phylip_Cluster
*********************************************************************/
void print_DistanceMatrix( FILE *handle, struct DistanceMatrix * );
/*********************************************************************
FUNCTION: read_phylip_DistanceMatrix
DESCRIPTION:
This function creates a DistanceMatrix from the given input file.
It also crates a dummy alignment (sequences with just names) and
puts it in the given Alignment pointer
RETURNS: struct Cluster *
ARGS:
A file handle
A pointer to an Alignment pointer
NOTES:
The file is assumed to be the distance matrix file format used
by the phlip package:
4
Name_1 0.0000 0.6776 0.6786 0.2342
Name_2 0.6776 0.0000 0.1111 0.9999
Name_3 0.6786 0.1111 0.0000 0.4444
Name_4 0.2342 0.9999 0.4444 0.0000
*********************************************************************/
struct DistanceMatrix *read_phylip_DistanceMatrix( FILE *, struct Alignment **);
/*********************************************************************
FUNCTION: write_phylip_DistanceMatrix
DESCRIPTION:
This function takes the given DistanceMatrix and writes it to the
given file handle in phylip format. The alignment is needed for the
Sequence names
format
RETURNS:
ARGS:
A file handle
A DistanceMatrix pointer (cluster.h)
An Alignment pointer
NOTES:
The file is written in the distance matrix file format used
by the phlip package:
4
Name_1 0.0000 0.6776 0.6786 0.2342
Name_1 0.6776 0.0000 0.1111 0.9999
Name_1 0.6786 0.1111 0.0000 0.4444
Name_1 0.2342 0.9999 0.4444 0.0000
*********************************************************************/
void write_phylip_DistanceMatrix( FILE *, struct DistanceMatrix *, struct Alignment *);
#endif
|