File: af_d2.h

package info (click to toggle)
seqan2 2.5.2-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 228,748 kB
  • sloc: cpp: 257,602; ansic: 91,967; python: 8,326; sh: 1,056; xml: 570; makefile: 229; awk: 51; javascript: 21
file content (124 lines) | stat: -rw-r--r-- 5,140 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
// ==========================================================================
//                 SeqAn - The Library for Sequence Analysis
// ==========================================================================
// Copyright (c) 2006-2026, Knut Reinert, FU Berlin
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are met:
//
//     * Redistributions of source code must retain the above copyright
//       notice, this list of conditions and the following disclaimer.
//     * Redistributions in binary form must reproduce the above copyright
//       notice, this list of conditions and the following disclaimer in the
//       documentation and/or other materials provided with the distribution.
//     * Neither the name of Knut Reinert or the FU Berlin nor the names of
//       its contributors may be used to endorse or promote products derived
//       from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
// ARE DISCLAIMED. IN NO EVENT SHALL KNUT REINERT OR THE FU BERLIN BE LIABLE
// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
// OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
// DAMAGE.
//
// ==========================================================================
// Author: Jonathan Goeke <goeke@molgen.mpg.de>
// ==========================================================================
// This header contains the implementation of the D2 score for alignment free
// sequence comparison (inner product of kmer counts).
//
// These functions can be called with alignmentFreeComparison().
// ==========================================================================

#ifndef SEQAN_INCLUDE_SEQAN_ALIGNMENT_FREE_AF_D2_H_
#define SEQAN_INCLUDE_SEQAN_ALIGNMENT_FREE_AF_D2_H_

namespace seqan2 {

/*
 * _alignmentFreeComparison is called by alignmentFreeComparison() (see alignment_free_comparison.h)
 */
template <typename TStringSet, typename TValue>
void _alignmentFreeComparison(Matrix<TValue, 2> & scoreMatrix,
                              TStringSet const & sequenceSet,
                              AFScore<D2> const & score)
{
    //typedef typename Value<TStringSet>::Type                        TString;
    //typedef typename Value<TString>::Type                           TAlphabet;
    typedef typename Iterator<TStringSet const>::Type               TIteratorSet;
    typedef typename Iterator<StringSet<String<unsigned> > >::Type  TIteratorSetInt;
    //typedef Matrix<TValue, 2>                                       TMatrix;

    unsigned seqNumber = length(sequenceSet);

    // Resize the scoreMatrix
    setLength(scoreMatrix, 0, seqNumber);
    setLength(scoreMatrix, 1, seqNumber);
    resize(scoreMatrix, (TValue) 0);

    StringSet<String<unsigned> > kmerCounts;
    resize(kmerCounts, seqNumber);

    // Count all kmers
    TIteratorSetInt itKmerCounts = begin(kmerCounts);
    TIteratorSet itSeqSet = begin(sequenceSet);

    for (; itSeqSet < end(sequenceSet); ++itSeqSet)
    {
        countKmers(value(itKmerCounts), value(itSeqSet), score.kmerSize);
        ++itKmerCounts;
    }
    if(score.verbose)
    {
      std::cout << "\ncounted words";
    }

    // Calculate all pairwise scores and store them in scoreMatrix
    for (unsigned rowIndex = 0; rowIndex < seqNumber; ++rowIndex)
    {
        if(score.verbose)
        {
            std::cout << "\nSequence number " << rowIndex;
        }
        for (unsigned colIndex = rowIndex; colIndex < seqNumber; ++colIndex)
        {
            _alignmentFreeCompareCounts(value(scoreMatrix, rowIndex, colIndex), kmerCounts[rowIndex], kmerCounts[colIndex], score);
            value(scoreMatrix, colIndex, rowIndex) = value(scoreMatrix, rowIndex, colIndex);  // Copy symmetric entries
        }
    }

}

/*
 * Calculate pairwise score given the counts of all kmers
 */
template <typename TValue>
void
_alignmentFreeCompareCounts(TValue & result,
                            String<unsigned> const & kmerCounts1,
                            String<unsigned> const & kmerCounts2,
                            AFScore<D2> const & /*score*/)
{
    typedef typename Iterator<String<unsigned> const>::Type TIteratorInt;

    TIteratorInt it1 = begin(kmerCounts1);
    TIteratorInt it2 = begin(kmerCounts2);

    result = 0;
    for (; it1 < end(kmerCounts1); ++it1)
    {
        result += value(it1) * value(it2);
        ++it2;
    }
}

}  // namespace seqan2

#endif  // SEQAN_INCLUDE_SEQAN_ALIGNMENT_FREE_AF_D2_H_