File: usort.cpp

package info (click to toggle)
mothur 1.48.1-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 13,692 kB
  • sloc: cpp: 161,866; makefile: 122; sh: 31
file content (86 lines) | stat: -rwxr-xr-x 1,818 bytes parent folder | download | duplicates (7)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
#if	UCHIMES

#include "myutils.h"
#include "seqdb.h"
#include "seq.h"
#include "alpha.h"

void SortDescending(const vector<float> &Values, vector<unsigned> &Order);

static byte *g_QueryHasWord;
static unsigned g_WordCount;

unsigned GetWord(const byte *Seq)
	{
	unsigned Word = 0;
	const byte *Front = Seq;
	for (unsigned i = 0; i < opt_w; ++i)
		{
		unsigned Letter = g_CharToLetterNucleo[*Front++];
		Word = (Word*4) + Letter;
		}
	return Word;
	}

static void SetQuery(const SeqData &Query)
	{
	if (g_QueryHasWord == 0)
		{
		g_WordCount = 4;
		for (unsigned i = 1; i < opt_w; ++i)
			g_WordCount *= 4;

		g_QueryHasWord = myalloc(byte, g_WordCount);
		}

	memset(g_QueryHasWord, 0, g_WordCount);

	if (Query.L <= opt_w)
		return;

	const unsigned L = Query.L - opt_w + 1;
	const byte *Seq = Query.Seq;
	for (unsigned i = 0; i < L; ++i)
		{
		unsigned Word = GetWord(Seq++);
		g_QueryHasWord[Word] = 1;
		}
	}

static unsigned GetUniqueWordsInCommon(const SeqData &Target)
	{
	if (Target.L <= opt_w)
		return 0;

	unsigned Count = 0;
	const unsigned L = Target.L - opt_w + 1;
	const byte *Seq = Target.Seq;
	for (unsigned i = 0; i < L; ++i)
		{
		unsigned Word = GetWord(Seq++);
		if (g_QueryHasWord[Word])
			++Count;
		}
	return Count;
	}

void USort(const SeqData &Query, const SeqDB &DB, vector<float> &WordCounts, 
  vector<unsigned> &Order)
	{
	WordCounts.clear();
	Order.clear();

	SetQuery(Query);

	const unsigned SeqCount = DB.GetSeqCount();
	for (unsigned SeqIndex = 0; SeqIndex < SeqCount; ++SeqIndex)
		{
		SeqData Target;
		DB.GetSeqData(SeqIndex, Target);
		float WordCount = (float) GetUniqueWordsInCommon(Target);
		WordCounts.push_back(WordCount);
		}
	SortDescending(WordCounts, Order);
	}

#endif // UCHIMES