File: find_palindromes.c

package info (click to toggle)
r-bioc-biostrings 2.42.1-1
  • links: PTS, VCS
  • area: main
  • in suites: stretch
  • size: 14,652 kB
  • ctags: 721
  • sloc: ansic: 10,262; sh: 11; makefile: 2
file content (122 lines) | stat: -rw-r--r-- 2,855 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
#include "Biostrings.h"
#include "XVector_interface.h"
#include "IRanges_interface.h"

#include <stdio.h>


static int is_match(char c1, char c2, const int *lkup, int lkup_len)
{
	int key, val;

	if (lkup != NULL) {
		key = (unsigned char) c1;
		if (key >= lkup_len || (val = lkup[key]) == NA_INTEGER)
			return 0;
		c1 = (char) val;
	}
	return c1 == c2;
}

static void get_find_palindromes_at(const char *x, int x_len,
	int i1, int i2, int max_loop_len1, int min_arm_len, int max_nmis,
	const int *lkup, int lkup_len)
{
	int arm_len, valid_indices;
	char c1, c2;

	arm_len = 0;
	while (((valid_indices = i1 >= 0 && i2 < x_len) &&
                i2 - i1 <= max_loop_len1) || arm_len != 0)
	{
		if (valid_indices) {
			c1 = x[i1];
			c2 = x[i2];
			if (is_match(c1, c2, lkup, lkup_len) ||
			    max_nmis-- > 0) {
				arm_len++;
				goto next;
			}
		}
		if (arm_len >= min_arm_len)
			_report_match(i1 + 2, i2 - i1 - 1);
		arm_len = 0;
	next:
		i1--;
		i2++;
	}
	return;
}

static int get_palindrome_arm_length(const char *x, int x_len, int max_nmis,
	const int *lkup, int lkup_len)
{
	int i1, i2;
	char c1, c2;

	for (i1 = 0, i2 = x_len - 1; i1 < i2; i1++, i2--) {
		c1 = x[i1];
		c2 = x[i2];
		if (!(is_match(c1, c2, lkup, lkup_len) ||
		      max_nmis-- > 0))
			break;
	}
	return i1;
}

/* --- .Call ENTRY POINT --- */
SEXP find_palindromes(SEXP x, SEXP min_armlength, SEXP max_looplength,
		      SEXP max_mismatch, SEXP L2R_lkup)
{
	Chars_holder x_holder;
	int x_len, min_arm_len, max_loop_len1, max_nmis, lkup_len, n;
	const int *lkup;

	x_holder = hold_XRaw(x);
	x_len = x_holder.length;
	min_arm_len = INTEGER(min_armlength)[0];
	max_loop_len1 = INTEGER(max_looplength)[0] + 1;
	max_nmis = INTEGER(max_mismatch)[0];
	if (L2R_lkup == R_NilValue) {
		lkup = NULL;
		lkup_len = 0;
	} else {
		lkup = INTEGER(L2R_lkup);
		lkup_len = LENGTH(L2R_lkup);
	}
	_init_match_reporting("MATCHES_AS_RANGES", 1);
	for (n = 0; n < x_len; n++) {
		/* Find palindromes centered on n. */
		get_find_palindromes_at(x_holder.ptr, x_len, n - 1, n + 1,
					max_loop_len1, min_arm_len, max_nmis,
					lkup, lkup_len);
		/* Find palindromes centered on n + 0.5. */
		get_find_palindromes_at(x_holder.ptr, x_len, n, n + 1,
					max_loop_len1, min_arm_len, max_nmis,
					lkup, lkup_len);
	}
	return _reported_matches_asSEXP();
}

/* --- .Call ENTRY POINT --- */
SEXP palindrome_arm_length(SEXP x, SEXP max_mismatch, SEXP L2R_lkup)
{
	Chars_holder x_holder;
	int x_len, max_nmis, lkup_len, arm_len;
	const int *lkup;

	x_holder = hold_XRaw(x);
	x_len = x_holder.length;
	max_nmis = INTEGER(max_mismatch)[0];
	if (L2R_lkup == R_NilValue) {
		lkup = NULL;
		lkup_len = 0;
	} else {
		lkup = INTEGER(L2R_lkup);
		lkup_len = LENGTH(L2R_lkup);
	}
	arm_len = get_palindrome_arm_length(x_holder.ptr, x_len, max_nmis,
					    lkup, lkup_len);
	return ScalarInteger(arm_len);
}