File: seqcmp_sse2.c

package info (click to toggle)
phylonium 1.7-3
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 44,340 kB
  • sloc: cpp: 15,701; ansic: 782; makefile: 139; python: 66; sh: 41
file content (59 lines) | stat: -rw-r--r-- 1,411 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
/**
 * SPDX-License-Identifier: GPL-3.0-or-later
 * Copyright 2018 (C) Fabian Klötzl
 */

#include "seqcmp.h"

#include <assert.h>
#include <emmintrin.h>
#include <string.h>

typedef __m128i vec_type;

size_t seqcmp_sse2(const char *begin, const char *other, size_t length)
{
	assert(begin != NULL);
	assert(other != NULL);

	size_t substitutions = 0;

	const size_t vec_bytes = sizeof(vec_type);
	size_t vec_offset = 0;
	size_t vec_length = (length / vec_bytes) & ~(size_t)1; // round down

	size_t equal = 0;
	for (; vec_offset < vec_length; vec_offset++) {
		vec_type begin_chunk;
		vec_type other_chunk;
		memcpy(&begin_chunk, begin + vec_offset * vec_bytes, vec_bytes);
		memcpy(&other_chunk, other + vec_offset * vec_bytes, vec_bytes);

		vec_type comp = _mm_cmpeq_epi8(begin_chunk, other_chunk);

		unsigned int vmask = _mm_movemask_epi8(comp);
		equal += __builtin_popcount(vmask);

		vec_offset++;
		// second pass
		memcpy(&begin_chunk, begin + vec_offset * vec_bytes, vec_bytes);
		memcpy(&other_chunk, other + vec_offset * vec_bytes, vec_bytes);

		comp = _mm_cmpeq_epi8(begin_chunk, other_chunk);

		vmask = _mm_movemask_epi8(comp);
		equal += __builtin_popcount(vmask);
	}

	substitutions = vec_offset * vec_bytes - equal;

	size_t offset = vec_offset * vec_bytes;

	for (; offset < length; offset++) {
		if (begin[offset] != other[offset]) {
			substitutions++;
		}
	}

	return substitutions;
}