File: TextPatternTest.java

package info (click to toggle)
libdsiutils-java 2.7.3%2Bdfsg-1
  • links: PTS, VCS
  • area: main
  • in suites: sid, trixie
  • size: 4,012 kB
  • sloc: java: 34,128; xml: 531; makefile: 51; sh: 47
file content (95 lines) | stat: -rw-r--r-- 3,358 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
/*
 * DSI utilities
 *
 * Copyright (C) 2010-2023 Sebastiano Vigna
 *
 * This program and the accompanying materials are made available under the
 * terms of the GNU Lesser General Public License v2.1 or later,
 * which is available at
 * http://www.gnu.org/licenses/old-licenses/lgpl-2.1-standalone.html,
 * or the Apache Software License 2.0, which is available at
 * https://www.apache.org/licenses/LICENSE-2.0.
 *
 * This program is distributed in the hope that it will be useful, but
 * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
 * or FITNESS FOR A PARTICULAR PURPOSE.
 *
 * SPDX-License-Identifier: LGPL-2.1-or-later OR Apache-2.0
 */

package it.unimi.dsi.util;

import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertTrue;

import org.junit.Test;

import it.unimi.dsi.fastutil.chars.CharArrayList;

public class TextPatternTest {
	@Test
	public void testSingleCharacterSearch() {
		final byte[] b = new byte[] { 1, (byte)'A', 2 };
		final String s = " A ";
		final TextPattern pattern = new TextPattern("A");

		assertEquals(-1, pattern.search(b, 0, 1));
		assertEquals(-1, pattern.search(s, 0, 1));
		assertEquals(-1, pattern.search(s.toCharArray(), 0, 1));
		assertEquals(-1, pattern.search(CharArrayList.wrap(s.toCharArray()), 0, 1));

		assertEquals(1, pattern.search(b));
		assertEquals(1, pattern.search(s));
		assertEquals(1, pattern.search(s.toCharArray()));
		assertEquals(1, pattern.search(CharArrayList.wrap(s.toCharArray())));
	}

	@Test
	public void testSearch() {
		final byte[] b = new byte[] { 1, (byte)'A', 'B', 2 };
		final String s = " AB ";
		final TextPattern pattern = new TextPattern("AB");

		assertEquals(-1, pattern.search(b, 0, 2));
		assertEquals(-1, pattern.search(s, 0, 2));
		assertEquals(-1, pattern.search(s.toCharArray(), 0, 2));
		assertEquals(-1, pattern.search(CharArrayList.wrap(s.toCharArray()), 0, 2));

		assertEquals(1, pattern.search(b));
		assertEquals(1, pattern.search(s));
		assertEquals(1, pattern.search(s.toCharArray()));
		assertEquals(1, pattern.search(CharArrayList.wrap(s.toCharArray())));

		TextPattern patternMeta = new TextPattern("<meta", TextPattern.CASE_INSENSITIVE);
		assertTrue(patternMeta.search(documentMetaIsutf_8.getBytes()) != -1);
		patternMeta = new TextPattern("<META", TextPattern.CASE_INSENSITIVE);
		assertTrue(patternMeta.search(documentMetaIsutf_8.getBytes()) != -1);

	}

	private static final String documentMetaIsutf_8 =
		"<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01 Strict//EN\" \"http://www.w3.org/TR/REC-html40/strict.dtd\">\n" +
		"\n" +
		"<html>\n" +
		"<head>\n" +
		"<style type=\"text/css\">\n" +
		"@import \"/css/content.php\";\n" +
		"@import \"/css/layout.php\";\n" +
		"</style>" +
		"<meta http-equiv=\"Content-Type\" content=\"text/html;charset=utf-8\" >" +
		"<title id=\"mamma\" special-type=\"li turchi\">Sebastiano Vigna</title>\n" +
		"</HEAD>\n" +
		"<boDY>\n" +
		"<div id=header>:::Sebastiano Vigna</div>" +
		"<div id=left>\n" +
		"<ul id=\"left-nav\">" +
		"<br>Bye bye baby\n" +
		"<img SRc=\"but I'm ignoring this one\"> and not this one\n" +
		"\n\n even whitespace counts \n\n" +
		"<frame SRC=\"http://www.GOOGLE.com/\">The frame source counts</frame>\n" +
		"<iframe SRC=\"http://www.GOOGLE.com/\">And so does the iframe source</iframe>\n" +
		"</body>\n" +
		"</html>";


}