File: match.m

package info (click to toggle)
mercury 0.9-1
  • links: PTS
  • area: main
  • in suites: potato
  • size: 18,488 kB
  • ctags: 9,800
  • sloc: objc: 146,680; ansic: 51,418; sh: 6,436; lisp: 1,567; cpp: 1,040; perl: 854; makefile: 450; asm: 232; awk: 203; exp: 32; fortran: 3; csh: 1
file content (152 lines) | stat: -rw-r--r-- 4,875 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
%-----------------------------------------------------------------------------%
% Copyright (C) 1998 The University of Melbourne.
% This file may only be copied under the terms of the GNU General
% Public License - see the file COPYING in the Mercury distribution.
%-----------------------------------------------------------------------------%

% Main author: bromage

% This module contains code to match common lines before diffing, based on
% the command-line options presented.  The important command-line options
% are --ignore-case, --ignore-all-space and --ignore-space-change.

% The output of build_matches is two arrays of integers, where any two
% lines are assigned the same integer iff they are identical (modulo case,
% space and/or space change depending on the command line options).  An
% added benefit of doing this here is that the diff algorithm (myers.m)
% only has to compare integers instead of strings.

% TO DO: We should collapse sequences of lines which only appear in one
%        file and pretend the whole sequence is just one line.  (GNU
%        diff does the same thing a slightly different way, but this
%        approach seems a bit more Mercury-esque.)  Since Myers'
%	 algorithm runs in O(ND) time, and performing this pre-filtering
%	 here would reduce the value of D (by quite a lot in real-world
%	 cases), things should speed up.

%-----------------------------------------------------------------------------%

:- module match.

:- interface.
:- import_module file, io, array.

:- pred build_matches(file :: in, file :: in,
		array(int) :: out, array(int) :: out,
		io__state :: di, io__state :: uo) is det.

%-----------------------------------------------------------------------------%
%-----------------------------------------------------------------------------%

:- implementation.
:- import_module globals, options.
:- import_module bool, list, int, std_util, string, char, map, require.

:- type match_options
	--->	match_options(
			bool,		% No options set
			bool,		% --ignore-case
			bool,		% --ignore-all-space
			bool		% --ignore-space-change
		).

build_matches(File1, File2, FileX, FileY) -->
	globals__io_lookup_bool_option(ignore_case, IgnCase),
	globals__io_lookup_bool_option(ignore_all_space, IgnAllSpc),
	globals__io_lookup_bool_option(ignore_space_change, IgnSpcChg),
	{
		bool__or_list([IgnCase, IgnAllSpc, IgnSpcChg], AnyOpts),
		bool__not(AnyOpts, NoOpts),
		Opts = match_options(NoOpts, IgnCase, IgnAllSpc, IgnSpcChg),
		map__init(MatchMap0),
		file__get_numlines(File1, SizeX),
		array__init(SizeX, -1, FileX0),
		build_matches_for_file(Opts, File1, SizeX - 1, MatchMap0,
			MatchMap1, 0, ID1, FileX0, FileX),
		file__get_numlines(File2, SizeY),
		array__init(SizeY, -1, FileY0),
		build_matches_for_file(Opts, File2, SizeY - 1, MatchMap1, _,
			ID1, _, FileY0, FileY)
	}.

:- pred build_matches_for_file(match_options, file, int,
	map(string, int), map(string, int), int, int, array(int), array(int)).
:- mode build_matches_for_file(in, in, in, in, out, in, out,
	array_di, array_uo) is det.

build_matches_for_file(Opts, OrigFile, I, MatchMap0, MatchMap, ID0, ID,
		File0, File) :-
	( I < 0 ->
		MatchMap = MatchMap0,
		ID = ID0,
		File = File0
	;
		( file__get_line(OrigFile, I, Line0) ->
			Line1 = Line0
		;
			error("build_matches_for_file")
		),
		Opts = match_options(NoOpts, IgnCase, IgnAllSpc, IgnSpcChg),
		( NoOpts = yes ->
			Line = Line1
		;
			string__to_char_list(Line1, Chars0),
			normalise_line(no, IgnCase, IgnAllSpc, IgnSpcChg,
				Chars0, Chars1),
			string__from_char_list(Chars1, Line)
		),
		( map__search(MatchMap0, Line, MaybeID) ->
			array__set(File0, I, MaybeID, File1),
			MatchMap1 = MatchMap0,
			ID1 = ID0
		;
			array__set(File0, I, ID0, File1),
			map__det_insert(MatchMap0, Line, ID0, MatchMap1),
			ID1 is ID0 + 1
		),
		build_matches_for_file(Opts, OrigFile, I - 1, MatchMap1,
			MatchMap, ID1, ID, File1, File)
	).

:- pred normalise_line(bool, bool, bool, bool, list(char), list(char)).
:- mode normalise_line(in, in, in, in, in, out) is det.

normalise_line(_, _, _, _, [], []).
normalise_line(LastSpace, IgnCase, IgnAllSpc, IgnSpcChg, [C0 | Cs0], Cs) :-
	( IgnCase = yes ->
		char__to_lower(C0, C)
	;
		C = C0
	),
	(
		char__is_whitespace(C),
		(
			IgnAllSpc = yes
		->
			normalise_line(LastSpace, IgnCase, IgnAllSpc, IgnSpcChg,
					Cs0, CsX)
		;
			IgnSpcChg = yes
		->
			( LastSpace = yes ->
				normalise_line(yes, IgnCase, IgnAllSpc,
						IgnSpcChg, Cs0, CsX)
			;
				normalise_line(yes, IgnCase, IgnAllSpc,
						IgnSpcChg, Cs0, Cs1),
				CsX = [' ' | Cs1]
				
			)
		;
			fail
		)
	->
		Cs = CsX
	;
		normalise_line(no, IgnCase, IgnAllSpc, IgnSpcChg,
				Cs0, Cs1),
		Cs = [C | Cs1]
	).

%-----------------------------------------------------------------------------%
%-----------------------------------------------------------------------------%