File: namefreq.ll

package info (click to toggle)
lifelines 3.0.50-2
  • links: PTS
  • area: main
  • in suites: etch-m68k
  • size: 11,140 kB
  • ctags: 6,517
  • sloc: ansic: 57,468; xml: 8,014; sh: 4,255; makefile: 848; yacc: 601; perl: 170; sed: 16
file content (135 lines) | stat: -rw-r--r-- 3,550 bytes parent folder | download | duplicates (5)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
/*
 * @progname       namefreq.ll
 * @version        3.0
 * @author         Chandler
 * @category       
 * @output         Text
 * @description    

This report counts occurrences of all first (given) names in the
database.  Individuals with only surnames are not counted.  If the
surname is listed first, the next word is taken as the given name.

namefreq

Tabulate frequency of first names in database.

Version 1 - 1993 Jun 16 - John F. Chandler
Version 2 - 1993 Jun 18 (sort output by frequency)
Version 3 - 1995 Mar 8  (requires LL 3.0 or higher)
			(Uses Jim Eggert's Quicksort routine)

The output file is normally sorted in order of decreasing frequency,
but the sort order can be altered by changing func "compare", e.g.,
comment out the existing "set" and uncomment the one for alphabetical
order.

This program works only with LifeLines.

*/
global(name_counts)	/* used by comparison in sorting by frequency */

/* Comparison function for sorting.  Same convention as strcmp. */
func compare(astring,bstring) {
/* alphabetical:
	return(strcmp(astring,bstring)) */
/* decreasing frequency: */
	if(ret,sub(lookup(name_counts,bstring),lookup(name_counts,astring))){
		return(ret)
	}
	return(strcmp(astring,bstring))
}

/*
   quicksort: Sort an input list by generating a permuted index list
   Input:  alist  - list to be sorted
   Output: ilist  - list of index pointers into "alist" in sorted order
   Needed: compare- external function of two arguments to return -1,0,+1
		    according to relative order of the two arguments
*/
proc quicksort(alist,ilist) {
    set(len,length(alist))
    set(index,len)
    while(index) {
	setel(ilist,index,index)
	decr(index)
    }
    call qsort(alist,ilist,1,len)
}

/* recursive core of quicksort */
proc qsort(alist,ilist,left,right) {
    if(pcur,getpivot(alist,ilist,left,right)) {
	set(pivot,getel(alist,getel(ilist,pcur)))
	set(mid,partition(alist,ilist,left,right,pivot))
	call qsort(alist,ilist,left,sub(mid,1))
	call qsort(alist,ilist,mid,right)
    }
}

/* partition around pivot */
func partition(alist,ilist,left,right,pivot) {
    while(1) {
	set(tmp,getel(ilist,left))
	setel(ilist,left,getel(ilist,right))
	setel(ilist,right,tmp)
	while(lt(compare(getel(alist,getel(ilist,left)),pivot),0)) {
	    incr(left)
	}
	while(ge(compare(getel(alist,getel(ilist,right)),pivot),0)) {
	    decr(right)
	}
	if(gt(left,right)) { break() }
    }
    return(left)
}

/* choose pivot */
func getpivot(alist,ilist,left,right) {
    set(pivot,getel(alist,getel(ilist,left)))
    set(left0,left)
    incr(left)
    while(le(left,right)) {
	set(rel,compare(getel(alist,getel(ilist,left)),pivot))
	if (gt(rel,0)) { return(left) }
	if (lt(rel,0)) { return(left0) }
	incr(left)
    }
    return(0)
}

proc main ()
{
	list(namelist)
	table(name_counts)
	list(names)
	list(ilist)

	forindi (indi, num) {
		if(not(mod(num,20))) {print(".")}
		extractnames(inode(indi), namelist, ncomp, sindx)
		set(gindx,1) if(eq(sindx,1)) { set(gindx,2) }
		set(fname, save(getel(namelist, gindx)))
		if( or( gt(sindx,1), gt(ncomp,sindx))) {
			if(nmatch, lookup(name_counts, fname)) {
				set(nmatch, add(nmatch, 1))
			}
			else {
				enqueue(names, fname)
				set(nmatch, 1)
			}
			insert(name_counts, fname, nmatch)
		}
	}
	"Frequency of given names (first only) in the database\n\n"
	"Name              Occurrences\n\n"

	call quicksort(names,ilist)
	forlist(ilist, index, num) {
		set(fname,getel(names,index))
		fname
		set(nmatch, lookup(name_counts, fname))
		col(sub(25, strlen(d(nmatch))))
		d(nmatch) "\n"
	}
}