File: make_test.awk

package info (click to toggle)
eprover 3.2.5%2Bds-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 14,504 kB
  • sloc: ansic: 104,396; csh: 13,135; python: 11,207; awk: 5,825; makefile: 554; sh: 400
file content (69 lines) | stat: -rwxr-xr-x 1,120 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
#!/opt/local/bin/gawk -f
#
# Usage: make_test.awk <file>
#
# Copyright 1998 Stephan Schulz, schulz@informatik.tu-muenchen.de
#
# Read a file, split it randomly into 10 subsets and print the 10
# 10/90 files fpr cross-validation.
#

BEGIN{
   i=0;
   file1 = "__tmprand__";
   file2 = "__tmpsort__";
}

/^#/{
  next;
}

{
   i++;
   printf("%5.4f :%s\n", rand(), $0) > file1;
}

END{
   close(file1);
   system("sort " file1 " | cut -d\: -f2- > " file2 ";rm " file1);

   total = i;
   i=0;
   while ((getline tmp < file2) > 0)
   {
      count[i]++;
      set[i] = set[i] tmp"\n";
      i++;
      if(i==10)
      {
	 i=0;
      }
   }
   system("rm " file2);

   for(i=0; i<10; i++)
   {
      if(i<9)
      {
	 file = "crossval0" i+1;
      }
      else
      {
	 file = "crossval10";
      }
      print "# Split " i+1 " - " total-count[i] "/" 0+count[i] " Terms">file; 
      print "Training: \n">file;
      for(j=0; j<10; j++)
      {
	 if(i!=j)
	 {
	    printf set[j]>file;
	 }
      }
      print ".\n">file;
      print "Test: \n">file;
      printf set[i]>file;
      print ".">file;
      close(file);
   }
}