1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144
|
# split.awk --- do split in awk
#
# Requires getopt() library function.
#
# Arnold Robbins, arnold@skeeve.com, Public Domain
# May 1993
# Revised slightly, May 2014
# Rewritten September 2020
function usage( common)
{
common = "[-a suffix-len] [file [outname]]"
printf("usage: split [-l count] %s\n", common) > "/dev/stderr"
printf(" split [-b N[k|m]] %s\n", common) > "/dev/stderr"
exit 1
}
BEGIN {
# Set defaults:
Suffix_length = 2
Line_count = 1000
Byte_count = 0
Outfile = "x"
parse_arguments()
init_suffix_data()
Output = (Outfile compute_suffix())
}
function parse_arguments( i, c, l, modifier)
{
while ((c = getopt(ARGC, ARGV, "a:b:l:")) != -1) {
if (c == "a")
Suffix_length = Optarg + 0
else if (c == "b") {
Byte_count = Optarg + 0
Line_count = 0
l = length(Optarg)
modifier = substr(Optarg, l, 1)
if (modifier == "k")
Byte_count *= 1024
else if (modifier == "m")
Byte_count *= 1024 * 1024
} else if (c == "l") {
Line_count = Optarg + 0
Byte_count = 0
} else
usage()
}
# Clear out options
for (i = 1; i < Optind; i++)
ARGV[i] = ""
# Check for filename
if (ARGV[Optind]) {
Optind++
# Check for different prefix
if (ARGV[Optind]) {
Outfile = ARGV[Optind]
ARGV[Optind] = ""
if (++Optind < ARGC)
usage()
}
}
}
function compute_suffix( i, result, letters)
{
# Logical step 3
if (Reached_last) {
printf("split: too many files!\n") > "/dev/stderr"
exit 1
} else if (on_last_file())
Reached_last = 1 # fail when wrapping after 'zzz'
# Logical step 1
result = ""
letters = "abcdefghijklmnopqrstuvwxyz"
for (i = 1; i <= Suffix_length; i++)
result = result substr(letters, Suffix_ind[i], 1)
# Logical step 2
for (i = Suffix_length; i >= 1; i--) {
if (++Suffix_ind[i] > 26) {
Suffix_ind[i] = 1
} else
break
}
return result
}
function init_suffix_data( i)
{
for (i = 1; i <= Suffix_length; i++)
Suffix_ind[i] = 1
Reached_last = 0
}
function on_last_file( i, on_last)
{
on_last = 1
for (i = 1; i <= Suffix_length; i++) {
on_last = on_last && (Suffix_ind[i] == 26)
}
return on_last
}
Line_count > 0 {
if (++tcount > Line_count) {
close(Output)
Output = (Outfile compute_suffix())
tcount = 1
}
print > Output
}
Byte_count > 0 {
# `+ 1' is for the final newline
if (tcount + length($0) + 1 > Byte_count) { # would overflow
# compute leading bytes
leading_bytes = Byte_count - tcount
# write leading bytes
printf("%s", substr($0, 1, leading_bytes)) > Output
# close old file, open new file
close(Output)
Output = (Outfile compute_suffix())
# set up first bytes for new file
$0 = substr($0, leading_bytes + 1) # trailing bytes
tcount = 0
}
# write full record or trailing bytes
tcount += length($0) + 1
print > Output
}
END {
close(Output)
}
|