File: sort.cpp

package info (click to toggle)
tippecanoe 2.53.0-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 148,236 kB
  • sloc: cpp: 44,069; ansic: 2,057; makefile: 454; perl: 129; python: 62; sh: 4
file content (127 lines) | stat: -rw-r--r-- 2,916 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <vector>
#include <string>

#define MAX_MEMORY (1024 * 1024 * 1024)	 // 1 GB

void fqsort(std::vector<FILE *> &inputs, size_t width, int (*cmp)(const void *, const void *), FILE *out, size_t mem) {
	std::string pivot;
	FILE *fp1, *fp2;

	if (mem > MAX_MEMORY) {
		mem = MAX_MEMORY;
	}

	{
		// read some elements into memory to choose a pivot from
		//
		// this is in its own scope so `buf` can go out of scope
		// before trying to do any sub-sorts.

		std::string buf;

		bool read_everything = false;
		for (size_t i = 0; i < inputs.size(); i++) {
			if (buf.size() > mem) {
				break;
			}

			while (true) {
				std::string element;
				element.resize(width);

				size_t n = fread((void *) element.c_str(), width, 1, inputs[i]);
				if (n == 0) {
					if (i + 1 == inputs.size()) {
						read_everything = true;
					}
					break;
				}

				buf.append(element);

				if (buf.size() > mem) {
					break;
				}
			}
		}

		qsort((void *) buf.c_str(), buf.size() / width, width, cmp);

		// If that was everything we have to sort, we are done.

		if (read_everything) {
			fwrite((void *) buf.c_str(), buf.size() / width, width, out);
			return;
		}

		// Otherwise, choose a pivot from it, make some temporary files,
		// write what we have to those files, and then partition the rest
		// of the input into them.

		// This would be unstable if the pivot is one of several elements
		// that compare equal. Does it matter?

		size_t pivot_off = width * (buf.size() / width / 2);
		pivot = std::string(buf, pivot_off, width);

		std::string t1 = "/tmp/sort1.XXXXXX";
		std::string t2 = "/tmp/sort2.XXXXXX";

		int fd1 = mkstemp((char *) t1.c_str());
		unlink(t1.c_str());
		int fd2 = mkstemp((char *) t2.c_str());
		unlink(t2.c_str());

		fp1 = fdopen(fd1, "w+b");
		if (fp1 == NULL) {
			perror(t1.c_str());
			exit(EXIT_FAILURE);
		}
		fp2 = fdopen(fd2, "w+b");
		if (fp2 == NULL) {
			perror(t2.c_str());
			exit(EXIT_FAILURE);
		}

		fwrite((void *) buf.c_str(), sizeof(char), pivot_off, fp1);
		fwrite((void *) ((char *) buf.c_str() + pivot_off), sizeof(char), buf.size() - pivot_off, fp2);
	}

	// read the remaining input into the temporary files

	for (size_t i = 0; i < inputs.size(); i++) {
		while (true) {
			std::string element;
			element.resize(width);

			size_t n = fread((void *) element.c_str(), width, 1, inputs[i]);
			if (n == 0) {
				break;
			}

			if (cmp((void *) element.c_str(), (void *) pivot.c_str()) < 0) {
				fwrite((void *) element.c_str(), width, 1, fp1);
			} else {
				fwrite((void *) element.c_str(), width, 1, fp2);
			}
		}
	}

	// Now sort the sub-ranges into the output.

	rewind(fp1);
	rewind(fp2);

	std::vector<FILE *> v1;
	v1.emplace_back(fp1);
	fqsort(v1, width, cmp, out, mem);
	fclose(fp1);

	std::vector<FILE *> v2;
	v2.emplace_back(fp2);
	fqsort(v2, width, cmp, out, mem);
	fclose(fp2);
}