1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357
|
// SPDX-License-Identifier: MIT
/* Check available memory and swap in a loop and start killing
* processes if they get too low */
#include <dirent.h>
#include <errno.h>
#include <getopt.h>
#include <regex.h>
#include <signal.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/mman.h>
#include <sys/resource.h>
#include <unistd.h>
#include "kill.h"
#include "meminfo.h"
#include "msg.h"
/* Arbitrary identifiers for long options that do not have a short
* version */
enum {
LONG_OPT_PREFER = 513,
LONG_OPT_AVOID,
};
static int set_oom_score_adj(int);
static void print_mem_stats(bool lowmem, const meminfo_t m);
static void poll_loop(const poll_loop_args_t args);
int enable_debug = 0;
long page_size = 0;
int main(int argc, char* argv[])
{
poll_loop_args_t args = {
.mem_term_percent = 10,
.swap_term_percent = 10,
.mem_kill_percent = 5,
.swap_kill_percent = 5,
.report_interval_ms = 1000,
/* omitted fields are set to zero */
};
int set_my_priority = 0;
char* prefer_cmds = NULL;
char* avoid_cmds = NULL;
regex_t _prefer_regex;
regex_t _avoid_regex;
page_size = sysconf(_SC_PAGESIZE);
/* request line buffering for stdout - otherwise the output
* may lag behind stderr */
setlinebuf(stdout);
fprintf(stderr, "earlyoom " VERSION "\n");
if (chdir("/proc") != 0) {
fatal(4, "Could not cd to /proc: %s", strerror(errno));
}
args.procdir = opendir(".");
if (args.procdir == NULL) {
fatal(5, "Could not open /proc: %s", strerror(errno));
}
meminfo_t m = parse_meminfo();
int c;
const char* short_opt = "m:s:M:S:kinN:dvr:ph";
struct option long_opt[] = {
{ "prefer", required_argument, NULL, LONG_OPT_PREFER },
{ "avoid", required_argument, NULL, LONG_OPT_AVOID },
{ "help", no_argument, NULL, 'h' },
{ 0, 0, NULL, 0 } /* end-of-array marker */
};
bool have_m = 0, have_M = 0, have_s = 0, have_S = 0;
while ((c = getopt_long(argc, argv, short_opt, long_opt, NULL)) != -1) {
float report_interval_f = 0;
term_kill_tuple_t tuple;
switch (c) {
case -1: /* no more arguments */
case 0: /* long option toggles */
break;
case 'm':
// Use 99 as upper limit. Passing "-m 100" makes no sense.
tuple = parse_term_kill_tuple("-m", optarg, 99, 15);
args.mem_term_percent = tuple.term;
args.mem_kill_percent = tuple.kill;
have_m = 1;
break;
case 's':
// Using "-s 100" is a valid way to ignore swap usage
tuple = parse_term_kill_tuple("-s", optarg, 100, 16);
args.swap_term_percent = tuple.term;
args.swap_kill_percent = tuple.kill;
have_s = 1;
break;
case 'M':
tuple = parse_term_kill_tuple("-M", optarg, m.MemTotalKiB * 100 / 99, 15);
args.mem_term_percent = 100 * tuple.term / m.MemTotalKiB;
args.mem_kill_percent = 100 * tuple.kill / m.MemTotalKiB;
have_M = 1;
break;
case 'S':
tuple = parse_term_kill_tuple("-S", optarg, m.SwapTotalKiB * 100 / 99, 16);
args.swap_term_percent = 100 * tuple.term / m.SwapTotalKiB;
args.swap_kill_percent = 100 * tuple.kill / m.SwapTotalKiB;
have_S = 1;
break;
case 'k':
fprintf(stderr, "Option -k is ignored since earlyoom v1.2\n");
break;
case 'i':
args.ignore_oom_score_adj = 1;
fprintf(stderr, "Ignoring oom_score_adj\n");
break;
case 'n':
args.notif_command = "notify-send";
fprintf(stderr, "Notifying using '%s'\n", args.notif_command);
break;
case 'N':
args.notif_command = optarg;
fprintf(stderr, "Notifying using '%s'\n", args.notif_command);
break;
case 'd':
enable_debug = 1;
break;
case 'v':
// The version has already been printed above
exit(0);
case 'r':
report_interval_f = strtof(optarg, NULL);
if (report_interval_f < 0) {
fatal(14, "-r: invalid interval '%s'\n", optarg);
}
args.report_interval_ms = report_interval_f * 1000;
break;
case 'p':
set_my_priority = 1;
break;
case LONG_OPT_PREFER:
prefer_cmds = optarg;
break;
case LONG_OPT_AVOID:
avoid_cmds = optarg;
break;
case 'h':
fprintf(stderr,
"Usage: earlyoom [OPTION]...\n"
"\n"
" -m PERCENT[,KILL_PERCENT] set available memory minimum to PERCENT of total (default 10 %%).\n"
" earlyoom sends SIGTERM once below PERCENT, then SIGKILL once below\n"
" KILL_PERCENT (default PERCENT/2).\n"
" -s PERCENT[,KILL_PERCENT] set free swap minimum to PERCENT of total (default 10 %%)\n"
" -M SIZE[,KILL_SIZE] set available memory minimum to SIZE KiB\n"
" -S SIZE[,KILL_SIZE] set free swap minimum to SIZE KiB\n"
" -i user-space oom killer should ignore positive oom_score_adj values\n"
" -n enable notifications using \"notify-send\"\n"
" -N COMMAND enable notifications using COMMAND\n"
" -d enable debugging messages\n"
" -v print version information and exit\n"
" -r INTERVAL memory report interval in seconds (default 1), set to 0 to\n"
" disable completely\n"
" -p set niceness of earlyoom to -20 and oom_score_adj to -1000\n"
" --prefer REGEX prefer killing processes matching REGEX\n"
" --avoid REGEX avoid killing processes matching REGEX\n"
" -h, --help this help text\n");
exit(0);
case '?':
fprintf(stderr, "Try 'earlyoom --help' for more information.\n");
exit(13);
}
} /* while getopt */
if (optind < argc) {
fatal(13, "extra argument not understood: '%s'\n", argv[optind]);
}
if (have_m && have_M) {
fatal(2, "can't use both -m and -M\n");
}
if (have_s && have_S) {
fatal(2, "can't use both -s and -S\n");
}
if (prefer_cmds) {
args.prefer_regex = &_prefer_regex;
if (regcomp(args.prefer_regex, prefer_cmds, REG_EXTENDED | REG_NOSUB) != 0) {
fatal(6, "could not compile regexp '%s'\n", prefer_cmds);
}
fprintf(stderr, "Prefering to kill process names that match regex '%s'\n", prefer_cmds);
}
if (avoid_cmds) {
args.avoid_regex = &_avoid_regex;
if (regcomp(args.avoid_regex, avoid_cmds, REG_EXTENDED | REG_NOSUB) != 0) {
fatal(6, "could not compile regexp '%s'\n", avoid_cmds);
}
fprintf(stderr, "Avoiding to kill process names that match regex '%s'\n", avoid_cmds);
}
if (set_my_priority) {
bool fail = 0;
if (setpriority(PRIO_PROCESS, 0, -20) != 0) {
warn("Could not set priority: %s. Continuing anyway\n", strerror(errno));
fail = 1;
}
int ret = set_oom_score_adj(-1000);
if (ret != 0) {
warn("Could not set oom_score_adj: %s. Continuing anyway\n", strerror(ret));
fail = 1;
}
if (!fail) {
fprintf(stderr, "Priority was raised successfully\n");
}
}
// Print memory limits
fprintf(stderr, "mem total: %4d MiB, sending SIGTERM at %2d %%, SIGKILL at %2d %%\n",
m.MemTotalMiB, args.mem_term_percent, args.mem_kill_percent);
fprintf(stderr, "swap total: %4d MiB, sending SIGTERM at %2d %%, SIGKILL at %2d %%\n",
m.SwapTotalMiB, args.swap_term_percent, args.swap_kill_percent);
/* Dry-run oom kill to make sure stack grows to maximum size before
* calling mlockall()
*/
userspace_kill(args, 0);
if (mlockall(MCL_CURRENT | MCL_FUTURE) != 0)
perror("Could not lock memory - continuing anyway");
// Jump into main poll loop
poll_loop(args);
return 0;
}
/* Print a status line like
* mem avail: 5259 MiB (67 %), swap free: 0 MiB (0 %)"
* to the fd passed in out_fd.
*/
static void print_mem_stats(bool lowmem, const meminfo_t m)
{
int (*out_func)(const char* fmt, ...) = &printf;
if (lowmem) {
out_func = &warn;
}
out_func("mem avail: %4d of %4d MiB (%2d %%), swap free: %4d of %4d MiB (%2d %%)\n",
m.MemAvailableMiB,
m.MemTotalMiB,
m.MemAvailablePercent,
m.SwapFreeMiB,
m.SwapTotalMiB,
m.SwapFreePercent);
}
// Returns errno (success = 0)
static int set_oom_score_adj(int oom_score_adj)
{
char buf[256];
pid_t pid = getpid();
snprintf(buf, sizeof(buf), "%d/oom_score_adj", pid);
FILE* f = fopen(buf, "w");
if (f == NULL) {
return -1;
}
// fprintf returns a negative error code on failure
int ret1 = fprintf(f, "%d", oom_score_adj);
// fclose returns a non-zero value on failure and errno contains the error code
int ret2 = fclose(f);
if (ret1 < 0) {
return -ret1;
}
if (ret2) {
return errno;
}
return 0;
}
/* Calculate the time we should sleep based upon how far away from the memory and swap
* limits we are (headroom). Returns a millisecond value between 100 and 1000 (inclusive).
* The idea is simple: if memory and swap can only fill up so fast, we know how long we can sleep
* without risking to miss a low memory event.
*/
static int sleep_time_ms(const poll_loop_args_t* args, const meminfo_t* m)
{
// Maximum expected memory/swap fill rate. In kiB per millisecond ==~ MiB per second.
const int mem_fill_rate = 6000; // 6000MiB/s seen with "stress -m 4 --vm-bytes 4G"
const int swap_fill_rate = 800; // 800MiB/s seen with membomb on ZRAM
// Clamp calculated value to this range (milliseconds)
const int min_sleep = 100;
const int max_sleep = 1000;
int mem_headroom_kib = (m->MemAvailablePercent - args->mem_term_percent) * 10 * m->MemTotalMiB;
if (mem_headroom_kib < 0) {
mem_headroom_kib = 0;
}
int swap_headroom_kib = (m->SwapFreePercent - args->swap_term_percent) * 10 * m->SwapTotalMiB;
if (swap_headroom_kib < 0) {
swap_headroom_kib = 0;
}
int ms = mem_headroom_kib / mem_fill_rate + swap_headroom_kib / swap_fill_rate;
if (ms < min_sleep) {
return min_sleep;
}
if (ms > max_sleep) {
return max_sleep;
}
return ms;
}
static void poll_loop(const poll_loop_args_t args)
{
meminfo_t m = { 0 };
int report_countdown_ms = 0;
// extra time to sleep after a kill
const int cooldown_ms = 200;
while (1) {
m = parse_meminfo();
if (m.MemAvailablePercent <= args.mem_term_percent && m.SwapFreePercent <= args.swap_term_percent) {
int sig = 0;
if (m.MemAvailablePercent <= args.mem_kill_percent && m.SwapFreePercent <= args.swap_kill_percent) {
warn("Low memory! At or below sigkill limits (mem: %d %%, swap: %d %%)\n",
args.mem_kill_percent, args.swap_kill_percent);
sig = SIGKILL;
} else {
warn("Low Memory! At or below sigterm limits (mem: %d %%, swap: %d %%)\n",
args.mem_term_percent, args.swap_term_percent);
sig = SIGTERM;
}
print_mem_stats(1, m);
userspace_kill(args, sig);
// With swap enabled, the kernel seems to need more than 100ms to free the memory
// of the killed process. This means that earlyoom would immediately kill another
// process. Sleep a little extra to give the kernel time to free the memory.
// (Yes, this will sleep even if the kill has failed. Does no harm and keeps the
// code simple.)
if (m.SwapTotalMiB > 0) {
usleep(cooldown_ms * 1000);
report_countdown_ms -= cooldown_ms;
}
} else if (args.report_interval_ms && report_countdown_ms <= 0) {
print_mem_stats(0, m);
report_countdown_ms = args.report_interval_ms;
}
int sleep_ms = sleep_time_ms(&args, &m);
if (enable_debug) {
printf("adaptive sleep time: %d ms\n", sleep_ms);
}
usleep(sleep_ms * 1000);
report_countdown_ms -= sleep_ms;
}
}
|