File: create-multibin-corefile.cpp

package info (click to toggle)
llvm-toolchain-19 1%3A19.1.7-3~deb12u1
  • links: PTS, VCS
  • area: main
  • in suites: bookworm
  • size: 1,998,492 kB
  • sloc: cpp: 6,951,680; ansic: 1,486,157; asm: 913,598; python: 232,024; f90: 80,126; objc: 75,281; lisp: 37,276; pascal: 16,990; sh: 10,009; ml: 5,058; perl: 4,724; awk: 3,523; makefile: 3,167; javascript: 2,504; xml: 892; fortran: 664; cs: 573
file content (484 lines) | stat: -rw-r--r-- 15,900 bytes parent folder | download | duplicates (11)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
#include <errno.h>
#include <fcntl.h>
#include <inttypes.h>
#include <mach-o/loader.h>
#include <mach/thread_status.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <string>
#include <sys/errno.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <sys/uio.h>
#include <unistd.h>
#include <uuid/uuid.h>
#include <vector>

// Given a list of binaries, and optional slides to be applied,
// create a corefile whose memory is those binaries laid down at
// their slid addresses.
//
// Add a 'main bin spec' LC_NOTE for the first binary, and
// 'load binary' LC_NOTEs for any additional binaries, and
// these LC_NOTEs will ONLY have the vmaddr of the binary - no
// UUID, no slide, no filename.
//
// Test that lldb can use the load addresses, find the UUIDs,
// and load the binaries/dSYMs and put them at the correct load
// address.

struct main_bin_spec_payload {
  uint32_t version;
  uint32_t type;
  uint64_t address;
  uint64_t slide;
  uuid_t uuid;
  uint32_t log2_pagesize;
  uint32_t platform;
};

struct load_binary_payload {
  uint32_t version;
  uuid_t uuid;
  uint64_t address;
  uint64_t slide;
  const char name[4];
};

union uint32_buf {
  uint8_t bytebuf[4];
  uint32_t val;
};

union uint64_buf {
  uint8_t bytebuf[8];
  uint64_t val;
};

void add_uint64(std::vector<uint8_t> &buf, uint64_t val) {
  uint64_buf conv;
  conv.val = val;
  for (int i = 0; i < 8; i++)
    buf.push_back(conv.bytebuf[i]);
}

void add_uint32(std::vector<uint8_t> &buf, uint32_t val) {
  uint32_buf conv;
  conv.val = val;
  for (int i = 0; i < 4; i++)
    buf.push_back(conv.bytebuf[i]);
}

std::vector<uint8_t> lc_thread_load_command(cpu_type_t cputype) {
  std::vector<uint8_t> data;
  // Emit an LC_THREAD register context appropriate for the cputype
  // of the binary we're embedded.  The tests in this case do not
  // use the register values, so 0's are fine, lldb needs to see at
  // least one LC_THREAD in the corefile.
#if defined(__x86_64__)
  if (cputype == CPU_TYPE_X86_64) {
    add_uint32(data, LC_THREAD); // thread_command.cmd
    add_uint32(data,
               16 + (x86_THREAD_STATE64_COUNT * 4)); // thread_command.cmdsize
    add_uint32(data, x86_THREAD_STATE64);            // thread_command.flavor
    add_uint32(data, x86_THREAD_STATE64_COUNT);      // thread_command.count
    for (int i = 0; i < x86_THREAD_STATE64_COUNT; i++) {
      add_uint32(data, 0); // whatever, just some empty register values
    }
  }
#endif
#if defined(__arm64__) || defined(__aarch64__)
  if (cputype == CPU_TYPE_ARM64) {
    add_uint32(data, LC_THREAD); // thread_command.cmd
    add_uint32(data,
               16 + (ARM_THREAD_STATE64_COUNT * 4)); // thread_command.cmdsize
    add_uint32(data, ARM_THREAD_STATE64);            // thread_command.flavor
    add_uint32(data, ARM_THREAD_STATE64_COUNT);      // thread_command.count
    for (int i = 0; i < ARM_THREAD_STATE64_COUNT; i++) {
      add_uint32(data, 0); // whatever, just some empty register values
    }
  }
#endif
  return data;
}

void add_lc_note_main_bin_spec_load_command(
    std::vector<std::vector<uint8_t>> &loadcmds, std::vector<uint8_t> &payload,
    int payload_file_offset, std::string uuidstr, uint64_t address,
    uint64_t slide) {
  std::vector<uint8_t> loadcmd_data;

  add_uint32(loadcmd_data, LC_NOTE); // note_command.cmd
  add_uint32(loadcmd_data, 40);      // note_command.cmdsize
  char lc_note_name[16];
  memset(lc_note_name, 0, 16);
  strcpy(lc_note_name, "main bin spec");

  // lc_note.data_owner
  for (int i = 0; i < 16; i++)
    loadcmd_data.push_back(lc_note_name[i]);

  // we start writing the payload at payload_file_offset to leave
  // room at the start for the header & the load commands.
  uint64_t current_payload_offset = payload.size() + payload_file_offset;

  add_uint64(loadcmd_data, current_payload_offset); // note_command.offset
  add_uint64(loadcmd_data,
             sizeof(struct main_bin_spec_payload)); // note_command.size

  loadcmds.push_back(loadcmd_data);

  // Now write the "main bin spec" payload.
  add_uint32(payload, 2);       // version
  add_uint32(payload, 3);       // type == 3 [ firmware, standalone, etc ]
  add_uint64(payload, address); // load address
  add_uint64(payload, slide);   // slide
  uuid_t uuid;
  uuid_parse(uuidstr.c_str(), uuid);
  for (int i = 0; i < sizeof(uuid_t); i++)
    payload.push_back(uuid[i]);
  add_uint32(payload, 0); // log2_pagesize unspecified
  add_uint32(payload, 0); // platform unspecified
}

void add_lc_note_load_binary_load_command(
    std::vector<std::vector<uint8_t>> &loadcmds, std::vector<uint8_t> &payload,
    int payload_file_offset, std::string uuidstr, uint64_t address,
    uint64_t slide) {
  std::vector<uint8_t> loadcmd_data;

  add_uint32(loadcmd_data, LC_NOTE); // note_command.cmd
  add_uint32(loadcmd_data, 40);      // note_command.cmdsize
  char lc_note_name[16];
  memset(lc_note_name, 0, 16);
  strcpy(lc_note_name, "load binary");

  // lc_note.data_owner
  for (int i = 0; i < 16; i++)
    loadcmd_data.push_back(lc_note_name[i]);

  // we start writing the payload at payload_file_offset to leave
  // room at the start for the header & the load commands.
  uint64_t current_payload_offset = payload.size() + payload_file_offset;

  add_uint64(loadcmd_data, current_payload_offset); // note_command.offset
  add_uint64(loadcmd_data,
             sizeof(struct load_binary_payload)); // note_command.size

  loadcmds.push_back(loadcmd_data);

  // Now write the "load binary" payload.
  add_uint32(payload, 1); // version
  uuid_t uuid;
  uuid_parse(uuidstr.c_str(), uuid);
  for (int i = 0; i < sizeof(uuid_t); i++)
    payload.push_back(uuid[i]);
  add_uint64(payload, address); // load address
  add_uint64(payload, slide);   // slide
  add_uint32(payload, 0);       // name
}

void add_lc_segment(std::vector<std::vector<uint8_t>> &loadcmds,
                    std::vector<uint8_t> &payload, int payload_file_offset,
                    uint64_t vmaddr, uint64_t size) {
  std::vector<uint8_t> loadcmd_data;
  struct segment_command_64 seg;
  seg.cmd = LC_SEGMENT_64;
  seg.cmdsize = sizeof(struct segment_command_64); // no sections
  memset(seg.segname, 0, 16);
  seg.vmaddr = vmaddr;
  seg.vmsize = size;
  seg.fileoff = payload.size() + payload_file_offset;
  seg.filesize = size;
  seg.maxprot = 1;
  seg.initprot = 1;
  seg.nsects = 0;
  seg.flags = 0;

  uint8_t *p = (uint8_t *)&seg;
  for (int i = 0; i < sizeof(struct segment_command_64); i++) {
    loadcmd_data.push_back(*(p + i));
  }
  loadcmds.push_back(loadcmd_data);
}

std::string scan_binary(const char *fn, uint64_t &vmaddr, cpu_type_t &cputype,
                        cpu_subtype_t &cpusubtype) {
  FILE *f = fopen(fn, "r");
  if (f == nullptr) {
    fprintf(stderr, "Unable to open binary '%s' to get uuid\n", fn);
    exit(1);
  }
  uint32_t num_of_load_cmds = 0;
  uint32_t size_of_load_cmds = 0;
  std::string uuid;
  off_t file_offset = 0;
  vmaddr = UINT64_MAX;

  uint8_t magic[4];
  if (::fread(magic, 1, 4, f) != 4) {
    fprintf(stderr, "Failed to read magic number from input file %s\n", fn);
    exit(1);
  }
  uint8_t magic_32_be[] = {0xfe, 0xed, 0xfa, 0xce};
  uint8_t magic_32_le[] = {0xce, 0xfa, 0xed, 0xfe};
  uint8_t magic_64_be[] = {0xfe, 0xed, 0xfa, 0xcf};
  uint8_t magic_64_le[] = {0xcf, 0xfa, 0xed, 0xfe};

  if (memcmp(magic, magic_32_be, 4) == 0 ||
      memcmp(magic, magic_64_be, 4) == 0) {
    fprintf(stderr, "big endian corefiles not supported\n");
    exit(1);
  }

  ::fseeko(f, 0, SEEK_SET);
  if (memcmp(magic, magic_32_le, 4) == 0) {
    struct mach_header mh;
    if (::fread(&mh, 1, sizeof(mh), f) != sizeof(mh)) {
      fprintf(stderr, "error reading mach header from input file\n");
      exit(1);
    }
    if (mh.cputype != CPU_TYPE_X86_64 && mh.cputype != CPU_TYPE_ARM64) {
      fprintf(stderr,
              "This tool creates an x86_64/arm64 corefile but "
              "the supplied binary '%s' is cputype 0x%x\n",
              fn, (uint32_t)mh.cputype);
      exit(1);
    }
    num_of_load_cmds = mh.ncmds;
    size_of_load_cmds = mh.sizeofcmds;
    file_offset += sizeof(struct mach_header);
    cputype = mh.cputype;
    cpusubtype = mh.cpusubtype;
  } else {
    struct mach_header_64 mh;
    if (::fread(&mh, 1, sizeof(mh), f) != sizeof(mh)) {
      fprintf(stderr, "error reading mach header from input file\n");
      exit(1);
    }
    if (mh.cputype != CPU_TYPE_X86_64 && mh.cputype != CPU_TYPE_ARM64) {
      fprintf(stderr,
              "This tool creates an x86_64/arm64 corefile but "
              "the supplied binary '%s' is cputype 0x%x\n",
              fn, (uint32_t)mh.cputype);
      exit(1);
    }
    num_of_load_cmds = mh.ncmds;
    size_of_load_cmds = mh.sizeofcmds;
    file_offset += sizeof(struct mach_header_64);
    cputype = mh.cputype;
    cpusubtype = mh.cpusubtype;
  }

  off_t load_cmds_offset = file_offset;

  for (int i = 0; i < num_of_load_cmds &&
                  (file_offset - load_cmds_offset) < size_of_load_cmds;
       i++) {
    ::fseeko(f, file_offset, SEEK_SET);
    uint32_t cmd;
    uint32_t cmdsize;
    ::fread(&cmd, sizeof(uint32_t), 1, f);
    ::fread(&cmdsize, sizeof(uint32_t), 1, f);
    if (vmaddr == UINT64_MAX && cmd == LC_SEGMENT_64) {
      struct segment_command_64 segcmd;
      ::fseeko(f, file_offset, SEEK_SET);
      if (::fread(&segcmd, 1, sizeof(segcmd), f) != sizeof(segcmd)) {
        fprintf(stderr, "Unable to read LC_SEGMENT_64 load command.\n");
        exit(1);
      }
      if (strcmp("__TEXT", segcmd.segname) == 0)
        vmaddr = segcmd.vmaddr;
    }
    if (cmd == LC_UUID) {
      struct uuid_command uuidcmd;
      ::fseeko(f, file_offset, SEEK_SET);
      if (::fread(&uuidcmd, 1, sizeof(uuidcmd), f) != sizeof(uuidcmd)) {
        fprintf(stderr, "Unable to read LC_UUID load command.\n");
        exit(1);
      }
      uuid_string_t uuidstr;
      uuid_unparse(uuidcmd.uuid, uuidstr);
      uuid = uuidstr;
    }
    file_offset += cmdsize;
  }
  return uuid;
}

void slide_macho_binary(std::vector<uint8_t> &image, uint64_t slide) {
  uint8_t *p = image.data();
  struct mach_header_64 *mh = (struct mach_header_64 *)p;
  p += sizeof(struct mach_header_64);
  for (int lc_idx = 0; lc_idx < mh->ncmds; lc_idx++) {
    struct load_command *lc = (struct load_command *)p;
    if (lc->cmd == LC_SEGMENT_64) {
      struct segment_command_64 *seg = (struct segment_command_64 *)p;
      if (seg->maxprot != 0 && seg->nsects > 0) {
        seg->vmaddr += slide;
        uint8_t *j = p + sizeof(segment_command_64);
        for (int sect_idx = 0; sect_idx < seg->nsects; sect_idx++) {
          struct section_64 *sect = (struct section_64 *)j;
          sect->addr += slide;
          j += sizeof(struct section_64);
        }
      }
    }
    p += lc->cmdsize;
  }
}

int main(int argc, char **argv) {
  if (argc < 3) {
    fprintf(stderr,
            "usage: output-corefile binary1[@optional-slide] "
            "[binary2[@optional-slide] [binary3[@optional-slide] ...]]\n");
    exit(1);
  }

  // An array of load commands (in the form of byte arrays)
  std::vector<std::vector<uint8_t>> load_commands;

  // An array of corefile contents (page data, lc_note data, etc)
  std::vector<uint8_t> payload;

  std::vector<std::string> input_filenames;
  std::vector<uint64_t> input_slides;
  std::vector<uint64_t> input_filesizes;
  std::vector<uint64_t> input_filevmaddrs;
  uint64_t main_binary_cputype = CPU_TYPE_ARM64;
  uint64_t vmaddr = UINT64_MAX;
  cpu_type_t cputype;
  cpu_subtype_t cpusubtype;
  for (int i = 2; i < argc; i++) {
    std::string filename;
    std::string filename_and_opt_hex(argv[i]);
    uint64_t slide = 0;
    auto at_pos = filename_and_opt_hex.find_last_of('@');
    if (at_pos == std::string::npos) {
      filename = filename_and_opt_hex;
    } else {
      filename = filename_and_opt_hex.substr(0, at_pos);
      std::string hexstr = filename_and_opt_hex.substr(at_pos + 1);
      errno = 0;
      slide = (uint64_t)strtoull(hexstr.c_str(), nullptr, 16);
      if (errno != 0) {
        fprintf(stderr, "Unable to parse hex slide value in %s\n", argv[i]);
        exit(1);
      }
    }
    struct stat stbuf;
    if (stat(filename.c_str(), &stbuf) == -1) {
      fprintf(stderr, "Unable to stat '%s', exiting.\n", filename.c_str());
      exit(1);
    }
    input_filenames.push_back(filename);
    input_slides.push_back(slide);
    input_filesizes.push_back(stbuf.st_size);
    scan_binary(filename.c_str(), vmaddr, cputype, cpusubtype);
    input_filevmaddrs.push_back(vmaddr + slide);
    if (i == 2) {
      main_binary_cputype = cputype;
    }
  }

  const char *output_corefile_name = argv[1];
  std::string empty_uuidstr = "00000000-0000-0000-0000-000000000000";

  // First add all the load commands / payload so we can figure out how large
  // the load commands will actually be.
  load_commands.push_back(lc_thread_load_command(cputype));

  add_lc_note_main_bin_spec_load_command(load_commands, payload, 0,
                                         empty_uuidstr, 0, UINT64_MAX);
  for (int i = 1; i < input_filenames.size(); i++) {
    add_lc_note_load_binary_load_command(load_commands, payload, 0,
                                         empty_uuidstr, 0, UINT64_MAX);
  }

  for (int i = 0; i < input_filenames.size(); i++) {
    add_lc_segment(load_commands, payload, 0, 0, 0);
  }

  int size_of_load_commands = 0;
  for (const auto &lc : load_commands)
    size_of_load_commands += lc.size();

  int size_of_header_and_load_cmds =
      sizeof(struct mach_header_64) + size_of_load_commands;

  // Erase the load commands / payload now that we know how much space is
  // needed, redo it.
  load_commands.clear();
  payload.clear();

  // Push the LC_THREAD load command.
  load_commands.push_back(lc_thread_load_command(main_binary_cputype));

  const off_t payload_offset = size_of_header_and_load_cmds;

  add_lc_note_main_bin_spec_load_command(load_commands, payload, payload_offset,
                                         empty_uuidstr, input_filevmaddrs[0],
                                         UINT64_MAX);

  for (int i = 1; i < input_filenames.size(); i++) {
    add_lc_note_load_binary_load_command(load_commands, payload, payload_offset,
                                         empty_uuidstr, input_filevmaddrs[i],
                                         UINT64_MAX);
  }

  for (int i = 0; i < input_filenames.size(); i++) {
    add_lc_segment(load_commands, payload, payload_offset, input_filevmaddrs[i],
                   input_filesizes[i]);

    // Copy the contents of the binary into payload.
    int fd = open(input_filenames[i].c_str(), O_RDONLY);
    if (fd == -1) {
      fprintf(stderr, "Unable to open %s for reading\n",
              input_filenames[i].c_str());
      exit(1);
    }
    std::vector<uint8_t> binary_contents;
    for (int j = 0; j < input_filesizes[i]; j++) {
      uint8_t byte;
      read(fd, &byte, 1);
      binary_contents.push_back(byte);
    }
    close(fd);

    size_t cur_payload_size = payload.size();
    payload.resize(cur_payload_size + binary_contents.size());
    slide_macho_binary(binary_contents, input_slides[i]);
    memcpy(payload.data() + cur_payload_size, binary_contents.data(),
           binary_contents.size());
  }

  struct mach_header_64 mh;
  mh.magic = MH_MAGIC_64;
  mh.cputype = cputype;

  mh.cpusubtype = cpusubtype;
  mh.filetype = MH_CORE;
  mh.ncmds = load_commands.size();
  mh.sizeofcmds = size_of_load_commands;
  mh.flags = 0;
  mh.reserved = 0;

  FILE *f = fopen(output_corefile_name, "w");

  if (f == nullptr) {
    fprintf(stderr, "Unable to open file %s for writing\n",
            output_corefile_name);
    exit(1);
  }

  fwrite(&mh, sizeof(mh), 1, f);

  for (const auto &lc : load_commands)
    fwrite(lc.data(), lc.size(), 1, f);

  fwrite(payload.data(), payload.size(), 1, f);

  fclose(f);
}