1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464
|
/* FOREMOST
*
* By Jesse Kornblum
*
* This is a work of the US Government. In accordance with 17 USC 105,
* copyright protection is not available for any work of the US Government.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
*
*/
//#define DEBUG 1
#ifndef __FOREMOST_H
#define __FOREMOST_H
/* Version information is defined in the Makefile */
#define AUTHOR "Jesse Kornblum, Kris Kendall, and Nick Mikus"
/* We use \r\n for newlines as this has to work on Win32. It's redundant for
everybody else, but shouldn't cause any harm. */
#define COPYRIGHT "This program is a work of the US Government. "\
"In accordance with 17 USC 105,\r\n"\
"copyright protection is not available for any work of the US Government.\r\n"\
"This is free software; see the source for copying conditions. There is NO\r\n"\
"warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.\r\n"
#define _GNU_SOURCE
#include <stdio.h>
#include <stdlib.h>
#include <limits.h>
#include <dirent.h>
#include <errno.h>
#include <string.h>
#include <unistd.h>
#include <time.h>
#include <math.h>
#include <ctype.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <signal.h>
/* For va_arg */
#include <stdarg.h>
#ifdef __LINUX
#include <sys/ioctl.h>
#include <sys/mount.h>
#define u_int64_t unsigned long long
#endif
#ifdef __LINUX
#ifndef __USE_BSD
#define __USE_BSD
#endif
#include <endian.h>
#elif defined (__SOLARIS)
#define BIG_ENDIAN 4321
#define LITTLE_ENDIAN 1234
#include <sys/isa_defs.h>
#ifdef _BIG_ENDIAN
#define BYTE_ORDER BIG_ENDIAN
#else
#define BYTE_ORDER LITTLE_ENDIAN
#endif
#elif defined (__WIN32)
#include <sys/param.h>
#elif defined (__MACOSX)
#include <machine/endian.h>
#define __U16_TYPE unsigned short
#endif
#define TRUE 1
#define FALSE 0
#define ONE_MEGABYTE 1048576
/* RBF - Do we need these type definitions? */
#ifdef __SOLARIS
#define u_int32_t unsigned int
#define u_int64_t unsigned long long
#endif
/* The only time we're *not* on a UNIX system is when we're on Windows */
#ifndef __WIN32
#ifndef __UNIX
#define __UNIX
#endif /* ifndef __UNIX */
#endif /* ifndef __WIN32 */
#ifdef __UNIX
#ifndef __U16_TYPE
#define __U16_TYPE unsigned short
#endif
#include <libgen.h>
#ifndef BYTE_ORDER
#define BIG_ENDIAN 4321
#define LITTLE_ENDIAN 1234
#define BYTE_ORDER LITTLE_ENDIAN
#endif
/* This avoids compiler warnings on older systems */
int fseeko(FILE *stream, off_t offset, int whence);
off_t ftello(FILE *stream);
#define CMD_PROMPT "$"
#define DIR_SEPARATOR '/'
#define NEWLINE "\n"
#define LINE_LENGTH 74
#define BLANK_LINE \
" "
#endif /* #ifdef __UNIX */
/* This allows us to open standard input in binary mode by default
See http://gnuwin32.sourceforge.net/compile.html for more */
#include <fcntl.h>
/* Code specific to Microsoft Windows */
#ifdef __WIN32
/* By default, Windows uses long for off_t. This won't do. We
need an unsigned number at minimum. Windows doesn't have 64 bit
numbers though. */
#ifdef off_t
#undef off_t
#endif
#define off_t unsigned long
#define CMD_PROMPT "c:\\>"
#define DIR_SEPARATOR '\\'
#define NEWLINE "\r\n"
#define LINE_LENGTH 72
#define BLANK_LINE \
" "
/* It would be nice to use 64-bit file lengths in Windows */
#define ftello ftell
#define fseeko fseek
#ifndef __CYGWIN
#define snprintf _snprintf
#endif
#define u_int32_t unsigned long
/* We create macros for the Windows equivalent UNIX functions.
No worries about lstat to stat; Windows doesn't have symbolic links */
#define lstat(A,B) stat(A,B)
#define u_int64_t unsigned __int64
#ifndef __CYGWIN
#define realpath(A,B) _fullpath(B,A,PATH_MAX)
#endif
/* Not used in md5deep anymore, but left in here in case I
ever need it again. Win32 documentation searches are evil.
int asprintf(char **strp, const char *fmt, ...);
*/
char *basename(char *a);
extern char *optarg;
extern int optind;
int getopt(int argc, char *const argv[], const char *optstring);
#endif /* ifdef _WIN32 */
/* On non-glibc systems we have to manually set the __progname variable */
#ifdef __GLIBC__
extern char *__progname;
#else
char *__progname;
#endif /* ifdef __GLIBC__ */
/* -----------------------------------------------------------------
Program Defaults
----------------------------------------------------------------- */
#define MAX_STRING_LENGTH 1024
#define COMMENT_LENGTH 64
/* Modes refer to options that can be set by the user. */
#define mode_none 0
#define mode_verbose 1<<1
#define mode_quiet 1<<2
#define mode_ind_blk 1<<3
#define mode_quick 1<<4
#define mode_write_all 1<<5
#define mode_write_audit 1<<6
#define mode_multi_file 1<<7
#define MAX_NEEDLES 254
#define NUM_SEARCH_SPEC_ELEMENTS 6
#define MAX_SUFFIX_LENGTH 8
#define MAX_FILE_TYPES 100
#define FOREMOST_NOEXTENSION_SUFFIX "NONE"
/* Modes 3 to 31 are reserved for future use. We shouldn't use
modes higher than 31 as Win32 can't go that high. */
#define DEFAULT_MODE mode_none
#define DEFAULT_CONFIG_FILE "foremost.conf"
#define DEFAULT_OUTPUT_DIRECTORY "output"
#define AUDIT_FILE_NAME "audit.txt"
#define FOREMOST_DIVIDER "------------------------------------------------------------------"
#define JPEG 0
#define GIF 1
#define BMP 2
#define MPG 3
#define PDF 4
#define DOC 5
#define AVI 6
#define WMV 7
#define HTM 8
#define ZIP 9
#define MOV 10
#define XLS 11
#define PPT 12
#define WPD 13
#define CPP 14
#define OLE 15
#define GZIP 16
#define RIFF 17
#define WAV 18
#define VJPEG 19
#define SXW 20
#define SXC 21
#define SXI 22
#define CONF 23
#define PNG 24
#define RAR 25
#define EXE 26
#define ELF 27
#define REG 28
#define KILOBYTE 1024
#define MEGABYTE 1024 * KILOBYTE
#define GIGABYTE 1024 * MEGABYTE
#define TERABYTE 1024 * GIGABYTE
#define PETABYTE 1024 * TERABYTE
#define EXABYTE 1024 * PETABYTE
#define UNITS_BYTES 0
#define UNITS_KILOB 1
#define UNITS_MEGAB 2
#define UNITS_GIGAB 3
#define UNITS_TERAB 4
#define UNITS_PETAB 5
#define UNITS_EXAB 6
#define SEARCHTYPE_FORWARD 0
#define SEARCHTYPE_REVERSE 1
#define SEARCHTYPE_FORWARD_NEXT 2
#define SEARCHTYPE_ASCII 3
#define FOREMOST_BIG_ENDIAN 0
#define FOREMOST_LITTLE_ENDIAN 1
/*DEFAULT CHUNK SIZE In MB*/
#define CHUNK_SIZE 100
/* Wildcard is a global variable because it's used by very simple
functions that don't need the whole state passed to them */
/* -----------------------------------------------------------------
State Variable and Global Variables
----------------------------------------------------------------- */
char wildcard;
typedef struct f_state
{
off_t mode;
char *config_file;
char *input_file;
char *output_directory;
char *start_time;
char *invocation;
char *audit_file_name;
FILE *audit_file;
int audit_file_open;
int num_builtin;
int chunk_size; /*IN MB*/
int fileswritten;
int block_size;
int skip;
int time_stamp;
} f_state;
typedef struct marker
{
unsigned char* value;
int len;
size_t marker_bm_table[UCHAR_MAX+1];
}marker;
typedef struct s_spec
{
char* suffix;
int type;
u_int64_t max_len;
unsigned char* header;
unsigned int header_len;
size_t header_bm_table[UCHAR_MAX+1];
unsigned char* footer;
unsigned int footer_len;
size_t footer_bm_table[UCHAR_MAX+1];
marker markerlist[5];
int num_markers;
int searchtype;
int case_sen;
int found;
char comment[MAX_STRING_LENGTH];/*Used for audit*/
int written; /*used for -a mode*/
}s_spec;
s_spec search_spec[50]; /*ARRAY OF BUILTIN SEARCH TYPES*/
typedef struct f_info {
char *file_name;
off_t total_bytes;
/* We never use the total number of bytes in a file,
only the number of megabytes when we display a time estimate */
off_t total_megs;
off_t bytes_read;
#ifdef __WIN32
/* Win32 is a 32-bit operating system and can't handle file sizes
larger than 4GB. We use this to keep track of overflows */
off_t last_read;
off_t overflow_count;
#endif
FILE *handle;
int is_stdin;
} f_info;
/* Set if the user hits ctrl-c */
int signal_caught;
/* -----------------------------------------------------------------
Function definitions
----------------------------------------------------------------- */
/* State functions */
int initialize_state(f_state *s, int argc, char **argv);
void free_state(f_state *s);
char *get_invocation(f_state *s);
char *get_start_time(f_state *s);
int set_config_file(f_state *s, char *fn);
char* get_config_file(f_state *s);
int set_output_directory(f_state *s, char *fn);
char* get_output_directory(f_state *s);
void set_audit_file_open(f_state *s);
int get_audit_file_open(f_state *s);
void set_mode(f_state *s, off_t new_mode);
int get_mode(f_state *s, off_t check_mode);
int set_search_def(f_state *s,char* ft,u_int64_t max_file_size);
void get_search_def(f_state s);
void set_input_file(f_state *s,char* filename);
void get_input_file(f_state *s);
void set_chunk(f_state *s, int size);
void init_bm_table(unsigned char *needle, size_t table[UCHAR_MAX + 1], size_t len, int casesensitive,int searchtype);
void set_skip(f_state *s, int size);
void set_block(f_state *s, int size);
#ifdef __DEBUG
void dump_state(f_state *s);
#endif
/* The audit file */
int open_audit_file(f_state *s);
void audit_msg(f_state *s, char *format, ...);
int close_audit_file(f_state *s);
/* Set up our output directory */
int create_output_directory(f_state *s);
int write_to_disk(f_state *s,s_spec * needle,u_int64_t len,unsigned char* buf, u_int64_t t_offset);
int create_sub_dirs(f_state *s);
void cleanup_output(f_state *s);
/* Configuration Files */
int load_config_file(f_state *s);
/* Helper functions */
char *current_time(void);
off_t find_file_size(FILE *f);
char *human_readable(off_t size, char *buffer);
char *units(unsigned int c);
unsigned int chop(char *buf);
void print_search_specs(f_state *s);
int memwildcardcmp(const void *s1, const void *s2,size_t n,int caseSensitive);
int charactersMatch(char a, char b, int caseSensitive);
void printx(unsigned char* buf,int start, int end);
unsigned short htos(unsigned char s[],int endian);
unsigned int htoi(unsigned char s[],int endian);
u_int64_t htoll(unsigned char s[],int endian);
int displayPosition(f_state* s,f_info* i,u_int64_t pos);
/* Interface functions
These functions stay the same regardless if we're using a
command line interface or a GUI */
void fatal_error(f_state *s, char *msg);
void print_error(f_state *s, char *fn, char *msg);
void print_message(f_state *s, char *format, va_list argp);
void print_stats(f_state *s);
/* Engine */
int process_file(f_state *s);
int process_stdin(f_state *s);
unsigned char *bm_search(unsigned char *needle, size_t needle_len,unsigned char *haystack, size_t haystack_len,
size_t table[UCHAR_MAX + 1], int case_sen,int searchtype);
unsigned char *bm_search_skipn(unsigned char *needle, size_t needle_len,unsigned char *haystack, size_t haystack_len,
size_t table[UCHAR_MAX + 1], int casesensitive,int searchtype, int start_pos) ;
#endif /* __FOREMOST_H */
/* BUILTIN */
unsigned char* extract_file(f_state *s, u_int64_t c_offset,unsigned char *foundat, u_int64_t buflen, s_spec * needle, u_int64_t f_offset);
|