File: snowdrop.c

package info (click to toggle)
snowdrop 0.02b-17
links: PTS, VCS
area: main
in suites: forky, sid
size: 312 kB
sloc: ansic: 2,251; makefile: 288
file content (847 lines) | stat: -rw-r--r-- 24,636 bytes
/*

   snowdrop - text watermarking and watermark recovery
   ---------------------------------------------------

   Copyright (C) 2002 by Michal Zalewski <lcamtuf@coredump.cx>

   This program is free software; you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
   the Free Software Foundation; either version 2 of the License, or
   (at your option) any later version.

   As a special exception, this program may be linked with the
   OpenSSL library, despite that library's more restrictive license.

   This file implements a language-independent watermark injection and
   recovery. 

*/

#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <ctype.h>
#include <assert.h>
#include <sys/mman.h>
#include <fcntl.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <time.h>
#ifdef USE_OPENSSL
#include <openssl/md5.h>
#else
#include <md5global.h>
#include <md5.h>
#define MD5_Init   MD5Init
#define MD5_Final  MD5Final
#define MD5_Update MD5Update
#endif /* USE_OPENSSL */

#include <string.h>

#include "language.h"


#define MAXATOMS        100000  // Max number of words in a document.

#define EXPENSELIMIT    4	// Atoms with higher storage capacity are
			        // too risky and expensive for resyncs.

#define MINSYNCLEN 	10	// Minimum number of matching atoms to
				// establish synchronization.

/*
void dump_binstream(const char* str,int blen) {
  int off=0;
  for (off=0;off<blen;off++) {
    int byte,bit;
    byte=off / 8;
    bit=off % 8;
    if (!(off % 32)) debug(" ");
    if (str[byte] & (1<<(7-bit))) debug("1"); else debug("o");
  }
  debug("\n");
}
*/

// Ugly, sure.
inline unsigned long long ror(unsigned long long x,int cnt,int bits) {
  int i;
  char carry;
  for (i=0;i<cnt;i++) {
    if (x & 1) carry=1; else carry=0;
    x >>= 1;
    if (carry) x |= (((unsigned long long)1) << (bits-1));
  }
  return x;
}

inline unsigned long long rol(unsigned long long x,int cnt,int bits) {
  return ror(x,bits-cnt,bits);
}


static char* argv0;

int   use_64bit;
int   nb=32;
float nbf=32;

static void usage(void) {
  debug("Usage: %s [ -6 ] -e origfile newfile\n"
        "       %s [ -6 ] -l\n"
        "       %s [ -6 ] -i origfile newfile \"Recipient\" [ \"Comment\" ] \n\n",argv0,argv0,argv0);

  debug("First method of calling the program (with -e option) enables watermark\n"
        "extraction mode. In this mode, file passed as a first parameter must be\n"
        "the original document used to generate second file (or its portions).\n\n"

        "Second method (-l) simply lists the contents of the watermark database\n"
        "for the module you're now running.\n\n"

        "Third method (-i) enables watermark injection mode. File 'origfile' is\n"
        "modified and saved as 'newfile'. Mandatory parameter is the recipient\n"
        "identifier. Optional comment can be added for your reference.\n\n"
  
        "Additional parameter -6 enables strong, 64-bit watermarking that is suitable\n"
        "for providing public documentation of watermarked document abuse.\n\n");

  module_help();
  debug("\n");
  exit(0);
}

unsigned int result[4];
unsigned int wmark[4];
MD5_CTX kuku;




static unsigned int get_random(void) {
  int x,r,r2;
  x=open("/dev/urandom",O_RDONLY);
  if (x<0) fatal("cannot open /dev/urandom");
  if (read(x,&r,4)!=4) fatal("cannot read from /dev/urandom");
  close(x);
  x=open("/dev/urandom",O_RDONLY);
  if (x<0) fatal("cannot open /dev/urandom");
  if (read(x,&r2,4)!=4) fatal("cannot read from /dev/urandom");
  close(x);
  if (r == r2) fatal("/dev/urandom is deterministic");
  return r2;
}

struct dbent {
  char* fn,*rcpt,*cmt;
  int f1,f2,m1,m2;
  unsigned long long wm;
  int tim;
};

#define MAXDB 10240

struct dbent db[MAXDB];
int dbtop;

static int gotothermd5;

static void load_database(void) {
  int line=0;
  FILE* foo;
  char buf[MAXBUF];
  sprintf(buf,"%s/.snowdrop/",getenv("HOME"));
  strcat(buf,"database");
  foo=fopen(buf,"r");
  if (!foo) fatal("cannot open database file %s",buf);
  debug("[*] Loading database for module %s, conf %08x...\n",TARGETLANG,md5_importantstuff());
  while (fgets(buf,MAXBUF,foo)) {
    char modname[MAXBUF],cmt[MAXBUF],rcpt[MAXBUF],fn[MAXBUF];
    int tim,dict,f1,f2,m1,m2;
    char type;
    unsigned long long wmark;
    if (sscanf(buf,"%c:[%[a-z]] %u\xad%x\xad%[ -~]\xad%x\xad%x\xad%[ -~]\xad%x\xad%x\xad%Lx"
                   "\xad%[ -~]",&type,modname,&tim,&dict,fn,&f1,&f2,rcpt,&m1,&m2,&wmark,cmt)!=12) fatal("malformed database line %d",line+1);
    line++;
    if (type=='3' && use_64bit) continue;
    if (type=='6' && !use_64bit) continue;
    if (strcmp(TARGETLANG,modname)) continue;
    if (dict != md5_importantstuff()) { gotothermd5=1; continue; }
    db[dbtop].fn=strdup(fn);
    if (!db[dbtop].fn) fatal("not enough memory");
    db[dbtop].rcpt=strdup(rcpt);
    if (!db[dbtop].rcpt) fatal("not enough memory");
    db[dbtop].cmt=strdup(cmt); 
    if (!db[dbtop].cmt) fatal("not enough memory");
    db[dbtop].tim=tim;
    db[dbtop].f1=f1;
    db[dbtop].f2=f2;
    db[dbtop].m1=m1;
    db[dbtop].m2=m2;
    db[dbtop].wm=wmark;
    dbtop++;    
  }
  if (dbtop) {
    debug("[+] Watermarks database: %d lines, loaded %d entries.\n",line,dbtop);
    if (gotothermd5) debug("[!] Skipped %d entries created from other conf files.\n",gotothermd5);
  } else {
    if (!gotothermd5) debug("[-] The database for this module is empty.\n");
    else debug("[-] No entries matching current conf file. Try passing different\n"
               "    configuration files as parameters to this module.\n");
    fatal("no data loaded");
  }

}


static void list_database(void) {
  int i;
  load_database();
  debug("\n");
  for (i=0;i<dbtop;i++) {
    char ctim[100];
    strcpy(ctim,ctime((void*)&db[i].tim));
    if (strchr(ctim,'\n')) *strchr(ctim,'\n')=0;

    if (use_64bit) {
      debug("-- Entry %d --\n"
            "  Source file : %s\n"
            "  Time        : %s\n"
            "  Recipient   : %s\n"
            "  Comment     : %s\n"
            "  Source MD5  : %08x-%08x\n"
            "  Magic value : %08x-%08x\n"
            "  Watermark   : %016Lx\n\n",
            i,db[i].fn,ctim,db[i].rcpt,db[i].cmt,
            db[i].f1,db[i].f2,db[i].m1,db[i].m2,db[i].wm);
    } else {
      debug("-- Entry %d --\n"
            "  Source file : %s\n"
            "  Time        : %s\n"
            "  Recipient   : %s\n"
            "  Comment     : %s\n"
            "  Source MD5  : %08x-%08x\n"
            "  Magic value : %08x-%08x\n"
            "  Watermark   : %08Lx\n\n",
            i,db[i].fn,ctim,db[i].rcpt,db[i].cmt,
            db[i].f1,db[i].f2,db[i].m1,db[i].m2,db[i].wm);
    }

  }

  debug("[+] Database dump completed.\n");
  exit(0);

}


void append_binary(unsigned char* ptr,int boff,unsigned char* what,int blen) {
  int i;
  int pbyte,pbit,wbyte,wbit;
  for (i=0;i<blen;i++) {
    pbyte=(boff+i)/8;
    pbit=(boff+i) % 8;
    wbyte=i/8;
    wbit=i % 8;
    if (what[wbyte] & (1<<wbit))
      ptr[pbyte] |= (1<<pbit);
  }
}


unsigned long long get_binary(unsigned char* ptr,int boff) {
  unsigned char ret[8];
  int i;
  int pbyte,pbit,wbyte,wbit;
  bzero(ret,sizeof(ret));
  for (i=0;i<64;i++) {
    pbyte=(boff+i)/8;
    pbit=(boff+i) % 8;
    wbyte=i/8;
    wbit=i % 8;
    if (ptr[pbyte] & (1<<pbit)) ret[wbyte] |= (1<<wbit);
  }
  return *(unsigned long long*)&ret[0];
}

int synced=1;


static void extract_sig(char* orig,char* mod) {
  int oncesynced=0;
  int i,siz,siz2,got=0,j;
  char* x, *y;
  int atomcnt=0;
  int bigtop=0;
  char* foo;
  char gotsomething=0;
  int cap[4]={0,0,0,0};
  int good_cnt=0;
  int off[4];
  int dict;
  int tots=0;
  int rd[4]={0,0,0,0},trd=0;

  // Read channel data.
  unsigned char* chan[4];
  int chpos[4]={0,0,0,0};
  unsigned char* bigchan;
  int bigpos=0;

  i=open(orig,O_RDONLY);
  if (i<0) fatal("cannot open input file %s",orig);
  siz=lseek(i,0,SEEK_END);
  lseek(i,0,SEEK_SET);
  if (siz<1) fatal("input file of zero length");
  x=malloc(siz+1);
  if (!x) fatal("not enough memory to load input file");
  read(i,x,siz);
  x[siz]=0;
  close(i);

  i=open(mod,O_RDONLY);
  if (i<0) fatal("cannot open watermarked file %s",mod);
  siz2=lseek(i,0,SEEK_END);
  lseek(i,0,SEEK_SET);
  if (siz2<1) fatal("watermarked file of zero length");
  y=malloc(siz2+1);
  if (!y) fatal("not enough memory to load watermarked file");
  read(i,y,siz2);
  y[siz2]=0;
  close(i);
  
  set_original(x);
  set_watermarked(y);
  load_database();

  MD5_Init(&kuku);
  MD5_Update(&kuku,x,siz);
  MD5_Final((char*)result,&kuku);
  dict=md5_importantstuff();

  debug("[+] Input file loaded successfully.\n");
  debug("  Target    : %s [%s]\n",get_langdesc(),TARGETLANG);
  debug("  Location  : %s\n",orig);
  debug("  Size      : %d bytes\n",siz);
  debug("  Signature : %08x-%08x\n",result[0] ^ result[1],result[2] ^ result[3]);
  debug("  Conf MD5  : %08x\n",dict);

  while ((foo=get_orig_atom())) {
    int j;
    for (j=0;j<4;j++) {
      int q=get_storage(foo,3-j); 
      // Commit to our choices.
      if (q>0) set_value(foo,rand() % q,3-j);
      cap[3-j]+=q; tots+=q;
    }
  }

  debug("[+] Calculated input storage redundancy / size:\n");

  debug("  Overall redundancy   : %.02f (%d bits)\n",((float)tots)/nbf,tots);
  debug("  Whitespace channel   : %.02f (%d bits)\n",((float)cap[0])/nbf,cap[0]);
  debug("  Grammar channel      : %.02f (%d bits)\n",((float)cap[1])/nbf,cap[1]);
  debug("  Formatting channel   : %.02f (%d bits)\n",((float)cap[2])/nbf,cap[2]);
  debug("  Substitution channel : %.02f (%d bits)\n",((float)cap[3])/nbf,cap[3]);

  // Create per-channel storage space.
  chan[0]=malloc(cap[0]/8+8);
  if (!chan[0]) fatal("not enough memory");
  chan[1]=malloc(cap[1]/8+8);
  if (!chan[1]) fatal("not enough memory");
  chan[2]=malloc(cap[2]/8+8);
  if (!chan[2]) fatal("not enough memory");
  chan[3]=malloc(cap[3]/8+8);
  if (!chan[3]) fatal("not enough memory");

  bigchan=malloc(tots/8+8);
  if (!bigchan) fatal("not enough memory");

  off[0]=0;
  off[1]=cap[0]>=nb?(nb/4):cap[0];
  off[2]=cap[1]>=nb?(nb/2):((off[1]+cap[1])%nb);
  off[3]=cap[2]>=nb?(3*nb/4):((off[2]+cap[2])%nb);
  debug("  Channel offsets are  : %d, %d, %d, %d\n",off[0],off[1],off[2],off[3]);

  for (i=0;i<dbtop;i++) {
    if (db[i].f1 == (result[0] ^ result[1]))
      if (db[i].f2 == (result[2] ^ result[3])) got++;
  }

  if (!got) fatal("No entries for this file / config in the database");
  debug("[+] Found %d record(s) for this file / config in the database.\n",got);

  set_original(x);

  // This really should be done in Prolog. Duh. Duh.

  while ((foo=get_orig_atom())) {
    int S=0;
    char* zoo=get_water_atom(); 
    int z,si[4],va[4];
    atomcnt++;
    if (good_cnt > MINSYNCLEN) {
      S=get_water_pos();
      synced=1;
      if (atomcnt <= MINSYNCLEN+2) {
        if (good_cnt==MINSYNCLEN+1) debug("[+] Files synchronized from the beginning, we're lucky.\n");
        oncesynced=1;
      } else {
        if (good_cnt==MINSYNCLEN+1) debug(" done!\n[+] Files synchronized near atom %d [%d:%d:%d:%d]...\n",atomcnt,chpos[0],chpos[1],chpos[2],chpos[3]);
        oncesynced=1;
      }
    }
    if (!zoo) {
      debug("[!] Watermarked file truncated.\n");
      break;
    }

    z=get_value(foo,zoo,&si[0],&va[0],0);
    // debug("CAPABILITY %s : %d %d %d %d (%d %d %d %d)\n",foo,si[0],si[1],si[2],si[3],chpos[0],chpos[1],chpos[2],chpos[3]);

    // if (!z) debug("not synced: [%s] - [%s]\n",foo,zoo);
    // else debug("synced (%d %d %d %d): [%s] - [%s]\n",si[0],si[1],si[2],si[3],foo,zoo);

    if (z) good_cnt++; else {
      synced=0;
      if (good_cnt>MINSYNCLEN) 
        debug("[!] Sync lost with the watermarked file near atom %d [%d:%d:%d:%d]: ",atomcnt,chpos[0],chpos[1],chpos[2],chpos[3]);
      else if (!oncesynced) {
        oncesynced=1;
        debug("[!] Files not identical, trying to synchronize...\n");
        debug("[+] Processing: ");
      }
      good_cnt=0;
      while (foo) {
        int rskip;
        char* text=foo;
        while (isspace(*text)) text++;
        set_water_pos(S);
        rskip=0;
        while ((zoo=get_water_atom())) {
          // debug("Resyncing %s <-> %s\n",foo,zoo);
          if (rskip) continue;
          z=get_value(foo,zoo,&si[0],&va[0],1);
          if ((!z) && (si[0] + si[1] + si[2] + si[3] > EXPENSELIMIT)) {
            // debug("Item %s considered too expensive for resync, skipping...\n",foo);
            rskip=1;
            continue;
          }

          if ((!z) && si[3]) {
            // Resyncing at a synonym is way too expensive in most cases.
            // debug("Item %s considered too expensive for resync, skipping...\n",foo);
            rskip=1;
            continue;
          }


          if (z && strlen(text)>1) { 
            z=get_value(foo,zoo,&si[0],&va[0],0);
            goto gotsync; 
          } else if (z) break;
        }

        // Failed to find any matching water atom. Fake storing
        // some value (just skip some space), and proceed to the next
        // input atom.

        // Let's fake something...
        z=get_value(foo,".SKIPME-PLEASE.",&si[0],&va[0],0);

        for (z=0;z<4;z++) if (si[z]>0) chpos[z]+=si[z];
        // debug("CAPABILITY[2] %s : %d %d %d %d (%d %d %d %d)\n",foo,si[0],si[1],si[2],si[3],chpos[0],chpos[1],chpos[2],chpos[3]);
        foo=get_orig_atom();
        if (!foo || strchr(foo,'\n')) debug(".");
        // debug("<%s>\n",foo);

        atomcnt++;
      }

      // Whoopsie! We shouldn't be here that long.
      if (!foo) {
        debug(" EOF\n[!] Failed to resync before the end of the original file.\n");
        goto bailout;
      }
      break;

    }
    
gotsync:

    for (z=0;z<4;z++) 
      if (si[z]>0) {
        append_binary(chan[z],chpos[z],(char*)&va[z],si[z]);
        chpos[z]+=si[z];
        rd[z]+=si[z];
        trd+=si[z];
      }

  }

bailout:

  if (get_water_atom()) debug("[!] Trailing garbage in the watermarked file.\n");


  append_binary(bigchan,0,chan[0],chpos[0]);
  bigtop=bigpos=chpos[0];
  while ((bigpos % nb) != off[1]) bigpos--;

  append_binary(bigchan,bigpos,chan[1],chpos[1]);
  bigpos+=chpos[1];
  if (bigpos>bigtop) bigtop=bigpos; else bigpos=bigtop;
  while ((bigpos % nb) != off[2]) bigpos--;

  append_binary(bigchan,bigpos,chan[2],chpos[2]);
  bigpos+=chpos[2];
  if (bigpos>bigtop) bigtop=bigpos; else bigpos=bigtop;
  while ((bigpos % nb) != off[3]) bigpos--;

  append_binary(bigchan,bigpos,chan[3],chpos[3]);
  bigpos+=chpos[3];
  if (bigpos>bigtop) bigtop=bigpos; else bigpos=bigtop;

  debug("[+] Successfully read %d bits of data of %d expected.\n",trd,tots);
  debug("[+] Constructed an uniform stream of %d bits.\n",bigpos);

  if (trd)
    debug("  Overall retrieved    : %d bits (%0.02f%%)\n",trd,((float)(trd))*100.0/((float)tots));
  else
    debug("  Overall retrieved    : 0 bits (n/a)\n");

  if (cap[0])
    debug("  Whitespace channel   : %d bits (%0.02f%%)\n",rd[0],((float)(rd[0]))*100.0/((float)cap[0]));
  else
    debug("  Whitespace channel   : 0 bits (n/a)\n");

  if (cap[1])
    debug("  Grammar channel      : %d bits (%0.02f%%)\n",rd[1],((float)(rd[1]))*100.0/((float)cap[1]));
  else
    debug("  Grammar channel      : 0 bits (n/a)\n");

  if (cap[2])
    debug("  Formatting channel   : %d bits (%0.02f%%)\n",rd[2],((float)(rd[2]))*100.0/((float)cap[2]));
  else
    debug("  Formatting channel   : 0 bits (n/a)\n");

  if (cap[3])
    debug("  Substitution channel : %d bits (%0.02f%%)\n",rd[3],((float)(rd[3]))*100.0/((float)cap[3]));
  else
    debug("  Substitution channel : 0 bits (n/a)\n");

  if (trd<nb) {
    debug("This is not enough to recover the %d-bit watermark.\n",nb);
    fatal("not enough data collected");
  }

  // debug("The stream is: ");
  // dump_binstream(bigchan,bigpos);

  debug("[*] Attempting block walk recovery...\n");

  i=0;
  while (i<=bigpos) {
    unsigned long long wmark;
    int j;
    wmark=get_binary(bigchan,i);
    if (!use_64bit) wmark &= 0xffffffff;
    for (j=0;j<dbtop;j++) {
      if ( wmark==ror(db[j].wm,i % nb,nb) ) {
        char ctim[100];
        strcpy(ctim,ctime((void*)&db[j].tim));
        if (strchr(ctim,'\n')) *strchr(ctim,'\n')=0;
        debug("\n[+] This document matches entry %d (channel offset %d):\n"
              "  Source file : %s\n"
              "  Time        : %s\n"
              "  Recipient   : %s\n"
              "  Comment     : %s\n"
              "  Source MD5  : %08x-%08x\n"
              "  Magic value : %08x-%08x\n"
              "  Watermark   : %016Lx\n\n",
            j,i,db[j].fn,ctim,db[j].rcpt,db[j].cmt,
            db[j].f1,db[j].f2,db[j].m1,db[j].m2,db[j].wm);
        debug("Matching document found, exiting.\n");
        exit(0);
      }
    }
    i++;
  }

  debug("[*] Attempting byte assembly recovery...\n");
  
  // For each signature
  for (j=0;j<dbtop;j++) {
    int f,gotpiece=0;
    i=0;
    // For every byte of the signature
    for (f=0;f<nb/8;f++) {
      // Check is we can find this byte anywhere at a proper offset...
      // Don't go back - we do not reset i.
      while (i<(bigpos)/8) { 
        unsigned long long wmark;
        wmark=bigchan[i+f];
        if (wmark == ((db[j].wm >> 8*f) & 0xff)) { gotpiece++; break; }
        i+=nb/8;
      }
    }
    // Got all pieces of the puzzle?
    if (gotpiece==nb/8) {
        char ctim[100];
        strcpy(ctim,ctime((void*)&db[j].tim));
        if (strchr(ctim,'\n')) *strchr(ctim,'\n')=0;
        debug("\n[+] This document possibly matches fragmented entry %d:\n"
              "  Source file : %s\n"
              "  Time        : %s\n"
              "  Recipient   : %s\n"
              "  Comment     : %s\n"
              "  Source MD5  : %08x-%08x\n"
              "  Magic value : %08x-%08x\n"
              "  Watermark   : %016Lx\n",
            j,db[j].fn,ctim,db[j].rcpt,db[j].cmt,
            db[j].f1,db[j].f2,db[j].m1,db[j].m2,db[j].wm);
        gotsomething=1;
    }
  }

  if (gotsomething) {
    debug("\nPossibly matching document found, exiting.\n");
    exit(0); 
  }

  debug("[-] I am sorry. Unable to find any matching document.\n");
  exit(1);

}



static void add_database(int dict,char* fn,int f1,int f2,char* rcpt,char* cmt,int m1,int m2,long long wmark) {
  FILE* foo;
  char *q;
  char buf[1024];
  sprintf(buf,"%s/.snowdrop/",getenv("HOME"));
  mkdir(buf,0700);
  strcat(buf,"database");
  foo=fopen(buf,"a");
  if (!foo) fatal("cannot open database file %s",buf);
  if (!cmt) cmt="<none>";
  // Some chars are obviously not OK...
  while ((q=strchr(fn,0xad))) *q='-';
  while ((q=strchr(rcpt,0xad))) *q='-';
  while ((q=strchr(cmt,0xad))) *q='-';
  while ((q=strchr(fn,'\n'))) *q=' ';
  while ((q=strchr(rcpt,'\n'))) *q=' ';
  while ((q=strchr(cmt,'\n'))) *q=' ';
  fprintf(foo,"%c:[%s] %u\xad%x\xad%s\xad%x\xad%x\xad%s\xad%x\xad%x\xad%Lx\xad%s\n",use_64bit?'6':'3',TARGETLANG,(int)time(0),dict,fn,f1,f2,rcpt,m1,m2,wmark,cmt);
  fclose(foo);
}


static char capseq[MAXATOMS][4];
static int vc;

static void add_sig(char* orig,char* mod,char* to,char* comm) {
  char* x;
  char* foo;
  int cap[4]={0,0,0,0},tots=0;
  long long wm;
  unsigned int dict;
  unsigned int totstor=0;
  unsigned int totst[4]={0,0,0,0};
  int dof[4]={0,0,0,0};
  int writ=0;
  int i,siz,m1,m2;
  int off[4];
  i=open(orig,O_RDONLY);
  if (i<0) fatal("cannot open input file %s",orig);
  siz=lseek(i,0,SEEK_END);
  lseek(i,0,SEEK_SET);
  if (siz<1) fatal("input file of zero length");
  x=malloc(siz+1);
  if (!x) fatal("not enough memory to load input file");
  read(i,x,siz);
  x[siz]=0;
  close(i);
  i=open(mod,O_RDWR|O_TRUNC|O_CREAT,0600);
  if (i<0) fatal("cannot open output file %s",mod);

  set_original(x);

  MD5_Init(&kuku);
  MD5_Update(&kuku,x,siz);
  MD5_Final((char*)result,&kuku);
  m1=get_random();
  m2=get_random();

  MD5_Init(&kuku);
  MD5_Update(&kuku,&m1,sizeof(int));
  MD5_Update(&kuku,&m2,sizeof(int));
  MD5_Update(&kuku,to,sizeof(strlen(to)));
  MD5_Final((char*)wmark,&kuku);

  if (!use_64bit)
  wm=wmark[0]^wmark[1]^wmark[2]^wmark[3];
  else
  wm=(((long long)wmark[0]^wmark[1])) << 32 | (wmark[2]^wmark[3]);

  dict=md5_importantstuff();

  debug("[+] Input file loaded successfully.\n");
  debug("  Location  : %s\n",orig);
  debug("  Size      : %d bytes\n",siz);
  debug("  Signature : %08x-%08x\n",result[0] ^ result[1],result[2] ^ result[3]);
  debug("  Recipient : %s\n",to);
  debug("  Comment   : %s\n",comm?comm:"<none>");
  debug("  Magic     : %08x-%08x\n",m1,m2);
  if (!use_64bit) 
    debug("  Watermark : %08Lx\n",wm);
  else 
    debug("  Watermark : %016Lx\n",wm);

  debug("  Conf MD5  : %08x\n",dict);

  add_database(dict,orig,result[0]^result[1],result[2]^result[3],to,comm,m1,m2,wm);

  vc=0;
  while ((foo=get_orig_atom())) {
    int j;
    vc++;
    if (vc>=MAXATOMS) fatal("file too big - MAXATOMS exceeded");
    for (j=0;j<4;j++) {
      int q=get_storage(foo,3-j);
      cap[3-j]+=q; tots+=q;
      // Commit to our choices.
      if (q>0) set_value(foo,rand() % q,3-j);
      capseq[vc][3-j]=q;
    }

  }

  debug("[*] Computed capacity: %d bits overall.\n",tots);
  debug("  Target specification : %s [%s]\n",get_langdesc(),TARGETLANG);

  if (tots < nb) fatal("File capacity too low to carry the watermark (need %d bits)",nb);
  
  if (tots >= nb*2) debug("  Overall redundancy   : %.02f (%d bits)\n",((float)tots)/nbf,tots);
  else debug("  WARNING: capacity too low, watermark will not be redundant!\n");

  debug("  Whitespace channel   : %.02f (%d bits)",((float)cap[0])/nbf,cap[0]);
  if (tots >= nb*2 && cap[0] < nb*2)  debug(" - NOT redundant!");
  debug("\n");

  debug("  Grammar channel      : %.02f (%d bits)",((float)cap[1])/nbf,cap[1]);
  if (tots >= nb*2 && cap[1] < nb*2)  debug(" - NOT redundant!");
  debug("\n");

  debug("  Formatting channel   : %.02f (%d bits)",((float)cap[2])/nbf,cap[2]);
  if (tots >= nb*2 && cap[2] < nb*2)  debug(" - NOT redundant!");
  debug("\n");

  debug("  Substitution channel : %.02f (%d bits)",((float)cap[3])/nbf,cap[3]);
  if (tots >= nb*2 && cap[3] < nb*2)  debug(" - NOT redundant!");
  debug("\n");

  off[0]=0;
  off[1]=cap[0]>=nb?(nb/4):cap[0];
  off[2]=cap[1]>=nb?(nb/2):((off[1]+cap[1])%nb);
  off[3]=cap[2]>=nb?(3*nb/4):((off[2]+cap[2])%nb);
  debug("  Channel offsets are  : %d, %d, %d, %d\n",off[0],off[1],off[2],off[3]);

  set_original(x);

  debug("[*] Embedding the watermark...\n");

  vc=0;
  while ((foo=get_orig_atom())) {
    int j;
    vc++;
    if (vc>=MAXATOMS) fatal("strange things happening - MAXATOMS exceeded");
    for (j=0;j<4;j++) {
      int q=get_storage(foo,3-j);
      int fox;
      if (q!=capseq[vc][3-j]) {
        debug("[!] Internal module bug: failed expectation in domain %d.\n"
              "    Expected storage %d, got %d for atom %d [%s].\n",3-j,
              capseq[vc][3-j],q,vc,foo);
        fatal("internal bug");
      }
      if (q>0) {
        totstor+=q;
        totst[3-j]+=q;
        fox=ror(wm,off[3-j],nb); 
        fox=fox & ((1<<q)-1);
        dof[3-j]+=q;
        // Stupid! fixme!
        foo=strdup(set_value(foo,fox,3-j));
        if (!foo) fatal("out of memory");
        off[3-j]+=q;
        if (off[3-j]>=nb) off[3-j]-=nb;
      } 

    }
    if (write(i,foo,strlen(foo))<0) fatal("cannot write to file");
    writ+=strlen(foo);
  }
  close(i);

  if (totstor<nb) {
    debug("Failed to embed at least one copy of the watermark (%d bits).\n",nb);
    unlink(to);
    fatal("insufficient watermark storage");
  }

  debug("[+] Done. Written %d bytes to file '%s', %d bits embedded.\n",writ,mod,totstor);

  exit(0);

}




int main(int argc,char** argv) {

  assert(sizeof(long long) == 8);
  assert(sizeof(int) == 4);
  assert(sizeof(char) == 1);

  umask(0077);

  argv0=argv[0];

  debug("snowdrop " VER ": text watermarking / watermark recovery by lcamtuf@coredump.cx\n\n");

  if (argc>1 && !strcmp(argv[1],"-6")) {
    argc--;
    argv++;
    use_64bit=1;
    nbf=64; nb=64;
    debug("[*] Strong 64-bit watermarking mode enabled.\n");
  } else
    debug("[*] Weak 32-bit watermarking used (use -6 to change it).\n");

  if (argc==2 && !strcmp(argv[1],"-l")) list_database();

  if (argc<4) usage();
  if (!strcmp(argv[1],"-e")) {
    if (argc-4) usage();
    extract_sig(argv[2],argv[3]);
  } else if (!strcmp(argv[1],"-i")) {
    if (argc<5 || argc>6) usage();
    add_sig(argv[2],argv[3],argv[4],argv[5]);
  } else usage();

  fatal("Broken Turing machines all over the place");
  return 0;

}


static const char spell[] = "\n\n\n\n"
"`How many Prolog programmers does it take to change a lightbulb?'\n"
"`No.'\n\n\n\n";