1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92
|
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/types.h>
#include <sys/socket.h>
#include <netinet/in.h>
#include <arpa/inet.h>
#include "logparse.h"
// Read next line into file
int lf_get_next_entry(FILE *fp, lf_entry &ne)
{
char buf[MAXBUF]; // must be large enough for a cache log
if ((fgets(buf, MAXBUF, fp) == NULL) || feof(fp) || ferror(fp)) {
return 1;
}
// Parse a line and fill an lf_entry
char *p = buf, *q, *tmp1, *tmp2, *ret_code;
u_int32_t lapse;
// first two entries: <TimeStamp> and <Elapsed Time>
q = strtok(p, " ");
ne.rt = strtod(q, NULL);
q = strtok(NULL, " ");
lapse = strtoul(q, NULL, 10);
ne.rt -= (double)lapse/1000.0;
// Client address
q = strtok(NULL, " ");
ne.cid = (u_int32_t)inet_addr(q);
// Log tags, do not store them but use it to filter entries
ret_code = strtok(NULL, " ");
if (ret_code == NULL) { abort(); }
// XXX Have to handle this return code in the end because we are using
// strtok() and it cannot interleave two strings :( STUPID!!
// Page size
q = strtok(NULL, " ");
ne.size = strtoul(q, NULL, 10);
// Request method, GET only
q = strtok(NULL, " ");
if (strcmp(q, "GET") != 0)
return -1;
// URL
q = strtok(NULL, " ");
if (q == NULL) abort();
if (strchr(q, '?') != NULL)
// Do not accept any URL containing '?'
return -1;
ne.url = new char[strlen(q) + 1];
strcpy(ne.url, q);
// Try to locate server name from the URL
// XXX no more parsing from the original string!!!!
tmp1 = strtok(q, "/");
if (strcmp(tmp1, "http:") != 0) {
// How come this isn't a http request???
delete []ne.url;
return -1;
}
tmp1 = strtok(NULL, "/");
if (tmp1 == NULL) abort();
ne.sid = new char[strlen(tmp1) + 1];
strcpy(ne.sid, tmp1);
// Now check return codes
if (ret_code == NULL) abort();
tmp1 = new char[strlen(ret_code)+1];
strcpy(tmp1, ret_code);
tmp2 = strtok(tmp1, "/");
tmp2 += 4; // Ignore the first 4 char "TCP_"
if ((strcmp(tmp2, "MISS") == 0) ||
(strcmp(tmp2, "CLIENT_REFRESH_MISS") == 0) ||
(strcmp(tmp2, "IMS_MISS") == 0) ||
(strcmp(tmp2, "DENIED") == 0)) {
delete []ne.url;
delete []ne.sid;
delete []tmp1;
return -1; // Return negative to discard this entry
}
delete []tmp1;
// All the rest are useless, do not parse them
return 0;
}
|