/* Copyright: Matt Mahoney and Phil Chan This software can be distributed for non-profit academic and research purposes. Incoporating part or all of this software for commerical purposes without the consent of the authors is prohibited. Aug 3, 2001 (pkc) - incorporate info on captured (offset 8) and orignal/on-wire (offset 12) packet length Jul 31, 2001 (pkc) - supress printing the model ta7.cpp - tcpdump anomaly detection for DARPA IDS evaluation. Like ta1, but uses clusters instead of a hash function. UNIX: ta7 1123200 in3* in4* in5* |sort +0.45 -r >ta7.sim MSDOS: ta7 14400 in23 |sort /+46 /r >ta7.sim Training time is in seconds (e.g. 13 days or 4 hours). Files are tcpdump files in chronological order. Output is in unsorted .sim format for eval.cpp, e.g. ID (0), date, time, victim IP address, and a score from 0 to 1. 0 04/06/1999 08:59:16 172.016.112.194 0.631169 # The Ethernet, IP, TCP, UDP, and ICMP packet headers are divided into fields of 1-4 bytes. During training, the set of possible values for each field are recorded in a set of up to K contiguous clusters. During testing, an anomaly is detected if a field has a value not in one of the clusters. The score is tn/r, where the field was seen n times in training, there were r anomalies in training, and it was t seconds since the last anomaly was seen in this field. The score output is -0.6 + 0.1 log10 SUM tn/r, summed over the fields. */ #include #include #include #include #include #include // Convert 2-4 bytes to unsigned long, MSB first inline unsigned int i2(const unsigned char* p) { return (p[0]<<8)|p[1]; } inline unsigned long i3(const unsigned char* p) { return (((p[0]<<8)|p[1])<<8)|p[2]; } inline unsigned long i4(const unsigned char* p) { return (((((p[0]<<8)|p[1])<<8)|p[2])<<8)|p[3]; } // Print the time (seconds since 1970) in a readable format void print_time(unsigned long seconds) { char s[30]; time_t t=seconds; tm* local=localtime(&t); if (local) { strftime(s, 30, "%m/%d/%Y %H:%M:%S", local); printf("%s", s); } else printf("?"); } // Parsed data packet const int MAX_PACKET=1600; // Ethernet + tcpdump header max size static unsigned char data[MAX_PACKET]; // tcpdump header and packet const unsigned char *base=data, *ethernet=0, *ip=0, *icmp=0, *udp=0, *tcp=0, *tcp_option=0, *appl=0, *null=0; // Pointers to start of each header enum Format {DECIMAL, HEX, TIME, IP}; // For printing a field const int K=32; // Max number of clusters // Print x in an appropriate format and length for a network packet field void print(unsigned long x, Format format, int length) { switch (format) { case DECIMAL: printf("%lu", x); break; case HEX: { putchar('x'); for (int i=length*8-8; i>=0; i-=8) printf("%02X", (x>>i)&255); break; } case TIME: print_time(x); break; case IP: printf("%03d.%03d.%03d.%03d", (x>>24)&255, (x>>16)&255, (x>>8)&255, x&255); } } // A table of fields struct Field { const char* name; // Field name const unsigned char** p; // Value is at (*p)[offset] int offset; int length; // Number of bytes in field Format format; // How it should be printed unsigned long n; // Number of observations unsigned long r; // Max number of different observed values unsigned long t; // Time of last anomaly unsigned long vmin[K+1], vmax[K+1]; // Cluster bounds int k; // Number of clusters, 0 to K // Current value of this field unsigned long value() const { if (!*p) return 0; const unsigned char* dp = *p+offset; switch (length) { case 1: return *dp; case 2: return i2(dp); case 3: return i3(dp); case 4: return i4(dp); } return 0; } }; Field field[] = { {"Time", &base, 0, 4, TIME}, //{"Ether Size", &base, 8, 4, DECIMAL}, {"Ether Size", &base, 12, 4, DECIMAL}, // offset 12 is original length -pkc {"Ether Dest Hi", ðernet, 0, 3, HEX}, {"Ether Dest Lo", ðernet, 3, 3, HEX}, {"Ether Src Hi", ðernet, 6, 3, HEX}, {"Ether Src Lo", ðernet, 9, 3, HEX}, {"Ether Protocol", ðernet, 12, 2, HEX}, {"IP Header Len", &ip, 0, 1, HEX}, {"IP TOS", &ip, 1, 1, HEX}, {"IP Length", &ip, 2, 2, DECIMAL}, {"IP Frag ID", &ip, 4, 2, DECIMAL}, {"IP Frag Ptr", &ip, 6, 2, HEX}, {"IP TTL", &ip, 8, 1, DECIMAL}, {"IP Protocol", &ip, 9, 1, DECIMAL}, {"IP Checksum", &ip, 10, 2, HEX}, {"IP Src", &ip, 12, 4, IP}, {"IP Dest", &ip, 16, 4, IP}, // 16 {"TCP Src Port", &tcp, 0, 2, DECIMAL}, {"TCP Dest Port", &tcp, 2, 2, DECIMAL}, {"TCP Seq", &tcp, 4, 4, DECIMAL}, {"TCP Ack", &tcp, 8, 4, DECIMAL}, {"TCP Header Len", &tcp, 12, 1, HEX}, {"TCP Flg UAPRSF", &tcp, 13, 1, HEX}, {"TCP Window Sz", &tcp, 14, 2, DECIMAL}, {"TCP Checksum", &tcp, 16, 2, HEX}, {"TCP URG Ptr", &tcp, 18, 2, DECIMAL}, {"TCP Option", &tcp_option, 0, 4, HEX}, {"UCP Src Port", &udp, 0, 2, DECIMAL}, {"UDP Dest Port", &udp, 2, 2, DECIMAL}, {"UDP Len", &udp, 4, 2, DECIMAL}, {"UDP Checksum", &udp, 6, 2, HEX}, {"ICMP Type", &icmp, 0, 1, DECIMAL}, {"ICMP Code", &icmp, 1, 1, DECIMAL}, {"ICMP Checksum", &icmp, 2, 2, HEX} }; main(int argc, char** argv) { // Check for args if (argc==1) { printf("Usage: ta7 training_time tcpDumpFiles...\n\n"); } // Training and test times, start of window unsigned long start_time=0, now=0, test_time=0; // Seconds since 1/1/1970 const double LOG10 = log(10); // Read the tcpdump files for (int argi=2; argiMAX_PACKET-16) { if (length>MAX_PACKET-16) { fprintf(stderr, "Packet length %uld exceeds max %ud in file %s at byte %ld\n", length, MAX_PACKET-16, argv[argi], ftell(f)); break; } // Read the rest of the data if (fread(data+16, 1, length, f)!=length) break; // End of file // Get start time now=i4(data); if (start_time==0) { start_time=now; test_time=now+atol(argv[1]); } // Parse the data, set pointers to each header ip=tcp=tcp_option=udp=icmp=0; ethernet=data+16; appl=ethernet+14; if (i2(ethernet+12)==0x800 && (ethernet[14]&0xf0)==0x40) // IPv4? ip=ethernet+14; if (ip && (i2(ip+6)&0x1fff)==0) { // First IP fragment? int ipheader=(ip[0]&15)*4; appl=ip+ipheader; if (ip[9]==6) { // Upper layer protocol tcp=ip+ipheader; appl=tcp+20; if (tcp[12]>=0x60) { // Header size tcp_option=tcp+20; appl=tcp+24; } } else if (ip[9]==17) { udp=ip+ipheader; appl=udp+8; } else if (ip[9]==1) { icmp=ip+ipheader; appl=icmp+4; } } // Replace checksums with their computed values. IP first if (ip) { int ipheader=(ip[0]&15)*4; unsigned long checksum=0; for (int i=0; i>16); data[ip-base+11]=checksum; data[ip-base+10]=checksum>>8; } // UDP checksum is optional (0 if not computed) if (udp && i2(udp+6)) { int udplen=i2(udp+4); // Length of UDP header and payload unsigned long checksum=17+udplen+i2(ip+12)+i2(ip+14)+i2(ip+16) +i2(ip+18); // Pseudo header (protocol, length, source, dest) for (int i=0; i>16); data[udp-base+7]=checksum; data[udp-base+6]=checksum>>8; } // TCP checksum if (tcp) { int tcplen=i2(ip+2)-4*(ip[0]&15); unsigned long checksum=6+tcplen+i2(ip+12)+i2(ip+14)+i2(ip+16) +i2(ip+18); // Checksum of psuedo header as in UDP for (int i=0; i>16; data[tcp-base+17]=checksum; data[tcp-base+16]=checksum>>8; } // ICMP checksum if (icmp) { int icmplen=i2(ip+2)-4*(ip[0]&15); unsigned long checksum=0; for (int i=0; i>16; data[icmp-base+3]=checksum; data[icmp-base+2]=checksum>>8; } // Process the fields double score=0; // Anomaly score double bscore=0; // Highest anomaly score of any field int bi=0; // Field with highest score for (int i=0; ifi.vmax[mid]) lo=mid+1; else hi=mid; } // If not found and still training, insert cluster for v at lo if (nowlo; --j) { fi.vmin[j]=fi.vmin[j-1]; fi.vmax[j]=fi.vmax[j-1]; } fi.vmin[lo]=fi.vmax[lo]=v; ++fi.k; ++fi.r; fi.t=now; // Merge adjacent clusters. If full, merge the two closest. if (fi.k>=K || (lo>0 && v==fi.vmax[lo-1]+1) || (lo1) { int bj=0; // First of pair to merge unsigned long bd=fi.vmin[1]-fi.vmax[0]; // Least dist found for (int j=1; j1) break; for (int j=bj+1; j0 && now>=test_time && (lo==fi.k || v0 && fi.r>0) { double sc=double(now-fi.t)*double(fi.n)/double(fi.r); score+=sc; if (sc>bscore) { bscore=sc; bi=i; } } fi.t=now; } } } // Print anomaly double percent=0; if (score>0) percent=100*bscore/score; score*=1e-6; if (score>1) { score=0.1*log(score)/LOG10; bscore=0.1*log(bscore)/LOG10; printf(" 0 "); print(now, TIME, 4); printf(" "); print(field[16].value(), IP, 4); printf(" %8.6f # %s=", score, field[bi].name); print(field[bi].value(), field[bi].format, field[bi].length); printf(" %1.0f%%\n", percent); } } } /* pkc - suppress printing out the probability table // Print stats for (int i=0; i4) printf("...(%d)", fi.k); else if (i<3 || i==fi.k-1) { putchar(' '); print(fi.vmin[i], fi.format, fi.length); if (fi.vmax[i]>fi.vmin[i]) { putchar('-'); print(fi.vmax[i], fi.format, fi.length); } } } printf("\n"); } */ return 0; }