#!/usr/bin/perl # # Copyright (C) 2003, Matt Mahoney. This program is distributed # without warranty under terms of the GNU general public license # See http://www.gnu.org/licenses/gpl.txt # # Convert 1999 IDS evaluation data from # http://www.ll.mit.edu/IST/ideval/docs/1999/master-listfile-condensed.txt # (as of Jan. 2, 2003) into initialization data for eval.cpp # # The output format is the body of two structs. The first lists all # attack segments # # {n,ip,start,end}, # # and the second lists whole attacks # # {n,flags,"id name"}, # # where n is 1-201 (attack number), start and end are seconds from # 0000 Feb. 28, 1999 local time, and "id name" is the 8 digit ID number # and name, e.g. "41.084031 ps". Flags are the sum of the following: # 1 in - Insider attack # 2 man - Attack was carried out manually # 4 cons - Console attack # 8 succ - Attack was successful # 16 aDmp - Evidence in file system dumps # 32 oDmp - Evidence in outside sniffer traffic # 64 iDmp - Evidence in inside sniffer traffic # 128 BSM - Evidence in BSM data # 256 SysLg - Evidence in system logs # 512 FSLst - Evidence if file system information # 1024 Stlth - Attack is stealthy # 2048 New - New in weeks 4-5 # 4096 Probe # 8192 DOS # 16384 R2L # 32768 U2R # 65536 Data while (<>) { # Fix labels where the victim and attacker are swapped if (/^ 5(2|4)\.120600/) {s/196\.037\.075\.158/172.016.112.050/;} if (/^ 55\.080105/) {s/206\.048\.044\.050/172.016.112.050/;} # Fix misspelled or inconsistent attack names s/syslog /syslogd /; s/cassen /casesen /; s/arppoision/arppoison /; s/crashii /crashiis /; s/ntis /ntinfoscan /; s/0telnet /0guesstelnet /; if (/43\.111111/) {s/warez /warezmaster /;} else {s/warez /warezclient /}; # Find lines describing attack segments if (($id,$month,$day,$hour,$min,$sec,$dhour,$dmin,$dsec,$ip3,$ip2,$ip1,$ip0,$name)= /^ (\d\d\.\d\d\d\d\d\d)(\d\d)\/(\d\d)\/1999 (\d\d):(\d\d):(\d\d) +(\d\d):(\d\d):(\d\d) +(172|192)\.(0*1|0*16|168)\.(\d+)\.(\*|\d+) *([a-z]*\w+)/) { $start=(((($month-2)*31+$day)*24+$hour)*60+$min)*60+$sec; $end=$start+($dhour*60+$dmin)*60+$dsec; $ip=(($ip3*256+$ip2)*256+$ip1)*256+$ip0; # Compute flags $flags=0; if (/ in /) {$flags+=1;} if (/ man /) {$flags+=2;} if (/ cons /) {$flags+=4;} if (/ succ /) {$flags+=8;} if (/ aDmp /) {$flags+=16;} if (/ oDmp /) {$flags+=32;} if (/ iDmp /) {$flags+=64;} if (/ BSM /) {$flags+=128;} if (/ SysLg /) {$flags+=256;} if (/ FSLst /) {$flags+=512;} if (/ Stlth /) {$flags+=1024;} if (/ New /) {$flags+=2048;} if (/llPROBE/) {$flags+=4096;} if (/llDOS/) {$flags+=8192;} if (/llR2L/) {$flags+=16384;} if (/llU2R/) {$flags+=32768;} if (/llDATA/) {$flags+=65536;} # No flags for OS, the data is corrupted # Compare last two segments do put(); } } $id=0; do put(); # Last segment print "\n"; print @list2; # List of attacks # Compare the current and previous attack segments. If they are the # same attack and target separated by less than 120 seconds, then merge # them, otherwise print the previous segment. When a new attack starts, # push it on @list2 to be printed later. sub put() { if ($id==$previd && $ip==$previp && $prevend+120>=$start && $start>=$prevstart) { $prevend=$end; } else { if ($nid) { printf("{%3d,0x%08X,%d,%d},\n", $nid, $previp, $prevstart, $prevend); } if ($id && $id ne $previd) { ++$nid; push(@list2, sprintf("{%3d,0x%05X,\"$id $name\"},\n", $nid, $flags)); } $previd=$id; $prevstart=$start; $prevend=$end; $previp=$ip; } }