#!/usr/bin/perl # Copyright (C) 2002, Matt Mahoney. This program is distributed # without warranty under terms of the GNU general public license. # See http://www.gnu.org/licenses/gpl.txt # alad.pl does anomaly detection in the application layer # Input is 2 files, train and test, given as command line arguments # Format is date, time, dest IP:port source IP:port duration flags length # and first 1000 bytes with ^C encoding C-64 and "^ " encoding linefeed. # For example: # 03/15/1999 08:00:18 172.016.113.105:25 196.037.075.158:1024 +0 S/AP/AF 1263 ^@EHLO jupiter.cherry.org^M^ HELO jupiter.cherry.org^M^ MAIL From:) { if (($time,$to,$port,$from,$duration,$flags,$length,$text)= /(.{19}) (.{15}):(\d+) (.{15}):\d+ \+(\d+) (\S+) (\d+) (.*)/) { $text=~s/\^M//g; $text=~s/\^ /\n/g; if (($port==25||$port==80) && $text=~/\n\n/) {$text="$`\n";} $text=~s/^\^@//; if ($time=~/(\d\d)\/(\d\d)\/\d\d\d\d (\d\d):(\d\d):(\d\d)/) { $now=((($1*31+$2)*24+$3)*60+$4)*60+$5; } foreach ($text=~/.*\n/g) { if (($k,$v)=/(\S*)(.*)/) { $k=substr($k, 0, 30); $v=substr($v, 0, 100); $score=0; $comment=""; &alarm($port, $k); &alarm($to, $flags); &alarm("To", "$to:$port"); &alarm($to, $from); &alarm("$to:$port", $from); if ($score>30000) { $score=log($score)/(10*log(10)); printf(" 0 $time $to %8.6f \#%s\n", $score, substr($comment, 0, 300)); } } } } } sub alarm { local($key, $val, $sc)=@_; if ($now<10300000) { ++$n{$key}; if (++$v{$key.$val}==1) { ++$r{$key}; $t{$key}=$now; } } elsif ($n{$key}>0 && !$v{$key.$val}) { $score+=($now-$t{$key})*$n{$key}/$r{$key}; $comment.=" $key=$val"; $t{$key}=$now; } } exit; # debug code below foreach (keys(%n)) { if ($n{$_}>100) { $key=$_; $nr=$n{$key}/$r{$key}; if ($nr>100) { push(@kl, $key); } } } @kl=sort({$n{$b}*$r{$a} <=> $n{$a}*$r{$b};} @kl); foreach (@kl) { if ($n{$_}>1) { $key=$_; $nr=$n{$key}/$r{$key}; if ($nr>100) { printf("%5d = %6d/%2d %s\n", $nr, $n{$key}, $r{$key}, $key); foreach (sort(keys(%v))) { if (index($_, $key)==0) { print " ".substr($_, length($key))."\n"; } } print "\n"; } } }