/* tf.cpp - tcpdump filter.

Copyright (C) 2002, Matt Mahoney.  This program is distributed
without warranty under terms of the GNU general public license.
See http://www.gnu.org/licenses/gpl.txt

Filter to extract TCP server requests containing
the first 100 bytes of data, and rate limit UDP and ICMP.
Incoming traffic is to the DARPA IDS evaluation set or www.cs.fit.edu.
Reads stdin if filename is -.

Usage: tf tcpdump_files...

Output is written to tf.out as a tcpdump file containing a subset
of the original packets.
*/

#include <cstdio>
#include <cstdlib>
#include <ctime>
#include <cmath>
#include <algorithm>
using namespace std;

// 8-32 bit unsigned int types
typedef unsigned char U8;
typedef unsigned short U16;
typedef unsigned long U32;

const U32 SNAPLEN=256;  // Max output packet size

// Converts 4 bytes to 32 bit int, either MSB first or LSB first
class I4 {
public:
  bool msb_first;
  I4(): msb_first(true) {}
  U32 operator()(U8* p) const {
    if (msb_first)
      return (U32(p[0])<<24)|(U32(p[1])<<16)|(p[2]<<8)|p[3];
    else
      return (U32(p[3])<<24)|(U32(p[2])<<16)|(p[1]<<8)|p[0];
  }
} i4;  // global functoid

// Write 32 bit int x to f, MSB first
void out4(FILE* f, U32 x) {
  putc(x>>24, f);
  putc(x>>16, f);
  putc(x>>8, f);
  putc(x, f);
}

int main(int argc, char** argv) {

  // Check program args
  if (argc<2) {
    fprintf(stderr, "To filter tcpdump files to tf.out: tf tcpdump_files...\n");
    fprintf(stderr, "To read stdin: tf -\n");
    return 1;
  }

  // Append to tf.out and write header if new
  bool newfile=true;
  FILE* out=fopen("tf.out", "rb");
  if (out) {
    newfile=false;
    fclose(out);
  }
  out=fopen("tf.out", "ab");
  if (!out) {
    perror("tf.out");
    return 1;
  }
  if (newfile) {
    fwrite("\xa1\xb2\xc3\xd4\x00\x02\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00",
      1, 16, out);
    out4(out, SNAPLEN);
    fwrite("\x00\x00\x00\x01", 1, 4, out);
  }

  // Open each file, skip if not found
  for (int i=1; i<argc; ++i) {
    FILE* in;
    if (argv[i][0]=='-' && argv[i][1]==0)
      in=stdin;
    else
      in=fopen(argv[i], "rb");
    if (!in) {
      perror(argv[i]);
      continue;
    }

    // Read TCPDUMP header, skip if bad
    const int MAX_BUF=1532;
    static U8 buf[MAX_BUF];  // Input buffer
    if (fread(buf, 1, 24, in)!=24) {
      fprintf(stderr, "%s: file too small\n", argv[i]);
      continue;
    }

    // Determine if input is MSB or LSB first
    i4.msb_first=true;
    if (i4(buf)!=0xa1b2c3d4)
      i4.msb_first=false;
    if (i4(buf)!=0xa1b2c3d4) {
      fprintf(stderr, "%s: not in tcpdump format\n", argv[i]);
      continue;
    }

    // Read packets
    U32 packets_in=0, packets_out=0;  // counts
    while (true) {
      if (fread(buf, 1, 16, in)!=16)
        break;  // EOF
      U32 seconds=i4(buf);  // Time
      U32 microseconds=i4(buf+4);
      U32 len1=i4(buf+8);  // Captured packet length
      U32 len2=i4(buf+12); // Original packet length
      if (len1>len2 || len2>MAX_BUF-16) {
        fprintf(stderr, "%s: corrupted: len1=%lu len2=%lu at %ld\n",
          argv[i], len1, len2, ftell(in));
        break;
      }
      if (fread(buf+16, 1, len1, in)!=len1)
        break;  // EOF
      ++packets_in;

      // Remove non-IP
      if (buf[28]!=8 || buf[29]!=0 || (buf[30]&0xf0)!=0x40)
        continue;

      // Remove outgoing IP packets not to 172.16.x, 192.168.x, 163.118.135.1
      if (!((buf[46]==172 && buf[47]==16) || (buf[46]==192 && buf[47]==168)
          || (buf[46]==163 && buf[47]==118 && buf[48]==135 && buf[49]==1)))
        continue;

      // Remove UDP to high ports
      if (buf[39]==17 && buf[30]==0x45 && buf[52]>3)
        continue;

      // Remove TCP data packets except near start
      U8 *tcp=buf+30+4*(buf[30]&15);  // start of TCP/UDP/ICMP header
      if (buf[39]==6) {  // TCP?
        U32 seqno=(tcp[4]<<24)|(tcp[5]<<16)|(tcp[6]<<8)|tcp[7];  // seq. no.
        const int HSIZE=4096;   // Hash table size, a power of 2
        static U32 seq[HSIZE];  // Hash table of sequence numbers
        U32 h=(((buf[45]*256+buf[44])*256+buf[43])*256+buf[42])*3 // src IP
          +(((buf[49]*256+buf[48])*256+buf[47])*256+buf[46])*5;   // dest IP
        h+=(((tcp[3]*256+tcp[2])*256+tcp[1])*256+tcp[0])*7;      // ports
        h^=(h>>10)^(h>>21);
        h&=(HSIZE-1);
        if (tcp[13]==2)  // SYN, no ACK?  Store seq. no.
          seq[h]=seqno;  // no collision check, may rarely drop packets
        else if ((tcp[13]==16 || tcp[13]==17 || tcp[13]==18 || tcp[13]==24)
             && seqno-seq[h]>100)
          continue;  // ACK, maybe SYN, FIN, PUSH, and not near start
      }

      // Rate limit packets by destination address/port to QSIZE per 1 min.
      {
        const int HSIZE=4096;   // Hash table size, a power of 2
        const int QSIZE=16;     // Number of packets times in queue
        static U32 t[HSIZE][QSIZE];  // Hash table of packet time queues
        static U8 q[HSIZE];  // Front of queues
        U32 h=(((buf[49]*256+buf[48])*256+buf[47])*256+buf[46])*3
          +buf[39]*61933;  // Hash Dest IP, protocol
        if (buf[39]==6 || buf[39]==17)
          h+=(tcp[3]*256+tcp[2])*61559;  // Hash TCP/UDP port number
        h^=(h>>10)^(h>>21);
        h&=(HSIZE-1);
        if (seconds-t[h][q[h]]>60) {
          t[h][q[h]]=seconds;
          q[h]=(q[h]+1)%QSIZE;
        }
        else
          continue;
      }

      // Write packets, MSB first
      len1=min(len1, SNAPLEN);
      out4(out, seconds);
      out4(out, microseconds);
      out4(out, len1);
      out4(out, len2);
      if (fwrite(buf+16, 1, len1, out)!=len1) {
        perror("tf.out");
        return 1;
      }
      ++packets_out;
    }
    fclose(in);
    fprintf(stderr, "%s: %lu packets in, %lu packets out\n",
      argv[i], packets_in, packets_out);
  }
  fclose(out);
  return 0;
}

