/* te - tcpdump extract utility.  Matt Mahoney, mmahoney@cs.fit.edu

Copyright (C) 2002, Matt Mahoney.  This program is distributed
without warranty under terms of the GNU general public license.
See http://www.gnu.org/licenses/gpl.txt

Usage: te tcpdump_files...

Extracts all incoming TCP streams to 172.16.112.0/21 on ports 0-1023
up to the first 1000 bytes of the application layer.  Format is
date, time, destinatin IP:port source IP:port, +duration in seconds
flags/flags/flags (first, next to last, and last packets), original
length, and 1000 bytes of text all on 1 line.  Characters 0-31 and ^
are shown as "^C" where C is the ASCII code + 64.  Linefeed is shown
as "^ "

TCP is reassembled by discarding non-IP, IP with bad checksums
or fragmented, and reassembling with a window size of 100,000,
discarding packets outside the window.  Streams are printed after
being closed by FIN or RST or by end of file.  Overlap is resolved
in favor of newer packets.  Gaps are 0 filled.

INPUT FORMAT

Input files hava a 24 byte header starting with A1 B2 C3 D4 hex.
This is followed by a series of records of 76-1530 bytes as follows.
The last record may be incomplete.

Bytes   Contents (numbers are most significant byte (MSB) first)
0-3     Time in seconds since 0000 Jan 1 1970 UCT
4-7     Time in microseconds, 0-999999
8-11    Record length - 16 (60-1514)
12-15   Second copy of length, must be identical

This is followed by the Ethernet packet (minus preamble and trailing checksum)
0-5     Destination MAC address
6-11    Source MAC address
12-13   Protocol, 0x800 = IP
14+     Network level payload

If the network payload is IPv4, it has the format:
0:7-4   Version number (bits 7-4), always 4 (0100)
0:3-0   Header length (4 bits) in 32 bit words, usually 5 (0101)
1       TOS
2-3     IP packet length (includes IP header), 20-65535
4-5     ID for fragment reassembly
6:6     Don't fragment bit (bit 6 of byte 6)
6:5     Fragment follows bit (bit 5 of byte 6)
6:4-7:0 Fragmentation offset in multiples of 8 bytes, 13 bits
8       TTL
9       Transport protocol, 1=ICMP, 6=TCP, 17=UDP
10-11   Checksum
12-15   Source IP address
16-19   Destination IP address
20+     IP options if header length > 5 (up to 40 bytes if 15)
20+     Transport payload after IP options

If the protocol is TCP, then:
0-1     Source port
2-3     Destination port
4-7     Sequence number
8-11    Acknowledgement number
12:7-4  TCP header length in 32 bit words, usually 5 (4 bits)
13:5-0  Flags URG, ACK, PSH, RST, SYN, FIN
14-15   Receiver window size
16-17   Checksum
18-19   Pointer to urgent data
20+     TCP options if TCP header > 5
20+     Application layer payload

If the protocol is UDP, then:
0-1     Source port
2-3     Destination port
4-5     Length
6-7     Checksum
8+      Application layer payload

If the protocol is ICMP, then:
0       Type (0=echo reply, 8=echo request, 3=error, etc.)
1       Code (if type=3, then 0=network unreachable, 1=host unreachable, etc)
2-3     Checksum
4+      Other data
*/

#include <iostream>
#include <fstream>
#include <iomanip>
#include <string>
#include <vector>
#include <map>
#include <algorithm>
#include <time.h>
using namespace std;

// 8, 16, and 32 bit unsigned numbers (define as appropriate)
typedef unsigned char U8;
typedef unsigned short U16;
typedef unsigned long U32;
typedef long I32;  // signed 32 bits

// Read 4 byte integer, MSB first, return as U32
U32 get4(istream& in) {
  if (in.eof())
    throw "End of file";
  U32 t=in.get()<<24;
  t|=in.get()<<16;
  t|=in.get()<<8;
  t|=in.get();
  return t;
}

// Print a byte in readable format
ostream& print_text(ostream& out, U8 c) {
  if (c=='\n')
    out << "^ ";
  else if (c<' ' || c=='^')
    out << '^' << char(c+64);
  else
    out << char(c);
  return out;
}

// Print time in format MM/DD/YYYY HH:MM:SS, t = seconds since 1970
// Convert to EST or EDT
ostream& print_time(ostream& out, double t) {
  time_t ts = (time_t) t;  // Whole seconds
  long us = long((t-ts)*1000000.0);
  ts-=18000;  // Convert to EST
  if (ts>=923205600)
    ts+=3600;  // Convert to EDT after 0200 4/4/1999
  struct tm *tp = localtime(&ts);
  char buf[50];
  strftime(buf, 50, "%m/%d/%Y %H:%M:%S", tp);
  out << buf;
  return out;
}

// Print IP address in format xxx.xxx.xxx.xxx
ostream& print_ip(ostream& out, U32 addr) {
  return out << setfill('0') << setw(3) << ((addr>>24)&255)
      << '.' << setfill('0') << setw(3) << ((addr>>16)&255)
      << '.' << setfill('0') << setw(3) << ((addr>>8)&255)
      << '.' << setfill('0') << setw(3) << (addr&255);
}

// Print TCP flags
ostream& print_flags(ostream& out, U8 f) {
  if (f&128)
    out << '1';
  if (f&64)
    out << '0';
  if (f&32)
    out << 'U';
  if (f&16)
    out << 'A';
  if (f&8)
    out << 'P';
  if (f&4)
    out << 'R';
  if (f&2)
    out << 'S';
  if (f&1)
    out << 'F';
  return out;
}

// Packet - TCP/UDP/ICMP/IP/Ethernet packet with parsed fields
struct Packet {
  double time;  // Seconds since 1970
  vector<U8> data;  // Starting with the Ethernet header
  int ip, tcp, udp, icmp, appl, end;  // Points to start of header/data, or 0

  // Init to 0
  Packet(): time(0), ip(0), tcp(0), udp(0), icmp(0), appl(0), end(0) {}

  // Get numeric fields of 1, 2, or 4 bytes
  U16 get1(int offset) const {return data[offset];}
  U16 get2(int offset) const {return (data[offset]<<8)+data[offset+1];}
  U32 get4(int offset) const {return (get2(offset)<<16)+get2(offset+2);}

  // Get IP fields by name
  U16 fragoffset() const {return ip ? get2(ip+6)&0x1fff : 0;}
  bool fragfollows() const {return ip ? (get1(ip+6)>>5)&1 : 0;}
  U16 fragid() const {return ip ? get2(ip+4) : 0;}
  U16 iplen() const {return ip ? get2(ip+2) : 0;}
  U16 ipheaderlen() const {return ip ? (get1(ip)&15)*4 : 0;}
  U16 ipdatalen() const {return iplen()-ipheaderlen();}
  U32 src() const {return ip ? get4(ip+12) : 0;}
  U32 dest() const {return ip ? get4(ip+16) : 0;}
  U8 protocol() const {return ip ? get1(ip+9) : 0;}

  // TCP/UDP fields
  U16 src_port() const {return tcp ? get2(tcp) : udp ? get2(udp) : 0;}
  U16 dest_port() const {return tcp ? get2(tcp+2) : udp ? get2(udp+2) : 0;}
  U8 tcpheaderlen() const {return tcp ? (get1(tcp+12)>>4)*4 : 0;}
  U32 tcpseq() const {return tcp ? get4(tcp+4) : 0;}
  U32 tcpack() const {return tcp ? get4(tcp+8) : 0;}
  U8 tcpflags() const {return get1(tcp+13);}
  bool ack() const {return tcp && (get1(tcp+13)&16);}
  bool rst() const {return tcp && (get1(tcp+13)&4);}
  bool syn() const {return tcp && (get1(tcp+13)&2);}
  bool fin() const {return tcp && (get1(tcp+13)&1);}
};

// Read a packet
istream& operator >> (istream& in, Packet& p) {

  // Erase old packet
  p.ip=p.tcp=p.udp=p.icmp=p.appl=0;

  // Read the tcpdump header
  p.time=get4(in);  // Seconds
  p.time+=get4(in)*0.000001;  // Microseconds
  U32 len1=get4(in);  // 2 copies of length
  U32 len2=get4(in);
  if (len1!=len2 || len1>1514) {
    cout << "len1=" << len1 << " len2=" << len2 << endl;
    throw "Lengths not equal or too big";
  }

  // Read the raw Ethernet packet
  p.data.resize(int(len1));
  for (int i=0; i<len1; ++i) {
    int c=in.get();
    if (c==EOF)
      throw "End of file";
    p.data[i]=c;
  }
  p.end=p.data.size();

  // IPv4?
  if (p.data.size()>34 && p.get2(12)==0x800 && (p.get1(14)&0xf0)==0x40)
    p.ip=14;
  return in;
}

// Read the header of the tcpdump file, return true if OK, else print error
void test_header(ifstream& in) {
  string buf;
  char c;
  for (int i=0; i<24 && in.get(c); ++i)  // Read 24 byte header
    buf+=c;

  // Should start with A1 B2 C3 D4
  if (buf.size()<4 || (buf[0]&255)!=0xa1 || (buf[1]&255)!=0xb2
      || (buf[2]&255)!=0xc3 || (buf[3]&255)!=0xd4) {
    throw "Bad header: "+buf;
  }
}

// Test IP checksum, return true if OK or not IP
bool ipcheck(const Packet& p) {
  if (!p.ip)
    return true;
  U32 t=0;
  for (int i=p.ipheaderlen()-2; i>=0; i-=2)
    t+=p.get2(p.ip+i);
  t=(t>>16)+(t&0xffff);
  return t==0xffff;
}

// Return true if IP packet p is not fragmented
bool defrag(Packet& p) {
  if (p.fragoffset()==0 && !p.fragfollows())
    return true;  // Not fragmented
  return false;  // Not implemented (discard fragments)
}

// Parse the transport layer of a packet
void parse_transport(Packet& p) {
  int protocol=p.protocol();
  if (p.ip) {
    p.end=p.ip+p.iplen();
  }
  if (protocol==1)
    p.icmp=p.ip+p.ipheaderlen();
  else if (protocol==6) {
    p.tcp=p.ip+p.ipheaderlen();
    p.appl=p.tcp+p.tcpheaderlen();
  }
  else if (protocol==17) {
    p.udp=p.ip+p.ipheaderlen();
    p.appl=p.udp+8;
  }
}

// TCPkey - determines a unique (one-sided) TCP stream
struct TCPkey {
  U32 sa, da;  // Source and destination IP addresses
  U16 sp, dp;  // Source and destination ports
  TCPkey(): sa(0), da(0), sp(0), dp(0) {}
  TCPkey(U32 a, U16 b, U32 c, U16 d): sa(a), sp(b), da(c), dp(d) {}
  TCPkey(const Packet& p): sa(p.src()), sp(p.src_port()),
    da(p.dest()), dp(p.dest_port()) {}
};

bool operator < (const TCPkey& a, const TCPkey& b) {
  if (a.sa!=b.sa)
    return a.sa<b.sa;
  if (a.da!=b.da)
    return a.da<b.da;
  if (a.sp!=b.sp)
    return a.sp<b.sp;
  return a.dp<b.dp;
}

// Data stream associated with a one-sided TCP stream
struct TCPval {
  vector<U8> data;  // Received data
  U32 seq;  // Initial sequence number
  double start, finish;  // Time of first and last packet
  U8 sf, f1, f0;  // TCP flags of first, next to last, and last packet
  TCPval(): seq(0), start(0), finish(0), sf(0), f1(0), f0(0) {}
};

map<TCPkey, TCPval> tcp;  // Reassembles TCP streams

// Print TCP stream
ostream& operator << (ostream& out, const TCPkey& k) {
  const TCPval& v=tcp[k];
  print_time(out, v.start);
  out << " ";
  print_ip(out, k.da);
  out << ':' << k.dp << " ";
  print_ip(out, k.sa);
  out << ':' << k.sp;
  out << " +" << int(v.finish-v.start) << ' ';
  print_flags(out, v.sf);
  out << '/';
  print_flags(out, v.f1);
  out << '/';
  print_flags(out, v.f0);
  out << " " << v.data.size() << " ";
  for (int i=0; i<v.data.size() && i<1000; ++i)
    print_text(out, v.data[i]);
  out << "\n";
  return out;
};

// Insert a TCP packet into the appropriate stream
void tcp_insert(const Packet& p) {
  if (!p.tcp)
    return;
  I32 len=p.end-p.appl;
  if (len==0 && !p.syn() && !p.fin() && !p.rst())  // Discard ACKs
    return;
  TCPkey k=TCPkey(p);
  TCPval& v=tcp[k];
  if (v.start==0) {  // new stream
    v.start=p.time;
    v.sf=p.tcpflags();
    v.seq=p.tcpseq();
  }
  v.finish=p.time;
  v.f1=v.f0;
  v.f0=p.tcpflags();

  // Insert data
  if (len>0) {
    I32 pos=p.tcpseq()-v.seq;  // Where to insert into data
    if (pos>=0 && pos<v.data.size()+100000) {
      if (pos+len>v.data.size())  // Make room
        v.data.resize(pos+len);
      copy(p.data.begin()+p.appl, p.data.begin()+p.end, v.data.begin()+pos);
    }
  }

  // When connection is closed, print the stream and remove from tcp
  if (p.fin() || p.rst()) {
    cout << k;
    map<TCPkey, TCPval>::iterator iter=tcp.find(k);
    if (iter != tcp.end())
      tcp.erase(iter);
  }
}

// Read the tcpdump files
int main(int argc, char** argv) {
  for (int i=1; i<argc; ++i) {
    try {
      cerr << "\nFile: " << argv[i] << endl;
      ifstream in(argv[i], ios::in | ios::binary);
      if (!in)
        throw "File not found";
      test_header(in);
      Packet p;
      while (in >> p) {
        if (!ipcheck(p))
          continue;
        if (!defrag(p))
          continue;
        parse_transport(p);
        U32 dest=p.dest();
        if (dest>=((172*256+16)*256+112)*256
            && dest<((172*256+16)*256+119)*256
            && p.dest_port() < 1024)
          tcp_insert(p);
      }

    }
    catch (const char* msg) {
      cerr << msg << endl;
    }
    catch (const string& msg) {
      cerr << msg << endl;
    }

    // Print and remove TCP streams open at EOF in the order they were opened
    cerr << '\n' << tcp.size() << " TCP streams still open\n\n";
    while (!tcp.empty()) {
      double bt=0;  // Earliest time
      map<TCPkey, TCPval>::iterator p, bp;
      for (p=tcp.begin(); p!=tcp.end(); ++p) {
        if (p==tcp.begin() || p->second.start < bt) {
          bp=p;
          bt=p->second.start;
        }
      }
      cout << bp->first;
      tcp.erase(bp);
    }
  }
  return 0;
}

