[flow-tools] added code to flow-tag for alternate file format

Craig A. Finseth fin@finseth.com
Fri, 31 Jan 2003 16:40:44 -0600 (CST)


Here is the code for anyone who wants it.  As mentioned, it may make
more sense to put the time and energy into improving the documentation
on flow-tag and the tag rather than incorporating this code.

The code has not been formatted or named to existing styles as I
intended it for internal use only.

============================================================
new flow-tag.c
============================================================
/*
 * Copyright (c) 2001 Mark Fullmer and The Ohio State University
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
 *      $Id: flow-tag.c,v 1.12 2002/08/26 02:24:38 maf Exp $
 */

#include <sys/types.h>
#include <unistd.h>
#include <stdio.h>
#include <stdlib.h>
#include <fcntl.h>
#include <ftlib.h>

#if HAVE_STRINGS_H
 #include <strings.h>
#endif

#if HAVE_STRING_H
  #include <string.h>
#endif

#include "ftbuild.h"

/*--finseth, 2003-01-20: added declarations--*/
#ifndef NULL
#define NULL		((void *) 0)
#endif
#define NUL		'\0'
void do_alternate_tag(struct fts3rec_v1005 *rec);
void load_alternate_tag(char *alttag);
/*--end change--*/

int debug;
void usage(void);

int main(int argc, char **argv)
{
  struct ftio ftio_in, ftio_out;
  struct ftprof ftp;
  struct ftver ftv_in, ftv_out;
  struct ftset ftset;
  struct fts3rec_v1005 xl_rec;
  struct fts3rec_offsets fo;
  struct fttag fttag;
  struct fttag_def *ftd;
  int i, keep_input_time;
  u_int32 total_flows, cap_start, cap_end;
  u_int32 time_start, time_end;
  char *tag_defs, *tag_active;
  char *rec_in, *rec_out;
/*--finseth, 2003-01-20: added "alttag"--*/
  char *alttag;
  alttag = "";
/*--end change--*/

  /* init fterr */
  fterr_setid(argv[0]);

  bzero(&ftv_in, sizeof ftv_in);
  bzero(&ftv_out, sizeof ftv_out);
  bzero(&fttag, sizeof fttag);
  tag_defs = FT_PATH_CFG_TAG;
  tag_active = "";

  total_flows = 0;

  keep_input_time = 0;

  /* profile */
  ftprof_start (&ftp);

  /* defaults + no compression */
  ftset_init(&ftset, 0);

/*--finseth, 2003-01-20: added "a:"--*/
  while ((i = getopt(argc, argv, "a:b:C:d:fh?klm:s:t:T:V:z:")) != -1)
/*--end change--*/

    switch (i) {

/*--finseth, 2003-01-20: added "a:"--*/
    case 'a': /* alternate tag file */
      alttag = optarg;
      break;
/*--end change--*/

    case 'b': /* output byte order */
      if (!strcasecmp(optarg, "little"))
        ftset.byte_order = FT_HEADER_LITTLE_ENDIAN;
      else if (!strcasecmp(optarg, "big"))
        ftset.byte_order = FT_HEADER_BIG_ENDIAN;
      else
        fterr_errx(1, "expecting \"big\" or \"little\"");
      break;

    case 'C': /* comment field */
      ftset.comments = optarg;
      break;

    case 'd': /* debug */
      debug = atoi(optarg);
      break;

    case 'k': /* keep the start/end time from the input */
      keep_input_time = 1;
      break;

    case 't': /* tag filename */
      tag_defs = optarg;
      break;

    case 'T': /* active tags */
      tag_active = optarg;
      break;

    case 'z': /* compress level */
      ftset.z_level = atoi(optarg);
      if ((ftset.z_level < 0) || (ftset.z_level > 9))
        fterr_errx(1, "Compression level must be between 0 and 9");
      break;

    case 'h': /* help */
    case '?': 
    default:
      usage();
      exit (1);
      break;

    } /* switch */

  i = optind;

/*--finseth, 2003-01-20: added alttag option--*/
  if (*alttag != 0) {
    load_alternate_tag(alttag);
  } else {
    /* read config file */
    if (fttag_load(&fttag, tag_defs) < 0)
      fterr_errx(1, "fttag_load(): failed");

    if (!(ftd = fttag_def_find(&fttag, tag_active))) 
      fterr_errx(1, "fttag_def_find(): failed");
  }
/*--end change--*/

  /* input is stdin */
  if (ftio_init(&ftio_in, 0, FT_IO_FLAG_READ) < 0)
    fterr_errx(1, "ftio_init(): failed");

  ftio_get_ver(&ftio_in, &ftv_in);

  /* need at least the v5 fields plus tags */
  if (ftio_check_xfield(&ftio_in, FT_XFIELD_TAGGING_MASK))
    fterr_errx(1, "Flow record missing required field for tagging.");

  /* output is stdout */
  if (ftio_init(&ftio_out, 1, FT_IO_FLAG_WRITE |
    ((ftset.z_level) ? FT_IO_FLAG_ZINIT : 0) ) < 0)
    fterr_errx(1, "ftio_init(): failed");

  /* preserve start/end time from input stream? */
  if (keep_input_time) {  
      
    time_start = ftio_get_cap_start(&ftio_in);
    time_end = ftio_get_cap_end(&ftio_in);
      
    if (time_start && time_end) {
        
      ftio_set_preloaded(&ftio_out, 1);
      ftio_set_cap_time(&ftio_out, time_start, time_end);
    
    }

  } /* keep_input_time */

  ftv_in.s_version = FT_IO_SVERSION;

  if (!ftv_out.set)
    bcopy(&ftv_in, &ftv_out, sizeof ftv_in);

  ftv_out.d_version = 1005;

  /* set the version information in the io stream */
  if (ftio_set_ver(&ftio_out, &ftv_out) < 0)
    fterr_errx(1, "ftio_set_ver(): failed");

  ftio_set_comment(&ftio_out, ftset.comments);
  ftio_set_byte_order(&ftio_out, ftset.byte_order);
  ftio_set_z_level(&ftio_out, ftset.z_level);
  ftio_set_streaming(&ftio_out, 1);
  ftio_set_debug(&ftio_out, debug);
  ftio_set_cap_time(&ftio_out, cap_start, cap_end);
  ftio_set_flows_count(&ftio_out, total_flows);

  if (ftio_write_header(&ftio_out) < 0)
    fterr_errx(1, "ftio_write_header(): failed");

  fts3rec_compute_offsets(&fo, &ftv_in);

  rec_out = (void*)&xl_rec;

  while ((rec_in = ftio_read(&ftio_in))) {

    ++total_flows;

    /* make a v1005 rec */
    if (ftv_in.d_version != 1005)
      ftrec_xlate(rec_in, &ftv_in, &xl_rec, &ftv_out);
    else
      rec_out = rec_in;

/*--finseth, 2003-01-20: added alttag check--*/
    if (*alttag != NUL) {
      /* code at end of this file */
      do_alternate_tag((struct fts3rec_v1005*)rec_out);
    } else {
      /* tag evaluation */
      fttag_def_eval(ftd, (struct fts3rec_v1005*)rec_out);
    }
/*--end change--*/

    if (ftio_write(&ftio_out, rec_out) < 0)
      fterr_errx(1, "ftio_write(): failed");

  } /* while */

  if (ftio_close(&ftio_out) < 0)
    fterr_errx(1, "ftio_close(): failed");

  if (ftio_close(&ftio_in) < 0)
    fterr_errx(1, "ftio_close(): failed");

  if (debug > 0) {
    ftprof_end (&ftp, total_flows);
    ftprof_print(&ftp, argv[0], stderr);
  }

  fttag_free(&fttag);

  return 0;

} /* main */

void usage(void)
{
  fprintf(stderr, "Usage: flow-tag [-hk] [-b big|little] [-C comment] [-d debug_level]\n");
  fprintf(stderr, "       [-t tag_fname ] [-T tag_active ] [-z z_level]\n");
  fprintf(stderr, "\n%s version %s: built by %s\n", PACKAGE, VERSION, FT_PROG_BUILD);

} /* usage */


/* ============================================================ */
/* This section has all new code for handling the alternate tag */
/* file.  The code is placed here to minimize the number of files */
/* that are changed. */

/* #define DEBUG	1 */
#if defined(DEBUG)
FILE *dfptr;
#endif

#define FLAG		char
#define INET_T		unsigned long

#define TAB		'\11'
#define LF		'\12'
#define CR		'\15'
#define SP		' '

#define TRUE		1
#define FALSE		0

#define BUFFSIZE	1024
#define INPMAX		256		/* size of user input buffer */

#define xstrncpy(d,s)		{ (void)strncpy((d),(s),sizeof(d)); (d)[sizeof(d)-1]=NUL; }

struct alttagrec {
	INET_T	ipaddr;
	int	masklen;
	INET_T	mask;
	int	sval;
	int	dval;
	};

void do_alternate_block(struct alttagrec altptr[], int blockcount, FLAG in,
	 struct fts3rec_v1005 *rec);
int load_block(FILE *fptr, char *alttag, int blockcount, int line);
int load_compare_alttagrec(const void *a, const void *b);
int load_line(FILE *fptr, char *alttag, int blockcount, int line, int which);

void InetToS(char *str, INET_T inet);
FLAG is_inside(INET_T innerip, INET_T outerip, INET_T outermask);
char *sfindnotin(char *str, char *any);
char *sindex(char *str, char chr);
FLAG SToInet(char *str, INET_T *inetptr);
FLAG SToN(char *str, int *n, int base);
FLAG xisalpha(int c);
FLAG xisdigit(int c);
FLAG xislower(int c);
FLAG xisupper(int c);
FLAG xiswhite(char c);
int xtoupper(int c);

struct alttagrec **aptr;
int *bptr;
FLAG *inptr;		/* true if matches are within the block */
int altcount;

/* ------------------------------------------------------------ */
/* This is the replacement for fttag_def_eval. */

void do_alternate_tag(struct fts3rec_v1005 *rec) {
	int i;

	rec->src_tag = 0;
	rec->dst_tag = 0;

	for (i = 0; i < altcount; i++) {
		do_alternate_block(aptr[i], bptr[i], inptr[i], rec);
		}
	}

/* ------------------------------------------------------------ */

void do_alternate_block(struct alttagrec altptr[], int blockcount, FLAG in,
	 struct fts3rec_v1005 *rec) {
	struct alttagrec *ap;

	if (in) {
		if (rec->srcaddr < altptr[0].ipaddr) {
			/* before the first, so skip */
			}
		else	{
			for (ap = &altptr[blockcount - 1]; ap >= altptr;
				 ap--) {
				if ((rec->srcaddr & ap->mask) == ap->ipaddr) {
					rec->src_tag |= ap->sval;
					break;
					}
				if (rec->srcaddr > ap->ipaddr) {
					/* in gap between entries */
					break;
					}
				}
			}
		if (rec->dstaddr < altptr[0].ipaddr) {
			/* before the first, so skip */
			}
		else	{
			for (ap = &altptr[blockcount - 1]; ap >= altptr;
				 ap--) {
				if ((rec->dstaddr & ap->mask) == ap->ipaddr) {
					rec->dst_tag |= ap->dval;
					break;
					}
				if (rec->dstaddr > ap->ipaddr) {
					/* in gap between entries */
					break;
					}
				}
			}
		}
	else	{
		if (rec->srcaddr < altptr[0].ipaddr) {
			rec->src_tag |= altptr[0].sval;
			}
		else	{
			for (ap = &altptr[blockcount - 1]; ap >= altptr;
				 ap--) {
				if ((rec->srcaddr & ap->mask) == ap->ipaddr) {
					break;
					}
				if (rec->srcaddr > ap->ipaddr) {
					/* in gap between entries */
					rec->src_tag |= ap->sval;
					break;
					}
				}
			}
		if (rec->dstaddr < altptr[0].ipaddr) {
			/* before the first, so skip */
			rec->dst_tag |= altptr[0].dval;
			}
		else	{
			for (ap = &altptr[blockcount - 1]; ap >= altptr;
				 ap--) {
				if ((rec->dstaddr & ap->mask) == ap->ipaddr) {
					break;
					}
				if (rec->dstaddr > ap->ipaddr) {
					/* in gap between entries */
					rec->dst_tag |= ap->dval;
					break;
					}
				}
			}
		}
	}		


/* ------------------------------------------------------------ */
/* Load the alternate tag file. */

void load_alternate_tag(char *alttag) {
#if defined(DEBUG)
	struct alttagrec *atrptr;
	char b1[BUFFSIZE];
	char b2[BUFFSIZE];
#endif
	FILE *fptr;
	char buf[BUFFSIZE];
	char *begptr, *endptr;
	int i;
	int line;

	if ((fptr = fopen(alttag, "r")) == NULL) {
		fprintf(stderr, "Unable to access file '%s'.\n", alttag);
		exit(1);
		}
	line = 1;
	if (fgets(buf, BUFFSIZE, fptr) <= 0) {
		fprintf(stderr, "File %s: read error on line 1.\n", alttag);
		fclose(fptr);
		exit(1);
		}
	begptr = buf;
	endptr = sfindnotin(begptr, "0123456789");
	*endptr = NUL;
	if (!SToN(begptr, &altcount, 10)) {
		fprintf(stderr, "Alternate tag file '%s' does not begin with a block count.\n",
			alttag);
		fclose(fptr);
		exit(1);
		}
	aptr = (struct alttagrec **)malloc(sizeof(struct alttagrec *) *
		altcount);
	if (aptr == NULL) {
		fprintf(stderr, "Unable to allocate memory for alt tag array.\n");
		fclose(fptr);
		exit(1);
		}
	inptr = (FLAG *)malloc(sizeof(FLAG) * altcount);
	if (inptr == NULL) {
		fprintf(stderr, "Unable to allocate memory for alt flag array.\n");
		fclose(fptr);
		exit(1);
		}
	bptr = (int *)malloc(sizeof(int) * altcount);
	if (bptr == NULL) {
		fprintf(stderr, "Unable to allocate memory for alt count array.\n");
		fclose(fptr);
		exit(1);
		}
#if defined(DEBUG)
	dfptr = fopen("/tmp/debug.out", "w");
	fprintf(dfptr, "file '%s' opened, altcount=%d\n", alttag, altcount);
#endif
	for (i = 1; i <= altcount; i++) {
		line = load_block(fptr, alttag, i, line);
		}
	fclose(fptr);

#if defined(DEBUG)
	for (i = 0; i < altcount; i++) {
		fprintf(dfptr, "block #%d, size %d, in %s:\n", i + 1, bptr[i],
			inptr[i] ? "in" : "out");
		for (atrptr = aptr[i]; atrptr->ipaddr != 0; atrptr++) {
			InetToS(b1, atrptr->ipaddr),
			InetToS(b2, atrptr->mask),
			fprintf(dfptr, "%s\t%d\t%s\t%08x\t%08x\n",
				b1,
				atrptr->masklen,
				b2,
				atrptr->sval,
				atrptr->dval);
			}
		}
	fprintf(dfptr, "file loaded\n");
	fclose(dfptr);
#endif
	}


/* ------------------------------------------------------------ */
/* Load a block. Return the new line number. */

int load_block(FILE *fptr, char *alttag, int blockcount, int line) {
	char buf[BUFFSIZE];
	char *begptr, *endptr;
	int cnt;
	int i;

	line++;
	if (fgets(buf, BUFFSIZE, fptr) <= 0) {
		fprintf(stderr, "File %s: read error on line %d.\n", alttag,
			line);
		fclose(fptr);
		exit(1);
		}
	begptr = buf;
	endptr = sfindnotin(begptr, "-0123456789");
	*endptr = NUL;
	if (!SToN(begptr, &cnt, 10)) {
		fprintf(stderr, "File %s line %d block %d does not begin with a block count.\n",
			alttag, line, blockcount);
		fclose(fptr);
		exit(1);
		}
	inptr[blockcount - 1] = cnt > 0;
	if (cnt < 0) cnt = -cnt;
	aptr[blockcount - 1] = (struct alttagrec *)
		malloc(sizeof(struct alttagrec) * (cnt + 1));
	if (aptr[blockcount - 1] == NULL) {
		fprintf(stderr, "File %s line %d block %d: unable to allocate memory.\n",
				alttag, line, blockcount);
		fclose(fptr);
		exit(1);
		}
	for (i = 0; i < cnt; i++) {
		line = load_line(fptr, alttag, blockcount, line, i);
		}
	aptr[blockcount - 1][cnt].ipaddr = 0;
	bptr[blockcount - 1] = cnt;

	/**sort*/
	qsort((aptr[blockcount - 1]),
		cnt,
		sizeof(struct alttagrec),
		load_compare_alttagrec);

	/**verify*/
	for (i = cnt - 1; i > 0; i--) {
		if (is_inside((aptr[blockcount - 1])[i].ipaddr,
			 (aptr[blockcount - 1])[i - 1].ipaddr,
			 (aptr[blockcount - 1])[i - 1].mask)) {
			fprintf(stderr, "File %s block %d: overlapping networks.\n",
				alttag, blockcount);
			fclose(fptr);
			exit(1);
			}
		}
	return(line);
	}


/* ------------------------------------------------------------ */
/* Compare two records for qsort. */

int load_compare_alttagrec(const void *av, const void *bv) {
	struct alttagrec *a = (struct alttagrec *)av;
	struct alttagrec *b = (struct alttagrec *)bv;

	if (a->ipaddr > b->ipaddr) {
		return(1);
		}
	else if (a->ipaddr < b->ipaddr) {
		return(-1);
		}
	
	if (a->masklen > b->masklen) {
		return(1);
		}
	else if (a->masklen < b->masklen) {
		return(-1);
		}
	return(0);
	}


/* ------------------------------------------------------------ */
/* Load a line for a block. Return the new line number. */

int load_line(FILE *fptr, char *alttag, int blockcount, int line, int which) {
	char buf[BUFFSIZE];
	char *begptr, *endptr;
	int i;
	struct alttagrec a;

	do	{
		line++;
		if (fgets(buf, BUFFSIZE, fptr) <= 0) {
			fprintf(stderr, "File %s: read error on line %d.\n",
				alttag, line);
			fclose(fptr);
			exit(1);
			}
		} while (!xisdigit(buf[0]));

	/* parse IP addr */
	begptr = buf;
	endptr = sfindnotin(begptr, "0123456789.");
	if (*endptr == NUL) {
		fprintf(stderr, "File %s line %d block %d: no data after network number.\n",
			alttag, line, blockcount);
		fclose(fptr);
		exit(1);
		}
	*endptr = NUL;
	if (!SToInet(begptr, &a.ipaddr)) {
		fprintf(stderr, "File %s line %d block %d: '%s' is not valid IP address.\n",
			alttag, line, blockcount, begptr);
		fclose(fptr);
		exit(1);
		}

	/* parse mask len */
	for (begptr = endptr + 1; xiswhite(*begptr); begptr++) ;
	endptr = sfindnotin(begptr, "0123456789");
	if (*endptr == NUL) {
		fprintf(stderr, "File %s line %d block %d: no data after network count.\n",
			alttag, line, blockcount);
		fclose(fptr);
		exit(1);
		}
	*endptr = NUL;
	if (!SToN(begptr, &i, 10) || i < 1 || i > 32) {
		fprintf(stderr, "File %s line %d block %d: '%s' is not valid integer between 1 and 32.\n",
			alttag, line, blockcount, begptr);
		fclose(fptr);
		exit(1);
		}
	a.masklen = i;

	/* generate mask */
	a.mask = ~((1 << (32 - a.masklen)) - 1);

	/* parse sval */
	for (begptr = endptr + 1; xiswhite(*begptr); begptr++) ;
	endptr = sfindnotin(begptr, "0123456789abcdefABCDEF");
	if (*endptr == NUL) {
		fprintf(stderr, "File %s line %d block %d: no data after sval.\n",
			alttag, line, blockcount);
		fclose(fptr);
		exit(1);
		}
	*endptr = NUL;
	if (!SToN(begptr, &i, 16)) {
		fprintf(stderr, "File %s line %d block %d: '%s' is not a valid hexadecimal value.\n",
			alttag, line, blockcount, begptr);
		fclose(fptr);
		exit(1);
		}
	a.sval = i;

	/* parse dval */
	for (begptr = endptr + 1; xiswhite(*begptr); begptr++) ;
	endptr = sfindnotin(begptr, "0123456789abcdefABCDEF");
	*endptr = NUL;
	if (!SToN(begptr, &i, 16)) {
		fprintf(stderr, "File %s line %d block %d: '%s' is not a valid hexadecimal value.\n",
			alttag, line, blockcount, begptr);
		fclose(fptr);
		exit(1);
		}
	a.dval = i;
	(aptr[blockcount - 1])[which] = a;
	return(line);
	}


/* ------------------------------------------------------------ */

/* Convert the Internet address INET to a string. */

void InetToS(char *str, INET_T inet) {
	sprintf(str, "%ld.%ld.%ld.%ld",
		(inet >> 24) & 0xFF,
		(inet >> 16) & 0xFF,
		(inet >>  8) & 0xFF,
		 inet        & 0xFF);
	}


/* ------------------------------------------------------------ */
/* Return True if the inner ip is within the outer IP network. */

FLAG is_inside(INET_T innerip, INET_T outerip, INET_T outermask) {
	return((innerip & outermask) == outerip);
	}


/* ------------------------------------------------------------ */

/* Return a pointer to the first occurrance in STR of any of the
characters NOT in ANY. */

char *sfindnotin(char *str, char *any) {
	while (*str != NUL && *sindex(any, *str)) ++str;
	return(str);
	}


/* ------------------------------------------------------------ */

/* Return a pointer to the first occurrance of C in STR. */

char *sindex(char *str, char chr) {
	while (*str != NUL && *str != chr) str++;
	return(str);
	}


/* ------------------------------------------------------------ */

/* Converts the value in STR to an Internet address and returns it in
INETPTR.  STR must contain a value in Internet dot notation (e.g.,
a.b.c.d).  All four components must be present.  Returns TRUE if the
conversion was sucessful, FALSE if the string was not a valid Internet
address. */

FLAG SToInet(char *str, INET_T *inetptr) {
	char buf[INPMAX];
	char *cptr;
	char *begptr;
	char *endptr;
	INET_T inet;
	int cnt;
	int num;

	if (strlen(str) >= INPMAX) return(FALSE);	/* too long */
	xstrncpy(buf, str);

	inet = 0;
	begptr = buf;
	endptr = buf + strlen(buf);
	for (cnt = 0; cnt < 4 && begptr < endptr && *begptr != NUL;
		 cnt++, begptr = cptr) {
		cptr = sindex(begptr, '.');
		*cptr++ = NUL;
		if (!SToN(begptr, &num, 10)) return(FALSE);
		if (num < 0 || num > 255) return(FALSE);
		inet <<= 8;
		inet |= (num & 0xFF);
		}
	if (cnt != 4) return(FALSE);
	*inetptr = inet;
	return(TRUE);
	}


/* ------------------------------------------------------------ */

/* Convert the value in STR to a decimal number and return it in N. 
The number is in base BASE.  Return TRUE if the conversion was
successful, FALSE if the string was not a valid number.

This routine allows for leading whitespace and can handle leading + or
- signs. */

FLAG
SToN(char *str, int *n, int base) {
	unsigned val;
	int minus;
	char chr;

	while (*str == SP || *str == TAB) str++;
	if (*str == '-') {
		minus = -1;
		str++;
		}
	else	{
		minus = 1;
		if (*str == '+') ++str;
		}
	for (val = 0; *str; ++str) {
		chr = xtoupper(*str);
		if (xisalpha(chr)) chr -= 'A' - 10;
		else if (xisdigit(chr)) chr -= '0';
		else return(FALSE);
		if (chr >= base) return(FALSE);
		val = val * base + chr;
		}
	*n = val * minus;
	return(TRUE);
	}


/* ------------------------------------------------------------ */

/* As ANSI version, but no domain limits. */

FLAG xisalpha(int c) {
	return(xisupper(c) || xislower(c));
	}


/* ------------------------------------------------------------ */

/* As ANSI version, but no domain limits. */

FLAG xisdigit(int c) {
	return(c >= '0' && c <= '9');
	}


/* ------------------------------------------------------------ */

/* As ANSI version, but no domain limits. */

FLAG xislower(int c) {
	return(c >= 'a' && c <= 'z');
	}


/* ------------------------------------------------------------ */

/* As ANSI version, but no domain limits. */

FLAG xisupper(int c) {
	return(c >= 'A' && c <= 'Z');
	}


/* ------------------------------------------------------------ */

/* Return TRUE if C is a whitespace character (Space or Tab only: see
isgray). */

FLAG xiswhite(char c) {
	return(c == TAB || c == SP);
	}


/* ------------------------------------------------------------ */

/* As ANSI version, but no domain limits. */

int xtoupper(int c) {
	return(xislower(c) ? c + ('A' - 'a') : c);
	}


============================================================
new flow-tag.
============================================================
<!DOCTYPE refentry PUBLIC "-//Davenport//DTD DocBook V3.0//EN">
<refentry>

<refmeta>
<refentrytitle>
<application>flow-tag</application>
</refentrytitle>
<manvolnum>1</manvolnum>
</refmeta>

<refnamediv>
<refname>
<application>flow-tag</application>
</refname>
<refpurpose>
Apply tags to flow files.
</refpurpose>
</refnamediv>

<refsynopsisdiv>
<cmdsynopsis>
<command>flow-tag</command>
<arg>-hk</arg>
<arg>-b<replaceable> big</replaceable>|<replaceable>little</replaceable></arg>
<arg>-C<replaceable> comment</replaceable></arg>
<arg>-d<replaceable> debug_level</replaceable></arg>
<arg>-t<replaceable> tag_fname</replaceable></arg>
<arg rep="repeat">-T<replaceable> active_def</replaceable>|<replaceable>active_def,active_def</replaceable></arg>
</cmdsynopsis>
</refsynopsisdiv>

<refsect1>
<title>DESCRIPTION</title>
<para>
The <command>flow-tag</command> utility is used to add or modify
source and destination tags in flow records.  Tags are 32 bit 
identifiers derived from rules and fields in a flow record.  Tags
can be used to group flows with common prefixes, autonomous systems,
next hops, exporter id and/or input/output interface.
<command>flow-stat</command> can be used with tagged flows to produce
group based reports.  For example, all outbound traffic for a customer
where the customer is defined by a list of IP prefixes.
</para>
</refsect1>

<refsect1>
<title>OPTIONS</title>
<variablelist>

<varlistentry>
<term>-a<replaceable> alt_tag_fname</replaceable></term>
<listitem>
<para>
Load tags from the alternate tag file named
<filename>tag_name</filename>.  No default.  If present, the -t option
is silently ignored.
</para>
</listitem>
</varlistentry>

<varlistentry>
<term>-b<replaceable> big</replaceable>|<replaceable>little</replaceable</term>
<listitem>
<para>
Byte order of output.
</para>
</listitem>
</varlistentry>

<varlistentry>
<term>-C<replaceable> Comment</replaceable></term>
<listitem>
<para>
Add a comment.
</para>
</listitem>
</varlistentry>

<varlistentry>
<term>-d<replaceable> debug_level</replaceable></term>
<listitem>
<para>
Enable debugging.
</para>
</listitem>
</varlistentry>

<varlistentry>
<term>-h</term>
<listitem>
<para>
Display help.
</para>
</listitem>
</varlistentry>

<varlistentry>
<term>-k</term>
<listitem>
<para>
Keep time from input.
</para>
</listitem>  
</varlistentry>

<varlistentry>
<term>-t<replaceable> tag_fname</replaceable></term>
<listitem>
<para>
Load tags from <filename>tag_name</filename>.  Defaults to 
<filename>@localstatedir@/cfg/tag</filename>
</para>
</listitem>
</varlistentry>

<varlistentry>
<term>-T<replaceable> active_def</replaceable>|<replaceable>active_def,active_def...</replaceable></term>
<listitem>
<para>
Use <replaceable>active_def</replaceable> as the active tag definition(s).
</para>
</listitem>
</varlistentry>
</variablelist>

<para>
</para>
<para>
The configuration file is a collection of actions and definitions.  An
action is triggered by a definition and a definition is invoked only
if listed with the <replaceable>-T</replaceable> flag.  Lines begining
with # are treated as comments and ignored.
</para>
<para>
<screen>
tag-action command                  Description
----------------------------------------------------------------------
tag-action                          Begin tag-action section
                                    tag-action foo

type                                Configure the type of action, one of
                                    src-prefix, dst-prefix, prefix,
                                    src-as, dst-as, as, next-hop.
                                    type src-prefix

match                               Match criteria.  The match condition
                                    depends on the type.  Following the
                                    match condition is one of
                                    set-dst, set-src, or-dst, or-src to
                                    set or logically or a value to the
                                    source or destination tag.
                                    match 128.146/16 set-dst 0x010001

</screen>
</para>
<para>
A definition lists a set of actions which are evaluated if the filter
criteria is met.  Each definition is built with terms.  A term has
its actions evaluated if the filter is passed.
<screen>
definition command                  Description
-----------------------------------------------------------------------
tag-definition                      Begin tag-defintion secrion
                                    tag-definition bar

term                                Begin a list of actions to be
                                    evaluated that match the filter
                                    rule.
                                    term

input-filter                        List of input ifIndexes the flow
                                    must match.
                                    input-filter 1,2,3,4

output-filter                       List of output ifIndexes the flow
                                    must match.
                                    output-filter 1,2,3,4

exporter                            IP address of exporter the flow must
                                    match.
                                    exporter 1.2.3.4

action                              Name of action to evaluate.  Actions
                                    are evaluated in the order they
                                    appear in a definition.
                                    action foo

</screen>
</para>
<para>
</para>
</refsect1>

<refsect1>
<title>EXAMPLES</title>
<informalexample>
<para>
The meaning of a tag is user defined.  The following example uses 
16 bits of a tag as a customer ID and 4 bits as a customer type.
<command>flow-xlate</command> can be used to apply a mask to these
fields.
<programlisting>
# file: gigapop-tags
# tag format
# 
# 0       7         15        23        31
# 0000 0000 0000 0000 0000 0000 0000 0000 (32 bits)
# RRRRRRRRRRRRRR TTTT NNNNNNNNNNNNNNNNNNN
#              |    |                   | Site name
#              |    | Site type
#              | Reserved
#
#
# SITE_NAME_MASK = 0x0000FFFF  
# SITE_TYPE_MASK = 0x00FF0000
#
# ID             Name
#---------------------------------
# 0x0001         OSU
# 0x0002         CWRU
# 0x0003         BGSU   
# ... etc
# 0x0019         MULTICAST
#
# ID             Type  
#------------------------
# 0x01         Participant
# 0x02         SEGP
# 0x03         Sponsored-Participant
# 0x04         Gigapop
# 0x05         MULTICAST

tag-action OHIO-GIGAPOP_DST
 type dst-prefix
# OSU
 match 128.146/16 set-dst     0x010001
 match 164.107/16 set-dst     0x010001
 match 140.254/16 set-dst     0x010001
 match 192.153.26/24 set-dst  0x010001
# CWRU
 match 129.22/16 set-dst      0x010002
 match 192.5.110/24 set-dst   0x010002
# BGSU
 match 129.1/16 set-dst       0x010003
# ...etc
# MULTICAST
 match 224/4 set-dst 0x050019

tag-action OHIO-GIGAPOP_SRC
 type src-prefix
# OSU
 match 128.146/16 set-src     0x010001
 match 164.107/16 set-src     0x010001
 match 140.254/16 set-src     0x010001
 match 192.153.26/24 set-src  0x010001
# CWRU
 match 129.22/16 set-src      0x010002
 match 192.5.110/24 set-src   0x010002
# BGSU
 match 129.1/16 set-src       0x010003
# ...etc

tag-action OTHER_DST
 type dst-prefix
 match 0/0 set-dst 0x0
 
tag-action OTHER_SRC
 type src-prefix
 match 0/0 set-src 0x0

tag-definition OHIO-GIGAPOP
 term
# Abilene interface
 input-filter 25
# clear tag first -- it defaults to 0, so this may not be necessary.
 action OTHER_DST
 action OHIO-GIGAPOP_DST
 term
# Abilene interface
 output-filter 25
# clear tag first -- it defaults to 0, so this may not be necessary.
 action OTHER_SRC
 action OHIO-GIGAPOP_SRC

</programlisting>
</para>
<para>
First populate <filename>@localstatedir@/sym/tag</filename> for <command>flow-stat</command> to use as symbols.
<programlisting>
0x0001 OSU
0x0002 CWRU
0x0003 BGSU
0x0019 MULTICAST
0x010000 PART
0x020000 SEGP
0x030000 SPART
0x040000 GIGAPOP
0x050000 MULTICAST
</programlisting>
</para>
<para>
To generate a report for outgoing traffic to Abilene based on customer ID:
<programlisting>
flow-cat <filename>flows</filename> | flow-filter -I25 | flow-tag -t gigapop-tags -TOHIO-GIGAPOP_SRC | flow-xlate -t0x0000FFFF | flow-stat -n -f30 -S2
</programlisting>
<screen>
#  --- ---- ---- Report Information --- --- ---
#
# Fields:    Total
# Symbols:   Enabled
# Sorting:   Descending Field 2
# Name:      Source Tag
#
# Args:      ../flow-stat -n -f30 -S2 
#
#
# Src Tag   flows                 octets                packets
#
OSU         4942230               181326237007          302476793
CWRU        874883                54358312807           70589318
BGSU        1008797               7600209852            22060870
</screen>
</para>
<para>
To generate a report for inbound traffic from Abilene based on customer type:
<programlisting>
flow-cat <filename>flows</filename> | flow-filter -i25 | flow-tag -t gigapop-tags -TOHIO-GIGAPOP_DST | flow-xlate -T0xFF0000 | flow-stat -n -f31 -S2
</programlisting>
<screen>
#  --- ---- ---- Report Information --- --- ---
#
# Fields:    Total
# Symbols:   Enabled
# Sorting:   Descending Field 2
# Name:      Destination Tag
#
# Args:      ../flow-stat -n -f31 -S2 
#
#
# Dst Tag   flows                 octets                packets
#
PART        15923156              663289954569          981163979
SEGP        4995795               135525076170          196534917
MULTICAST   45171                 49866825003           137798118
GIGAPOP     942209                26422533266           23199961
SPART       73998                 5170323905            7597985
</screen>
</para>
</informalexample>
</refsect1>

<refsect1>
<title>ALTERNATE TAG FILE</title>
<para>
The alternate tag file format provides for much faster (up to 20x)
handling of specialized tag situations.  It only provides for
categorization based on source and/or destination IP address.
</para>
<para>
The specialized situation that spurred this development was the case
where flows in a large network had to be tagged as follows.  (The rest
of this discussion will be in terms of source IP address, but the
format provides for destination addresses as well.)
</para>
<para>
1) Flows having a source address on one of a customers' networks are
tagged with an identifier for that customer.  Each customer can have a
list of networks.
</para>
<para>
2) Flows having a source address that does not match any of the
networks within our system are tagged as being from the Internet as a
whole.
</para>
<para>
The logic can be expressed as follows.
</para>
<para>
Given a flow record, the program is assumed to do the following:
</para>
<para>
<screen>
src_tag = 0
dst_tag = 0
for each block
	for each network in that block
		if (within block flag)
			if srcaddr is in block then
				src_tag |= corresponding <sval>
		else
			if srcaddr is not in block then
				src_tag |= corresponding <sval>
		if (within block flag)
			if dstaddr is in block then
				dst_tag |= corresponding <dval>
		else
			if dstaddr is not in block then
				dst_tag |= corresponding <dval>
</screen>
<para>
Thus, a tag for a given flow record can accumulate <sval>s or <dval>s
from any of zero to all of the blocks.
</para>
<para>
Files in the alternate file format are plain ASCII text files.  Empty
lines and those that that start with any character other than the
digits 0 to 9 are silently skipped. The file is organized overall as
follows.
</para>
<screen>
block count
number of lines in block #1
line 1 of block #1
line 2 of block #1
	...
line n of block #1
number of lines in block #2
line 1 of block #2
line 2 of block #2
	...
line n of block #2
	...
number of lines in block #n
line 1 of block #n
line 2 of block #n
	...
line n of block #n
</screen>
<para>
In other words, the file format provides for N blocks, each block
with its own line count and lines.  The number of blocks is present
as a decimal integer on a line by itself, as are the counts of lines
within each block.  None of the counts can be zero. The purpose of
the counts is to make it easy to both load and to delimit
blocks.
</para>
<para>
If the count is negative, the sense of the block is reversed: the sval/
dval is used if the IP address is _not_ in any of the listed networks.
</para>
<para>
Each line record is of the following form.
</para>
<screen>
	IP	NUM	SVAL	DVAL
</screen>
<para>
Where:
</para>
<para>
IP is the IP network number in standard dotted decimal form.
</para>
<para>
NUM is the number of bits in the network portion.  Bits in IP after
this position must be zero.
</para>
<para>
SVAL is the value that is or-ed in when the source address of the flow
matches the network block for this line.  It is in hexadecimal (with
no leading "0x" or anything).
</para>
<para>
DVAL is the value that is or-ed in when the destination address of the
flow matches the network block for this line.  It is in hexadecimal
(with no leading "0x" or anything).
</para>
<para>
If the block is a not-matched block (i.e., the line count is negative),
the same SVAL values should appear for all lines in the block and
similarly for DVAL values.
</para>
<para>
Items on a line are separated by arbitrary whitespace (SP or TAB, 32
or 9 decimal) characters, but there is no leading whitespace.)
</para>
<para>
A line appears for each network block to be matched.
</para>
</refsect1>

<refsect1>
<title>BUGS</title>
<para>
None known.
</para>
</refsect1>

<refsect1>
<title>AUTHOR</title>
<para>
<author>
<firstname>Mark</firstname>
<surname>Fullmer</surname>
</author>
<email>maf@splintered.net</email>
</para>
</refsect1>

<refsect1>
<title>SEE ALSO</title>
<para>
<application>flow-tools</application>(1)
</para>
</refsect1>

</refentry>