Removing possibly unused metadata?

MN m.newton at stanford.edu
Mon Nov 7 18:10:34 EST 2011


Hi Jason - 

These diffs are based on the 2.0.6 distribution, so are old.
They worked well for us over a several year period.  I believe
the ragator changes made about a 3% improvement.  The timestamp
and other rastrip changes made a variable difference depending
upon the mask, but were substantial.

Now, the asciification and xz compression - and lots more
storage - allow us to keep ~400 days.

Hope these help,
- mike


% diff ragator.c raGATOR.c
37a38,39
> int fromFilesOnly = 0;		/* -- MN */
> 
132a135,141
>       /* if we are not "real-time", then do not purge the queue as often -- MN */
>       if (rflag & !Sflag) {
> 	extern struct timeval RaClientTimeout;
> 	RaClientTimeout.tv_sec = 8;
> 	RaClientTimeout.tv_usec = 0;
>       }
> 
223a233
>    fprintf (stderr, "            -H bins[L]:range   Do Historgram-related processing (range is value-value, where value is %%d[ums]) [UNDOC'ed]"); /* -- MN */
524,526c534,537
< 
< #define RA_MAXQSCAN  25600
< #define RA_MAXQSIZE  250000
---
> /* original multipliers were each 1; RA_MAXQSIZE roughly equals the amount of memory (in K) used */
> #define RA_MAXQSCAN  (1 * 25600)
> #define RA_MAXQSIZE  (2 * 250000)
> /* #define RA_MAXQSIZE  625000 */
527a539,540
> /* Note: this is called once every RaClientTimeout whether or not reading from streams. */
> 
542,559c555,573
<          while (queue->count > RA_MAXQSIZE) {
<             obj = (struct ArgusRecordStore *) RaRemoveFromQueue(RaModelerQueue, RaModelerQueue->start->prv);
<             RaTimeoutArgusStore(obj);
<          }
< 
<          if ((cnt = ((queue->count > RA_MAXQSCAN) ? RA_MAXQSCAN : queue->count)) != 0) {
<             while (cnt--) {
<                if ((obj = (struct ArgusRecordStore *) RaPopQueue(queue)) != NULL) {
<                   if (RaCheckTimeout(obj, NULL))
<                      RaTimeoutArgusStore(obj);
<                   else
<                      RaAddToQueue(queue, &obj->qhdr);
< 
<                } else
<                   cnt++;
<             }
<          }
<          break;
---
> 	while (queue->count > RA_MAXQSIZE) {
> 	  obj = (struct ArgusRecordStore *) RaRemoveFromQueue(RaModelerQueue, RaModelerQueue->start->prv);
> 	  RaTimeoutArgusStore(obj);
> 	}
> 
> 	if ((cnt = ((queue->count > RA_MAXQSCAN) ? RA_MAXQSCAN : queue->count)) != 0) {
> 	  while (cnt--) {
> 	    if ((obj = (struct ArgusRecordStore *) RaPopQueue(queue)) != NULL) {
> 	      if (RaCheckTimeout(obj, NULL))
> 		RaTimeoutArgusStore(obj);
> 	      else
> 		RaAddToQueue(queue, &obj->qhdr);
> 	      
> 	    } else
> 	      cnt++;
> 	  }
> 	}
>    
> 	break;


% diff rastrip.c raxstrip.c
83a84,88
> /* MN: later we should offer these as options, but for now */
> int XFall = 1;			/* do all record zeroings */
> int XFusecs = 1;		/* zero synAckuSecs & ackDatauSecs */
> int XFtimedescusecs = 1;		/* zero .time.start.tv_usec & .time.last.tv_usec */
> 
197c202,205
<    fprintf (stderr, "Rastrip Version %s\n", version);
---
>    fprintf (stderr, "RaXstrip Version %s\n", version);
>    fprintf (stderr, "Does all rastrip processing and also zeros many non-essential fields,\n");
>    fprintf (stderr, "which, with bzip2, produces much higher compression ratios.");
>    fprintf (stderr, " ... zxm.zxnewton at zxstanford.zxedu\n");
222d229
< struct ArgusRecord * RaConstructArgusRecord (struct ArgusRecord *);
223a231,252
> #ifdef SUBSECOND
> /*
>  * for these, the high 12 bits should never be on (usec < 1000000);
>  * USEC_BITS_TO_DROP gives number of low order bits dropped
>  */
> #define struct USEC_BITS_TO_DROP	12
> #define USEC_BITS_MASK	((unsigned) (2^USEC_BITS_TO_DROP) - 1)
> #define USEC_UP(x)	((x+USEC_BITS_MASK) & (~USEC_BITS_MASK))
> 
> void
> trunc_up_timeval(struct timeval *t)
> {
>   if (t->tv_usec & USEC_BITS_MASK) {
>     t->tv_usec &= USEC_BITS_MASK;
>     t->tv_usec += (USEC_BITS_MASK+1);
>     if (t->tv_usec >= 1000000) {
>       t->tv_sec++;
>       t->tv_usec = 0;
>     }
>   }
> }
> #endif /* SUBSECOND */
245a275,277
> 	if (XFall) {
> 	  newarg->ahdr.seqNumber = 0;		/* this may be dangerous */
> 	}
247a280,296
> 	    struct ArgusFarStruct *t = (struct ArgusFarStruct *) &((char *)newarg)[newarg->ahdr.length];
> 	    if (XFall) { /* MN: clear the microseconds part of the timestamps, others */
> 	      t->time.start.tv_usec = 0;		/* truncate start time */
> 	      if (t->time.last.tv_usec) t->time.last.tv_sec++;	      /* round up end time */
> 	      t->time.last.tv_usec = 0;
> 	      t->flow.flow_union.ip.ip_id = 0;		/* clear the ID field */
> 
> 	      t->ArgusTransRefNum = 0;			/* this may be dangerous */
> 
> 	      if (((argus->ahdr.status & 0xFFFF) != ETHERTYPE_ARP) &&
> 		  ((argus->ahdr.status & 0xFFFF) != ETHERTYPE_REVARP)) {
> 		/* these would wipe out ArgusARPAttributes otherwise */
> 	      t->attr.attr_union.ip.soptions = t->attr.attr_union.ip.doptions = 0;
> 	      t->attr.attr_union.ip.sttl = t->attr.attr_union.ip.dttl = 0;
> 	      t->attr.attr_union.ip.stos = t->attr.attr_union.ip.dtos = 0;
> 	      }
> 	    }
257a307,318
> 	    struct ArgusTCPObject *t = (struct ArgusTCPObject *)&((char *)newarg)[newarg->ahdr.length];
> 	    if (XFusecs) {	/* MN: remove performance stats from ArugsTCPObject */
> 	      t->synAckuSecs = t->ackDatauSecs = 0;
> 	      t->src.pad = t->dst.pad = 0; /* should be 0 anyway, but just in case */
> 	      t->src.win = t->dst.win = 0;
> 	      t->src.seqbase = t->dst.seqbase = 0;
> 
> 	      /* more dangerous... */
> 	      t->src.ackbytes = t->dst.ackbytes = 0;
> 	      t->src.rpkts = t->dst.rpkts = 0;
> 	      t->src.bytes = t->dst.bytes = 0;
> 	    }
360c421,425
< 
---
> #ifdef NEVERUSED
> /*
>  * MN: as far as I can tell, this is never used - there does not appear to be any calls,
>  * even in the original rastrip.c or libraries used by it.
>  */
436a502
> #endif /* NEVERUSED */




On Mon, Nov 07, 2011 at 07:57:39PM +0000, Jason Carr wrote:
> Hi Mike,
> 
> If you have any of those scripts handy, I'd appreciate a copy.  I'm
> actually interested in the conversion to ASCII, I didn't think of that
> one.  It makes things a little harder if we want to do things like network
> range queries, but it might be worth it.
> 
> I'm also trying xz.  We are using plain gzip current archiving.
> 
> Here's my current result set:
> 
> -rw-r--r-- 1 root root 1146339108 2011-10-27 15:36 core.2011.10.13.14.00
> 
> -rw-r--r-- 1 root root  564863918 2011-10-27 15:37 core.gz
> 
> -rw-r--r-- 1 root root  523034738 2011-10-27 15:37 core.bz2
> 
> -rw-r--r-- 1 root root  358668276 2011-10-27 15:37 core-9.xz
> -rw-r--r-- 1 root root  396348980 2011-10-27 15:37 core-6.xz
> 
> 
> Pretty decent compression with xz.  Takes a long time to compress it, -9
> takes 14 minutes for my tests.  -6 takes 11 minutes.  For longer term
> compression, it's probably worth it.
> 
> 
> Thanks Mike,
> 
> Jason
> 
> On 11/4/11 2:46 PM, "MN" <m.newton at stanford.edu> wrote:
> 
> >
> >Formerly, for data that we kept long-term, rounding time stamps to the
> >nearest 1/4 or 1/8 of a second reduced entropy sufficiently to make a
> >significant difference in compressed file sizes (this will not help on
> >non-compressed argus files).  I can send the old code if desired, but
> >it was for an older version of Argus.
> >
> >Now we save our longer term data in ascii format, saving just the fields
> >that we want, and using a combination of -p and RA_TIME_FORMAT.
> >
> >Consider using xz instead of bzip2, especially if you look at the log
> >files frequently, as the decompression time is significantly less - at
> >the cost of longer compression times.  Note xz defaults to '-6'.
> >
> >We've been keeping more than a years worth of data on roughly ten 1-4g/s
> >links.
> >
> >- mike
> >
> >On Oct 28, 2011, at 5:06 PM, Jason Carr wrote:
> >
> >> We write argus data into five minute chunked files.  We typically have
> >>+1G
> >> files for those 5 minutes.  Is there any metadata that we might be able
> >>to
> >> purge to decrease the size significantly?
> >> 
> >> I normally only care about StartTime, flags, pro to, src/dst
> >> {mac,ip,port}, direction, packets, bytes, state, and user data in either
> >> direction.
> >> 
> >> I already gzip compress the files, I tried using bzip2 on a few test
> >>files
> >> and got a 1.1G file down to 500M instead of 539M, but I'm looking for a
> >> larger compression and/or size difference.
> >> 
> >> Thanks,
> >> 
> >> Jason
> 



More information about the argus mailing list