root/source3/utils/debugparse.c

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. dbg_token2string
  2. dbg_char2token
  3. dbg_test

   1 /* ========================================================================== **
   2  *                                debugparse.c
   3  *
   4  * Copyright (C) 1998 by Christopher R. Hertel
   5  *
   6  * Email: crh@ubiqx.mn.org
   7  *
   8  * -------------------------------------------------------------------------- **
   9  * This module is a very simple parser for Samba debug log files.
  10  * -------------------------------------------------------------------------- **
  11  *
  12  *  This library is free software; you can redistribute it and/or
  13  *  modify it under the terms of the GNU Lesser General Public
  14  *  License as published by the Free Software Foundation; either
  15  *  version 3 of the License, or (at your option) any later version.
  16  *
  17  *  This library is distributed in the hope that it will be useful,
  18  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
  19  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  20  *  Library General Public License for more details.
  21  *
  22  *  You should have received a copy of the GNU Lesser General Public
  23  *  License along with this library; if not, see <http://www.gnu.org/licenses/>.
  24  *
  25  * -------------------------------------------------------------------------- **
  26  * The important function in this module is dbg_char2token().  The rest is
  27  * basically fluff.  (Potentially useful fluff, but still fluff.)
  28  * ========================================================================== **
  29  */
  30 
  31 #include "debugparse.h"
  32 
  33 /* -------------------------------------------------------------------------- **
  34  * Constants...
  35  *
  36  *  DBG_BSIZE - This internal constant is used only by dbg_test().  It is the
  37  *          size of the read buffer.  I've tested the function using a
  38  *          DBG_BSIZE value of 2.
  39  */
  40 
  41 #define DBG_BSIZE 128
  42 
  43 /* -------------------------------------------------------------------------- **
  44  * Functions...
  45  */
  46 
  47 const char *dbg_token2string( dbg_Token tok )
     /* [<][>][^][v][top][bottom][index][help] */
  48   /* ------------------------------------------------------------------------ **
  49    * Given a token, return a string describing the token.
  50    *
  51    *  Input:  tok - One of the set of dbg_Tokens defined in debugparse.h.
  52    *
  53    *  Output: A string identifying the token.  This is useful for debugging,
  54    *          etc.
  55    *
  56    *  Note:   If the token is not known, this function will return the
  57    *          string "<unknown>".
  58    *
  59    * ------------------------------------------------------------------------ **
  60    */
  61   {
  62   switch( tok )
  63     {
  64     case dbg_null:
  65       return( "null" );
  66     case dbg_ignore:
  67       return( "ignore" );
  68     case dbg_header:
  69       return( "header" );
  70     case dbg_timestamp:
  71       return( "time stamp" );
  72     case dbg_level:
  73       return( "level" );
  74     case dbg_sourcefile:
  75       return( "source file" );
  76     case dbg_function:
  77       return( "function" );
  78     case dbg_lineno:
  79       return( "line number" );
  80     case dbg_message:
  81       return( "message" );
  82     case dbg_eof:
  83       return( "[EOF]" );
  84     }
  85   return( "<unknown>" );
  86   } /* dbg_token2string */
  87 
  88 dbg_Token dbg_char2token( dbg_Token *state, int c )
     /* [<][>][^][v][top][bottom][index][help] */
  89   /* ------------------------------------------------------------------------ **
  90    * Parse input one character at a time.
  91    *
  92    *  Input:  state - A pointer to a token variable.  This is used to
  93    *                  maintain the parser state between calls.  For
  94    *                  each input stream, you should set up a separate
  95    *                  state variable and initialize it to dbg_null.
  96    *                  Pass a pointer to it into this function with each
  97    *                  character in the input stream.  See dbg_test()
  98    *                  for an example.
  99    *          c     - The "current" character in the input stream.
 100    *
 101    *  Output: A token.
 102    *          The token value will change when delimiters are found,
 103    *          which indicate a transition between syntactical objects.
 104    *          Possible return values are:
 105    *
 106    *          dbg_null        - The input character was an end-of-line.
 107    *                            This resets the parser to its initial state
 108    *                            in preparation for parsing the next line.
 109    *          dbg_eof         - Same as dbg_null, except that the character
 110    *                            was an end-of-file.
 111    *          dbg_ignore      - Returned for whitespace and delimiters.
 112    *                            These lexical tokens are only of interest
 113    *                            to the parser.
 114    *          dbg_header      - Indicates the start of a header line.  The
 115    *                            input character was '[' and was the first on
 116    *                            the line.
 117    *          dbg_timestamp   - Indicates that the input character was part
 118    *                            of a header timestamp.
 119    *          dbg_level       - Indicates that the input character was part
 120    *                            of the debug-level value in the header.
 121    *          dbg_sourcefile  - Indicates that the input character was part
 122    *                            of the sourcefile name in the header.
 123    *          dbg_function    - Indicates that the input character was part
 124    *                            of the function name in the header.
 125    *          dbg_lineno      - Indicates that the input character was part
 126    *                            of the DEBUG call line number in the header.
 127    *          dbg_message     - Indicates that the input character was part
 128    *                            of the DEBUG message text.
 129    *
 130    * ------------------------------------------------------------------------ **
 131    */
 132   {
 133   /* The terminating characters that we see will greatly depend upon
 134    * how they are read.  For example, if gets() is used instead of
 135    * fgets(), then we will not see newline characters.  A lot also
 136    * depends on the calling function, which may handle terminators
 137    * itself.
 138    *
 139    * '\n', '\0', and EOF are all considered line terminators.  The
 140    * dbg_eof token is sent back if an EOF is encountered.
 141    *
 142    * Warning:  only allow the '\0' character to be sent if you are
 143    *           using gets() to read whole lines (thus replacing '\n'
 144    *           with '\0').  Sending '\0' at the wrong time will mess
 145    *           up the parsing.
 146    */
 147   switch( c )
 148     {
 149     case EOF:
 150       *state = dbg_null;   /* Set state to null (initial state) so */
 151       return( dbg_eof );   /* that we can restart with new input.  */
 152     case '\n':
 153     case '\0':
 154       *state = dbg_null;   /* A newline or eoln resets to the null state. */
 155       return( dbg_null );
 156     }
 157 
 158   /* When within the body of the message, only a line terminator
 159    * can cause a change of state.  We've already checked for line
 160    * terminators, so if the current state is dbg_msgtxt, simply
 161    * return that as our current token.
 162    */
 163   if( dbg_message == *state )
 164     return( dbg_message );
 165 
 166   /* If we are at the start of a new line, and the input character 
 167    * is an opening bracket, then the line is a header line, otherwise
 168    * it's a message body line.
 169    */
 170   if( dbg_null == *state )
 171     {
 172     if( '[' == c )
 173       {
 174       *state = dbg_timestamp;
 175       return( dbg_header );
 176       }
 177     *state = dbg_message;
 178     return( dbg_message );
 179     }
 180 
 181   /* We've taken care of terminators, text blocks and new lines.
 182    * The remaining possibilities are all within the header line
 183    * itself.
 184    */
 185 
 186   /* Within the header line, whitespace can be ignored *except*
 187    * within the timestamp.
 188    */
 189   if( isspace( c ) )
 190     {
 191     /* Fudge.  The timestamp may contain space characters. */
 192     if( (' ' == c) && (dbg_timestamp == *state) )
 193       return( dbg_timestamp );
 194     /* Otherwise, ignore whitespace. */
 195     return( dbg_ignore );
 196     }
 197 
 198   /* Okay, at this point we know we're somewhere in the header.
 199    * Valid header *states* are: dbg_timestamp, dbg_level,
 200    * dbg_sourcefile, dbg_function, and dbg_lineno.
 201    */
 202   switch( c )
 203     {
 204     case ',':
 205       if( dbg_timestamp == *state )
 206         {
 207         *state = dbg_level;
 208         return( dbg_ignore );
 209         }
 210       break;
 211     case ']':
 212       if( dbg_level == *state )
 213         {
 214         *state = dbg_sourcefile;
 215         return( dbg_ignore );
 216         }
 217       break;
 218     case ':':
 219       if( dbg_sourcefile == *state )
 220         {
 221         *state = dbg_function;
 222         return( dbg_ignore );
 223         }
 224       break;
 225     case '(':
 226       if( dbg_function == *state )
 227         {
 228         *state = dbg_lineno;
 229         return( dbg_ignore );
 230         }
 231       break;
 232     case ')':
 233       if( dbg_lineno == *state )
 234         {
 235         *state = dbg_null;
 236         return( dbg_ignore );
 237         }
 238       break;
 239     }
 240 
 241   /* If the previous block did not result in a state change, then
 242    * return the current state as the current token.
 243    */
 244   return( *state );
 245   } /* dbg_char2token */
 246 
 247 void dbg_test( void );
 248 void dbg_test( void )
     /* [<][>][^][v][top][bottom][index][help] */
 249   /* ------------------------------------------------------------------------ **
 250    * Simple test function.
 251    *
 252    *  Input:  none.
 253    *  Output: none.
 254    *  Notes:  This function was used to test dbg_char2token().  It reads a
 255    *          Samba log file from stdin and prints parsing info to stdout.
 256    *          It also serves as a simple example.
 257    *
 258    * ------------------------------------------------------------------------ **
 259    */
 260   {
 261   char bufr[DBG_BSIZE];
 262   int  i;
 263   int  linecount  = 1;
 264   dbg_Token old   = dbg_null,
 265             newtok= dbg_null,
 266             state = dbg_null;
 267 
 268   while( fgets( bufr, DBG_BSIZE, stdin ) )
 269     {
 270     for( i = 0; bufr[i]; i++ )
 271       {
 272       old = newtok;
 273       newtok = dbg_char2token( &state, bufr[i] );
 274       switch( newtok )
 275         {
 276         case dbg_header:
 277           if( linecount > 1 )
 278             (void)putchar( '\n' );
 279           break;
 280         case dbg_null:
 281           linecount++;
 282           break;
 283         case dbg_ignore:
 284           break;
 285         default:
 286           if( old != newtok )
 287             (void)printf( "\n[%05d]%12s: ", linecount, dbg_token2string(newtok) );
 288           (void)putchar( bufr[i] );
 289         }
 290       }
 291     }
 292   (void)putchar( '\n' );
 293   } /* dbg_test */
 294 
 295 
 296 /* -------------------------------------------------------------------------- **
 297  * This simple main line can be uncommented and used to test the parser.
 298  */
 299 
 300 /*
 301  * int main( void )
 302  *  {
 303  *  dbg_test();
 304  *  return( 0 );
 305  *  }
 306  */
 307 
 308 /* ========================================================================== */

/* [<][>][^][v][top][bottom][index][help] */