/* SGML.h
 * $Id: SGMLstream.h,v 1.3 93/01/06 18:40:29 connolly Exp Locker: connolly $
 */

#ifndef SGML_h
#define SGML_h

#include "c_dialect.h"
#include "HMDoc.h"

/*
 * supported variations on the SGML declaration
 */

#ifdef SGML_DECLARATION
#include SGML_DECLARATION
#endif

#ifndef SGML_NAMELEN
#define SGML_NAMELEN 8
#endif

#ifndef SGML_LITLEN
#define SGML_LITLEN 240
#endif

#ifndef SGML_ATTCNT
#define SGML_ATTCNT 40
#endif

#ifndef SGML_TAGLVL
#define SGML_TAGLVL 24
#endif

#ifndef SGML_SHORTTAG
#define SGML_SHORTTAG 1
#endif

#ifndef SGML_UCNMCHAR
#define SGML_UCNMCHAR ".-"
#endif

/*
 * SGML content types
 */

enum {
  SGML_EMPTY,    /* no content */
  SGML_CDATA,    /* character data. recognize </ only */
  SGML_RCDATA,   /* replaceable character data. recognize </ and &ref; */
  SGML_MIXED,    /* elements and parsed character data. recognize all markup */
  SGML_ELEMENT   /* any data found will be returned as an error*/
  };

#ifndef EOF
#define EOF (-1)
#endif

enum {
  SGML_record_end = -5,
  SGML_entity = -4,
  SGML_start_tag = -3,
  SGML_end_tag = -2,
  /* EOF = -1, */
  SGML_error = 0
  };


VOID
  SGML_parseInstance PARAMS((HMStream stream, HMGetcProc getc,
			     HMDoc* document,
			     CONST HMDoc_Class *docclass));
/* documentation @@
 */

int
  SGML_read PARAMS((HMStream stream,
		    HMGetcProc getc,
		    char* buf, int nbytes,
		    int content,
		    int* inout_lookahead));
/* 
 * PRE:
 * stream -- opaque stream object
 * getc -- getc method for stream. returns -1 on EOF
 * buf -- where to store data
 * nbytes -- how much to read MUST BE > 3!
 * entities -- opaque entities object
 * expand_entity -- method for entities
 *    (expand_entity)(entities, name, dest)
 *    stores the expansion of name at dest, and returns the length
 * max_entity_length -- upper bound on return value of expand_entity
 * content -- SGML_CDATA, SGML_RCDATA, SGML_MIXED, or SGML_ELEMENT
 * inout_lookahead -- EOF or first character of input
 *
 * POST:
 * returns value:
 * @@ record_end
 *   SGML_start_tag ==> sgml start tag found. *inout_lookahead
 *                      is first character of name. rest of name
 *                      follows on stream.
 *   SGML_end_tag   ==> sgml end tag found, like start tag
 *                  NOTE: SGML_read attempts to discard newlines
 *                        between tags: if *inout_lookahead == EOF, newlines
 *                        before tags and markup declarations
 *                        will be discarded.
 *                        There is a pathological case where
 *                        there are more than nbytes-4 leading newlines.
 *                        In that case, the newlines will be treated as data,
 *                        even though they may be between tags.
 *   SMGL_entity    ==> entity reference found. name is in the buffer,
 *                      null terminated
 *   EOF            ==> EOF found before any tags or data
 *                      (note that SGML_read may skip over newlines, comments,
 *                       processing insructions, and markup declarations
 *                       to get to EOF)
 *                      *inout_lookahead is not defined.
 *   SGML_error     ==> data found in element content, or
 *                      possible markup was found ('<' or '&')
 *                      and nbytes was not sufficient to determine
 *                      whether the character was markup or data
 *                      NOTE: if nbytes > SGML_NAMELEN+1, SGML_read is
 *                             guaranteed not to return 0.
 *                      *inout_lookahead is set to the last value
 *                       read from stream.
 *   0<ret<nbytes   ==> ret bytes of data found, followed by
 *                      '<', which may begin a tag, or
 *                      '&' with insufficient room to determine
 *                          whether it's data, or insufficient
 *                          room for the name of the entity.
 *   nbytes         ==> nbytes of data found.
 *                      *inout_lookahead = EOF (ready for next call)
 */


int
  SGML_read_name PARAMS((HMStream stream,
			 HMGetcProc getc,
			 char* buf,
			 int* inout_lookahead));
/* 
 * PRE:
 * stream -- opaque stream object
 * getc -- getc method for stream. returns -1 on EOF
 * buf -- where to store name (must be at least SGML_NAMELEN chars)
 * inout_lookahead -- EOF or first character of input
 *
 * POST:
 * returns value:
 *   0              ==> first character is not a name.
 *   0<ret<=SGML_NAMELEN
 *                  ==> name is ret bytes long
 *                      folded to lower case and stored at buf.
 *                      trailing whitespace is skipped
 *                      *inout_lookahead = last value read from stream
 */


int
  SGML_read_value PARAMS((HMStream stream,
			  HMGetcProc getc,
			  char* buf,
			  int* inout_lookahead));
/*
 * PRE:
 * stream -- opaque stream object
 * getc -- getc method for stream. returns -1 on EOF
 * buf -- where to store value (must be at least SGML_LITLEN chars)
 * inout_lookahead -- EOF or first character of input
 *
 * POST:
 * returns value:
 *   0              ==> first character is not a letter, a digit, or a quote.
 *   0<ret<=SGML_LITLEN
 *                  ==> value is ret bytes long
 *                      stored at buf with entities  expanded
 *                      trailing whitespace is skipped
 *                      *inout_lookahead = last value read from stream
 */

#endif /* SGML_h */
