;;; html2mml.l -- translate HyperText Markup Language to Maker Markup Language.
;;; $Id: html2mml.l,v 1.1 92/08/19 18:37:59 connolly Exp $
;;;
;;; USE
;;;  sgmls file.html | xlisp html2mml.l >file.mml
;;;
;;; Where xlisp is Tom Almy's improved release of David Betz's XLISP 2.1,
;;; available in export.lcs.mit.edu:/contrib/winterp/xlisp/xlisp-2.1.almy.tar.Z
;;; and sgmls is built from
;;; ifi.uio.no:/pub/SGML/SGMLS/sgmls-0.8.tar
;;; aka
;;; ftp.uu.net:/pub/text-processing/sgml/sgmls-0.8.tar.Z
;;;
;;; The resulting file will have the OS Banner from XLisp at the
;;; top. For some reason, XLisp writes everything to stdout.
;;; I patched it to write diagnostic output to stderr. I'll have
;;; to get the patches incorporated soon.
;;;
;;; Anyway, just edit the banner out so the first line of the file is
;;; <MML ...>
;;;
;;; Then import the mml file to FrameMaker.
;;;

(setq *tracenable* t)
(setq *breakenable* t)

(princ "<MML \"from html2mml.l by connolly@convex.com\">\n")
(setq *para-tags*
      '(title h1 h2 h3 h4 h5 document ol ul dl menu dir address xmp listing))
(setq *literal-tags* '(xmp listing))

(setq *style-sheet* "
<!DefinePar Title
  <Alignment r>
  <plain> <pts 18> <bold>
>
<!DefinePar H1
  <Alignment c>
  <plain><pts 18>
  <SpaceBefore 12pt><SpaceAfter 12pt>
>
<!DefinePar H2
  <LeftIndent 0in><FirstIndent 0in>
  <SpaceBefore 12pt><SpaceAfter 6pt>
  <Alignment l>
  <plain><pts 14>
>
<!DefinePar H3
  <plain> <bold>
  <LeftIndent 0.25in><FirstIndent 0.25in>
  <SpaceBefore 6pt><SpaceAfter 3pt>
  <Alignment l><pts 12>
>
<!DefinePar H4
  <Alignment l>
>
<!DefinePar H5
  <Alignment l>
>
<!DefinePar DOCUMENT
  <LeftIndent 0.75in><FirstIndent 0.75in>
  <SpaceBefore 3pt><SpaceAfter 3pt><BlockSize 3>
  <Alignment l><plain><pts 12>
>
<!DefinePar OL
  <FirstIndent 1.0in> <LeftIndent 1.5in>
  <TabStops <TabStop 1.25in>>
  <SpaceBefore 3pt><SpaceAfter 3pt><BlockSize 3>
  <AutoNumber Yes> <NumberFormat \"<n+>\t\">
  <Alignment l><plain><pts 12>
>
<!DefinePar UL
  <SpaceBefore 3pt><SpaceAfter 3pt><BlockSize 3>
  <FirstIndent 1.0in> <LeftIndent 1.5in>
  <TabStops <TabStop 1.25in>>
  <AutoNumber Yes> <NumberFormat \"o\\t\">
  <Alignment l><plain><pts 12>
>
<!DefinePar DL
  <AutoNumber No>
  <SpaceBefore 3pt><SpaceAfter 3pt><BlockSize 3>
  <FirstIndent 0.5in> <LeftIndent 2.5in>
  <TabStops <TabStop 2.5in>>
  <Alignment l><plain><pts 12>
>
<!DefinePar MENU
  <AutoNumber No>
  <WithNext yes><WithPrev yes>
  <Alignment l><plain><pts 12>
>
<!DefinePar DIR
  <AutoNumber No>
  <Alignment l><plain><pts 12>
>
<!DefinePar Address
  <AutoNumber No>
  <Alignment r><plain><pts 12>
>
<!DefinePar XMP
  <AutoNumber No>
  <FirstIndent 0in> <LeftIndent 0in>
  <Alignment l><plain>
  <Family Courier><pts 9>
  <SpaceBefore 3pt><SpaceAfter 3pt><BlockSize 3>
>
<!DefinePar LISTING
  <AutoNumber No>
  <FirstIndent 0in> <LeftIndent 0in>
  <Alignment l><plain>
  <Family Courier><pts 8>
  <SpaceBefore 3pt><SpaceAfter 3pt><BlockSize 3>
>
")

(princ *style-sheet*)

;; From almy2.1
;; push and pop treat variable v as a stack
(defmacro push (v l)
        `(setf ,l (cons ,v ,l)))

(defmacro pop (l)
        `(prog1 (first ,l) (setf ,l (rest ,l))))


(defun start-para (stream tag)
  (format stream "<~A>~%" tag)
  )

(defun end-para (stream)
  (format stream "~%~%")
  )


(defun convert-data (literal)
  (do ((c (read-char) (read-char))
       d1 d2 d3
       )
      ((eq c #\Newline) nil)
      
      (cond ((eq c #\\)
	     (cond ((setf d1 (digit-char-p (setf c (read-char))))
		    (setf d2 (digit-char-p (read-char))) 
		    (setf d3 (digit-char-p (read-char)))
		    (princ (int-char (+ d3 (* 8 (+ d2 (* 8 d1))))))
		    )
		   ((eq c #\\) (princ "\\\\"))
		   ((eq c #\n) (format t (if literal "<HardReturn>" " ")))
		   ((eq c #\|) ;;nothing
		    )
		   ((eq c #\s) (princ " "))
		   ) )
	    ((member c '(#\< #\>)) (format t "\\~A" c))
	    ((eq c #\space) (format t (if literal "<HardSpace>" " ")))
	    ((eql c 7) (format t "<tab>"))
	    (t (princ c))
	) ) )

(defun html2mml ()
  (do ((c (read-char) (read-char))
       stack
       tag
       attrs
       )
      ((null c)) ;; quit at end of file
      
      (case c
	    (#\Newline ;; do nothing
	     )
	    
	    (#\( (let ((gi (read))
		       )
		   ;; open tag
		   (push gi stack)
		   (cond ((member gi *para-tags*)
			  (setq tag gi)
			  (start-para t tag)
			  )
			 ((eq gi 'a)
			  (let ((href (second (assoc 'href attrs)))
				)
			    ;; watch out for >'s and 's
			    (format t "<Marker <MType 8> <MText `message www ~A'>><italic>" href)
			    )
			  )
			 )
		   (setq attrs nil)
		   ))
	    (#\) (let ((gi (read))
		       )
		   (pop stack)
		   (cond ((member gi *para-tags*)
			  (setq tag nil))
			 ((eq gi 'a) (format t "<noitalic>"))
			 ((eq gi 'dd) (format t "<tab>"))
			 ((member gi '(p dt li)) (format t "<par>"))
			 )
		   ))
	    
	    (#\-
	     (unless tag
		     (end-para t)
		     (dolist (gi stack)
			     (when (member gi *para-tags*)
				   (setq tag gi)
				   (return)
				   ) )
		     (start-para t tag)
		     )
	     (convert-data (member tag *literal-tags*))
	     )
	    
	    (#\& (let ((name (read))
		       )
		   ;; name
		   ))
	    
	    (#\? (let ((pi (read-line))
		       )
		   ;; processing instruction
		   ))
	    (#\A (let ((name (read))
		       (token (read))
		       )
		   (case token
			 (IMPLIED ;; nothing
			  )
			 (CDATA (let ((data (read-line))
				      )
				  (push (list name data) attrs)
				  ))
			 (TOKEN (let ((tokens (read-line)) ;;@@ read tokens til \n
				      )
				  ;; tokens
				  ))
			 (NOTATION (let ((name (read))
					 )
				     ;; notation
				     ))
			 (ENTITY (let ((name (read))
				       )
				   ;; general entity
				   ))
			 (ID (let ((id (read))
				   )
			       ;; id
			       ))
			 (IDREF (let ((ids (read-line)) ;; @@ read ids til \n
				      )
				  ;; id's
				  ))
			 ) ) )
	    
	    (#\D (read-line) ;; do like A but for external data name
	     )
	    ) )
  )

(html2mml)

