html2mml.l -- FrameMaker support for HTML

Dan Connolly <connolly@pixel.convex.com>
Message-id: <9207240518.AA06296@pixel.convex.com>
To: www-talk@nxoc01.cern.ch
Subject: html2mml.l -- FrameMaker support for HTML
Content-Type: multipart/mixed; boundary="cut-here"
Date: Fri, 24 Jul 92 00:18:34 CDT
From: Dan Connolly <connolly@pixel.convex.com>
--cut-here

Here's a lisp program I cooked up to convert HTML files
to Frame's Maker Markup Language. It uses a real SGML
parser and a lisp interpreter, so you'll have to build
those if you don't have them handy.

I haven't tested it extensively, and I don't have a
down translator (MIF to HTML) yet... but I thought
I'd pass it along.

Oh... it assumes the HTML file conforms to the DTD
I sent out a while ago (I'd give you a WWW HREF/URL,
but the server's down right now and I can't find
it.)

Dan

--cut-here

;;; html2mml.l -- translate HyperText Markup Language to Maker Markup Language.
;;;
;;; USE
;;;  sgmls file.html | xlisp html2mml.l >file.mml
;;;
;;; Where xlisp is Tom Almy's improved release of David Betz's XLISP 2.1,
;;; available in export.lcs.mit.edu:/contrib/winterp/xlisp/xlisp-2.1.almy.tar.Z
;;; and sgmls is built from
;;; ifi.uio.no:/pub/SGML/SGMLS/sgmls-0.8.tar
;;; aka
;;; ftp.uu.net:/pub/text-processing/sgml/sgmls-0.8.tar.Z
;;;
;;; The resulting file will have the OS Banner from XLisp at the
;;; top. For some reason, XLisp writes everything to stdout.
;;; I patched it to write diagnostic output to stderr. I'll have
;;; to get the patches incorporated soon.
;;;
;;; Anyway, just edit the banner out so the first line of the file is
;;; <MML ...>
;;;
;;; Then import the mml file to FrameMaker.
;;;

(setq *tracenable* t)
(setq *breakenable* t)

(princ "<MML \"from html2mml.l by connolly@convex.com\">\n")
(setq *para-tags*
      '(title h1 h2 h3 h4 h5 body ol ul dl menu dir xmp listing))
(setq *literal-tags* '(xmp listing))

(setq *style-sheet* "
<!DefinePar Title
  <Alignment r>
  <plain> <pts 18> <bold>
>
<!DefinePar H1
  <Alignment c>
  <plain><pts 18>
  <SpaceBefore 12pt><SpaceAfter 12pt>
>
<!DefinePar H2
  <LeftIndent 0in><FirstIndent 0in>
  <SpaceBefore 12pt><SpaceAfter 6pt>
  <Alignment l>
  <plain><pts 14>
>
<!DefinePar H3
  <plain> <bold>
  <LeftIndent 0.25in><FirstIndent 0.25in>
  <SpaceBefore 6pt><SpaceAfter 3pt>
  <Alignment l><pts 12>
>
<!DefinePar H4
  <Alignment l>
>
<!DefinePar H5
  <Alignment l>
>
<!DefinePar BODY
  <LeftIndent 0.75in><FirstIndent 0.75in>
  <SpaceBefore 3pt><SpaceAfter 3pt><BlockSize 3>
  <Alignment l><plain><pts 12>
>
<!DefinePar OL
  <FirstIndent 1.0in> <LeftIndent 1.5in>
  <TabStops <TabStop 1.25in>>
  <SpaceBefore 3pt><SpaceAfter 3pt><BlockSize 3>
  <AutoNumber Yes> <NumberFormat \"<n+>\t\">
  <Alignment l><plain><pts 12>
>
<!DefinePar UL
  <SpaceBefore 3pt><SpaceAfter 3pt><BlockSize 3>
  <FirstIndent 1.0in> <LeftIndent 1.5in>
  <TabStops <TabStop 1.25in>>
  <AutoNumber Yes> <NumberFormat \"o\\t\">
  <Alignment l><plain><pts 12>
>
<!DefinePar DL
  <AutoNumber No>
  <SpaceBefore 3pt><SpaceAfter 3pt><BlockSize 3>
  <FirstIndent 0.5in> <LeftIndent 2.5in>
  <TabStops <TabStop 2.5in>>
  <Alignment l><plain><pts 12>
>
<!DefinePar MENU
  <AutoNumber No>
  <WithNext yes><WithPrev yes>
  <Alignment l><plain><pts 12>
>
<!DefinePar DIR
  <AutoNumber No>
  <Alignment l><plain><pts 12>
>
<!DefinePar XMP
  <AutoNumber No>
  <FirstIndent 0in> <LeftIndent 0in>
  <Alignment l><plain>
  <Family Courier><pts 9>
  <SpaceBefore 3pt><SpaceAfter 3pt><BlockSize 3>
>
<!DefinePar LISTING
  <AutoNumber No>
  <FirstIndent 0in> <LeftIndent 0in>
  <Alignment l><plain>
  <Family Courier><pts 8>
  <SpaceBefore 3pt><SpaceAfter 3pt><BlockSize 3>
>
")

(princ *style-sheet*)

;; From almy2.1
;; push and pop treat variable v as a stack
(defmacro push (v l)
        `(setf ,l (cons ,v ,l)))

(defmacro pop (l)
        `(prog1 (first ,l) (setf ,l (rest ,l))))


(defun start-para (stream tag)
  (format stream "<~A>~%" tag)
  )

(defun end-para (stream)
  (format stream "~%~%")
  )


(defun convert-data (literal)
  (do ((c (read-char) (read-char))
       d1 d2 d3
       )
      ((eq c #\Newline) nil)
      
      (cond ((eq c #\\)
	     (cond ((setf d1 (digit-char-p (setf c (read-char))))
		    (setf d2 (digit-char-p (read-char))) 
		    (setf d3 (digit-char-p (read-char)))
		    (princ (int-char (+ d3 (* 8 (+ d2 (* 8 d1))))))
		    )
		   ((eq c #\\) (princ "\\\\"))
		   ((eq c #\n) (format t (if literal "<HardReturn>" " ")))
		   ((eq c #\|) ;;nothing
		    )
		   ((eq c #\s) (princ " "))
		   ) )
	    ((member c '(#\< #\>)) (format t "\\~A" c))
	    ((eq c #\space) (format t (if literal "<HardSpace>" " ")))
	    ((eql c 7) (format t "<tab>"))
	    (t (princ c))
	) ) )

(defun html2mml ()
  (do ((c (read-char) (read-char))
       stack
       tag
       attrs
       )
      ((null c)) ;; quit at end of file
      
      (case c
	    (#\Newline ;; do nothing
	     )
	    
	    (#\( (let ((gi (read))
		       )
		   ;; open tag
		   (push gi stack)
		   (cond ((member gi *para-tags*)
			  (setq tag gi)
			  (start-para t tag)
			  )
			 ((eq gi 'a)
			  (let ((href (second (assoc 'href attrs)))
				)
			    ;; watch out for >'s and 's
			    (format t "<Marker <MType 8> <MText `message www ~A'>><italic>" href)
			    )
			  )
			 )
		   (setq attrs nil)
		   ))
	    (#\) (let ((gi (read))
		       )
		   (pop stack)
		   (cond ((member gi *para-tags*)
			  (setq tag nil))
			 ((eq gi 'a) (format t "<noitalic>"))
			 ((eq gi 'dt) (format t "<tab>"))
			 ((member gi '(p dd li)) (format t "<par>"))
			 )
		   ))
	    
	    (#\-
	     (unless tag
		     (end-para t)
		     (dolist (gi stack)
			     (when (member gi *para-tags*)
				   (setq tag gi)
				   (return)
				   ) )
		     (start-para t tag)
		     )
	     (convert-data (member tag *literal-tags*))
	     )
	    
	    (#\& (let ((name (read))
		       )
		   ;; name
		   ))
	    
	    (#\? (let ((pi (read-line))
		       )
		   ;; processing instruction
		   ))
	    (#\A (let ((name (read))
		       (token (read))
		       )
		   (case token
			 (IMPLIED ;; nothing
			  )
			 (CDATA (let ((data (read-line))
				      )
				  (push (list name data) attrs)
				  ))
			 (TOKEN (let ((tokens (read-line)) ;;@@ read tokens til \n
				      )
				  ;; tokens
				  ))
			 (NOTATION (let ((name (read))
					 )
				     ;; notation
				     ))
			 (ENTITY (let ((name (read))
				       )
				   ;; general entity
				   ))
			 (ID (let ((id (read))
				   )
			       ;; id
			       ))
			 (IDREF (let ((ids (read-line)) ;; @@ read ids til \n
				      )
				  ;; id's
				  ))
			 ) ) )
	    
	    (#\D (read-line) ;; do like A but for external data name
	     )
	    ) )
  )

(html2mml)

--cut-here--