#!/usr/local/bin/csi -script

;; usage: ./convert.scm index.html > index.scm

(use htmlprag) 
(use regex)
(define (->sxml file)
  (call-with-input-file file html->sxml))
; (define ws-only (regexp "^[\\n\\t]+$"))
(define ws-only (regexp "^\\s+$"))

;; remove strings with only whitespace, and also nulls (), from tree
;; does not collapse spaces, as we might be within a pre
;; more intelligent transform could take a stylesheet approach for pre
(define (remove-elements pred? tree)
  (cond ((null? tree) '())
        ((pair? tree)
         (let ((x (remove-elements pred? (car tree))))
           (if (null? x)
               (remove-elements pred? (cdr tree))
               (cons x (remove-elements pred? (cdr tree))))))
        ((pred? tree) '())
        (else tree)))

(define (whitespace-string tree)
  (and (string? tree) 
       (string-search ws-only tree)))

(let* ((fn (car (command-line-arguments)))
       (sxml (->sxml fn)))
  (pretty-print (remove-elements whitespace-string sxml)))   ; or write

