perl script to legalize HTML files
Dan Connolly <connolly@pixel.convex.com>
Message-id: <9207160349.AA25229@pixel.convex.com>
To: www-talk@nxoc01.cern.ch
Subject: perl script to legalize HTML files
Date: Wed, 15 Jul 92 22:49:21 CDT
From: Dan Connolly <connolly@pixel.convex.com>
#!/usr/local/bin/perl
#
# USE
# fix-html.pl <W3-file.html >W3-file.sgml
#
# SEE ALSO
# the html.dtd.
#
print "<!DOCTYPE HTML SYSTEM>\n";
@html = <>; # read whole file
$_ = join('', @html);
while(/</){
&out($`);
$_ = $';
if(s/^A\s+//i){
&fix_anchor;
}elsif(s/^NEXTID\s+(\d+)\s*>//){
&out("<NEXTID N=$1>");
}else{
&out('<');
}
}
&out($_);
sub out{
print $_[0];
}
sub fix_anchor{
local($name, $href, $type);
# What exactly is the syntax of an SGML attribute value?
while(s/^(\w+)\s*=\s*((\"[^\"]*\")|([^\s>]+))\s*//){
local($v) = ($3 || $4);
local($a) = $1;
$href = $v if $a =~ /^href$/i;
$name = $v if $a =~ /^name$/i;
$type = $v if $a =~ /^type$/i;
}
s/[^>]*>//;
&out("<A");
&out(" NAME=\"$name\"") if $name ne '';
&out(" TYPE=\"$type\"") if $type ne '';
&out(" HREF=\"$href\"") if $href ne '';
&out(">");
}