root/trunk/spec/preprocess_html.pl

Revision 8, 1.4 KB (checked in by oren, 7 months ago)

Language fixes;
Ensuring JSON compatibility (removing Unicode line breaks);
Adding recommended schemas section.

Line 
1#!/usr/bin/perl -w
2
3use strict;
4use English;
5
6my $table_depth = 0;
7my $simple_table_depth = -1;
8my $text = "";
9while (my $line = <>) {
10  $line =~ s/\n// if ($line =~ /class="database"/);
11  $line =~ s/span class="index"/span class="appendix"/g;
12  $line =~ s/width="3%"/class="productioncounter"/g;
13  $line =~ s/width="10%"/class="productionlhs"/g;
14  $line =~ s/width="5%"/class="productionseperator"/g;
15  $line =~ s/width="52%"/class="productionrhs"/g;
16  $line =~ s/width="30%"/class="productioncomment"/g;
17  $line =~ s/Symbols/Indicators/g;
18  $line =~ s/<table border="1">/<table border="0" style="width: 0%">/g;
19  $line =~ s/em>/b>/g if $line =~ /<em>(Byte|Encoding)/;
20
21  $table_depth++ if $line =~ /<table/;
22  $simple_table_depth = $table_depth if $line =~ /<table.*class="simplelist"/;
23  $table_depth-- if $line =~ /<\/table/;
24  $line =~ s/<td>/<td width="50%">/ if $table_depth == $simple_table_depth;
25
26  # Collapse spaces in such link ids.
27  for (my $i = 0; $i < 6; $i++) {
28    $line =~ s/id="((?:[^" ]+ )+ )[ ]+/id="$1/;
29  }
30  $text .= $line;
31}
32
33# Make defterm a link to the index entry for unquoted terms.
34$text =~ s:</i>::g;
35$text =~ s: <a \s id=" ([^/]*) / ([^"]*) ">
36            </a>
37            <i \s class="firstterm">
38            ([^]*.)
39            
40          :<a id="$1/$2"></a
41            ><a href="#index-entry-$1"
42            ><i class="firstterm"
43            >$3</i></a
44          >:gx;
45
46print $text;
Note: See TracBrowser for help on using the browser.