|
Revision 8, 1.4 KB
(checked in by oren, 7 months ago)
|
|
Language fixes;
Ensuring JSON compatibility (removing Unicode line breaks);
Adding recommended schemas section.
|
| Line | |
|---|
| 1 | #!/usr/bin/perl -w |
|---|
| 2 | |
|---|
| 3 | use strict; |
|---|
| 4 | use English; |
|---|
| 5 | |
|---|
| 6 | my $table_depth = 0; |
|---|
| 7 | my $simple_table_depth = -1; |
|---|
| 8 | my $text = ""; |
|---|
| 9 | while (my $line = <>) { |
|---|
| 10 | $line =~ s/\n// if ($line =~ /class="database"/); |
|---|
| 11 | $line =~ s/span class="index"/span class="appendix"/g; |
|---|
| 12 | $line =~ s/width="3%"/class="productioncounter"/g; |
|---|
| 13 | $line =~ s/width="10%"/class="productionlhs"/g; |
|---|
| 14 | $line =~ s/width="5%"/class="productionseperator"/g; |
|---|
| 15 | $line =~ s/width="52%"/class="productionrhs"/g; |
|---|
| 16 | $line =~ s/width="30%"/class="productioncomment"/g; |
|---|
| 17 | $line =~ s/Symbols/Indicators/g; |
|---|
| 18 | $line =~ s/<table border="1">/<table border="0" style="width: 0%">/g; |
|---|
| 19 | $line =~ s/em>/b>/g if $line =~ /<em>(Byte|Encoding)/; |
|---|
| 20 | |
|---|
| 21 | $table_depth++ if $line =~ /<table/; |
|---|
| 22 | $simple_table_depth = $table_depth if $line =~ /<table.*class="simplelist"/; |
|---|
| 23 | $table_depth-- if $line =~ /<\/table/; |
|---|
| 24 | $line =~ s/<td>/<td width="50%">/ if $table_depth == $simple_table_depth; |
|---|
| 25 | |
|---|
| 26 | # Collapse spaces in such link ids. |
|---|
| 27 | for (my $i = 0; $i < 6; $i++) { |
|---|
| 28 | $line =~ s/id="((?:[^" ]+ )+ )[ ]+/id="$1/; |
|---|
| 29 | } |
|---|
| 30 | $text .= $line; |
|---|
| 31 | } |
|---|
| 32 | |
|---|
| 33 | # Make defterm a link to the index entry for unquoted terms. |
|---|
| 34 | $text =~ s:</i>::g; |
|---|
| 35 | $text =~ s: <a \s id=" ([^/]*) / ([^"]*) "> |
|---|
| 36 | </a> |
|---|
| 37 | <i \s class="firstterm"> |
|---|
| 38 | ([^]*.) |
|---|
| 39 | |
|---|
| 40 | :<a id="$1/$2"></a |
|---|
| 41 | ><a href="#index-entry-$1" |
|---|
| 42 | ><i class="firstterm" |
|---|
| 43 | >$3</i></a |
|---|
| 44 | >:gx; |
|---|
| 45 | |
|---|
| 46 | print $text; |
|---|