diff options
Diffstat (limited to 'doc/LinuxDocTools.pm')
-rw-r--r-- | doc/LinuxDocTools.pm | 634 |
1 files changed, 634 insertions, 0 deletions
diff --git a/doc/LinuxDocTools.pm b/doc/LinuxDocTools.pm new file mode 100644 index 0000000..51d4a04 --- /dev/null +++ b/doc/LinuxDocTools.pm @@ -0,0 +1,634 @@ +#! /usr/bin/perl +# +# LinuxDocTools.pm +# +# $Id$ +# +# LinuxDoc-Tools driver core. This contains all the basic functionality +# we need to control all other components. +# +# © Copyright 1996, Cees de Groot. +# © Copyright 2000, Taketoshi Sano +# +# THIS VERSION HAS BEEN HACKED FOR BIRD BY MARTIN MARES +# +package LinuxDocTools; + +require 5.004; +use strict; + +=head1 NAME + +LinuxDocTools - SGML conversion utilities for LinuxDoc DTD. + +=head1 SYNOPSIS + + use LinuxDocTools; + LinuxDocTools::init; + @files = LinuxDocTools::process_options ($0, @ARGV); + for $curfile (@files) { + LinuxDocTools::process_file ($curfile); + } + +=head1 DESCRIPTION + +The LinuxDocTools package encapsulates all the functionality offered by +LinuxDoc-Tools. It is used, of course, by LinuxDoc-Tools; +but the encapsulation should provide for a simple interface for other users as well. + +=head1 FUNCTIONS + +=over 4 + +=cut + +use DirHandle; +use File::Basename; +use File::Find; +use File::Copy; +use FileHandle; +use IPC::Open2; +use Cwd; +use LinuxDocTools::Lang; +use LinuxDocTools::Utils qw(process_options usage cleanup trap_signals remove_tmpfiles create_temp); +use LinuxDocTools::Vars; + +sub BEGIN +{ + # + # Make sure we're always looking here. Note that "use lib" adds + # on the front of the search path, so we first push dist, then + # site, so that site is searched first. + # + use lib "$main::DataDir/dist"; + use lib "$main::DataDir/site"; +} + +=item LinuxDocTools::init + +Takes care of initialization of package-global variables (which are actually +defined in L<LinuxDocTools::Vars>). The package-global variables are I<$global>, +a reference to a hash containing numerous settings, I<%Formats>, a hash +containing all the formats, and I<%FmtList>, a hash containing the currently +active formats for help texts. + +Apart from this, C<LinuxDocTools::init> also finds all distributed and site-local +formatting backends and C<require>s them. + +=cut + +sub init +{ + trap_signals; + + # + # Register the ``global'' pseudoformat. Apart from the global settings, + # we also use $global to keep the global variable name space clean; + # everything that we need to provide to other modules is stuffed + # into $global. + # + $global = {}; + $global->{NAME} = "global"; + $global->{HELP} = ""; + $global->{OPTIONS} = [ + { option => "backend", type => "l", + 'values' => [ "html", "info", "latex", + "lyx", "rtf", "txt", "check" ], + short => "B" }, + { option => "papersize", type => "l", + 'values' => [ "a4", "letter" ], short => "p" }, + { option => "language", type => "l", + 'values' => [ @LinuxDocTools::Lang::Languages ], short => "l" }, + { option => "charset", type => "l", + 'values' => [ "latin", "ascii", "nippon", "euc-kr" ], short => "c" }, + { option => "style", type => "s", short => "S" }, + { option => "tabsize", type => "i", short => "t" }, +# { option => "verbose", type => "f", short => "v" }, + { option => "debug", type => "f", short => "d" }, + { option => "define", type => "s", short => "D" }, + { option => "include", type => "s", short => "i" }, + { option => "pass", type => "s", short => "P" } + ]; + $global->{backend} = "linuxdoc"; + $global->{papersize} = "a4"; + $global->{language} = "en"; + $global->{charset} = "ascii"; + $global->{style} = ""; + $global->{tabsize} = 8; + $global->{verbose} = 0; + $global->{define} = ""; + $global->{debug} = 0; + $global->{include} = ""; + $global->{pass} = ""; + $global->{InFiles} = []; + $Formats{$global->{NAME}} = $global; # All formats we know. + $FmtList{$global->{NAME}} = $global; # List of formats for help msgs. + + # automatic language detection: disabled by default + # { + # my $lang; + # foreach $lang (@LinuxDocTools::Lang::Languages) + # { + # if (($ENV{"LC_ALL"} =~ /^$lang/i) || + # ($ENV{"LC_CTYPE"} =~ /^$lang/i) || + # ($ENV{"LANG"} =~ /^$lang/i)) { + # $global->{language} = Any2ISO($lang); + # } + # } + # } + + # + # Used when the format is "global" (from sgmlcheck). + # + $global->{preNSGMLS} = sub { + $global->{NsgmlsOpts} .= " -s "; + $global->{NsgmlsPrePipe} = "cat $global->{file}"; + }; + + # + # Build up the list of formatters. + # + my $savdir = cwd; + my %Locs; + chdir "$main::DataDir/dist"; + my $dir = new DirHandle("."); + die "Unable to read directory $main::DataDir/dist: $!" unless defined($dir); + foreach my $fmt (grep(/^fmt_.*\.pl$/, $dir->read())) + { + $Locs{$fmt} = "dist"; + } + $dir->close(); + chdir "$main::DataDir/site"; + $dir = new DirHandle("."); + die "Unable to read directory $main::DataDir/site: $!" unless defined($dir); + foreach my $fmt (grep(/^fmt_.*\.pl$/, $dir->read())) + { + $Locs{$fmt} = "site"; + } + $dir->close(); + foreach my $fmt (keys %Locs) + { + require $fmt; + } + chdir $savdir; +} + +=item LinuxDocTools::process_options ($0, @ARGV) + +This function contains all initialization that is bound to the current +invocation of LinuxDocTools. It looks in C<$0> to deduce the backend that +should be used (ld2txt activates the I<txt> backend) and parses the +options array. It returns an array of filenames it encountered during +option processing. + +As a side effect, the environment variables I<SGMLDECL> and +I<SGML_CATALOG_FILES> are modified. + +=cut + +sub process_options +{ + my $progname = shift; + my @args = @_; + + # + # Deduce the format from the caller's file name + # + my ($format, $dummy1, $dummy2) = fileparse ($progname, ""); + $global->{myname} = $format; + $format =~ s/sgml2*(.*)/$1/; + + # + # check the option "--backend / -B" + # + if ($format eq "linuxdoc") { + my @backends = @args; + my $arg; + while (@backends) { + $arg = shift @backends; + if ($arg eq "-B") { + $arg = shift @backends; + $format = $arg; + last; + } + if ( $arg =~ s/--backend=(.*)/$1/ ) { + $format = $arg; + last; + } + } + } + + $format = "global" if $format eq "check"; + usage ("") if $format eq "linuxdoc"; + $format = "latex2e" if $format eq "latex"; + $FmtList{$format} = $Formats{$format} or + usage ("$global->{myname}: unknown format"); + $global->{format} = $format; + + # + # Parse all the options. + # + my @files = LinuxDocTools::Utils::process_options (@args); + $global->{language} = Any2ISO ($global->{language}); + # + # check the number of given files + $#files > -1 || usage ("no filenames given"); + + # + # Setup the SGML environment. + # (Note that Debian package rewrite path to catalog of + # iso-entities using debian/rules so that it can use + # entities from sgml-data pacakge. debian/rules also + # removes iso-entites sub directory after doing make install.) + # + $ENV{SGML_CATALOG_FILES} .= (defined $ENV{SGML_CATALOG_FILES} ? ":" : "") . + "$main::prefix/share/sgml/entities/sgml-iso-entities-8879.1986/catalog"; + $ENV{SGML_CATALOG_FILES} .= ":$main::DataDir/linuxdoc-tools.catalog"; + $ENV{SGML_CATALOG_FILES} .= ":$main::/etc/sgml.catalog"; + if (-f "$main::DataDir/dtd/$format.dcl") + { + $ENV{SGMLDECL} = "$main::DataDir/dtd/$format.dcl"; + } + elsif (-f "$main::DataDir/dtd/$global->{style}.dcl") + { + $ENV{SGMLDECL} = "$main::DataDir/dtd/$global->{style}.dcl"; + } + elsif (-f "$main::DataDir/dtd/sgml.dcl") + { + $ENV{SGMLDECL} = "$main::DataDir/dtd/sgml.dcl"; + } + + # + # OK. Give the list of files we distilled from the options + # back to the caller. + # + return @files; +} + +=item LinuxDocTools::process_file + +With all the configuration done, this routine will take a single filename +and convert it to the currently active backend format. The conversion is +done in a number of steps in tight interaction with the currently active +backend (see also L<LinuxDocTools::BackEnd>): + +=over + +=item 1. Backend: set NSGMLS options and optionally create a pre-NSGMLS pipe. + +=item 2. Here: Run the preprocessor to handle conditionals. + +=item 3. Here: Run NSGMLS. + +=item 4. Backend: run pre-ASP conversion. + +=item 5. Here: Run SGMLSASP. + +=item 6. Backend: run post-ASP conversion, generating the output. + +=back + +All stages are influenced by command-line settings, currently active format, +etcetera. See the code for details. + +=cut + +sub process_file +{ + my $file = shift (@_); + my $saved_umask = umask; + + print "Processing file $file\n"; + umask 0077; + + my ($filename, $filepath, $filesuffix) = fileparse ($file, "\.sgml"); + my $tmpnam = $filepath . '/' . $filename; + $file = $tmpnam . $filesuffix; + -f $file || $file =~ /.*.sgml$/ || ($file .= '.sgml'); + -f $file || ($file = $tmpnam . '.SGML'); + -f $file || die "Cannot find $file\n"; + $global->{filename} = $filename; + $global->{file} = $file; + $global->{filepath} = $filepath; + + my $tmp = new FileHandle "<$file"; + my $dtd; + while ( <$tmp> ) + { + tr/A-Z/a-z/; + # check for [<!doctype ... system] type definition + if ( /<!doctype\s*(\w*)\s*system/ ) + { + $dtd = $1; + last; + } + # check for <!doctype ... PUBLIC ... DTD ... + if ( /<!doctype\s*\w*\s*public\s*.*\/\/dtd\s*(\w*)/mi ) + { + $dtd = $1; + last; + } + # check for <!doctype ... + # PUBLIC ... DTD ... + # (multi-line version) + if ( /<!doctype\s*(\w*)/ ) + { + $dtd = "precheck"; + next; + } + if ( /\s*public\s*.*\/\/dtd\s*(\w*)/ && $dtd eq "precheck" ) + { + $dtd = $1; + last; + } + } + $tmp->close; + if ( $global->{debug} ) + { + print "DTD: " . $dtd . "\n"; + } + $global->{dtd} = $dtd; + + # prepare temporary directory + my $tmpdir = $ENV{'TMPDIR'} || '/tmp'; + $tmpdir = $tmpdir . '/' . 'linuxdoc-dir-' . $$; + mkdir ($tmpdir, 0700) || + die " - temporary files can not be created, aborted - \n"; + + my $tmpbase = $global->{tmpbase} = $tmpdir . '/sgmltmp.' . $filename; + $ENV{"SGML_SEARCH_PATH"} .= ":$filepath"; + + # + # Set up the preprocessing command. Conditionals have to be + # handled here until they can be moved into the DTD, otherwise + # a validating SGML parser will choke on them. + # + # check if output option for latex is pdf or not + if ($global->{format} eq "latex2e") + { + if ($Formats{$global->{format}}{output} eq "pdf") + { + $global->{define} .= " pdflatex=yes"; + } + } + # + my($precmd) = "|sgmlpre output=$global->{format} $global->{define}"; + + # + # You can hack $NsgmlsOpts here, etcetera. + # + $global->{NsgmlsOpts} .= "-D $main::prefix/share/sgml -D $main::DataDir"; + $global->{NsgmlsOpts} .= "-i$global->{include}" if ($global->{include}); + $global->{NsgmlsPrePipe} = "NOTHING"; + if ( defined $Formats{$global->{format}}{preNSGMLS} ) + { + $global->{NsgmlsPrePipe} = &{$Formats{$global->{format}}{preNSGMLS}}; + } + + # + # Run the prepocessor and nsgmls. + # + my ($ifile, $writensgmls); + + if ($global->{NsgmlsPrePipe} eq "NOTHING") + { + $ifile = new FileHandle $file; + } + else + { + $ifile = new FileHandle "$global->{NsgmlsPrePipe}|"; + } + + create_temp("$tmpbase.1"); + $writensgmls = new FileHandle + "$precmd|$main::progs->{NSGMLS} $global->{NsgmlsOpts} $ENV{SGMLDECL} >\"$tmpbase.1\""; + if ($global->{charset} eq "latin") + { + while (<$ifile>) + { + # Outline these commands later on - CdG + #change latin1 characters to SGML + #by Farzad Farid, adapted by Greg Hankins + s/À/\À/g; + s/Á/\Á/g; + s/Â/\Â/g; + s/Ã/\Ã/g; + s/Ä/\Ä/g; + s/Å/\Å/g; + s/Æ/\Æ/g; + s/Ç/\Ç/g; + s/È/\È/g; + s/É/\É/g; + s/Ê/\Ê/g; + s/Ë/\Ë/g; + s/Ì/\Ì/g; + s/Í/\Í/g; + s/Î/\Î/g; + s/Ï/\Ï/g; + s/Ñ/\Ñ/g; + s/Ò/\Ò/g; + s/Ó/\Ó/g; + s/Ô/\Ô/g; + s/Õ/\Õ/g; + s/Ö/\Ö/g; + s/Ø/\Ø/g; + s/Ù/\Ù/g; + s/Ú/\Ú/g; + s/Û/\Û/g; + s/Ü/\Ü/g; + s/Ý/\Ý/g; + s/Þ/\Þ/g; + s/ß/\ß/g; + s/à/\à/g; + s/á/\á/g; + s/â/\â/g; + s/ã/\ã/g; + s/ä/\ä/g; + s/å/\å/g; + s/æ/\æ/g; + s/ç/\ç/g; + s/è/\è/g; + s/é/\é/g; + s/ê/\ê/g; + s/ë/\ë/g; + s/ì/\ì/g; + s/í/\í/g; + s/î/\î/g; + s/ï/\ï/g; + s/µ/\μ/g; + s/ð/\ð/g; + s/ñ/\ñ/g; + s/ò/\ò/g; + s/ó/\ó/g; + s/ô/\ô/g; + s/õ/\õ/g; + s/ö/\ö/g; + s/ø/\ø/g; + s/ù/\ù/g; + s/ú/\ú/g; + s/û/\û/g; + s/ü/\ü/g; + s/ý/\ý/g; + s/þ/\þ/g; + s/ÿ/\ÿ/g; + print $writensgmls $_; + } + } + else + { + while (<$ifile>) + { + print $writensgmls $_; + } + } + $ifile->close; + $writensgmls->close; + + # + # Special case: if format is global, we're just checking. + # + $global->{format} eq "global" && cleanup; + + # + # If the output file is empty, something went wrong. + # + ! -e "$tmpbase.1" and die "can't create file - exiting"; + -z "$tmpbase.1" and die "SGML parsing error - exiting"; + if ( $global->{debug} ) + { + print "Nsgmls stage finished.\n"; + } + + # + # If a preASP stage is defined, let the format handle it. + # + # preASP ($inhandle, $outhandle); + # + my $inpreasp = new FileHandle "<$tmpbase.1"; + my $outpreasp = new FileHandle "$tmpbase.2",O_WRONLY|O_CREAT|O_EXCL,0600; + if (defined $Formats{$global->{format}}{preASP}) + { + &{$Formats{$global->{format}}{preASP}}($inpreasp, $outpreasp) == 0 or + die "error pre-processing $global->{format}.\n"; + } + else + { + copy ($inpreasp, $outpreasp); + } + $inpreasp->close; + $outpreasp->close; + ! -e "$tmpbase.2" and die "can't create file - exiting"; + + if ( $global->{debug} ) + { + print "PreASP stage finished.\n"; + } + + # + # Run sgmlsasp, with an optional style if specified. + # + # Search order: + # - datadir/site/<dtd>/<format> + # - datadir/dist/<dtd>/<format> + # So we need to fetch the doctype from the intermediate. + # + # Note: this is a very simplistic check - but as far as I know, + # it is correct. Am I right? + # + my $tmp = new FileHandle "<$tmpbase.2"; + my $dtd; + while ( ($dtd = <$tmp>) && ! ( $dtd =~ /^\(/) ) { }; + $tmp->close; + $dtd =~ s/^\(//; + $dtd =~ tr/A-Z/a-z/; + chop $dtd; + $global->{dtd} = $dtd; + + my $style = ""; + if ($global->{style}) + { + $style = "$main::DataDir/site/$dtd/$global->{format}/$global->{style}mapping"; + -r $style or + $style = "$main::DataDir/dist/$dtd/$global->{format}/$global->{style}mapping"; + } + my $mapping = "$main::DataDir/site/$dtd/$global->{format}/mapping"; + -r $mapping or $mapping = "$main::DataDir/dist/$dtd/$global->{format}/mapping"; + + $global->{charset} = "nippon" if ($global->{language} eq "ja"); + # + # we don't have Korean groff so charset should be latin1. + # + if ($global->{language} eq "ko") + { + if ($global->{format} eq "groff") + { + $global->{charset} = "latin1"; + } + else + { + $global->{charset} = "euc-kr"; + } + } + + if ($global->{format} eq "groff" or $global->{format} eq "latex2e") + { + if ($dtd eq "linuxdoctr") + { + $mapping = "$main::DataDir/dist/$dtd/$global->{format}/tr-mapping"; + } + } + + create_temp("$tmpbase.3"); + system ("$main::progs->{SGMLSASP} $style $mapping <\"$tmpbase.2\" | + expand -$global->{tabsize} >\"$tmpbase.3\""); + ! -e "$tmpbase.3" and die "can't create file - exiting"; + + + if ( $global->{debug} ) + { + print "ASP stage finished.\n"; + } + + # + # If a postASP stage is defined, let the format handle it. + # It should leave whatever it thinks is right based on $file. + # + # postASP ($inhandle) + # + umask $saved_umask; + my $inpostasp = new FileHandle "<$tmpbase.3"; + if (defined $Formats{$global->{format}}{postASP}) + { + &{$Formats{$global->{format}}{postASP}}($inpostasp) == 0 or + die "error post-processing $global->{format}.\n"; + } + $inpostasp->close; + + if ( $global->{debug} ) + { + print "postASP stage finished.\n"; + } + + # + # All done, remove the temporaries. + # + if( !$global->{debug} ) { + remove_tmpfiles($tmpbase); + } +} + +=pod + +=back + +=head1 SEE ALSO + +Documentation for various sub-packages of LinuxDocTools. + +=head1 AUTHOR +SGMLTools are written by Cees de Groot, C<E<lt>cg@cdegroot.comE<gt>>, +and various SGML-Tools contributors as listed in C<CONTRIBUTORS>. +Taketoshi Sano C<E<lt>sano@debian.org<gt>> rename to LinuxDocTools. + +=cut +1; |