#!/usr/bin/perl #-------------- #txtattack.pl # #this is a script which takes text input by default and outputs DocBook XML by default having guessed at the semantic structure of the text. #At the moment it's arranged in such a way that allows for expansion, including the development of a module based on this as a template use strict; use warnings; use vars qw($articlename $headertest $nextline $lnapply $writestart $writetitle $writeelement $writeheader $writeend $lineallowance $informat $outformat $val $marker $line $isheader $string $paranumber $articlename); $informat = "text"; $outformat = "DocBook"; $lineallowance = 0; #here should go the code for overriding the defaults # #but in the meantime i'll happily setup blind defaults and do the modularity bit later, as i've seperated it all cool like #good for testing ;-) if($informat eq "text"){ $articlename = sub{ my $val = ; chomp($val); return $val; }; $nextline = sub{ return ; }; $headertest = sub{ if($string eq "\n" and $marker > $lineallowance){ $isheader = 1; } }; } if($outformat eq "DocBook"){ $writestart = sub{ print ''; print ''; print "\n\n"; }; $writetitle = sub{ print "
\n "; print &$articlename; print ""; }; $writeelement = sub { print "\n\n$line\n\n"; }; $writeheader = sub { # print ""; # print "\n\n"; print "$line\n"; }; $writeend = sub{ print "\n
"; }; } $lnapply = sub { if($isheader == 0){ track("break isn't header"); &$writeelement($line); } elsif($isheader == 1){ track("break is header"); &$writeheader($line); } }; #--------------------- #sort out all function aliases before here #-------------------- #and here we have the actual algorithm sub liberate{ if (defined $_[0]){ open SOURCE, $ARGV[0] or return("$!"); } else{ print "usage: semget [file] > [outfile]" and return; } $marker = 0; $isheader = 0; $paranumber = 0; &$writestart; &$writetitle; while(defined($string = &$nextline)){ &$headertest($string); if($string eq "\n") { track("found break"); if($marker == $lineallowance){ track("hit line allowance"); $paranumber++; &$lnapply; $isheader = 0; $line = undef; } track("redundant break"); $marker++; } else{ chomp($string); #track("found text"); if (defined $line){ $line = "${line} $string"; } else{ $line = $string; } $marker = 0; } #print "$string"; } &$writeend; } liberate($ARGV[0]); sub track{ warn "\ntrack:$_[0] at $paranumber"; }