#!/usr/bin/perl
#--------------
#txtattack.pl
#
#this is a script which takes text input by default and outputs DocBoo
+k XML by default having guessed at the semantic structure of the text
+.
#At the moment it's arranged in such a way that allows for expansion,
+including the development of a module based on this as a template
use strict;
use warnings;
use vars qw($articlename $headertest $nextline $lnapply $writestart $w
+ritetitle $writeelement $writeheader $writeend $lineallowance $inform
+at $outformat $val $marker $line $isheader $string $paranumber $artic
+lename);
$informat = "text";
$outformat = "DocBook";
$lineallowance = 0;
#here should go the code for overriding the defaults
#
#but in the meantime i'll happily setup blind defaults and do the modu
+larity bit later, as i've seperated it all cool like
#good for testing ;-)
if($informat eq "text"){
$articlename = sub{
my $val = <SOURCE>;
chomp($val);
return $val;
};
$nextline = sub{
return <SOURCE>;
};
$headertest = sub{
if($string eq "\n" and $marker > $lineallowance){
$isheader = 1;
}
};
}
if($outformat eq "DocBook"){
$writestart = sub{
print '<?xml version="1.0" encoding="UTF-8"?>';
print '<!DOCTYPE article PUBLIC "-//OASIS//DTD DocBook XML V4
+.1.2//EN" "http://www.oasis-open.org/docbook/xml/4.1.2/docbookx.dtd">
+';
print "\n\n";
};
$writetitle = sub{
print "<article>\n <title>";
print &$articlename;
print "</title>";
};
$writeelement = sub {
print "\n<para>\n$line\n</para>\n";
};
$writeheader = sub {
# print "</chapter>";
# print "\n<chapter id=\"$element\">\n";
print "<title>$line</title>\n";
};
$writeend = sub{
print "\n</article>";
};
}
$lnapply = sub {
if($isheader == 0){
track("break isn't header");
&$writeelement($line);
}
elsif($isheader == 1){
track("break is header");
&$writeheader($line);
}
};
#---------------------
#sort out all function aliases before here
#--------------------
#and here we have the actual algorithm
sub liberate{
if (defined $_[0]){
open SOURCE, $ARGV[0] or return("$!");
}
else{
print "usage: semget [file] > [outfile]" and return;
}
$marker = 0;
$isheader = 0;
$paranumber = 0;
&$writestart;
&$writetitle;
while(defined($string = &$nextline)){
&$headertest($string);
if($string eq "\n") {
track("found break");
if($marker == $lineallowance){
track("hit line allowance");
$paranumber++;
&$lnapply;
$isheader = 0;
$line = undef;
}
track("redundant break");
$marker++;
}
else{
chomp($string);
#track("found text");
if (defined $line){
$line = "${line} $string";
}
else{
$line = $string;
}
$marker = 0;
}
#print "$string";
}
&$writeend;
}
liberate($ARGV[0]);
sub track{
warn "\ntrack:$_[0] at $paranumber";
}
|