Beefy Boxes and Bandwidth Generously Provided by pair Networks
Do you know where your variables are?
 
PerlMonks  

Re: rtf to txt conversion

by thundergnat (Deacon)
on Nov 08, 2005 at 16:18 UTC ( [id://506782]=note: print w/replies, xml ) Need Help??


in reply to rtf to txt conversion

You may want to look at just using RTF::Tokenizer. That's what RTF::Text::converter is using behind the scenes anyway, and you get much more control over what you want to save or throw away.

Here's a basic implementation of a RTF->text converter.

use warnings; use strict; use RTF::Tokenizer; my $tokenizer = RTF::Tokenizer->new( file => \*DATA ); my ( $token_type, $argument, $parameter ); { # reduce bogus warnings no warnings 'uninitialized'; # get past the header ( $token_type, $argument, $parameter ) = $tokenizer->get_token() +until ($token_type eq 'control' and $argument eq 'pard'); } while ($token_type ne 'eof'){ ( $token_type, $argument, $parameter ) = $tokenizer->get_token(); print "$argument " if $token_type eq 'text'; } __DATA__ {\rtf1\ansi\ansicpg1252\uc1 \deff0\deflang1033\deflangfe1033{\fonttbl{ +\f0\froman\fcharset0\fprq2{\*\panose 02020603050405020304}Times New R +oman{\*\falt Times New Roman};}{\f1\fswiss\fcharset0\fprq2{\*\panose +020b0604020202020204}Arial{\*\falt Arial};} {\f2\fmodern\fcharset0\fprq1{\*\panose 02070309020205020404}Courier Ne +w{\*\falt Courier New};}{\f17\froman\fcharset238\fprq2 Times New Roma +n CE{\*\falt Times New Roman};}{\f18\froman\fcharset204\fprq2 Times N +ew Roman Cyr{\*\falt Times New Roman};} {\f20\froman\fcharset161\fprq2 Times New Roman Greek{\*\falt Times New + Roman};}{\f21\froman\fcharset162\fprq2 Times New Roman Tur{\*\falt T +imes New Roman};}{\f22\froman\fcharset186\fprq2 Times New Roman Balti +c{\*\falt Times New Roman};} {\f23\fswiss\fcharset238\fprq2 Arial CE{\*\falt Arial};}{\f24\fswiss\f +charset204\fprq2 Arial Cyr{\*\falt Arial};}{\f26\fswiss\fcharset161\f +prq2 Arial Greek{\*\falt Arial};}{\f27\fswiss\fcharset162\fprq2 Arial + Tur{\*\falt Arial};} {\f28\fswiss\fcharset186\fprq2 Arial Baltic{\*\falt Arial};}{\f29\fmod +ern\fcharset238\fprq1 Courier New CE{\*\falt Courier New};}{\f30\fmod +ern\fcharset204\fprq1 Courier New Cyr{\*\falt Courier New};} {\f32\fmodern\fcharset161\fprq1 Courier New Greek{\*\falt Courier New} +;}{\f33\fmodern\fcharset162\fprq1 Courier New Tur{\*\falt Courier New +};}{\f34\fmodern\fcharset186\fprq1 Courier New Baltic{\*\falt Courier + New};}}{\colortbl;\red0\green0\blue0; \red0\green0\blue255;\red0\green255\blue255;\red0\green255\blue0;\red2 +55\green0\blue255;\red255\green0\blue0;\red255\green255\blue0;\red255 +\green255\blue255;\red0\green0\blue128;\red0\green128\blue128;\red0\g +reen128\blue0;\red128\green0\blue128; \red128\green0\blue0;\red128\green128\blue0;\red128\green128\blue128;\ +red192\green192\blue192;}{\stylesheet{\widctlpar\adjustright \fs20\cg +rid \snext0 Normal;}{\s1\keepn\widctlpar\adjustright \b\f1\cgrid \sba +sedon0 \snext0 heading 1;}{ \s2\qc\keepn\widctlpar\adjustright \b\f1\cgrid \sbasedon0 \snext0 head +ing 2;}{\*\cs10 \additive Default Paragraph Font;}{\s15\widctlpar\tx7 +20\tx1440\tx2160\tx2880\tx3600\tx4320\tx5040\tx5760\tx6480\tx7200\tx7 +920\tx8640\tx9360\tx10080\adjustright \f1\fs18\cgrid \snext15 RadPlus Text;}{\s16\nowidctlpar\adjustright \b +\cf2 \sbasedon0 \snext16 \sautoupd RadPlus Title;}{\s17\widctlpar\adj +ustright \f2\fs20\cgrid \sbasedon0 \snext17 Plain Text;}{\s18\widctlp +ar \tx720\tx1440\tx2160\tx2880\tx3600\tx4320\tx5040\tx5760\tx6480\tx7200\ +tx7920\tx8640\tx9360\tx10080\adjustright \f1\cgrid \sbasedon15 \snext +18 Arial 12 Point;}{\s19\widctlpar \tx720\tx1440\tx2160\tx2880\tx3600\tx4320\tx5040\tx5760\tx6480\tx7200\ +tx7920\tx8640\tx9360\tx10080\adjustright \f1\fs22\cgrid \sbasedon17 \ +snext19 Arial 11 Point;}{\s20\widctlpar \tx720\tx1440\tx2160\tx2880\tx3600\tx4320\tx5040\tx5760\tx6480\tx7200\ +tx7920\tx8640\tx9360\tx10080\adjustright \f1\fs20\cgrid \sbasedon15 \ +snext20 Arial 10 Point;}{\s21\widctlpar \tx720\tx1440\tx2160\tx2880\tx3600\tx4320\tx5040\tx5760\tx6480\tx7200\ +tx7920\tx8640\tx9360\tx10080\adjustright \f1\fs28\cgrid \sbasedon15 \ +snext21 Arial 14 Point;}{\s22\widctlpar \tx720\tx1440\tx2160\tx2880\tx3600\tx4320\tx5040\tx5760\tx6480\tx7200\ +tx7920\tx8640\tx9360\tx10080\adjustright \fs18\cgrid \sbasedon15 \sne +xt22 Times New Roman 9 Point;}{\s23\widctlpar \tx720\tx1440\tx2160\tx2880\tx3600\tx4320\tx5040\tx5760\tx6480\tx7200\ +tx7920\tx8640\tx9360\tx10080\adjustright \fs20\cgrid \sbasedon15 \sne +xt23 Times New Roman 10 Point;}{\s24\widctlpar \tx720\tx1440\tx2160\tx2880\tx3600\tx4320\tx5040\tx5760\tx6480\tx7200\ +tx7920\tx8640\tx9360\tx10080\adjustright \fs22\cgrid \sbasedon15 \sne +xt24 Times New Roman 11 Point;}{\s25\widctlpar \tx720\tx1440\tx2160\tx2880\tx3600\tx4320\tx5040\tx5760\tx6480\tx7200\ +tx7920\tx8640\tx9360\tx10080\adjustright \cgrid \sbasedon15 \snext25 +Times New Roman 12 Point;}{\s26\widctlpar \tx720\tx1440\tx2160\tx2880\tx3600\tx4320\tx5040\tx5760\tx6480\tx7200\ +tx7920\tx8640\tx9360\tx10080\adjustright \fs28\cgrid \sbasedon15 \sne +xt26 Times New Roman 14 Point;}}{\info{\title CM}{\author RADTRANSWO4 +}{\operator RADTRANSWO6} {\creatim\yr2005\mo2\dy21\hr13\min7}{\revtim\yr2005\mo2\dy21\hr13\min7 +}{\version3}{\edmins0}{\nofpages1}{\nofwords0}{\nofchars0}{\*\company + MGHS}{\nofcharsws0}{\vern113}}\margl1080\margr360\margt360\margb360 \widowctrl\ftnbj\aenddoc\hyphcaps0\viewkind1\viewscale90 \fet0{\*\temp +late C:\\PB\\MLIVE\\word_fls\\QuickTxt.dot}{\*\docvar {ContainerApp}{ +RadPlus (MLIVE)}}{\*\docvar {IniFile}{C:\'5cPB\'5cTEMP\'5csession8\'5 +cmrq16871\'5cqt_coword.ini}} {\*\docvar {ParentHandle}{3015954}}{\*\docvar {TemplateDirectory}{C:\' +5cPB\'5cMLIVE\'5cword_fls\'5c}}\sectd \linex0\sectdefaultcl {\*\pnsec +lvl1\pnucrm\pnstart1\pnindent720\pnhang{\pntxta .}}{\*\pnseclvl2\pnuc +ltr\pnstart1\pnindent720\pnhang{\pntxta .}} {\*\pnseclvl3\pndec\pnstart1\pnindent720\pnhang{\pntxta .}}{\*\pnseclv +l4\pnlcltr\pnstart1\pnindent720\pnhang{\pntxta )}}{\*\pnseclvl5\pndec +\pnstart1\pnindent720\pnhang{\pntxtb (}{\pntxta )}}{\*\pnseclvl6\pnlc +ltr\pnstart1\pnindent720\pnhang{\pntxtb (} {\pntxta )}}{\*\pnseclvl7\pnlcrm\pnstart1\pnindent720\pnhang{\pntxtb ( +}{\pntxta )}}{\*\pnseclvl8\pnlcltr\pnstart1\pnindent720\pnhang{\pntxt +b (}{\pntxta )}}{\*\pnseclvl9\pnlcrm\pnstart1\pnindent720\pnhang{\pnt +xtb (}{\pntxta )}}\trowd \trgaph108\trleft2160 \trbrdrt\brdrdb\brdrw15\brdrcf1 \trbrdrl\brdrdb\brdrw15\brdrcf1 \trbrd +rb\brdrdb\brdrw15\brdrcf1 \trbrdrr\brdrdb\brdrw15\brdrcf1 \trbrdrh\br +drs\brdrw15\brdrcf1 \trbrdrv\brdrs\brdrw15\brdrcf1 \clvertalt\clbrdrt +\brdrdb\brdrw15\brdrcf1 \clbrdrl \brdrdb\brdrw15\brdrcf1 \clbrdrb\brdrs\brdrw15\brdrcf1 \clbrdrr\brdrs\ +brdrw15\brdrcf1 \cltxlrtb \cellx4284\clvertalt\clbrdrt\brdrdb\brdrw15 +\brdrcf1 \clbrdrl\brdrs\brdrw15\brdrcf1 \clbrdrb\brdrs\brdrw15\brdrcf +1 \clbrdrr\brdrs\brdrw15\brdrcf1 \cltxlrtb \cellx5670\clvertalt\clbrdrt\brdrdb\brdrw15\brdrcf1 \clbrdrl\brdrs\brd +rw15\brdrcf1 \clbrdrb\brdrs\brdrw15\brdrcf1 \clbrdrr\brdrdb\brdrw15\b +rdrcf1 \cltxlrtb \cellx7920\pard\plain \s18\widctlpar\intbl \tx720\tx1440\tx2160\tx2880\tx3600\tx4320\tx5040\tx5760\tx6480\tx7200\ +tx7920\tx8640\tx9360\tx10080\adjustright \f1\cgrid {\cell }\pard \s18 +\qc\widctlpar\intbl \tx720\tx1440\tx2160\tx2880\tx3600\tx4320\tx5040\tx5760\tx6480\tx7200\ +tx7920\tx8640\tx9360\tx10080\adjustright {\b CM\cell }\pard \s18\widc +tlpar\intbl \tx720\tx1440\tx2160\tx2880\tx3600\tx4320\tx5040\tx5760\tx6480\tx7200\ +tx7920\tx8640\tx9360\tx10080\adjustright {\b WEEKS/DAYS\cell }\pard\p +lain \widctlpar\intbl\adjustright \fs20\cgrid {\caps \row }\trowd \tr +gaph108\trleft2160\trbrdrt \brdrdb\brdrw15\brdrcf1 \trbrdrl\brdrdb\brdrw15\brdrcf1 \trbrdrb\brdrd +b\brdrw15\brdrcf1 \trbrdrr\brdrdb\brdrw15\brdrcf1 \trbrdrh\brdrs\brdr +w15\brdrcf1 \trbrdrv\brdrs\brdrw15\brdrcf1 \clvertalt\clbrdrt\brdrs\b +rdrw15\brdrcf1 \clbrdrl\brdrdb\brdrw15\brdrcf1 \clbrdrb\brdrs\brdrw15\brdrcf1 \clbrdrr\brdrs\brdrw15\brdrcf1 \cltxlrt +b \cellx4284\clvertalt\clbrdrt\brdrs\brdrw15\brdrcf1 \clbrdrl\brdrs\b +rdrw15\brdrcf1 \clbrdrb\brdrs\brdrw15\brdrcf1 \clbrdrr\brdrs\brdrw15\ +brdrcf1 \cltxlrtb \cellx5670\clvertalt\clbrdrt \brdrs\brdrw15\brdrcf1 \clbrdrl\brdrs\brdrw15\brdrcf1 \clbrdrb\brdrs\b +rdrw15\brdrcf1 \clbrdrr\brdrdb\brdrw15\brdrcf1 \cltxlrtb \cellx7920\p +ard\plain \s18\widctlpar\intbl \tx720\tx1440\tx2160\tx2880\tx3600\tx4320\tx5040\tx5760\tx6480\tx7200\ +tx7920\tx8640\tx9360\tx10080\adjustright \f1\cgrid {\b BPD\cell }\par +d \s18\qc\widctlpar\intbl \tx720\tx1440\tx2160\tx2880\tx3600\tx4320\tx5040\tx5760\tx6480\tx7200\ +tx7920\tx8640\tx9360\tx10080\adjustright {\cell }\pard \s18\widctlpar +\intbl \tx720\tx1440\tx2160\tx2880\tx3600\tx4320\tx5040\tx5760\tx6480\tx7200\ +tx7920\tx8640\tx9360\tx10080\adjustright {\cell }\pard\plain \widctlp +ar\intbl\adjustright \fs20\cgrid {\row }\pard\plain \s18\widctlpar\in +tbl \tx720\tx1440\tx2160\tx2880\tx3600\tx4320\tx5040\tx5760\tx6480\tx7200\ +tx7920\tx8640\tx9360\tx10080\adjustright \f1\cgrid {\b HC\cell }\pard + \s18\qc\widctlpar\intbl \tx720\tx1440\tx2160\tx2880\tx3600\tx4320\tx5040\tx5760\tx6480\tx7200\ +tx7920\tx8640\tx9360\tx10080\adjustright {\cell }\pard \s18\widctlpar +\intbl \tx720\tx1440\tx2160\tx2880\tx3600\tx4320\tx5040\tx5760\tx6480\tx7200\ +tx7920\tx8640\tx9360\tx10080\adjustright {\cell }\pard\plain \widctlp +ar\intbl\adjustright \fs20\cgrid {\row }\pard\plain \s18\widctlpar\in +tbl \tx720\tx1440\tx2160\tx2880\tx3600\tx4320\tx5040\tx5760\tx6480\tx7200\ +tx7920\tx8640\tx9360\tx10080\adjustright \f1\cgrid {\b AC\cell }\pard + \s18\qc\widctlpar\intbl \tx720\tx1440\tx2160\tx2880\tx3600\tx4320\tx5040\tx5760\tx6480\tx7200\ +tx7920\tx8640\tx9360\tx10080\adjustright {\cell }\pard \s18\widctlpar +\intbl \tx720\tx1440\tx2160\tx2880\tx3600\tx4320\tx5040\tx5760\tx6480\tx7200\ +tx7920\tx8640\tx9360\tx10080\adjustright {\cell }\pard\plain \widctlp +ar\intbl\adjustright \fs20\cgrid {\row }\pard\plain \s18\widctlpar\in +tbl \tx720\tx1440\tx2160\tx2880\tx3600\tx4320\tx5040\tx5760\tx6480\tx7200\ +tx7920\tx8640\tx9360\tx10080\adjustright \f1\cgrid {\b Femur Length\c +ell }\pard \s18\qc\widctlpar\intbl \tx720\tx1440\tx2160\tx2880\tx3600\tx4320\tx5040\tx5760\tx6480\tx7200\ +tx7920\tx8640\tx9360\tx10080\adjustright {\cell }\pard \s18\widctlpar +\intbl \tx720\tx1440\tx2160\tx2880\tx3600\tx4320\tx5040\tx5760\tx6480\tx7200\ +tx7920\tx8640\tx9360\tx10080\adjustright {\cell }\pard\plain \widctlp +ar\intbl\adjustright \fs20\cgrid {\row }\trowd \trgaph108\trleft2160\ +trkeep\trbrdrt\brdrdb\brdrw15\brdrcf1 \trbrdrl \brdrdb\brdrw15\brdrcf1 \trbrdrb\brdrdb\brdrw15\brdrcf1 \trbrdrr\brdrd +b\brdrw15\brdrcf1 \trbrdrh\brdrs\brdrw15\brdrcf1 \trbrdrv\brdrs\brdrw +15\brdrcf1 \clvertalt\clbrdrt\brdrs\brdrw15\brdrcf1 \clbrdrl\brdrdb\b +rdrw15\brdrcf1 \clbrdrb\brdrdb\brdrw15\brdrcf1 \clbrdrr\brdrdb\brdrw15\brdrcf1 \cltxlrtb \cellx7920\pard\plain \s18\w +idctlpar\intbl\tx720\tx1440\tx2160\tx2880\tx3600\tx4320\tx5040\tx5760 +\tx6480\tx7200\tx7920\tx8640\tx9360\tx10080\adjustright \f1\cgrid {A +regular fetal heart rate of ? beats per minute is seen.\cell }\pard\plain \widctlpar\intbl\adjustright \fs20\cgrid { +\row }\pard\plain \s15\widctlpar\tx720\tx1440\tx2160\tx2880\tx3600\tx +4320\tx5040\tx5760\tx6480\tx7200\tx7920\tx8640\tx9360\tx10080\adjustr +ight \f1\fs18\cgrid { \par }}

Log In?
Username:
Password:

What's my password?
Create A New User
Domain Nodelet?
Node Status?
node history
Node Type: note [id://506782]
help
Chatterbox?
and the web crawler heard nothing...

How do I use this?Last hourOther CB clients
Other Users?
Others examining the Monastery: (8)
As of 2024-03-28 15:02 GMT
Sections?
Information?
Find Nodes?
Leftovers?
    Voting Booth?

    No recent polls found