Beefy Boxes and Bandwidth Generously Provided by pair Networks
Clear questions and runnable code
get the best and fastest answer
 
PerlMonks  

CGI Script for Reading Newest Nodes

by voyager (Friar)
on Jun 14, 2001 at 01:08 UTC ( [id://88207]=sourcecode: print w/replies, xml ) Need Help??
Category: PerlMonks Related Scripts
Author/Contact Info voyager
Description: This script fetches the XML version of Newest Nodes and creates a page similar to the Newest Nodes page. With this script you can control which sections show up and in what order.
#! /perl/bin/perl -w

use strict;
use CGI();
use CGI::Carp qw(fatalsToBrowser warningsToBrowser);
use LWP::Simple;
use XML::Parser;

use vars qw(
    $q
    $TITLE
    $URL
    $NODETYPE_TITLES
    $NODETYPES_TO_SHOW_FIRST
    $NODETYPES_TO_IGNORE
    $SHOW_AUTHOR
    $xml_source_link
    $nodes_by_type $current_node $current_node_char
    $authors
    $info
);

main();

sub main {
    init();
    get_and_parse_xml();
    print
          $q->header
        , $q->start_html({-title => $TITLE})
        , $q->h1($TITLE)
        , $q->i($info->{cdata})
        , (' ' x 4) . $xml_source_link
    ;
    nodes_to_html();
    ignored_nodes_html();
    print $q->end_html;
}

sub init {
    $| = 1;
    $q = CGI->new;
    $URL = "http://www.perlmonks.org/index.pl?node=Newest%20Nodes%20XM
+L%20Generator";
    $xml_source_link = $q->a({-href => "view-source:$URL"}, 'View the 
+XML Source');
    $TITLE = "Perl Monks - Newest Nodes via XML";
    $NODETYPE_TITLES = {
          perlquestion => "Questions"
        , perlmeditation => "Meditations"
        , user => "Users"
        , note => "Notes"
        , sourcecode => "Code"
        , CUFP => "Cool Uses For Perl"
        , poem => "Poetry"
        , perltutorial => "Tutorials"
        , modulereview => "Module Reviews"
        , obfuscated => "Obfuscation"
        , "categorized question" => "Categorized Questions"
        , "categorized answer" => "Categorized Answers"
        , "monkdiscuss" => "Discussion"
    };
    $NODETYPES_TO_SHOW_FIRST = [
          "perlquestion"
        , "perlmeditation"
        , "monkdiscuss"
        , "CUFP"
        , "sourcecode"
        , "modulereview"
        , "poem"
        , "categorized question"
        , "categorized answer"
    ];
    $NODETYPES_TO_IGNORE = [
          "user"
        , "note"
        , "obfuscated"
    ];
    $SHOW_AUTHOR = 1;
    $nodes_by_type = {};
    $authors = {};
}

####
# subs for parsing xml, handling tags, etc.
####

sub get_and_parse_xml {
    my $xml = get($URL) || die " couldn't get xml";
    my $p = XML::Parser->new (
        Handlers =>{
        Start => \&start_tag,
        End   => \&end_tag,
        Char  => \&char_data
    },) || die "Couldn't create new Parser";
    eval { $p->parse($xml) };
    die "couldn't parse: $@" if $@;
}
sub start_tag {
    my ($p, $el, %attrs) = @_;
    $current_node = {};
    %$current_node = %attrs;
    $current_node_char = '';
}
sub end_tag {
    my ($p, $el) = @_;
    $current_node->{cdata} = $current_node_char;
    INFO_tag() if $el eq 'INFO';
    NODE_tag() if $el eq 'NODE';
    AUTHOR_tag() if $el eq 'AUTHOR';
}
sub char_data {
    my ($p, $string) = @_;
    $current_node_char .= $string;
}
sub INFO_tag {
    $info = $current_node;
}
sub NODE_tag {
    my $nodetype = $current_node->{nodetype};
    push(@{$nodes_by_type->{$nodetype}}, $current_node);    
}
sub AUTHOR_tag {
    my $author_id = $current_node->{node_id};
    my $author_name = $current_node->{cdata};
    $authors->{$author_id} = $author_name;
}

###
# subs for taking parsed nodes and generating html
####

sub nodes_to_html {
    my $html = '';
    foreach my $nodetype (@$NODETYPES_TO_SHOW_FIRST) {
        $html .= html_by_type($nodetype);
        delete $nodes_by_type->{$nodetype};
    }
    foreach my $nodetype (@$NODETYPES_TO_IGNORE) {
        delete $nodes_by_type->{$nodetype};
    }
    foreach my $nodetype (keys %$nodes_by_type) {
        $html .= html_by_type($nodetype);
    }
}

sub html_by_type {
    my $nodetype = shift;
    print $q->h2( node_title($nodetype) );
    my $nodes = $nodes_by_type->{$nodetype} || [];
    unless (@$nodes) {
        print $q->ul($q->li('No nodes') );
        return;
    }
    my $url_base = 'http://www.perlmonks.org/?node_id=';
    print '<ul>';
    foreach my $node (@$nodes) {
        my $link = $q->a({-href => "$url_base$node->{node_id}"}, $node
+->{cdata});
        my $node_text = $q->li($link);
        my $author = $authors->{$node->{author_user}};
        $node_text .= ('&nbsp;' x 2) . $q->small(" by $author") if $SH
+OW_AUTHOR;
        print $node_text;
    }
    print '</ul>';
}

sub ignored_nodes_html {
    my $html = '';
    foreach my $ignore_type (@$NODETYPES_TO_IGNORE) {
        $html .= $q->li( node_title($ignore_type) );
    }
    return unless $html;
    print $q->h2('Ignored Node Types')
        , $q->ul($html)
    ;
}

sub node_title { $NODETYPE_TITLES->{$_[0]} || $_[0] }
Replies are listed 'Best First'.
Re: CGI Script for Reading Newest Nodes
by epoptai (Curate) on Jun 14, 2001 at 07:57 UTC
    Looking good voyager. Here's a small patch that will convert any UTF-8 extended ascii from XML::Parser to latin1 so odd nodetitles or usernames look right.

    Example nodetitle from today's new nodes:

    Before: Reactionary Coding—One-Shot Programs
    After : Reactionary Coding—One-Shot Programs

    Add this conversion subroutine:

    sub UTF8_latin1 { # UTF-8 to latin1 regex from XML::TiePYX (thanks to mirod) my($text) = @_; $text =~ s{([\xc0-\xc3])(.)}{ my $hi = ord($1); my $lo = ord($2); chr((($hi & 0x03) <<6) | ($lo & 0x3F)) }ge; return $text; }
    Add two lines to the foreach loop in sub html_by_type:
    foreach my $node (@$nodes) { $node->{cdata} = UTF8_latin1($node->{cdata}); # ADDED my $link = $q->a({-href => "$url_base$node->{node_id}"}, $node +->{cdata}); my $node_text = $q->li($link); my $author = $authors->{$node->{author_user}}; $author = UTF8_latin1($author); # ADDED $node_text .= ('&nbsp;' x 2) . $q->small(" by $author") if $SH +OW_AUTHOR; print $node_text; }
    Enjoy.

    --
    Check out my Perlmonks Related Scripts like framechat, reputer, and xNN.

Log In?
Username:
Password:

What's my password?
Create A New User
Domain Nodelet?
Node Status?
node history
Node Type: sourcecode [id://88207]
help
Chatterbox?
and the web crawler heard nothing...

How do I use this?Last hourOther CB clients
Other Users?
Others pondering the Monastery: (8)
As of 2024-03-28 10:10 GMT
Sections?
Information?
Find Nodes?
Leftovers?
    Voting Booth?

    No recent polls found