use strict; use warnings; use Data::Dumper::Concise; # Lists in the input are prefixed by indications of their level # and type: # # * is an unordered list # # is an ordered list # #3 a number indicates the value # # Lists are contained in 'inlist' structures. # # An inlist structure is an array of two elements: Type and # Content. # # Type is 'o' for an ordered list or 'u' for an unordered list. # # Content is an array of items in the list. # # The elements of the Contents array are either text or array # refs. # # If the list item has no attributes, it is present as text. # # If the list item has attributes, it is present as reference to # an array of two elements: the text of the element and a # reference to a hash of attributes. # # # Lists may be nested to arbitrary depth. A nested list is contained in # the value of the 'inlist' attribute of its parent list item. # my $lists = [ new_inlist() ]; # Stack is an array of pointers to inlist structures. Each inlist # structure contains the elements of a list. The stack grows as # items are added to more deeply nested lists. # # # # Bottom of stack is the current level 1 list. # # Every other element of the stack is a pointer to the inlist # structure containing the current list at some level of nesting. # # The top of stack is a pointer to the inlist structure for the # most deeply nested, current list. # # my $stack = [ $lists->[-1] ]; while(my $line = ) { chomp($line); if($line =~ m/^\s*$/) { unless( @$stack == 1 and # level 1 @{$stack->[-1]->[1]} == 0 # with no contents ) { push(@$lists, new_inlist()); $stack = [ $lists->[-1] ]; } } else { parse_line_and_add_to_list($stack, $line); } } print Dumper($lists); exit(0); # new_inlist returns an empty inlist data structure # sub new_inlist { return([ undef, [] ]); } # parse_line_and_add_to_list parses an input line into level, # type, value and text, then adds an item to the appropriate list, # according to level, creating sub-lists as necessary. # sub parse_line_and_add_to_list { my ($stack, $line) = @_; my ($pre, $value, $text) = $line =~ m/^([*#]*)([^ ]*)? (.*)/; my $level = length($pre); my $type_marker = substr($pre, -1); my $type = { '*' => 'u', '#' => 'o', }->{$type_marker}; die "unknown list type marker $type_marker" unless($type); while($level < @$stack) { pop(@$stack); } while($level > @$stack) { start_sub_list($stack, $type); } my $item = length($value) ? [ $text, { value => $value } ] : $text; my $inlist = $stack->[-1]; $inlist->[0] = $type unless(defined($inlist->[0])); die "inconsistent type on list element" unless($inlist->[0] eq $type); push(@{$inlist->[1]}, $item); } # start_sub_list adds a sub-list to the last item in the list at # the top of the stack. sub start_sub_list { my ($stack, $type) = @_; # Top of stack points to the innermost inlist structure my (undef, $contents) = @{$stack->[-1]}; my $last_item = $contents->[-1]; $last_item = [ $last_item, {} ] unless(ref($last_item) eq 'ARRAY'); my $attributes = $last_item->[1]; die "Attempt to initialize sub-list on an item with a sub-list" if(exists($attributes->{inlist})); $attributes->{inlist} = [ $type, [] ]; $contents->[-1] = $last_item; push(@$stack, $attributes->{inlist}); } __DATA__ * list 1 unordered item 1 * list 1 unordered item 2 *# list 1 unordered item 2 ordered item 1 *# list 1 unordered item 2 ordered item 2 *# list 1 unordered item 2 ordered item 3 * list 1 unordered item 3 ** list 1 unordered item unordered item 1 ** list 1 unordered item unordered item 2 ** list 1 unordered item unordered item 3 **# list 1 unordered item unordered item 3 ordered item 1 **# list 1 unordered item unordered item 3 ordered item 2 **# list 1 unordered item unordered item 3 ordered item 3 # list 2 ordered item 1 #3 list 2 ordered item 2 # list 2 ordered item 3 #* list 2 ordered item 3 unordered item 1 #* list 2 ordered item 3 unordered item 2 #* list 2 ordered item 3 unordered item 3