Congratulations, you've found a bug!
The problem is that if the start and end tags both belong to the same text() node, insertion of the end tag creates a new text() nodes that replace the old one, so inserting the start tag into the old text() node doesn't insert it to the newly created one. Easily fixed by the following patch:
@@ -12,18 +12,20 @@
my $after = substr $text, $pos;
my $parent = $text->parentNode;
- $parent->insertBefore('XML::LibXML::Text'->new($before), $text);
+ my $preceding
+ = $parent->insertBefore('XML::LibXML::Text'->new($before), $t
+ext);
$parent->insertAfter('XML::LibXML::Text'->new($after), $text);
my $tag = 'XML::LibXML::Element'->new($tag_name);
$parent->replaceChild($tag, $text);
$tag->{query} = $query;
+ return $preceding
}
my $xml = "<foo>The quick br<bar>o</bar>wn <baz>f<bar>o</bar>x</baz>"
+;
$xml .= " jumps over the lazy d<bar>o</bar>g.</foo>";
my $new_element = "canid";
-my @queried = ("lazy dog", "quick brown fox",);
+my @queried = ("lazy dog", "quick brown fox", "the");
my $dom = 'XML::LibXML'->load_xml(string => $xml);
@@ -48,8 +50,10 @@
my $subtext_length = sum(map length, @texts[ $from .. $to ]);
my $last_pos = length($texts[$to]) - ($subtext_length - $foun
+d - length $query);
- insert_tag($texts[$to], $last_pos, 'end', $query);
- insert_tag($texts[$from], $found, 'start', $query);
+ my $preceding = insert_tag($texts[$to], $last_pos, 'end', $qu
+ery);
+
+ my $start_text = $from == $to ? $preceding : $texts[$from];
+ insert_tag($start_text, $found, 'start', $query);
last OUTER;
}
i.e. the insert_tag subroutine returns the newly created text() node preceding the tag, and it's used as the target text() when $from == $to, i.e. when both the elements belong to the same text().
The code as written doesn't handle multiple occurrences of the query. Again, the fix is easy:
@@ -25,7 +25,7 @@
$xml .= " jumps over the lazy d<bar>o</bar>g.</foo>";
my $new_element = "canid";
-my @queried = ("lazy dog", "quick brown fox", "the");
+my @queried = ("lazy dog", "quick brown fox", "he", "e");
my $dom = 'XML::LibXML'->load_xml(string => $xml);
@@ -55,7 +55,10 @@
my $start_text = $from == $to ? $preceding : $texts[$from];
insert_tag($start_text, $found, 'start', $query);
- last OUTER;
+ @texts = $dom->findnodes('//text()');
+ $from += $from == $to ? 1 : 2;
+
+ last OUTER if $from > @texts;
}
}
print $dom;
i.e. after the replacement, reload the text() nodes to search (this could probably be optimized*) to only replace the split one by the ones it's been split to), and start searching from the text where the end tag was inserted (when both the tags were inserted into the same text() node, the node was split into three text() nodes, if they belong to different text() nodes, each of them was split into two nodes, so there are four new nodes).
*) Update: Here's the optimization:
@@ -14,11 +14,12 @@
my $parent = $text->parentNode;
my $preceding
= $parent->insertBefore('XML::LibXML::Text'->new($before), $t
+ext);
- $parent->insertAfter('XML::LibXML::Text'->new($after), $text);
+ my $following
+ = $parent->insertAfter('XML::LibXML::Text'->new($after), $tex
+t);
my $tag = 'XML::LibXML::Element'->new($tag_name);
$parent->replaceChild($tag, $text);
$tag->{query} = $query;
- return $preceding
+ return $preceding, $following
}
my $xml = "<foo>The quick br<bar>o</bar>wn <baz>f<bar>o</bar>x</baz>"
+;
@@ -50,12 +51,14 @@
my $subtext_length = sum(map length, @texts[ $from .. $to ]);
my $last_pos = length($texts[$to]) - ($subtext_length - $foun
+d - length $query);
- my $preceding = insert_tag($texts[$to], $last_pos, 'end', $qu
+ery);
+ my @new_texts = insert_tag($texts[$to], $last_pos, 'end', $qu
+ery);
+
+ splice @texts, $to, 1, @new_texts;
+
+ my $start_text = $from == $to ? $new_texts[0] : $texts[$from]
+;
- my $start_text = $from == $to ? $preceding : $texts[$from];
insert_tag($start_text, $found, 'start', $query);
- @texts = $dom->findnodes('//text()');
$from += $from == $to ? 1 : 2;
last OUTER if $from > @texts;
Note that it's not needed to splice the texts around the start tag, because that part has already been searched for the current query.
($q=q:Sq=~/;[c](.)(.)/;chr(-||-|5+lengthSq)`"S|oS2"`map{chr |+ord
}map{substrSq`S_+|`|}3E|-|`7**2-3:)=~y+S|`+$1,++print+eval$q,q,a,
|