This updated version need a Unicode::UCD with prop_invlist, which started with perl 5.016
See perlunicode, perluniprops, perlrecharclass, Unicode::UCD, Data::Dump, List::MoreUtils
#!/usr/bin/perl --
use strict;
use warnings;
use Data::Dump qw/ dd pp /;
use List::MoreUtils qw' uniq ';
use Unicode::UCD qw/ prop_invlist /;
Main( @ARGV );
exit( 0 );
sub uRanges { RangeIt( shift, '\\u%04.4X' ) }
sub pRanges { RangeIt( shift, '\\N{U+%04.4X}' ) }
sub iRanges { RangeIt( shift, '%04.4X', '%04.4X %04.4X' ) }
sub RangeIt {
my( $punct , $format1, $format2 ) = @_;
$format1 ||= '\\N{U+%04.4X}';
$format2 ||= join '-', $format1, $format1;
my @invlist = prop_invlist( $punct );
unless( @invlist ){
warn "## empty for $punct \n";
return;
}
my @ranges;
for (my $i = 0; $i < @invlist; $i += 2) {
my $lower = $invlist[ $i ];
my $upper = ($i + 1) < @invlist
? $invlist[$i+1] - 1 # In range
: $Unicode::UCD::MAX_CP; # To infinity. You may
+want
# to stop much much earl
+ier;
# going this high may ex
+pose
# perl deficiencies with
+ very
# large numbers.
if( $lower != $upper ){
push @ranges, sprintf $format2, $lower, $upper;
} else {
push @ranges, sprintf $format1, $lower;
}
}
@ranges;
}
sub Main {
use Getopt::Long qw/ GetOptionsFromArray /;
my %opt;
GetOptionsFromArray(
\@_,
\%opt,
q{i|is|in!},
q{p|perl!},
q{j|js|java|javascript!},
q{u|utf!},
q{h|help!},
);
$opt{h} and return Usage();
@_ or return Usage();
my %rangers = (
j => sub { printf "%s => %s\n\n", $_[0], join '', uRanges( $_
+[0] ); },
p => sub { printf "%s => %s\n\n", $_[0], join '', pRanges( $_
+[0] ); },
i => sub { print qq{sub Is$_[0] { return <<'$_[0]';\n@{[ join
+ "\n", iRanges( $_[0] ) ]}\n$_[0]\n} ## end of Is$_[0]\n\n}; },
u => sub { print qq{sub Is$_[0] { return <<'$_[0]';\n+utf8::$
+_[0]\n$_[0]\n} ## end of Is$_[0]\n\n}; },
);
$rangers{''} ||= $rangers{j} ;
my $ranger = $rangers{ ( keys %opt )[0] || '' };## ick
for my $k ( uniq @_ ){
$ranger->( $k );
}
}
sub Usage {
print "\nUsage:\n $0 [ -i -j -u -p ] Punctuation\n";
print "\n $0 PerlSpace Title Bopo Dingbats AHex \n";
#~ print "\n $0 ASCII_Hex_Digit=Yes ASCII_Hex_Digit=No \n";
print q{
$ perl unicharproptoregexrange.pl Dingbats
Dingbats => \u2700-\u27BF
$ perl unicharproptoregexrange.pl -j Dingbats
Dingbats => \u2700-\u27BF
$ perl unicharproptoregexrange.pl -p Dingbats
Dingbats => \N{U+2700}-\N{U+27BF}
$ perl unicharproptoregexrange.pl -i Dingbats
sub IsDingbats { return <<'Dingbats';
2700 27BF
Dingbats
} ## end of IsDingbats
$ perl unicharproptoregexrange.pl -u Dingbats
sub IsDingbats { return <<'Dingbats';
+utf8::Dingbats
Dingbats
} ## end of IsDingbats
See perldoc perluniprops
};;;;;;;
}
__END__
-
Are you posting in the right place? Check out Where do I post X? to know for sure.
-
Posts may use any of the Perl Monks Approved HTML tags. Currently these include the following:
<code> <a> <b> <big>
<blockquote> <br /> <dd>
<dl> <dt> <em> <font>
<h1> <h2> <h3> <h4>
<h5> <h6> <hr /> <i>
<li> <nbsp> <ol> <p>
<small> <strike> <strong>
<sub> <sup> <table>
<td> <th> <tr> <tt>
<u> <ul>
-
Snippets of code should be wrapped in
<code> tags not
<pre> tags. In fact, <pre>
tags should generally be avoided. If they must
be used, extreme care should be
taken to ensure that their contents do not
have long lines (<70 chars), in order to prevent
horizontal scrolling (and possible janitor
intervention).
-
Want more info? How to link
or How to display code and escape characters
are good places to start.