sub truncate_utf8 {
my ($utf8, $len) = @_;
$len += 0; # Make sure $len is a number.
return $utf8 =~ s/^.{0,$len)\K(?![\x80-\xBF]).*//sr;
}
Unmatched ) in regex, and if I fix that:
use warnings;
use strict;
use Test::More tests=>15;
use open qw/:std :utf8/;
my $in = "\N{U+CF} \N{U+2764} \N{U+1F42A}";
is truncate_utf8($in, 11), "\N{U+CF} \N{U+2764} \N{U+1F42A}";
is truncate_utf8($in, $_), "\N{U+CF} \N{U+2764} " for 10,9,8,7;
is truncate_utf8($in, 6), "\N{U+CF} \N{U+2764}";
is truncate_utf8($in, $_), "\N{U+CF} " for 5,4,3;
is truncate_utf8($in, 2), "\N{U+CF}";
is truncate_utf8($in, $_), "" for 1,0;
$in = "\xE4b";
utf8::downgrade($in); # make sure this really is a non-UTF8 string
is truncate_utf8($in, 2), "\xE4b";
is truncate_utf8($in, 1), "\xE4";
is truncate_utf8($in, 0), "";
sub truncate_utf8 {
my ($utf8, $len) = @_;
$len += 0; # Make sure $len is a number.
return $utf8 =~ s/^.{0,$len}\K(?![\x80-\xBF]).*//sr;
}
__END__
1..15
ok 1
not ok 2
# Failed test at x.pl line 9.
Wide character in print at /opt/perl5.28/lib/site_perl/5.28.1/Test2/Fo
+rmatter/TAP.pm line 112.
# got: 'Ï ❤ 🐪'
# expected: 'Ï ❤ '
not ok 3
# Failed test at x.pl line 9.
Wide character in print at /opt/perl5.28/lib/site_perl/5.28.1/Test2/Fo
+rmatter/TAP.pm line 112.
# got: 'Ï ❤ 🐪'
# expected: 'Ï ❤ '
not ok 4
# Failed test at x.pl line 9.
Wide character in print at /opt/perl5.28/lib/site_perl/5.28.1/Test2/Fo
+rmatter/TAP.pm line 112.
# got: 'Ï ❤ 🐪'
# expected: 'Ï ❤ '
not ok 5
# Failed test at x.pl line 9.
Wide character in print at /opt/perl5.28/lib/site_perl/5.28.1/Test2/Fo
+rmatter/TAP.pm line 112.
# got: 'Ï ❤ 🐪'
# expected: 'Ï ❤ '
not ok 6
# Failed test at x.pl line 10.
Wide character in print at /opt/perl5.28/lib/site_perl/5.28.1/Test2/Fo
+rmatter/TAP.pm line 112.
# got: 'Ï ❤ 🐪'
# expected: 'Ï ❤'
not ok 7
# Failed test at x.pl line 11.
Wide character in print at /opt/perl5.28/lib/site_perl/5.28.1/Test2/Fo
+rmatter/TAP.pm line 112.
# got: 'Ï ❤ 🐪'
# expected: 'Ï '
not ok 8
# Failed test at x.pl line 11.
Wide character in print at /opt/perl5.28/lib/site_perl/5.28.1/Test2/Fo
+rmatter/TAP.pm line 112.
# got: 'Ï ❤ '
# expected: 'Ï '
not ok 9
# Failed test at x.pl line 11.
Wide character in print at /opt/perl5.28/lib/site_perl/5.28.1/Test2/Fo
+rmatter/TAP.pm line 112.
# got: 'Ï ❤'
# expected: 'Ï '
not ok 10
# Failed test at x.pl line 12.
# got: '� '
# expected: '�'
not ok 11
# Failed test at x.pl line 13.
# got: '�'
# expected: ''
ok 12
ok 13
ok 14
ok 15
# Looks like you failed 10 tests of 15.
sub truncate_utf8 {
my ($utf8, $len) = @_;
return $utf8 if length($utf8) <= $len;
my $next = substr($utf8, $len, length($utf8)-$len, '');
$next = chop($utf8) while (ord($next) & 0xC0) == 0x80;
return $utf8;
}
use warnings;
use strict;
use Test::More tests=>15;
use open qw/:std :utf8/;
my $in = "\N{U+CF} \N{U+2764} \N{U+1F42A}";
is truncate_utf8($in, 11), "\N{U+CF} \N{U+2764} \N{U+1F42A}";
is truncate_utf8($in, $_), "\N{U+CF} \N{U+2764} " for 10,9,8,7;
is truncate_utf8($in, 6), "\N{U+CF} \N{U+2764}";
is truncate_utf8($in, $_), "\N{U+CF} " for 5,4,3;
is truncate_utf8($in, 2), "\N{U+CF}";
is truncate_utf8($in, $_), "" for 1,0;
$in = "\xE4b";
utf8::downgrade($in); # make sure this really is a non-UTF8 string
is truncate_utf8($in, 2), "\xE4b";
is truncate_utf8($in, 1), "\xE4";
is truncate_utf8($in, 0), "";
sub truncate_utf8 {
my ($utf8, $len) = @_;
return $utf8 if length($utf8) <= $len;
my $next = substr($utf8, $len, length($utf8)-$len, '');
$next = chop($utf8) while (ord($next) & 0xC0) == 0x80;
return $utf8;
}
__END__
1..15
ok 1
not ok 2
# Failed test at x.pl line 9.
Wide character in print at /opt/perl5.28/lib/site_perl/5.28.1/Test2/Fo
+rmatter/TAP.pm line 112.
# got: 'Ï ❤ 🐪'
# expected: 'Ï ❤ '
not ok 3
# Failed test at x.pl line 9.
Wide character in print at /opt/perl5.28/lib/site_perl/5.28.1/Test2/Fo
+rmatter/TAP.pm line 112.
# got: 'Ï ❤ 🐪'
# expected: 'Ï ❤ '
not ok 4
# Failed test at x.pl line 9.
Wide character in print at /opt/perl5.28/lib/site_perl/5.28.1/Test2/Fo
+rmatter/TAP.pm line 112.
# got: 'Ï ❤ 🐪'
# expected: 'Ï ❤ '
not ok 5
# Failed test at x.pl line 9.
Wide character in print at /opt/perl5.28/lib/site_perl/5.28.1/Test2/Fo
+rmatter/TAP.pm line 112.
# got: 'Ï ❤ 🐪'
# expected: 'Ï ❤ '
not ok 6
# Failed test at x.pl line 10.
Wide character in print at /opt/perl5.28/lib/site_perl/5.28.1/Test2/Fo
+rmatter/TAP.pm line 112.
# got: 'Ï ❤ 🐪'
# expected: 'Ï ❤'
not ok 7
# Failed test at x.pl line 11.
Wide character in print at /opt/perl5.28/lib/site_perl/5.28.1/Test2/Fo
+rmatter/TAP.pm line 112.
# got: 'Ï ❤ 🐪'
# expected: 'Ï '
not ok 8
# Failed test at x.pl line 11.
Wide character in print at /opt/perl5.28/lib/site_perl/5.28.1/Test2/Fo
+rmatter/TAP.pm line 112.
# got: 'Ï ❤ '
# expected: 'Ï '
not ok 9
# Failed test at x.pl line 11.
Wide character in print at /opt/perl5.28/lib/site_perl/5.28.1/Test2/Fo
+rmatter/TAP.pm line 112.
# got: 'Ï ❤'
# expected: 'Ï '
not ok 10
# Failed test at x.pl line 12.
# got: '� '
# expected: '�'
not ok 11
# Failed test at x.pl line 13.
# got: '�'
# expected: ''
ok 12
ok 13
ok 14
ok 15
# Looks like you failed 10 tests of 15.
-
Are you posting in the right place? Check out Where do I post X? to know for sure.
-
Posts may use any of the Perl Monks Approved HTML tags. Currently these include the following:
<code> <a> <b> <big>
<blockquote> <br /> <dd>
<dl> <dt> <em> <font>
<h1> <h2> <h3> <h4>
<h5> <h6> <hr /> <i>
<li> <nbsp> <ol> <p>
<small> <strike> <strong>
<sub> <sup> <table>
<td> <th> <tr> <tt>
<u> <ul>
-
Snippets of code should be wrapped in
<code> tags not
<pre> tags. In fact, <pre>
tags should generally be avoided. If they must
be used, extreme care should be
taken to ensure that their contents do not
have long lines (<70 chars), in order to prevent
horizontal scrolling (and possible janitor
intervention).
-
Want more info? How to link
or How to display code and escape characters
are good places to start.