Why not interpolate the stuff into a variable? It can sometimes be a useful technique, but to my eye the following is more readable and obvious to people who follow.
my $valid_XML_BaseChars =
join('',
"\x{0041}-\x{005A}", # Uppercase A-Z
"\x{0100}-\x{0131}", # Extended Latin A subset
# Skipping ligatures 0132, 0133
"\x{0134}-\x{013E}", # Continuing Ext. Latin A
# Skipping middle dots 013F, 0140
"\x{0141}-\x{0148}", # Finishing Ext. Latin A
"\x{01FA}-\x{0217}", # Extended Latin B subset
"\x{0250}-\x{02A8}", # IPA Extensions
);
my $XML_BaseChar= qr/[$valid_xml_basechar]/o;
-ben