#!/usr/bin/perl -w

use strict;

# NOTE: use a pipe or redirection to feed input data to this script

binmode( STDIN, ":encoding(cp936)" );
binmode( STDOUT, ":encoding(cp936)" );

# (you could add a command-line option to select
# a different input/output character encoding)

while (<>)
{
# first, convert any "fullwidth" ascii characters to normal ascii
# (ff01-ff5e is the unicode range for "fullwidth ascii", and it
# can be transferred directly to the ascii range 0x21-0x7e):

    tr/\x{ff01}-\x{ff5e}/!-~/;  

# now split into chunks: ideographic vs. non-ideographic
# note that we put capturing parens around the split regex):

    my @chunks = split /(\p{Ideographic}+)/;

# put the chunks back together, adding spaces to non-ideographics as needed

    my $out = '';
    if ( @chunks == 1 ) {
        $out = shift @chunks;
    } else {
        for ( my $i=0; $i <= $#chunks; $i++ ) {
            $chunks[$i] =~ s/([!-~])$/$1 / unless $i == $#chunks;
            $chunks[$i] =~ s/^([!-~])/ $1/ unless $i == 0;
            $out .= $chunks[$i];
        }
    }
    print $out;
}