#!/opt/perl/bin/perl -w # # Convert ISO-8859-1 to UTF-8. # Convert "\uXXXX" sequences to Unicode characters. # use strict; binmode STDOUT, ":utf8"; sub parseutf { my $s = $_[0]; my $new = ""; if ($s =~ /\\u[0-9a-f][0-9a-f][0-9a-f][0-9a-f]/g) { while (1) { $new .= substr($s, 0, pos($s) - 6); $new .= chr(eval "0x" . substr($s, pos($s) - 4, 4)); $s = substr($s, pos($s)); $s =~ /\\u[0-9a-f][0-9a-f][0-9a-f][0-9a-f]/g or last; } $new .= $s; } else { $new = $s; } return $new; } while (<>) { print parseutf($_); } .