#!/usr/bin/perl -nw # Name: b5tm # Author: Chao-Kuei Hung http://www.cyut.edu.tw/~ckhung/ # Release date: Oct 1999 # Purpose: Find potential "trouble-makers" in a text file containing # big-5 characters # See also: http://www.cyut.edu.tw/~ckhung/b/pl/big5.shtml # To make sure that this program is not affected by the big-5 problem, # we insist on writing comments in English. use strict; my (@line, $ch0, $asc0, $ch1, $asc1); use vars qw($trouble); BEGIN { $trouble = shift; die "usage: b5tm chars-to-avoid [f1 f2 ... fn]" unless defined $trouble; } @line = split //, $_; $ch0 = shift @line; # the leading byte $asc0 = ord $ch0; # its ascii code foreach $ch1 (@line) { # the trailing byte $asc1 = ord $ch1; # its ascii code if ($asc0 < 0xa1 or $asc0 > 0xf9) { # this can not be the leading byte of a big-5 character printf STDERR "strange character 0x%02x\n", $asc0 if ($asc0 < 0x20 and $asc0 != 0x9 and $asc0 != 0x0a); ($ch0, $asc0) = ($ch1, $asc1); next; } if ($asc1 < 0x40 or $asc1 > 0x7e and $asc1 < 0xa1 or $asc1 > 0xfe) { printf STDERR "strange character pair 0x%02x-%02x\n", $asc0, $asc1; ($ch0, $asc0) = ($ch1, $asc1); } printf "$ARGV %4d: $ch0$ch1 $ch1 %02x %02x\n", $., $asc0, $asc1 if (index($trouble, $ch1) >= 0); ($ch0, $asc0) = (" ", ord " "); } # print "$ARGV $.: $_"; close ARGV if eof; # 好了, 程式結束, 可以用中文寫註解了 :-) # 以下是一些含有 []{}\ 等碼的中文字: 開現陣程認加久址功因也設許淚 # 可以在 UNIX 的 shell 底下用 b5tm '[]{}\@' b5tm 查看.