#!/usr/bin/tclsh # Name: b5tm # Author: Chao-Kuei Hung http://www.cyut.edu.tw/~ckhung/ # Release date: Mar 1999 # Purpose: Find potential "trouble-makers" in a text file containing # big-5 characters # See also: http://www.cyut.edu.tw/~ckhung/b/tcl/big5.shtml # To make sure that this program is not affected by the big-5 problem, # we insist on writing comments in English. if {$argc < 1} { error {usage: b5tm chars-to-avoid [f1 f2 ... fn]} } set trouble [lindex $argv 0] if {$argc < 2} { set argv "x -" } # Somebody please help: How do we convert a character to its ASCII code in Tcl? # Short of that function, I'll just have to use brute force -- table lookup. set _ascii_ [format "%c" 0xff] for {set i 1} {$i < 0x100} {incr i} { set _ascii_ "$_ascii_[format "%c" $i]" } proc ord ch { global _ascii_ if {$ch == [format "%c" 0xff]} { return 0xff } else { return [string first $ch $_ascii_] } } foreach fn [lrange $argv 1 end] { if {$fn == "-"} { set inf stdin } else { set inf [open $fn] } set lineno 1 ;# current line number set ch0 [read $inf 1] ;# the leading byte set asc0 [ord $ch0] ;# its ascii code while {[set ch1 [read $inf 1]] != ""} { ;# the trailing byte set asc1 [ord $ch1] ;# its ascii code if { $asc0 < 0xa1 || $asc0 > 0xf9 } { # this can not be the leading byte of a big-5 character if { $asc0 < 0x20 && $asc0 != 0x9 && $asc0 != 0x0a } { puts stderr "strange character 0x[format {%02x} $asc0]" } if { $asc0 == 0xa } { incr lineno } set ch0 $ch1 set asc0 $asc1 continue } if { $asc1 < 0x40 || $asc1 > 0x7e && $asc1 < 0xa1 || $asc1 > 0xfe } { puts stderr "strange character pair 0x[format {%02x} $asc0]-[format {%02x} $asc1]" set ch0 $ch1 set asc0 $asc1 continue } if { [string first $ch1 $trouble] >= 0 } { puts [format "%4d: $ch0$ch1 $ch1 %02x %02x" $lineno $asc0 $asc1] } set ch0 [read $inf 1] set asc0 [ord $ch0] } if {$fn != "-"} { close $inf } } # 好了, 程式結束, 可以用中文寫註解了 :-) # 以下是一些含有 []{}\ 等碼的中文字: 開現陣程認加久址功因也設許 # 可以在 UNIX 的 shell 底下用 b5tm '[]{}\' b5tm 查看.