#!/usr/bin/perl -w # extract fields from the CIA factbook # usage: # cd factbook/print # ciafbxf *.html use strict; use Getopt::Std; my ($field, $key, $val); $field = [ 'Geographic coordinates' => '(\S.*\S)', 'Map references' => '(\S.*\S)', 'Area' => 'total:<\/i>\s*(.*) sq km', 'Land boundaries' => '(\d\S*) km', 'Coastline' => '(\d\S*) km', 'Population' => '(\S+)', 'Unemployment rate' => '(\S+%)', ]; print <[$i]"; } print "\n"; $field = { @$field }; while (<>) { my ($k); if ($. == 1) { $ARGV =~ /(\w+)\.htm/; print $1; } if (/>Introduction; $k =~ />(.*?)\s*$k\s*:\s*\s*$key\s*:\s*) { last if m#) { last if m#{$key}/; $val = $1; last; } $val = "undef" unless defined $val; $val = $1 * 1e6 if ($key eq "Area" and $val =~ /(\S+)\s*million/i); $val =~ s/,//g if $val =~ /^\s*[\d,]*(\.\d*)?\s*$/; print ":$val"; } continue { if (eof) { print "\n"; close ARGV; } } print "\n";