#!/usr/local/bin/perl

# unicat.pl (c) 2003-2005 exeal
# CEditView δĤΥ᥽åɼΤˡ
# UnicodeData.txt ꤷƥ°
# ɥݥȤΥ޶ڤΥꥹ (8ĤȤ˲Ԥ) 롣
# 1Ĥ2:
#
#   unicat.pl <category> [-d:directory]
#
# "category" ϰʲ椫򤹤뤫ͭʥƥ̾ (2ʸ):
#
#   mark -- Mn, Mc, Me
#   numeric -- Nd, Nl, No
#   space -- Zs
#   format -- Cf
#   id_continue -- Mn, Mc, Nd, Pc
#   punctuation -- Pc, Pd, Ps, Pe, Pi, Pf, Po
#   symbol -- Sm, Sc, Sk, So
#   control -- Cc, Cf
#
# ɸϡ

use strict;
use integer;

# show usage and abort
sub usage($) {
	die("\n" . shift() . "\nUsage: unicat.pl <category> [-d:directory]\n\n");
}

my $fileName = 'UnicodeData.txt';
my $pattern;

usage("Bad parameter number.") if($#ARGV != 0 and $#ARGV != 1);
if($ARGV[0] eq 'mark') {
	$pattern = '(Mn)|(Mc)|(Me)';
} elsif($ARGV[0] eq 'numeric') {
	$pattern = '(Nd)|(Nl)|(No)';
} elsif($ARGV[0] eq 'space') {
	$pattern = 'Zs';
} elsif($ARGV[0] eq 'format') {
	$pattern = 'Cf';
} elsif($ARGV[0] eq 'id_continue') {
	$pattern = '(Mn)|(Mc)|(Nd)|(Pc)';
} elsif($ARGV[0] eq 'punctuation') {
	$pattern = '(Pc)|(Pd)|(Ps)|(Pe)|(Pi)|(Pf)|(Po)';
} elsif($ARGV[0] eq 'symbol') {
	$pattern = '(Sm)|(Sc)|(Sk)|(So)';
} elsif($ARGV[0] eq 'control') {
	$pattern = '(Cc)|(Cf)';
} else {
	$pattern = $ARGV[0];
}
if($#ARGV == 1) {
	usage("Second parameter is illegal.") unless($ARGV[1] =~ /\-d\:(.+)/);
	my $path = $1;
	$path =~ tr/\"//;
	$fileName = $path . "\\" . $fileName;
}

my $line;
my $matchCount = 0;

if(!open(FH, $fileName)) {
	die("\nCannot open $fileName.\nIf UnicodeData.txt is not current directory, you can use -d:<directory> switch to specify where the file is.\n");
}

while($line = <FH>) {
	if($line =~ m/^(.*?)\;.*?\;(\w\w)/) {
		my $cp = $1;
		if($2 =~ m/$pattern/) {
			print(($matchCount++ % 8 == 0) ? "\n" : " ");
			print("0x$cp,");
		}
	}
}

close(FH);

# print "\n$matchCount characters output.\n\n";

__END__