#!/usr/local/bin/perl
#
# $Id: kcode,v 0.1 2004/04/01 07:40:11 hirose31 Exp $
#
# Copyright (c) 2004 HIROSE, Masaaki. All rights reserved.
#

=encoding utf8
=head1 NAME

B<kcode> - Ƽʸɤɽ

=head1 SYNOPSIS

B<kcode> [ B<-d> I<level> ] [ B<-x> ] [ B<-i> I<CES> ] [ B<-o> I<CES> ] I<char|hex>

=head1 DESCRIPTION

ɸϤ⤷ϥޥɥ饤顢ʸΤΤʸ󥳡ǥ
󥰤16ʿɽꡢƼʸ󥳡ǥ󥰤Ǥ16ʿɽ
Unicode, IncUnihan DatabaseURLϤ롣

Ϥ16ʿɽʸ󥳡ǥ󥰤μϰʲ̤ꡣ

  Shift_JIS
  EUC-JP
  ISO-2022-JP
  UCS-2
  UTF-8

դǧˤUnihan DatabaseURL˥Ф褤

=head1 OPTIONS

=over 4

=item B<-x>

ϤʸǤϤʤʸ󥳡ǥ󥰤16ʿɽȤƽ롣

=item B<-i> I<CES>

ʸʸ󥳡ǥ󥰤λꡣάeucȤʤ롣

=item B<-o> I<CES>

ʸʸ󥳡ǥ󥰤λꡣάeucȤʤ롣

=item B<-d> I<level>

ǥХå⡼ɡǥХå٥(12)ǻꤹ롣

=item B<-h>

إפɽƽλ롣

=item B<CES>

ʸ󥳡ǥ(Character Encoding Scheme)
ʲΤΤΤ줫

  sjis
  euc
  jis
  ucs2
  utf8

=item B<char>

ʸΤΡ

=item B<hex>

ʸ16ʿɽ

=back

=head1 EXAMPLES

=head2 Shift_JISǡؤ٤ΥɤäƤʤä?

  echo '' | kcode

=head2 UCS-2U+3042äƤɤʸä?

  kcode -i ucs2 -x 3042

=cut

use strict;
use Getopt::Std;

my $RCSID = q$Id: kcode,v 0.1 2004/04/01 07:40:11 hirose31 Exp $;
my $REVISION = $RCSID =~ /,v ([\d.]+)/ ? $1 : 'unknown';
my $VERSION = '1.0';
my $PROG = substr($0, rindex($0, '/')+1);

my $Debug = 0;

sub dprint (@) {
	return unless $Debug;
	my @m = @_;
	chomp @m;
	print STDERR 'DEBUG: ', @m,"\n";
}

sub dprint2(@) {
	dprint @_ if $Debug >= 2;
}

sub usage() {
	my $mesg = shift;

	print "[ERROR] $mesg\n" if $mesg;
	print "usage:\n";
	print "  $PROG [ -d level ] [-x] [-i CES] [-o CES] char|hex\n";
	print "
    -x        assume input is hexadecimal.
    -i CES    set input character encoding to CES.
    -o CES    set output character encoding to CES.
       CES = sjis | euc | jis | ucs2 | utf8
    -d level  set debug level.
    -h        show this help.

v$VERSION (Rev $REVISION)
";
	exit 1;
}

use constant ENC_SJIS => 0;
use constant ENC_EUC  => 1;
use constant ENC_JIS  => 2;
use constant ENC_UCS2 => 3;
use constant ENC_UTF8 => 4;

our $Converter;
our @Encodings;
our $From_Enc;
our $To_Enc;

# --------------------------------------------------------------------
# M A I N
# --------------------------------------------------------------------

MAIN: {
	my %opt;
	getopts('d:xi:o:',\%opt) or &usage();
	$Debug = exists $opt{'d'} ? ( $opt{'d'} or 1 ) : 0;
	unless ($Debug =~ /^\d+$/) {
		&usage("invalid argument -d=$opt{'d'}");
	}
	dprint "DEBUG MODE LEVEL=$Debug";

	my $k_from_to = sub {};
 LOAD: {
		eval q( require Encode );
		unless ($@) {
			dprint "load Encode";
			$Converter = 'Encode';
			$k_from_to = \&_kft_encode;
			@Encodings = qw( cp932 euc-jp iso-2022-jp ucs-2be utf8 );
		} else {
			eval q( require Jcode );
			unless ($@) {
				dprint "load Jcode";
				$Converter = 'Jcode';
				$k_from_to = \&_kft_jcode;
				@Encodings = qw( sjis euc jis ucs2 utf8 );
			} else {
				die "failed to load Encode and Jcode.";
			}
		}
	}

	if (exists $opt{'i'}) {
		$From_Enc = &select_enc($opt{'i'});
	} else {
		$From_Enc = $Encodings[ENC_EUC];
	}
	dprint "From_Enc=$From_Enc";

	if (exists $opt{'o'}) {
		$To_Enc = &select_enc($opt{'o'});
	} else {
		$To_Enc = $Encodings[ENC_EUC];
	}
	dprint "To_Enc=$To_Enc";

	# memo : U+3042 e3 81 82(utf8) / a4 a2 (euc) / 82 a0 (sjis)
	my @in = @ARGV ? @ARGV : map { chomp($_); $_; } <>;
	foreach my $inx (@in) {
		my $ins = exists $opt{'x'} ? pack('H*', $inx) : $inx;
		# header
		printf "%-14s: %-18s (%s)\n", $From_Enc, $inx, &{$k_from_to}($ins, $From_Enc, $To_Enc);
		print '='x70,"\n";

		# all charset
		foreach my $cs (@Encodings) {
			printf "%-14s: %-18s %s\n", $cs, &hexdump(&{$k_from_to}($ins, $From_Enc, $cs));
		}
		printf "http://www.unicode.org/cgi-bin/GetUnihanData.pl?codepoint=%s\n",
			&hexdump(&{$k_from_to}($ins, $From_Enc, $Encodings[ENC_UCS2]));
		print "\n";
	}
	printf "%70s\n", "using $Converter";

	exit 0;
}

# --------------------------------------------------------------------
# S U B  R O U T I N E
# --------------------------------------------------------------------

sub _kft_encode {
	my $s = shift;
	my $from_code = shift;
	my $to_code = shift;

	my $tmp = $s;
	Encode::from_to($tmp, $from_code, $to_code);
	return $tmp;
}

sub _kft_jcode {
	my $s = shift;
	my $from_code = shift;
	my $to_code = shift;

	return Jcode->new($s, $from_code)->$to_code;
}

sub hexdump {
	my $s = shift;
	my $h = lc(unpack('H*', $s));
	my @ret;
	push @ret, uc($h);
	$h =~ s/(..)/\\x$1/g;
	push @ret, '"'.$h.'"';
	return wantarray ? @ret : $ret[0];
}

sub select_enc {
	my $enc = shift;

	foreach ($enc) {
		/(?:s|shift|shift_)jis/i
			and do { return $Encodings[ENC_SJIS]; };
		/(?:cp932|ms932|windowd-31j)/i
			and do { return $Encodings[ENC_SJIS]; };
		/euc(?:jp|-jp|)/i
			and do { return $Encodings[ENC_EUC];  };
		/(?:jis|iso-2022-jp)/i
			and do { return $Encodings[ENC_JIS];  };
		/ucs(?:2|-2)/i
			and do { return $Encodings[ENC_UCS2]; };
		/utf(?:8|-8)/i
			and do { return $Encodings[ENC_UTF8]; };
		/.*/ and do { die "unknown encoding: $enc"; };
	}
}

__END__

=head1 SEE ALSO

L<Module::Jcode>,
L<Module::Encode>

L<http://www.irori.org/tool/kcode.html>

=head1 AUTHOR

HIROSE, Masaaki E<lt>hirose31@irori.orgE<gt>

=cut

# for Emacsen
# Local Variables:
# tab-width: 4
# coding: euc-jp
# End:
