# -*- project-name: VASM -*-
package VASM::Catalog::Message;

use strict;
use warnings;
use VASM::Tree;
use XML::Parser;
use XML::Writer;
use Encode qw/encode/;
use Encode::Alias;
use Carp;

our $VERSION = '3.03';

# Known aliases for Encode -- the value returned by langinfo(CODESET) in
# VASM::Resource::Catalog::Message do not always agree with the codesets and
# aliases defined by Encode itself. Presently the only one I know of needs
# aliasing is TIS-620 (standard 8-bit encoding for Thai)
my %aliases = (
  # "Ha, ain't that a bitch; I'm /relish/, motherf---er! Mustard... ZAP YO
  # DUMB ASS!" -- Intrachit
  'TIS-620' => 'tis620'
);

# Now define them
for my $alias (keys %aliases) {
  define_alias($alias, $aliases{$alias});
}

sub new {
  # The IO::Handle to parse
  my ($self, %args) = @_; my $instance = {};

  $instance->{catalog} = VASM::Tree->new; # The message catalog itself
  $instance->{identifiers} = VASM::Tree->new; # Quantity identifiers
  bless $instance, $self;

  # If a file handle was given to parse:
  defined $args{handle} and $instance->parse($args{handle});

  # If UTF-8 is not desired for rendered messages, an encoding may also be
  # specified. If this field is left blank, the attribute will remain
  # undefined
  $instance->{encoding} = $args{encoding};

  return $instance;
}

sub store {
  # $id represents the original message in English and the quantity expressed.
  # $message is the translated message.
  my ($self, $id, $message) = @_;

  # All traits must be defined
  croak 'ID and message argument must be given' 
    unless defined $id and defined $message;

  # It is now very simple to store the message in the catalog itself
  $self->{catalog}->store($id => $message);
  
  return;
}

sub render {
  my ($self, $id, @arguments) = @_; my $message;

  croak 'ID argument must be given' unless defined $id;

  $message = $self->{catalog}->retrieve($id);
  # Try '_default' message unless already successful
  $message = $self->{catalog}->retrieve('_default') unless defined $message;
  croak 'Message does not exist and no default given' unless defined $message;

  # Add arguments
  $message = sprintf $message, @arguments;

  # If an alternative encoding was given, recode to it before returning the
  # string
  return $self->_recodeString($message);
}

sub delete {
  my ($self, $id) = @_;

  croak 'ID argument must be given' unless defined $id;
  $self->{catalog}->delete($id);

  return;
}

# Mainly for internal use
sub _recodeString {
  my ($self, $string) = @_;
 
  return defined $self->{encoding} ? 
    encode($self->{encoding}, $string) : 
    $string; # Identity
}

# List the messages in a given catalog
sub list {
  my ($self) = @_;

  return grep { $_ ne '_default' } $self->{catalog}->children;
}

sub _parseMessage {
  my ($self, $message) = @_;

  # Message tags must have the 'id' attribute
  croak '<message> tag lacks id attribute' unless length $message->[0]->{id};
  # Make sure the content of the tag is textual
  croak 'Non-textual content in <message> tag' unless $message->[1] eq '0';

  # Remove extraneous whitespace from the message
  $message->[2] =~ s!(?:^\s*|\s*$)!!gs; $message->[2] =~ s!\s*\n\s*! !g;
  # If all is well, store the message
  $self->store($message->[0]->{id}, $message->[2]);

  return;
}

# Piggyback on the facilities of XML::Writer to instantiate a message catalog
# from the contents of an XML file for which an IO::File is given as input.
sub parse {
  my ($self, $handle) = @_;
  # If this fails, XML::Parser will emit an error message and end the program.
  # Unfortunately, this message is always English. :( There is at least an
  # error context, which can be universally understood by the programmer.
  my $parser = XML::Parser->new(Style => 'Tree', ErrorContext => 3);
  my $tree = $parser->parse($handle); my $tags = $tree->[1];
  
  # Main loop: iterate through tags under <catalog>
  for my $index (grep { $_ % 2 } (0..$#{ $tags })) {
    next if $tags->[$index] eq '0'; # Skip text elements

    if ($tags->[$index] eq 'message') {
      $self->_parseMessage($tags->[$index + 1]);
    } else {
      croak 'Invalid tag in message catalog';
    }
  }
  
  return;
}

sub write {
  my ($self, $handle) = @_;

  croak 'Handle argument must be given' unless defined $handle;

  # Now create an XML::Writer instance and turn that puppy out
  my $xmlHandle = XML::Writer->new(
    OUTPUT => $handle, ENCODING => 'utf-8',
    DATA_MODE => 1, DATA_INDENT => 2
  );

  # Open up the document
  $xmlHandle->xmlDecl;
  $xmlHandle->startTag('catalog');
  $xmlHandle->comment('Generated by VASM');

  # Now emit the 'message' tags
  for my $message ($self->list) {
    $xmlHandle->dataElement(
      'message', $self->{catalog}->retrieve($message), id => $message
    );
  }

  $xmlHandle->endTag('catalog'); $xmlHandle->end; # End the document...

  return;
}

1;

__END__

=head1 NAME

VASM::Catalog::Message - i18n message catalogs with XML serialization

=head1 SYNOPSIS

    use VASM::Catalog::Message;

    my $instance = VASM::Catalog::Message->new($ioHandle);
    print $instance->render('phone call');
    # -> Peter ruft mich an
    print $instance->render('start the game')
    # -> Starte das verdammte Spiel!

=head1 DESCRIPTION

VASM::Catalog::Message provides for message translation from a source language
(presumably English) to the user's native language and includes support for
arguments in the traditional sprintf notation. In addition to the simple
object interface, VASM::Catalog::Message instances can digest an XML file
containing translated messages.

=head1 METHODS

These are all the methods generally useful to a programmer utilizing the
VASM::Catalog::Message class.

=over

=item new([ $handle ])

new constructs a new instance of the VASM::Catalog::Message class, and accepts
an optional argument, indicating an IO::Handle representing an XML file, whose
catalog definition you wish to introduce to the instance upon construction.
See L</FORMAT> below.

=item store(id => $message)

The method store accepts a hash constructor of three key/value pairs: 'id',
the untranslated source language message; 'quantity', a name designating a
linguistic quantity such as 'nullar', 'singular', 'paucal', or 'plural'; and
'message', the translated message itself, representing these other two
qualities. This translated message may contain all the directives accepted by
Perl's sprintf function; see L<perlfunc> and the render method below. This
method returns nothing.

=item render(id => ..., [ quantity => .., arguments => ... ])

The method render accepts a hash constructor similar to that prescribed for
the store and SetQuantity methods (noticing a pattern?), whose keys and values
are id, again referring to the original source message; quantity, an integer
which introduces a grammatical context to the translated message and defaults
to -1 if not given (see SetQuantity above); and arguments, an optional listref
value containing values for any sprintf directives in the original message.
These arguments, if any, are applied to the translated message, which render
returns. This, for example:

    $instance->render(id => 'Greetings', arguments => [ qw/Hanumizzle/ ]);

...would yield 'Namaste, Hanumizzle', if the undefined or sunya value for the
message 'Greetings' was 'Namaste, %s'. If the target language(s) gramatically
inflected the contents of the strings given as arguments, it would be
necessary to compose calls to the render object; that is, pass the rendition
of a string from a completely different catalog in as a member of the
arguments arrayref.

render will also accept a single scalar argument, which will be construed as
the 'id' key in the arguments hash. quantity, as usual, will default to -1.
This is useful for the common idiom of an constant string such as 'OK' or
'Cancel', e.g., $instance->render('OK');

=item list

Returns a list of the messages translated in the catalog. Each member of this
list will correspond to the 'id' field passed to the store method.

=item parse($handle)

This method accepts a single argument, an IO::Handle whose referent XML file
to intern in the instance's message collection. (See </FORMAT> below.) parse
returns nothing, but will croak if unsuccessful.

=item write($handle)

The write method serves as the counterpart to the parse method, accepting an
IO::Handle with which to serialize the invocant instance. The output produced
takes human beings into account, and will follow reasonable indenting
guidelines.

=back

=head1 FORMAT

As said before, VASM::Catalog::Message introduces an extremely simple XML
format for serializing message catalogs. It is almost self-explanatory upon
examination:

    <?xml version="1.0" encoding="utf-8"?>

    <catalog>
      <message id='pra ted'>pradesh - country, state</message>
      <message id='sab'>sab - every, all</message>
      <message id='ayutthaya'>Ayodhya - indestructable</message>
      <message id='na korn'>nagar - city</message>
    </catalog>

'quantity' tags contain a 'degree' and 'match' attribute, which indicate a
linguistic quantity and matching regular expression, respectively. The 'match'
attribute may be empty, but 'degree' attributes may not; see rationale above.
Furthermore, any textual content of 'quantity' tags will simply be ignored, as
well as any other deviation that does not seriously conflict with the
semantics of the format itself. (To test this, try swapping the root tag name,
'catalog', with 'bananas'.) 'message' tags must have an 'id' attribute,
indicating the original message, and the remaining tags therein will be
interpreted as message translations for a given quantity, the only stipulation
being that they must correspond to a previously defined 'quantity'.

=head1 AUTHORS

hanumizzle L<mailto:hanumizzle@gmail.com>: principal author
cintyram: design suggestions
YaP: the recoding facility and implementation details

=cut
