# -*- project-name: VASM -*-
package VASM::Message;

use strict;
use warnings;
use base qw/VASM::Tree/;
use Carp;
use XML::Parser;
use XML::Writer;

our $VERSION = '1.12';

sub Store {
  # $args{qw/ID Quantity/} represents the original message in English and the
  # quantity expressed. $args{Message} is the translated message bearing all
  # these other qualities.
  my ($self, %args) = @_;

  # All traits must be defined
  croak 'All arguments must be given to Store!' 
    unless (grep { defined $args{$_} } qw/ID Quantity Message/) == 3;

  # It is now very simple to store the message in the catalog itself.
  $self->SUPER::Store('Catalog', @args{qw/ID Quantity/},
                      $args{Message}); # Separated for clarity
  
  return;
}

sub SetQuantity {
  my ($self, $quantity, $identifier) = @_;

  # All traits must be defined
  croak 'All arguments must be given to SetQuantity!' unless @_ == 3;

  # Put the new identifier in the Identifiers tree
  $self->SUPER::Store('Identifiers', $quantity, qr/$identifier/);

  return;
}

sub matchQuantity {
  my ($self, $quantity) = @_;

  croak 'Quantity undefined!' unless defined $quantity;

  # Iterate over the patterns provided in the Identifiers tree
  for my $key ($self->Children('Identifiers')) {
    return $key if
      $quantity =~ $self->Retrieve('Identifiers', $key);
  }

  return;
}

sub definedQuantity {
  my ($self, $name) = @_;

  croak 'Name argument undefined!' unless defined $name;
  return 1 if defined $self->Retrieve('Identifiers', $name);
  return; # Fall through
}

sub Render {
  my $self = shift; my %args;

  # If there is a single argument, it is shorthand for specifying the ID
  # attribute. Otherwise, the arguments specify a whole hash constructor.
  if (@_ == 1) { $args{ID} = $_[0] }
  else { %args = @_ }
  
  # This will become a /linguistic/ quantity, as opposed to $args{Quantity},
  # which is a number.
  my ($quantity, @arguments);
  
  # If no /numerical/ quantity was given, assume -1
  $args{Quantity} = -1 unless defined $args{Quantity};

  # At this point, Quantity and ID need to be defined in %args
  croak 'All arguments must be given to Render!' 
    unless (grep { defined $args{$_} } qw/Quantity ID/) == 2;
  
  # Here, give a linguistic identification to the raw number, and croak if
  # unsuccessful
  croak 'Quantity identifier undefined!'
    unless $quantity = $self->matchQuantity($args{Quantity});

  # Return the formatted message with all remaining arguments, after removing
  # the extraneous whitespace from it
  my $message = $self->Retrieve('Catalog', $args{ID}, $quantity);
  croak 'Message does not exist!' unless defined $message;
  
  $message =~ s!(?:^\s*|\s*$)!!gs; $message =~ s!\s*\n\s*! !g;
  defined $args{Arguments} and @arguments = @{ $args{Arguments} };
  return sprintf $message, @arguments;
}

# List the messages in a given catalog
sub List {
  my ($self) = @_;
  
  return $self->Children('Catalog');
}

sub parseHeavyQuantity {
  my ($self, $attrs) = @_;
  # Quantity tags must have the 'match' and 'degree' attributes. 'match',
  # however, may be null.
  croak 'Attributes not given in <quantity> tag!'
    unless exists $attrs->{match} and defined $attrs->{degree};
  # Add the new quantity definition
  $self->SetQuantity($attrs->{degree}, $attrs->{match});  
}

sub parseHeavyMessage {
  my ($self, $messages) = @_;

  # Message tags must have the 'id' attribute
  croak '<message> tag lacks id attribute!' 
    unless defined $messages->[0]->{id};
  # The remaining tags represent quantities and their translated messages
  for my $index (grep { $_ % 2 } (0..$#{ $messages })) {
    next if $messages->[$index] eq '0'; # Skip text elements

    # The quantity must be defined and the textual content of the tag must
    # be a defined value
    croak 'Undefined quantity or non-textual content in <message> tag!'
      unless $messages->[$index + 1]->[1] eq '0'
        and defined $messages->[$index + 1]->[2]
          and $self->definedQuantity($messages->[$index]);
    # And intern it...
    $self->Store(ID => $messages->[0]->{id},
                 Quantity => $messages->[$index],
                 Message => $messages->[$index + 1]->[2]);
  }
}

# Piggyback on the facilities of XML::Parser to instantiate a message catalog
# from the contents of an XML file for which an IO::Handle is given as input.
sub Parse {
  my ($self, $handle) = @_;
  # If this fails, XML::Parser will emit an error message and end the program.
  # Unfortunately, this message is always English. :( There is at least an
  # error context, which can be universally understood by the programmer.
  my $parser = XML::Parser->new(Style => 'Tree', ErrorContext => 3);
  my $tree = $parser->parse($handle); my $tags = $tree->[1];
  
  # Main loop: iterate through tags under <catalog>
  for my $index (grep { $_ % 2 } (0..$#{ $tags })) {
    next if $tags->[$index] eq '0'; # Skip text elements

    if ($tags->[$index] eq 'quantity') {
      $self->parseHeavyQuantity($tags->[$index + 1]->[0]);
    } elsif ($tags->[$index] eq 'message') {
      $self->parseHeavyMessage($tags->[$index + 1]);
    }
  }
  
  return 1; # Success!
}

sub Write {
  my ($self, $handle) = @_;

  croak 'Handle argument must be given to Write!' unless defined $handle;

  # Now create an XML::Writer instance and turn that puppy out
  my $xml = XML::Writer->new(OUTPUT => $handle, ENCODING => 'utf-8',
                             DATA_MODE => 1, DATA_INDENT => 2);

  # Open up the document
  $xml->xmlDecl;
  $xml->startTag('catalog');
  $xml->comment('Generated by VASM');

  # Emit the 'quantity' tags
  for my $quantity ($self->Children('Identifiers')) {
    $xml->emptyTag('quantity',
                   degree => $quantity,
                   match => $self->Retrieve('Identifiers', $quantity));
  }

  # Now emit the 'message' tags
  for my $message ($self->Children('Catalog')) {
    $xml->startTag('message', id => $message);
    # Message and quantity
    for my $quantity ($self->Children('Catalog', $message)) {
      $xml->dataElement(
        $quantity, $self->Retrieve('Catalog', $message, $quantity));
    }
    $xml->endTag('message');
  }

  $xml->endTag('catalog'); $xml->end; # End the document...

  return 1; # Success!
}

1;

__END__

=head1 NAME

VASM::Message - i18n message catalogs with XML serialization

=head1 SYNOPSIS

    use VASM::Message;

    # Typical usage; I'm getting CTS in my elbows
    # for some reason...
    my $instance = VASM::Message->new($ioHandle);
    print $instance->Render(ID => 'Phone Call',
                            Quantity => 0);
    # -> Niemand ruft mich an
    print $instance->Render(ID => 'Phone Call',
                            Quantity => 1, 
                            Arguments => [ 'Peter' ]);
    # -> Peter ruft mich an
    print $instance->Render(ID => 'Phone Call',
                            Quantity => 2,
                            Arguments => [ 'Peter und Sabine' ]);
    # -> Peter und Sabine rufen mich an

=head1 DESCRIPTION

VASM::Message provides for message translation from a source language
(presumably English) to the user's native language, including flexible support
for quantities expressed in the language via regexes and arguments in the
traditional sprintf notation. In addition to the simple object interface,
VASM::Message instances can digest an XML file containing quantity definitions
and translated messages. VASM::Message even allows the use of proper
inflection or plural forms when issuing messages in the source language
itself, when varying quantities are expected.

=head1 METHODS

These are all the methods generally useful to a programmer utilizing the
VASM::Message class. A few methods, such as matchQuantity, are really only
useful internally. If you wish to fully understand the inner workings of the
class, it will be necessary to peruse the source.

=over

=item new

new constructs a new instance of the VASM::Message class, and accepts an
optional argument, indicating an IO::Handle representing an XML file, whose
catalog definition you wish to introduce to the instance upon construction.
See L</FORMAT> below.

=item Store

The method Store accepts a hash constructor of three key/value pairs: 'ID',
the untranslated source language message; 'Quantity', a name designating a
linguistic quantity such as 'Nullar', 'Singular', 'Paucal', or 'Plural'; and
'Message', the translated message itself, representing these other two
qualities. This translated message may contain all the directives accepted by
Perl's sprintf function; see L<perlfunc> and the Render method below. This
method returns nothing.

=item SetQuantity

The SetQuantity method associates a named, gramatically significant quantity
with a regular expression, so that the use of a raw number in Render (see
below) will identify that proper gramatical form in emitting the translated
message; the names you give quantities are not important so long as the use is
consistent. Furthermore, the matching regular expression is usually anchored
on either side, to prevent a value like '10' to be considered nullar or
singular, for instance.

This method has a particularly interesting property: when multiple quantity
definitions might match a given number, the one interned in the instance
earlier takes precedence. Most languages, for instance, have some way of
expressing the idea of nothing (nullar), the idea of a single entity
(singular), and the idea of two or more entities (plural), some more elaborate
than others. Languages like Lihir and Polish really go to town here, but for a
relatively simple language like English, it's simple enough to add patterns
for the nullar and singular quantities and let a null pattern trap everything
else for plural:

    $instance->SetQuantity(Quantity => 'Nullar',
                           Identifier => '^0$');
    $instance->SetQuantity(Quantity => 'Singular',
                           Identifier => '^1$');
    $instance->SetQuantity(Quantity => 'Plural',
                           Identifier => '');

...and the XML catalog will let you do this, too. This implicit matching is
way more convenient than a fully qualified expression like
'^([2-9]|\p{IsDigit} {2,}$', which explictly matches plural quantities.

In cases where there is really no discernable quantity to speak of, one might
use a name like 'Undefined' and associate it with the regex '^-1$'. In a Hindi
message catalog, it might make sense to call such values 'Nirguna'. :D

SetQuantity is the strong, silent type, and returns nothing.

=item Render

The method Render accepts a hash constructor similar to that prescribed for
the Store and SetQuantity methods (noticing a pattern?), whose keys and values
are ID, again referring to the original source message; Quantity, an integer
which introduces a grammatical context to the translated message and defaults
to -1 if not given (see SetQuantity above); and Arguments, an optional listref
value containing values for any sprintf directives in the original message.
These arguments, if any, are applied to the translated message, which Render
returns. This, for example:

    $instance->Render(ID => 'Greetings',
                      Arguments => [ qw/Hanumizzle/ ]);

...would yield 'Namaste, Hanumizzle', if the undefined or sunya value for
the message 'Greetings' was 'Namaste, %s'.

Render will also accept a single scalar argument, which will be construed as
the 'ID' key in the arguments hash. Quantity, as usual, will default to -1.
This is useful for the common idiom of an constant string such as 'OK' or
'Cancel', e.g., $instance->Render('OK');

=item List

Returns a list of the messages translated in the catalog. Each member of this
list will correspond to the 'ID' field passed to the Store method.

=item Parse

This method accepts a single argument, an IO::Handle whose referent XML file
to intern in the instance's message collection. (See </FORMAT> below.) Because
this process is somewhat more complex than the methods previously described
and requires validation of user input (those damn translators!), Parse returns
a true value if successful.

=item Write

The Write method serves as the counterpart to the Parse method, accepting an
IO::Handle with which to serialize the invocant instance. The output produced
takes human beings into account, and will follow reasonable indenting
guidelines. Like Parse, Write also returns a true value if successful.

=back

=head1 FORMAT

As said before, VASM::Message introduces an extremely simple XML format for
serializing message catalogs. It is almost self-explanatory upon examination:

    <?xml version="1.0" encoding="utf-8"?>

    <catalog>
      <quantity degree='nullar' match='^0$'/>
      <quantity degree='singular' match='^1$'/>
      <quantity degree='plural' match=''/>
      <message id='Phone Call'>
        <nullar>Niemand ruft mich an</nullar>
        <singular>%s ruft mich an</singular>
        <plural>%s rufen mich an</plural>
      </message>
    </catalog>

'quantity' tags contain a 'degree' and 'match' attribute, which indicate a
linguistic quantity and matching regular expression, respectively. The 'match'
attribute may be empty, but 'degree' attributes may not; see rationale above.
Furthermore, any textual content of 'quantity' tags will simply be ignored, as
well as any other deviation that does not seriously conflict with the
semantics of the format itself. (To test this, try swapping the root tag name,
'catalog', with 'bananas'.) 'message' tags must have an 'id' attribute,
indicating the original message, and the remaining tags therein will be
interpreted as message translations for a given quantity, the only stipulation
being that they must correspond to a previously defined 'quantity'.

=head1 AUTHORS

hanumizzle L<mailto:hanumizzle@gmail.com> wrote VASM::Message.

=cut
