# -*- project-name: VASM -*-
package VASM::Message;

use strict;
use warnings;
use XML::Parser;
use VASM::Tree;

our $VERSION = '1.02';

sub new {
  # The XML file to parse
  my ($self, $file) = @_;

  my $instance = {};
  $instance->{Catalog} = VASM::Tree->new; # The message catalog itself
  $instance->{Identifiers} = VASM::Tree->new; # Quantity identifiers
  bless $instance, $self;

  defined $file and $instance->Parse($file);

  return $instance;
}

sub Store {
  # $args{qw/ID Quantity/} represents the original message in English and the
  # quantity expressed. $args{Message} is the translated message bearing all
  # these other qualities.
  my ($self, %args) = @_;

  # It is now very simple to store the message in the catalog itself.
  $self->{Catalog}->Store(@args{qw/ID Quantity/},
                          $args{Message}); # Separated for clarity
  
  return;
}

sub SetQuantity {
  my ($self, %args) = @_;

  # Put the new identifier in the Identifiers tree
  $self->{Identifiers}->Store($args{qw/Quantity/},
                              qr/$args{Identifier}/);

  return;
}
  
sub MatchQuantity {
  my ($self, $quantity) = @_;

  # Iterate over the patterns provided in the Identifiers tree
  for my $key ($self->{Identifiers}->Children()) {
    return $key if
      $quantity =~ $self->{Identifiers}->Retrieve($key);
  }

  return;
}

sub DefinedQuantity {
  my ($self, $name) = @_;
  
  return 1 if defined $self->{Identifiers}->Retrieve($name);

  return;
}

sub Render {
  my ($self, %args) = @_;
  # This will become a /linguistic/ quantity, as opposed to $args{Quantity},
  # which is a number.
  my ($quantity, @arguments);
  
  # If no /numerical/ quantity was given, assume -1
  $args{Quantity} = -1 unless defined $args{Quantity};

  # Here, give a linguistic identification to the raw number, and return if
  # unsuccessful
  return unless $quantity = $self->MatchQuantity($args{Quantity});

  # Return the formatted message with all remaining arguments
  my $message = $self->{Catalog}->Retrieve($args{ID}, $quantity);
  defined $args{Arguments} and @arguments = @{ $args{Arguments} };
  return sprintf $message, @arguments if defined $message;

  return;
}

# This deletes all the translations of a given message; any other usage would
# be kind of pointless.
sub Delete {
  my ($self, $id) = @_;

  # Prevent destruction of the whole catalog; I doubt this method will be very
  # useful anyway
  return unless defined $id;

  $self->{Catalog}->Delete($id);

  return; # A subroutine
}

# Piggyback on the facilities of XML::Parser to instantiate a message catalog
# from the contents of an XML file whose path is given as input. This is
# pretty hairy, so I suggest you read the POD for XML::Parser (specifically
# the part about the 'Tree' style of parsing) before trying to understand
# this.
sub Parse {
  my ($self, $file) = @_;
  # If this fails, XML::Parser will emit an error message and end the program.
  # Unfortunately, this message is always English. :( There is at least an
  # error context, which can be universally understood by the programmer.
  my $parser = XML::Parser->new(Style => 'Tree', ErrorContext => 3);
  my $tree = $parser->parsefile($file); my $tags = $tree->[1];
  
  # Main loop: iterate through tags under <catalog>
  for my $MainIdx (grep { $_ % 2 } (0..$#{ $tags })) {
    next if $tags->[$MainIdx] eq '0'; # Skip text elements

    if ($tags->[$MainIdx] eq 'quantity') {
      # Quantity tags must have the 'match' and 'degree' attributes. 'match',
      # however, may be null.
      return unless exists $tags->[$MainIdx + 1]->[0]->{match}
        and defined $tags->[$MainIdx + 1]->[0]->{degree};
      # Add the new quantity definition
      $self->SetQuantity(Quantity => $tags->[$MainIdx + 1]->[0]->{degree},
                         Identifier => $tags->[$MainIdx + 1]->[0]->{match});
    } elsif ($tags->[$MainIdx] eq 'message') {
      # Message tags must have the 'id' attribute
      my $messages = $tags->[$MainIdx + 1];
      return unless defined $messages->[0]->{id};
      # The remaining tags represent quantities and their translated messages
      for my $MsgIdx (grep { $_ % 2 } (0..$#{ $messages })) {
        next if $messages->[$MsgIdx] eq '0'; # Skip text elements

        # The quantity must be defined and the textual content of the tag must
        # be a defined value
        return unless $messages->[$MsgIdx + 1]->[1] eq '0'
          and defined $messages->[$MsgIdx + 1]->[2]
            and $self->DefinedQuantity($messages->[$MsgIdx]);
        # And intern it...
        $self->Store(ID => $messages->[0]->{id},
                     Quantity => $messages->[$MsgIdx],
                     Message => $messages->[$MsgIdx + 1]->[2]);
      }
    }
  }
  
  return 1; # Success!
}

1;

__END__

=head1 NAME

VASM::Message - i18n message catalogs with XML serialization

=head1 SYNOPSIS

    use VASM::Message;

    # Typical usage; I'm getting CTS in my elbows
    # for some reason and I feel lazy...
    my $instance = VASM::Message->new('Deutsch.xml');
    print $instance->Render(ID => 'Phone Call',
                            Quantity => 0);
    # -> Niemand ruft mich an
    print $instance->Render(ID => 'Phone Call',
                            Quantity => 1, 
                            Arguments => [ 'Peter' ]);
    # -> Peter ruft mich an
    print $instance->Render(ID => 'Phone Call',
                            Quantity => 2,
                            Arguments => [ 'Peter und Sabine' ]);
    # -> Peter und Sabine rufen mich an

=head1 DESCRIPTION

VASM::Message provides for message translation from a source language
(presumably English) to the user's native language, including flexible support
for quantities expressed in the language via regexes and arguments in the
traditional sprintf notation. In addition to the simple object interface,
VASM::Message instances can digest an XML file containing quantity definitions
and translated messages. VASM::Message even allows the use of proper
inflection or plural forms when issuing messages in the source language
itself, when varying quantities are expected.

=head1 METHODS

These are all the methods generally useful to a programmer utilizing the
VASM::Message class. A few methods, such as MatchQuantity, are really only
useful internally, and Delete is just in there to be safe. If you wish to
fully understand the workings of the class, it will be necessary to peruse the
source.

=over 4

=item new

new constructs a new instance of the VASM::Message class, and accepts an
optional argument, indicating the pathname of an XML file whose catalog
definition you wish to introduce to the instance upon construction. See
L</FORMAT> below.

=item Store

The method Store accepts a hash constructor of three key/value pairs: 'ID',
the untranslated source language message; 'Quantity', a name designating a
linguistic quantity such as 'Nullar', 'Singular', 'Paucal', or 'Plural'; and
'Message', the translated message itself, representing these other two
qualities. This translated message may contain all the directives accepted by
Perl's sprintf function; see L<perlfunc> and the Render method below. This
method returns nothing.

=item SetQuantity

The SetQuantity method associates a named, gramatically significant quantity
with a regular expression, so that the use of a raw number in Render (see
below) will identify that proper gramatical form in emitting the translated
message; the names you give quantities are not important so long as the use is
consistent. Furthermore, the matching regular expression is usually anchored
on either side, to prevent a value like '10' to be considered nullar or
singular, for instance.

This method has a particularly interesting property: when multiple quantity
definitions might match a given number, the one interned in the instance
earlier takes precedence. Most languages, for instance, have some way of
expressing the idea of nothing (nullar), the idea of a single entity
(singular), and the idea of two or more entities (plural), some more elaborate
than others. Languages like Lihir and Polish really go to town here, but for a
relatively simple language like English, it's simple enough to add patterns
for the nullar and singular quantities and let a null pattern trap everything
else for plural:

    $instance->SetQuantity(Quantity => 'Nullar',
                           Identifier => '^0$');
    $instance->SetQuantity(Quantity => 'Singular',
                           Identifier => '^1$');
    $instance->SetQuantity(Quantity => 'Plural',
                           Identifier => '');

...and the XML catalog will let you do this, too. This implicit matching is
way more convenient than a fully qualified expression like
'^([2-9]|\p{IsDigit} {2,}$', which explictly matches plural quantities.

In cases where there is really no discernable quantity to speak of, one might
use a name like 'Undefined' and associate it with the regex '^-1$'. In a Hindi
message catalog, it might make sense to call such values 'Nirguna'. :D

SetQuantity is the strong, silent type, and returns nothing.

=item Render

The method Render accepts a hash constructor similar to that prescribed for
the Store and SetQuantity methods (noticing a pattern?), whose keys and values
are ID, again referring to the original source message; Quantity, an integer
which introduces a grammatical context to the translated message and defaults
to -1 if not given (see SetQuantity above); and Arguments, an optional listref
value containing values for any sprintf directives in the original message.
These arguments, if any, are applied to the translated message, which Render
returns. This, for example:

    $instance->Render(ID => 'Greetings',
                      Arguments => [ qw/Hanumizzle/ ]);

...would yield 'Namaste, Hanumizzle', if the Undefined or Nirguna value for
the message 'Greetings' was 'Namaste, %s'.

=item Parse

This method accepts a single argument naming an XML file to intern in the
instance's message collection. (See </FORMAT> below.) Because this process is
somewhat more complex than the methods previously described and requires
validation of user input (those damn translators!), Parse returns a true value
if successful.

=back

=head1 FORMAT

As said before, VASM::Message introduces an extremely simple XML format for
serializing message catalogs. It is almost self-explanatory upon examination:

    <catalog>
      <quantity degree='nullar' match='^0$'/>
      <quantity degree='singular' match='^1$'/>
      <quantity degree='plural' match=''/>
      <message id='Phone Call'>
        <nullar>Niemand ruft mich an</nullar>
        <singular>%s ruft mich an</singular>
        <plural>%s rufen mich an</plural>
      </message>
    </catalog>

'quantity' tags contain a 'degree' and 'match' attribute, which indicate a
linguistic quantity and matching regular expression, respectively. The 'match'
attribute may be empty, but 'degree' attributes may not; see rationale above.
Furthermore, any textual content of 'quantity' tags will simply be ignored, as
well as any other deviation that does not seriously conflict with the
semantics of the format itself. (To test this, try swapping the root tag name,
'catalog', with 'bananas'.) 'message' tags must have an 'id' attribute,
indicating the original message, and the remaining tags therein will be
interpreted as message translations for a given quantity, the only stipulation
being that they must correspond to a previously defined 'quantity'.

=head1 AUTHORS

hanumizzle L<mailto:hanumizzle@gmail.com> wrote VASM::Message.

=cut
