package RIB::BIDMParser;

use RIB::Util;
require HTML::Parser;
@ISA = qw(HTML::Parser);

use HTML::Entities ();

use strict;
use vars qw( $VERSION $DEBUG $DONE );
$DEBUG = 1;
$DONE = 0;
$VERSION = 0.9;

sub new {
    my $class = shift;
    my $self = bless HTML::Parser->new,$class;
    $self->{'META'} = {}; # Hash of name=>content pair. Each key of the hash
			   # is a BIDM string. Each value is a list
			   # of one or more values for the BIDM string.
    $self->{'LINK'} = {}; # Hash of rel=>href and and rev=>href pairs.

    $self->{'_errormsg'} = [];

    return $self;
}

sub parse {
    my $self = shift;
    eval { $self->SUPER::parse(@_) };
    if ($@) {
        print $@ if $DEBUG;
	$self->{'_buf'} = '';  # flush rest of buffer
        push @{$self->{'_errormsg'}}, "HTML::Parser::Parse Failed!";
        return '';
    }
    return 1;
}

sub ErrorMsg {
    shift @{shift->{'_errormsg'}};
}

sub parse_file {
    my ($self, $file) = @_;
    unless (open(F, $file)){
        push @{$self->{'_errormsg'}}, "Can't open $file: $!";
        return 0;
    }
    my $chunk = '';
    while(read(F, $chunk, 2048)) {
        $self->parse($chunk);
    }
    close(F);
    $self->parse(undef); #EOF
    return 1;
}

sub error_msg {
    my $self = shift;
    shift @{$self->{'_errormsg'}};
}

sub start {
    my ($self,$tag,$attr) = @_;

    if ( $tag eq "meta"){ 
	if (exists $attr->{'name'}){
	    my $buf = pretty($attr->{'name'});
	    my $field;
	    if ($buf =~ /^(.*)\.(.*)\.(.*)$/){
		unless (exists $self->{'CLASS'}){
		    $self->{'CLASS'} = $2;
		}
		$field = $3;
	    } elsif ($buf =~ /^(.*)\.(.*)$/){
		unless (exists $self->{'CLASS'}){
                    $self->{'CLASS'} = $1;
                }
                $field = $2;
	    }
	    unless (exists $self->{'META'}{$field}){
		$self->{'META'}{$field} = [];
	    }
	    my $array = $self->{'META'}{$field};
	    push @$array, pretty($attr->{'content'});
	}
    } elsif ($tag eq "link"){
	if (exists $attr->{'rel'}){
	    my $buf = pretty($attr->{'rel'});
	    my $field;
	    if ($buf =~ /^(.*)\.(.*)\.(.*)\.(.*)$/){
                unless (exists $self->{'CLASS'}){
                    $self->{'CLASS'} = $2;
                }
                $field = $3;
            } elsif ($buf =~ /^(.*)\.(.*)\.(.*)$/){
                unless (exists $self->{'CLASS'}){
                    $self->{'CLASS'} = $1;
                }
                $field = $2;
            }
	    unless (exists $self->{'LINK'}{$field}){
		$self->{'LINK'}{$field} = [];
	    }
	    my $array = $self->{'LINK'}{$field};
	    push @$array, pretty($attr->{'href'});
	}
	if (exists $attr->{'rev'}){
	    my $buf = pretty($attr->{'rev'});
	    my $field;
	    if ($buf =~ /^(.*)\.(.*)\.(.*)\.(.*)$/){
                unless (exists $self->{'CLASS'}){
                    $self->{'CLASS'} = $2;
                }
                $field = $3;
            } elsif ($buf =~ /^(.*)\.(.*)\.(.*)$/){
                unless (exists $self->{'CLASS'}){
                    $self->{'CLASS'} = $1;
                }
                $field = $2;
            }
	    unless (exists $self->{'LINK'}{$field}){
		$self->{'LINK'}{$field} = [];
	    }
	    my $array = $self->{'LINK'}{$field};
	    push @$array, pretty($attr->{'href'});
	}
    }
}

sub link {
    my $self = shift;
    return %{$self->{'LINK'}};
}

sub meta {
    my $self = shift;
    return %{$self->{'META'}};
}

sub pretty {
    my $buf = shift;
    HTML::Entities::decode($buf);
    $buf =~ s/^\s+//;
    $buf =~ s/\s+$//;
    $buf =~ s/\s+/ /g;

    return $buf;
}

sub valuesof {
    my ($self,$field) = @_;
    return @{$self->{'META'}->{$field}} if (exists $self->{'META'}->{$field});
    return @{$self->{'LINK'}->{$field}} if (exists $self->{'LINK'}->{$field});
    return ();
}

sub valueof {
    my ($self,$field) = @_;
    return () unless $self->valuesof($field);
    my @array = $self->valuesof($field);
    return $array[0];
}

1;
