# Html.pm: Class Used for HTML Macros
package RDA::Library::Html;
# $Id: Html.pm,v 1.9 2014/11/07 18:06:49 RDA Exp $
# ARCS: $Header: /home/cvs/cvs/RDA_8/src/scripting/lib/RDA/Library/Html.pm,v 1.9 2014/11/07 18:06:49 RDA Exp $
#
# Change History
# 20141107 MSC Add the refresh method.
=head1 NAME
RDA::Library::Html - Class Used for HTML Macros
=head1 SYNOPSIS
require RDA::Library::Html;
=head1 DESCRIPTION
The objects of the C class are used to interface with HTML
macros.
The following methods are available:
=cut
use strict;
BEGIN
{ use Exporter;
use RDA::Text qw(get_string);
use RDA::Driver::Library;
use RDA::Object;
use RDA::Object::Html;
use RDA::Value::List;
use RDA::Value::Scalar qw(:value);
}
# Define the global public variables
use vars qw($STRINGS $SUSPEND $VERSION @ISA);
$SUSPEND = {
trc => sub {return $_[0]->{'_col'}->get_trace('HTML')},
};
$VERSION = sprintf('%d.%02d', q$Revision: 1.9 $ =~ /(\d+)\.(\d+)/);
@ISA = qw(RDA::Driver::Library Exporter);
# Define the global private constants
my $HTML = 'RDA::Object::Html';
# Define the global private variables
my %tb_fct = (
'htmlAttributes' => [\&_m_attrs, 'L'],
'htmlContent' => [\&_m_content, 'L'],
'htmlDisable' => [\&_m_disable, 'O'],
'htmlExists' => [\&_m_exists, 'N'],
'htmlFilter' => [\&_m_filter, 'O'],
'htmlFind' => [\&_m_find, 'L'],
'htmlFix' => [\&_m_fix, 'N'],
'htmlLoadFile' => [\&_m_load_file, 'O'],
'htmlLoadResponse' => [\&_m_load_rsp, 'O'],
'htmlName' => [\&_m_name, 'T'],
'htmlParser' => [\&_m_parser, 'O'],
'htmlError' => [\&_m_error, 'N'],
'htmlTable' => [\&_m_table, 'L'],
'htmlText' => [\&_m_text, 'T'],
'htmlType' => [\&_m_type, 'T'],
'htmlValue' => [\&_m_value, 'T'],
'setHtmlTrace' => [\&_m_trace, 'N'],
);
# Report the package version
sub Version
{ return $VERSION;
}
=head2 S<$h = RDA::Library::Html-Enew($driver,$collector)>
The object constructor. It takes the library driver and collector references as
arguments.
C is represented by a blessed hash reference. The following
special keys are used:
=over 12
=item S< B<'trc' > > HTML trace flag
=item S< B<'_col'> > Reference to the collector object
=back
Internal keys are prefixed by an underscore.
=cut
sub new
{ my ($cls, $drv, $col) = @_;
my ($slf);
# Create the macro object
$slf = bless {}, ref($cls) || $cls;
# Register the macros
$drv->register($slf, [keys(%tb_fct)], qw(refresh suspend));
# Return the object reference
return refresh($slf, $col);
}
=head2 S<$h-Ecall($name,...)>
This method executes the macro code.
=cut
sub call
{ my ($slf, $nam, @arg) = @_;
return &{$tb_fct{$nam}->[0]}($slf, @arg);
}
=head2 S<$h-Edelete_object>
This method deletes the library control object.
=cut
sub delete_object
{ RDA::Object::dump_caller($_[0], 'Library') if $RDA::Object::DELETE;
undef %{$_[0]};
undef $_[0];
return;
}
=head2 S<$h-Erefresh($col)>
This method updates the library control object for a new collector.
=cut
sub refresh
{ my ($slf, $col) = @_;
$slf->{'trc'} = $col->get_trace('HTML');
$slf->{'_col'} = $col;
return $slf;
}
=head2 S<$h-Erun($name,$arg,$ctx)>
This method executes the macro with the specified argument list in a given
context.
=cut
sub run
{ my ($slf, $nam, $arg, $ctx) = @_;
my ($fct, $ret, $typ);
$fct = $tb_fct{$nam};
$typ = $fct->[1];
# Treat an array context
return RDA::Value::List::new_from_data(&{$fct->[0]}($slf, $ctx,
$arg->eval_as_array)) if $typ eq 'L';
# Treat a scalar context
return defined($ret = &{$fct->[0]}($slf, $ctx, $arg->eval_as_array))
? RDA::Value::Scalar->new($typ, $ret)
: $VAL_UNDEF;
}
=head1 HTML MACROS
=head2 S
This macro returns the list of node attributes.
=cut
sub _m_attrs
{ my ($slf, $ctx, $htm) = @_;
my @tbl;
return () unless ref($htm) eq $HTML;
return $htm->get_attr;
}
=head2 S
This macro returns the list of child nodes after resolving the conditions. The
second argument specifies the list of child types to consider. The third
argument specifies a regular expression to identify objects that must be
replaced by their content. By default, it returns all child nodes.
=cut
sub _m_content
{ my ($slf, $ctx, $htm, $flt, $cln) = @_;
my @tbl;
return () unless ref($htm) eq $HTML;
return $htm->get_content($flt, $cln);
}
=head2 S
This macro returns the number of parsing errors.
=cut
sub _m_error
{ my ($slf, $ctx, $htm) = @_;
return (ref($htm) eq $HTML) ? $htm->get_error : 0;
}
=head2 S
This macro indicates whether the attribute exists in the specified node.
=cut
sub _m_exists
{ my ($slf, $ctx, $htm, $key, $dft) = @_;
return ref($htm) eq $HTML && $htm->exists($key);
}
=head2 S
This macro performs the query on the HTML object. It returns the result as an
object list.
=cut
sub _m_find
{ my ($slf, $ctx, $htm, $qry) = @_;
return () unless ref($htm) eq $HTML;
return $htm->find($qry);
}
=head2 S
This macro parses a HTML file and returns the resulting HTML object. You can
specify a parser as an argument to control what information is extracted.
=cut
sub _m_load_file
{ my ($slf, $ctx, $fil, $htm) = @_;
$htm = RDA::Object::Html->new($slf->{'trc'}) unless ref($htm) eq $HTML;
return $htm->parse_file($fil);
}
=head2 S
This macro parses the HTTP response content and returns the resulting HTML
object. You can specify a parser as an argument to control what information is
extracted.
=cut
sub _m_load_rsp
{ my ($slf, $ctx, $rsp, $htm) = @_;
$htm = RDA::Object::Html->new($slf->{'trc'}) unless ref($htm) eq $HTML;
if (ref($rsp) eq 'RDA::Object::Response')
{ foreach my $lin (@{$rsp->get_content})
{ $htm->parse($lin);
}
$htm->eof;
}
return $htm;
}
=head2 S
This macro returns the node name when defined. Otherwise, it returns an
undefined value.
=cut
sub _m_name
{ my ($slf, $ctx, $htm) = @_;
return (ref($htm) eq $HTML) ? $htm->get_name : undef;
}
=head2 S
This macro extracts all significant tables from the parsed document. Cells in
bold or containing C or C in their style name are taken as
headings. It converts single cell rows in the header of the specified level. It
considers horizontal rulers and header lines also. The macro returns the result
as a list of raw data lines.
=cut
sub _m_table
{ my ($slf, $ctx, $htm, $lvl, $fmt) = @_;
return () unless ref($htm) eq $HTML;
return $htm->get_tables($lvl, $fmt);
}
=head2 S
This macro extracts the texts contained in the specified node. It returns an
empty string when it finds no text.
=cut
sub _m_text
{ my ($slf, $ctx, $htm) = @_;
return (ref($htm) eq $HTML) ? $htm->get_text : q{};
}
=head2 S
This macro returns the node type.
=cut
sub _m_type
{ my ($slf, $ctx, $htm) = @_;
return (ref($htm) eq $HTML) ? $htm->get_type : undef;
}
=head2 S
This macro returns the value of the attribute in the specified node. When the
attribute is not defined, it returns the default value.
=cut
sub _m_value
{ my ($slf, $ctx, $htm, $key, $dft) = @_;
return (ref($htm) eq $HTML) ? $htm->get_value($key, $dft) : $dft;
}
=head1 PARSER MACROS
=head2 S
This macro indicates the list of child types to ignore. When the list is
empty, any type filtering is disabled. It returns the parser object reference.
=cut
sub _m_disable
{ my ($slf, $ctx, $htm, $flt) = @_;
return (ref($htm) eq $HTML) ? $htm->disable($flt) : undef;
}
=head2 S
This macro specifies the list of the tags to consider when parsing the
document. When the list is empty, any tag filtering is disabled. It returns the
parser object reference.
=cut
sub _m_filter
{ my $slf = shift;
my $ctx = shift;
my $htm = shift;
return (ref($htm) eq $HTML) ? $htm->filter(@_) : undef;
}
=head2 S
This macro indicates that the parser can fix incorrect HTML code. It returns
the parser object reference.
=cut
sub _m_fix
{ my ($slf, $ctx, $htm, $flg) = @_;
return (ref($htm) eq $HTML) ? $htm->fix($flg) : undef;
}
=head2 S
This macro initializes a new HTML parser and returns its reference.
=cut
sub _m_parser
{ return RDA::Object::Html->new(shift->{'trc'});
}
=head2 S
This macro sets the HTML parsing level:
=over 7
=item B< 0 > No trace
=item B< 1 > Trace the HTML parsing
=back
The level is unchanged if the new level is not defined.
It returns the previous level.
=cut
sub _m_trace
{ my ($slf, $ctx, $lvl) = @_;
my $old;
$old = $slf->{'trc'};
$slf->{'trc'} = $lvl if defined($lvl);
return $old;
}
1;
__END__
=head1 SEE ALSO
L,
L,
L,
L,
L,
L,
L,
L
=head1 COPYRIGHT NOTICE
Copyright (c) 2002, 2016, Oracle and/or its affiliates. All rights reserved.
=head1 TRADEMARK NOTICE
Oracle and Java are registered trademarks of Oracle and/or its
affiliates. Other names may be trademarks of their respective owners.
=cut