#!/usr/bin/perl -w
# See also HTML::Form module
use HTML::PullParser ();
use HTML::Entities qw(decode_entities);
use Data::Dump qw(dump);
my @FORM_TAGS = qw(form input textarea button select option);
my $p = HTML::PullParser->new(file => shift || "xxx.html",
start => 'tag, attr',
end => 'tag',
text => '@{text}',
report_tags => \@FORM_TAGS,
) || die "$!";
# a little helper function
sub get_text {
my($p, $stop) = @_;
my $text;
while (defined(my $t = $p->get_token)) {
if (ref $t) {
$p->unget_token($t) unless $t->[0] eq $stop;
last;
}
else {
$text .= $t;
}
}
return $text;
}
my @forms;
while (defined(my $t = $p->get_token)) {
next unless ref $t; # skip text
if ($t->[0] eq "form") {
shift @$t;
push(@forms, $t);
while (defined(my $t = $p->get_token)) {
next unless ref $t; # skip text
last if $t->[0] eq "/form";
if ($t->[0] eq "select") {
my $sel = $t;
push(@{$forms[-1]}, $t);
while (defined(my $t = $p->get_token)) {
next unless ref $t; # skip text
last if $t->[0] eq "/select";
#print "select ", dump($t), "\n";
if ($t->[0] eq "option") {
my $value = $t->[1]->{value};
my $text = get_text($p, "/option");
unless (defined $value) {
$value = decode_entities($text);
}
push(@$sel, $value);
}
else {
warn "$t->[0] inside select";
}
}
}
elsif ($t->[0] =~ /^\/?option$/) {
warn "option tag outside select";
}
elsif ($t->[0] eq "textarea") {
push(@{$forms[-1]}, $t);
$t->[1]{value} = get_text($p, "/textarea");
}
elsif ($t->[0] =~ m,^/,) {
warn "stray $t->[0] tag";
}
else {
push(@{$forms[-1]}, $t);
}
}
}
else {
warn "form tag $t->[0] outside form";
}
}
print dump(\@forms), "\n";