package Mojo::DOM::HTML; use Mojo::Base -base; use Mojo::Util qw(html_unescape xml_escape); use Scalar::Util 'weaken'; has 'xml'; has tree => sub { ['root'] }; my $ATTR_RE = qr/ ([^<>=\s]+) # Key (?: \s*=\s* (?: "([^"]*?)" # Quotation marks | '([^']*?)' # Apostrophes | ([^>\s]*) # Unquoted ) )? \s* /x; my $END_RE = qr!^\s*/\s*(.+)\s*!; my $TOKEN_RE = qr/ ([^<]*) # Text (?: <\?(.*?)\?> # Processing Instruction | ' if $e eq 'comment'; # CDATA return '[1] . ']]>' if $e eq 'cdata'; # Processing instruction return '' . $tree->[1] . '?>' if $e eq 'pi'; # Start tag my $start = 1; my $content = ''; if ($e eq 'tag') { $start = 4; # Open tag my $tag = $tree->[1]; $content .= "<$tag"; # Attributes my @attrs; for my $key (sort keys %{$tree->[2]}) { my $value = $tree->[2]{$key}; # No value push @attrs, $key and next unless defined $value; # Key and value push @attrs, qq{$key="} . xml_escape($value) . '"'; } my $attrs = join ' ', @attrs; $content .= " $attrs" if $attrs; # Element without end tag return $self->xml || $VOID{$tag} ? "$content />" : "$content>$tag>" unless $tree->[4]; # Close tag $content .= '>'; } # Render whole tree $content .= $self->_render($tree->[$_]) for $start .. $#$tree; # End tag $content .= '' . $tree->[1] . '>' if $e eq 'tag'; return $content; } sub _start { my ($self, $start, $attrs, $current) = @_; # Autoclose optional HTML elements if (!$self->xml && $$current->[0] ne 'root') { # "li" if ($start eq 'li') { $self->_close($current, {li => 1}, 'ul') } # "p" elsif ($PARAGRAPH{$start}) { $self->_end('p', $current) } # "head" elsif ($start eq 'body') { $self->_end('head', $current) } # "optgroup" elsif ($start eq 'optgroup') { $self->_end('optgroup', $current) } # "option" elsif ($start eq 'option') { $self->_end('option', $current) } # "colgroup", "thead", "tbody" and "tfoot" elsif (grep { $_ eq $start } qw(colgroup thead tbody tfoot)) { $self->_close($current, \%TABLE, 'table'); } # "tr" elsif ($start eq 'tr') { $self->_close($current, {tr => 1}, 'table') } # "th" and "td" elsif ($start eq 'th' || $start eq 'td') { $self->_close($current, {$_ => 1}, 'table') for qw(th td); } # "dt" and "dd" elsif ($start eq 'dt' || $start eq 'dd') { $self->_end($_, $current) for qw(dt dd); } # "rt" and "rp" elsif ($start eq 'rt' || $start eq 'rp') { $self->_end($_, $current) for qw(rt rp); } } # New tag my $new = ['tag', $start, $attrs, $$current]; weaken $new->[3]; push @$$current, $new; $$current = $new; } 1; =encoding utf8 =head1 NAME Mojo::DOM::HTML - HTML/XML engine =head1 SYNOPSIS use Mojo::DOM::HTML; # Turn HTML into DOM tree my $html = Mojo::DOM::HTML->new; $html->parse('
A
B