package Mojo::DOM::HTML; use Mojo::Base -base; use Mojo::Util qw(html_unescape xml_escape); use Scalar::Util 'weaken'; has 'xml'; has tree => sub { ['root'] }; my $ATTR_RE = qr/ ([^<>=\s]+) # Key (?: \s*=\s* (?: "([^"]*?)" # Quotation marks | '([^']*?)' # Apostrophes | ([^>\s]*) # Unquoted ) )? \s* /x; my $END_RE = qr!^\s*/\s*(.+)\s*!; my $TOKEN_RE = qr/ ([^<]*) # Text (?: <\?(.*?)\?> # Processing Instruction | ' if $e eq 'comment'; # CDATA return '[1] . ']]>' if $e eq 'cdata'; # Processing instruction return '[1] . '?>' if $e eq 'pi'; # Start tag my $start = 1; my $content = ''; if ($e eq 'tag') { $start = 4; # Open tag my $tag = $tree->[1]; $content .= "<$tag"; # Attributes my @attrs; for my $key (sort keys %{$tree->[2]}) { my $value = $tree->[2]{$key}; # No value push @attrs, $key and next unless defined $value; # Key and value push @attrs, qq{$key="} . xml_escape($value) . '"'; } my $attrs = join ' ', @attrs; $content .= " $attrs" if $attrs; # Element without end tag return $self->xml || $VOID{$tag} ? "$content />" : "$content>" unless $tree->[4]; # Close tag $content .= '>'; } # Render whole tree $content .= $self->_render($tree->[$_]) for $start .. $#$tree; # End tag $content .= '[1] . '>' if $e eq 'tag'; return $content; } sub _start { my ($self, $start, $attrs, $current) = @_; # Autoclose optional HTML elements if (!$self->xml && $$current->[0] ne 'root') { # "li" if ($start eq 'li') { $self->_close($current, {li => 1}, 'ul') } # "p" elsif ($PARAGRAPH{$start}) { $self->_end('p', $current) } # "head" elsif ($start eq 'body') { $self->_end('head', $current) } # "optgroup" elsif ($start eq 'optgroup') { $self->_end('optgroup', $current) } # "option" elsif ($start eq 'option') { $self->_end('option', $current) } # "colgroup", "thead", "tbody" and "tfoot" elsif (grep { $_ eq $start } qw(colgroup thead tbody tfoot)) { $self->_close($current, \%TABLE, 'table'); } # "tr" elsif ($start eq 'tr') { $self->_close($current, {tr => 1}, 'table') } # "th" and "td" elsif ($start eq 'th' || $start eq 'td') { $self->_close($current, {$_ => 1}, 'table') for qw(th td); } # "dt" and "dd" elsif ($start eq 'dt' || $start eq 'dd') { $self->_end($_, $current) for qw(dt dd); } # "rt" and "rp" elsif ($start eq 'rt' || $start eq 'rp') { $self->_end($_, $current) for qw(rt rp); } } # New tag my $new = ['tag', $start, $attrs, $$current]; weaken $new->[3]; push @$$current, $new; $$current = $new; } 1; =encoding utf8 =head1 NAME Mojo::DOM::HTML - HTML/XML engine =head1 SYNOPSIS use Mojo::DOM::HTML; # Turn HTML into DOM tree my $html = Mojo::DOM::HTML->new; $html->parse('

A

B

'); my $tree = $html->tree; =head1 DESCRIPTION L is the HTML/XML engine used by L. =head1 ATTRIBUTES L implements the following attributes. =head2 tree my $tree = $html->tree; $html = $html->tree(['root', ['text', 'foo']]); Document Object Model. Note that this structure should only be used very carefully since it is very dynamic. =head2 xml my $bool = $html->xml; $html = $html->xml($bool); Disable HTML semantics in parser and activate case sensitivity, defaults to auto detection based on processing instructions. =head1 METHODS L inherits all methods from L and implements the following new ones. =head2 parse $html = $html->parse('test'); Parse HTML/XML fragment. =head2 render my $xml = $html->render; Render DOM to XML. =head1 SEE ALSO L, L, L. =cut