package Mojo::DOM;
use Mojo::Base -strict;
use overload
  '%{}'    => sub { shift->attr },
  bool     => sub {1},
  '""'     => sub { shift->to_xml },
  fallback => 1;

# "Fry: This snow is beautiful. I'm glad global warming never happened.
#  Leela: Actually, it did. But thank God nuclear winter canceled it out."
use Carp 'croak';
use Mojo::Collection;
use Mojo::DOM::CSS;
use Mojo::DOM::HTML;
use Mojo::Util 'squish';
use Scalar::Util qw(blessed weaken);

  my $self = shift;

  my ($package, $method) = our $AUTOLOAD =~ /^([\w:]+)::(\w+)$/;
  croak "Undefined subroutine &${package}::$method called"
    unless blessed $self && $self->isa(__PACKAGE__);

  # Search children of current element
  my $children = $self->children($method);
  return @$children > 1 ? $children : $children->[0] if @$children;
  croak qq{Can't locate object method "$method" via package "$package"};

sub DESTROY { }

sub new {
  my $class = shift;
  my $self = bless [Mojo::DOM::HTML->new], ref $class || $class;
  return @_ ? $self->parse(@_) : $self;

sub all_text { shift->_content(1, @_) }

sub ancestors { _select($_[0]->_collect(_ancestors($_[0]->tree)), $_[1]) }

sub append { shift->_add(1, @_) }

sub append_content {
  my ($self, $new) = @_;
  my $tree = $self->tree;
  push @$tree, _link($self->_parse("$new"), $tree);
  return $self;

sub at { shift->find(@_)->[0] }

sub attr {
  my $self = shift;

  # Hash
  my $tree = $self->tree;
  my $attrs = $tree->[0] eq 'root' ? {} : $tree->[2];
  return $attrs unless @_;

  # Get
  return defined $attrs->{$_[0]} ? $attrs->{$_[0]} : '' unless @_ > 1 || ref $_[0];

  # Set
  %$attrs = (%$attrs, %{ref $_[0] ? $_[0] : {@_}});

  return $self;

sub children {
  my $self = shift;
  return _select(
    $self->_collect(grep { $_->[0] eq 'tag' } _nodes($self->tree)), @_);

sub content_xml {
  my $self = shift;
  my $xml  = $self->xml;
  return join '', map { _render($_, $xml) } _nodes($self->tree);

sub find {
  my $self = shift;
  my $results = Mojo::DOM::CSS->new(tree => $self->tree)->select(@_);
  return $self->_collect(@$results);

sub match {
  my $self = shift;
  return undef unless Mojo::DOM::CSS->new(tree => $self->tree)->match(@_);
  return $self;

sub namespace {
  my $self = shift;

  return '' if (my $current = $self->tree)->[0] eq 'root';

  # Extract namespace prefix and search parents
  my $ns = $current->[1] =~ /^(.*?):/ ? "xmlns:$1" : undef;
  while ($current->[0] ne 'root') {

    # Namespace for prefix
    my $attrs = $current->[2];
    if ($ns) { /^\Q$ns\E$/ and return $attrs->{$_} for keys %$attrs }

    # Namespace attribute
    elsif (defined $attrs->{xmlns}) { return $attrs->{xmlns} }

    $current = $current->[3];

  return '';

sub next { shift->_siblings->[1][0] }

sub parent {
  my $self = shift;
  return undef if (my $tree = $self->tree)->[0] eq 'root';
  return $self->new->tree($tree->[3])->xml($self->xml);

sub parse { shift->_delegate(parse => shift) }

sub prepend { shift->_add(0, @_) }

sub prepend_content {
  my ($self, $new) = @_;
  my $tree = $self->tree;
  splice @$tree, _offset($tree), 0, _link($self->_parse("$new"), $tree);
  return $self;

sub previous { shift->_siblings->[0][-1] }

sub remove { shift->replace('') }

sub replace {
  my ($self, $new) = @_;
  my $tree = $self->tree;
  return $self->xml(undef)->parse($new) if $tree->[0] eq 'root';
  return $self->_replace($tree, $self->_parse("$new"));

sub replace_content {
  my ($self, $new) = @_;
  my $tree = $self->tree;
  splice @$tree, _offset($tree), $#$tree, _link($self->_parse("$new"), $tree);
  return $self;

sub root {
  my $self = shift;
  return $self unless my $tree = _ancestors($self->tree, 1);
  return $self->new->tree($tree)->xml($self->xml);

sub siblings { _select(Mojo::Collection->new(@{_siblings($_[0], 1)}), $_[1]) }

sub strip {
  my $self = shift;
  my $tree = $self->tree;
  return $self if $tree->[0] eq 'root';
  return $self->_replace($tree, ['root', _nodes($tree)]);

sub tap { shift->Mojo::Base::tap(@_) }

sub text { shift->_content(0, @_) }

sub text_after {
  my ($self, $trim) = @_;

  return '' if (my $tree = $self->tree)->[0] eq 'root';

  my (@nodes, $started);
  for my $n (_nodes($tree->[3])) {
    ++$started and next if $n eq $tree;
    next unless $started;
    last if $n->[0] eq 'tag';
    push @nodes, $n;

  return _text(\@nodes, 0, _trim($tree->[3], $trim));

sub text_before {
  my ($self, $trim) = @_;

  return '' if (my $tree = $self->tree)->[0] eq 'root';

  my @nodes;
  for my $n (_nodes($tree->[3])) {
    last if $n eq $tree;
    push @nodes, $n;
    @nodes = () if $n->[0] eq 'tag';

  return _text(\@nodes, 0, _trim($tree->[3], $trim));

sub to_xml { shift->[0]->render }

sub tree { shift->_delegate(tree => @_) }

sub type {
  my ($self, $type) = @_;
  return '' if (my $tree = $self->tree)->[0] eq 'root';
  return $tree->[1] unless $type;
  $tree->[1] = $type;
  return $self;

sub xml { shift->_delegate(xml => @_) }

sub _add {
  my ($self, $offset, $new) = @_;

  return $self if (my $tree = $self->tree)->[0] eq 'root';

  my $parent = $tree->[3];
  splice @$parent, _parent($parent, $tree) + $offset, 0,
    _link($self->_parse("$new"), $parent);

  return $self;

sub _ancestors {
  my ($tree, $root) = @_;
  my @ancestors;
  push @ancestors, $tree while ($tree->[0] eq 'tag') && ($tree = $tree->[3]);
  return $root ? $ancestors[-1] : @ancestors[0 .. $#ancestors - 1];

sub _collect {
  my $self = shift;
  my $xml  = $self->xml;
  return Mojo::Collection->new(@_)
    ->map(sub { $self->new->tree($_)->xml($xml) });

sub _content {
  my $tree = shift->tree;
  return _text([_nodes($tree)], shift, _trim($tree, @_));

sub _delegate {
  my ($self, $method) = (shift, shift);
  return $self->[0]->$method unless @_;
  return $self;

sub _link {
  my ($children, $parent) = @_;

  # Link parent to children
  my @new;
  for my $n (@$children[1 .. $#$children]) {
    push @new, $n;
    next unless $n->[0] eq 'tag';
    $n->[3] = $parent;
    weaken $n->[3];

  return @new;

sub _nodes {
  return unless my $n = shift;
  return @$n[_offset($n) .. $#$n];

sub _offset { $_[0][0] eq 'root' ? 1 : 4 }

sub _parent {
  my ($parent, $child) = @_;

  # Find parent offset for child
  my $i = _offset($parent);
  for my $n (@$parent[$i .. $#$parent]) {
    last if $n == $child;

  return $i;

sub _parse { Mojo::DOM::HTML->new(xml => shift->xml)->parse(shift)->tree }

sub _render { Mojo::DOM::HTML->new(tree => shift, xml => shift)->render }

sub _replace {
  my ($self, $tree, $new) = @_;
  my $parent = $tree->[3];
  splice @$parent, _parent($parent, $tree), 1, _link($new, $parent);
  return $self->parent;

sub _select {
  my ($self, $selector) = @_;
  return defined $selector ? $self->grep(sub { $_->match($selector) }) : $self;

sub _siblings {
  my ($self, $merge) = @_;

  return $merge ? [] : [[], []] unless my $parent = $self->parent;

  my $tree = $self->tree;
  my (@before, @after, $match);
  for my $child ($parent->children->each) {
    ++$match and next if $child->tree eq $tree;
    $match ? push @after, $child : push @before, $child;

  return $merge ? [@before, @after] : [\@before, \@after];

sub _text {
  my ($nodes, $recurse, $trim) = @_;

  # Merge successive text nodes
  my $i = 0;
  while (my $next = $nodes->[$i + 1]) {
    ++$i and next unless $nodes->[$i][0] eq 'text' && $next->[0] eq 'text';
    splice @$nodes, $i, 2, ['text', $nodes->[$i][1] . $next->[1]];

  my $text = '';
  for my $n (@$nodes) {
    my $type = $n->[0];

    # Nested tag
    my $content = '';
    if ($type eq 'tag' && $recurse) {
      $content = _text([_nodes($n)], 1, _trim($n, $trim));

    # Text
    elsif ($type eq 'text') { $content = $trim ? squish($n->[1]) : $n->[1] }

    # CDATA or raw text
    elsif ($type eq 'cdata' || $type eq 'raw') { $content = $n->[1] }

    # Add leading whitespace if punctuation allows it
    $content = " $content" if $text =~ /\S\z/ && $content =~ /^[^.!?,;:\s]+/;

    # Trim whitespace blocks
    $text .= $content if $content =~ /\S+/ || !$trim;

  return $text;

sub _trim {
  my ($e, $trim) = @_;

  # Disabled
  return 0 unless $e && ($trim = defined $trim ? $trim : 1);

  # Detect "pre" tag
  while ($e->[0] eq 'tag') {
    return 0 if $e->[1] eq 'pre';
    last unless $e = $e->[3];

  return 1;


=encoding utf8

=head1 NAME

Mojo::DOM - Minimalistic HTML/XML DOM parser with CSS selectors


  use Mojo::DOM;

  # Parse
  my $dom = Mojo::DOM->new('<div><p id="a">A</p><p id="b">B</p></div>');

  # Find
  say $dom->at('#b')->text;
  say $dom->find('p')->text;
  say $dom->find('[id]')->attr('id');

  # Walk
  say $dom->div->p->[0]->text;
  say $dom->div->children('p')->first->{id};

  # Iterate
  $dom->find('p[id]')->each(sub { say shift->{id} });

  # Loop
  for my $e ($dom->find('p[id]')->each) {
    say $e->text;

  # Modify
  $dom->div->p->[1]->append('<p id="c">C</p>');

  # Render
  say "$dom";


L<Mojo::DOM> is a minimalistic and relaxed HTML/XML DOM parser with CSS
selector support. It will even try to interpret broken XML, so you should not
use it for validation.


L<Mojo::DOM> defaults to HTML semantics, that means all tags and attributes
are lowercased and selectors need to be lowercase as well.

  my $dom = Mojo::DOM->new('<P ID="greeting">Hi!</P>');
  say $dom->at('p')->text;
  say $dom->p->{id};

If XML processing instructions are found, the parser will automatically switch
into XML mode and everything becomes case sensitive.

  my $dom = Mojo::DOM->new('<?xml version="1.0"?><P ID="greeting">Hi!</P>');
  say $dom->at('P')->text;
  say $dom->P->{ID};

XML detection can also be disabled with the L</"xml"> method.

  # Force XML semantics

  # Force HTML semantics

=head1 METHODS

L<Mojo::DOM> implements the following methods.

=head2 new

  my $dom = Mojo::DOM->new;
  my $dom = Mojo::DOM->new('<foo bar="baz">test</foo>');

Construct a new array-based L<Mojo::DOM> object and L</"parse"> HTML/XML
fragment if necessary.

=head2 all_text

  my $trimmed   = $dom->all_text;
  my $untrimmed = $dom->all_text(0);

Extract all text content from DOM structure, smart whitespace trimming is
enabled by default.

  # "foo bar baz"

  # "foo\nbarbaz\n"

=head2 ancestors

  my $collection = $dom->ancestors;
  my $collection = $dom->ancestors('div');

Find all ancestors of this element matching the CSS selector and return a
L<Mojo::Collection> object containing these elements as L<Mojo::DOM> objects.
All selectors from L<Mojo::DOM::CSS> are supported.

  # List types of ancestor elements
  say $dom->ancestors->type;

=head2 append

  $dom = $dom->append('<p>Hi!</p>');

Append HTML/XML fragment to element.

  # "<div><h1>A</h1><h2>B</h2></div>"

=head2 append_content

  $dom = $dom->append_content('<p>Hi!</p>');

Append HTML/XML fragment to element content.

  # "<div><h1>AB</h1></div>"

=head2 at

  my $result = $dom->at('html title');

Find first element matching the CSS selector and return it as a L<Mojo::DOM>
object or return C<undef> if none could be found. All selectors from
L<Mojo::DOM::CSS> are supported.

  # Find first element with "svg" namespace definition
  my $namespace = $dom->at('[xmlns\:svg]')->{'xmlns:svg'};

=head2 attr

  my $attrs = $dom->attr;
  my $foo   = $dom->attr('foo');
  $dom      = $dom->attr({foo => 'bar'});
  $dom      = $dom->attr(foo => 'bar');

Element attributes.

  # List id attributes
  say $dom->find('*')->attr('id')->compact;

=head2 children

  my $collection = $dom->children;
  my $collection = $dom->children('div');

Find all children of this element matching the CSS selector and return a
L<Mojo::Collection> object containing these elements as L<Mojo::DOM> objects.
All selectors from L<Mojo::DOM::CSS> are supported.

  # Show type of random child element
  say $dom->children->shuffle->first->type;

=head2 content_xml

  my $xml = $dom->content_xml;

Render content of this element to XML.

  # "<b>test</b>"

=head2 find

  my $collection = $dom->find('html title');

Find all elements matching the CSS selector and return a L<Mojo::Collection>
object containing these elements as L<Mojo::DOM> objects. All selectors from
L<Mojo::DOM::CSS> are supported.

  # Find a specific element and extract information
  my $id = $dom->find('div')->[23]{id};

  # Extract information from multiple elements
  my @headers = $dom->find('h1, h2, h3')->text->each;
  my @links   = $dom->find('a[href]')->attr('href')->each;

=head2 match

  my $result = $dom->match('html title');

Match the CSS selector against this element and return it as a L<Mojo::DOM>
object or return C<undef> if it didn't match. All selectors from
L<Mojo::DOM::CSS> are supported.

=head2 namespace

  my $namespace = $dom->namespace;

Find element namespace.

  # Find namespace for an element with namespace prefix
  my $namespace = $dom->at('svg > svg\:circle')->namespace;

  # Find namespace for an element that may or may not have a namespace prefix
  my $namespace = $dom->at('svg > circle')->namespace;

=head2 next

  my $sibling = $dom->next;

Return L<Mojo::DOM> object for next sibling of element or C<undef> if there
are no more siblings.

  # "<h2>B</h2>"

=head2 parent

  my $parent = $dom->parent;

Return L<Mojo::DOM> object for parent of element or C<undef> if this element
has no parent.

=head2 parse

  $dom = $dom->parse('<foo bar="baz">test</foo>');

Parse HTML/XML fragment with L<Mojo::DOM::HTML>.

  # Parse XML
  my $dom = Mojo::DOM->new->xml(1)->parse($xml);

=head2 prepend

  $dom = $dom->prepend('<p>Hi!</p>');

Prepend HTML/XML fragment to element.

  # "<div><h1>A</h1><h2>B</h2></div>"

=head2 prepend_content

  $dom = $dom->prepend_content('<p>Hi!</p>');

Prepend HTML/XML fragment to element content.

  # "<div><h2>AB</h2></div>"

=head2 previous

  my $sibling = $dom->previous;

Return L<Mojo::DOM> object for previous sibling of element or C<undef> if
there are no more siblings.

  # "<h1>A</h1>"

=head2 remove

  my $parent = $dom->remove;

Remove element and return L<Mojo::DOM> object for parent of element.

  # "<div></div>"

=head2 replace

  my $parent = $dom->replace('<div>test</div>');

Replace element with HTML/XML fragment and return L<Mojo::DOM> object for
parent of element.

  # "<div><h2>B</h2></div>"

  # "<div></div>"

=head2 replace_content

  $dom = $dom->replace_content('<p>test</p>');

Replace element content with HTML/XML fragment.

  # "<div><h1>B</h1></div>"

  # "<div><h1></h1></div>"

=head2 root

  my $root = $dom->root;

Return L<Mojo::DOM> object for root node.

=head2 siblings

  my $collection = $dom->siblings;
  my $collection = $dom->siblings('div');

Find all siblings of this element matching the CSS selector and return a
L<Mojo::Collection> object containing these elements as L<Mojo::DOM> objects.
All selectors from L<Mojo::DOM::CSS> are supported.

  # List types of sibling elements
  say $dom->siblings->type;

=head2 strip

  my $parent = $dom->strip;

Remove element while preserving its content and return L<Mojo::DOM> object for
parent of element.

  # "<div>A</div>"

=head2 tap

  $dom = $dom->tap(sub {...});

Alias for L<Mojo::Base/"tap">.

=head2 text

  my $trimmed   = $dom->text;
  my $untrimmed = $dom->text(0);

Extract text content from element only (not including child elements), smart
whitespace trimming is enabled by default.

  # "foo baz"

  # "foo\nbaz\n"

=head2 text_after

  my $trimmed   = $dom->text_after;
  my $untrimmed = $dom->text_after(0);

Extract text content immediately following element, smart whitespace trimming
is enabled by default.

  # "baz"

  # "baz\n"

=head2 text_before

  my $trimmed   = $dom->text_before;
  my $untrimmed = $dom->text_before(0);

Extract text content immediately preceding element, smart whitespace trimming
is enabled by default.

  # "foo"

  # "foo\n"

=head2 to_xml

  my $xml = $dom->to_xml;
  my $xml = "$dom";

Render this element and its content to XML.

  # "<b>test</b>"

=head2 tree

  my $tree = $dom->tree;
  $dom     = $dom->tree(['root', ['text', 'foo']]);

Document Object Model. Note that this structure should only be used very
carefully since it is very dynamic.

=head2 type

  my $type = $dom->type;
  $dom     = $dom->type('div');

Element type.

  # List types of child elements
  say $dom->children->type;

=head2 xml

  my $bool = $dom->xml;
  $dom     = $dom->xml($bool);

Disable HTML semantics in parser and activate case sensitivity, defaults to
auto detection based on processing instructions.


In addition to the methods above, many child elements are also automatically
available as object methods, which return a L<Mojo::DOM> or
L<Mojo::Collection> object, depending on number of children.

  say $dom->p->text;
  say $dom->div->[23]->text;
  say $dom->div->text;


Direct hash reference access to element attributes is also possible.

  say $dom->{foo};
  say $dom->div->{id};

=head1 SEE ALSO

