Newer Older
798 lines | 18.732kb
add files
Yuki Kimoto authored on 2014-03-26
1
package Mojo::DOM;
2
use Mojo::Base -strict;
3
use overload
4
  '%{}'    => sub { shift->attr },
5
  bool     => sub {1},
6
  '""'     => sub { shift->to_xml },
7
  fallback => 1;
8

            
9
# "Fry: This snow is beautiful. I'm glad global warming never happened.
10
#  Leela: Actually, it did. But thank God nuclear winter canceled it out."
11
use Carp 'croak';
12
use Mojo::Collection;
13
use Mojo::DOM::CSS;
14
use Mojo::DOM::HTML;
15
use Mojo::Util 'squish';
16
use Scalar::Util qw(blessed weaken);
17

            
18
sub AUTOLOAD {
19
  my $self = shift;
20

            
21
  my ($package, $method) = our $AUTOLOAD =~ /^([\w:]+)::(\w+)$/;
22
  croak "Undefined subroutine &${package}::$method called"
23
    unless blessed $self && $self->isa(__PACKAGE__);
24

            
25
  # Search children of current element
26
  my $children = $self->children($method);
27
  return @$children > 1 ? $children : $children->[0] if @$children;
28
  croak qq{Can't locate object method "$method" via package "$package"};
29
}
30

            
31
sub DESTROY { }
32

            
33
sub new {
34
  my $class = shift;
35
  my $self = bless [Mojo::DOM::HTML->new], ref $class || $class;
36
  return @_ ? $self->parse(@_) : $self;
37
}
38

            
39
sub all_text { shift->_content(1, @_) }
40

            
41
sub ancestors { _select($_[0]->_collect(_ancestors($_[0]->tree)), $_[1]) }
42

            
43
sub append { shift->_add(1, @_) }
44

            
45
sub append_content {
46
  my ($self, $new) = @_;
47
  my $tree = $self->tree;
48
  push @$tree, _link($self->_parse("$new"), $tree);
49
  return $self;
50
}
51

            
52
sub at { shift->find(@_)->[0] }
53

            
54
sub attr {
55
  my $self = shift;
56

            
57
  # Hash
58
  my $tree = $self->tree;
59
  my $attrs = $tree->[0] eq 'root' ? {} : $tree->[2];
60
  return $attrs unless @_;
61

            
62
  # Get
63
  return $attrs->{$_[0]} // '' unless @_ > 1 || ref $_[0];
64

            
65
  # Set
66
  %$attrs = (%$attrs, %{ref $_[0] ? $_[0] : {@_}});
67

            
68
  return $self;
69
}
70

            
71
sub children {
72
  my $self = shift;
73
  return _select(
74
    $self->_collect(grep { $_->[0] eq 'tag' } _nodes($self->tree)), @_);
75
}
76

            
77
sub content_xml {
78
  my $self = shift;
79
  my $xml  = $self->xml;
80
  return join '', map { _render($_, $xml) } _nodes($self->tree);
81
}
82

            
83
sub find {
84
  my $self = shift;
85
  my $results = Mojo::DOM::CSS->new(tree => $self->tree)->select(@_);
86
  return $self->_collect(@$results);
87
}
88

            
89
sub match {
90
  my $self = shift;
91
  return undef unless Mojo::DOM::CSS->new(tree => $self->tree)->match(@_);
92
  return $self;
93
}
94

            
95
sub namespace {
96
  my $self = shift;
97

            
98
  return '' if (my $current = $self->tree)->[0] eq 'root';
99

            
100
  # Extract namespace prefix and search parents
101
  my $ns = $current->[1] =~ /^(.*?):/ ? "xmlns:$1" : undef;
102
  while ($current->[0] ne 'root') {
103

            
104
    # Namespace for prefix
105
    my $attrs = $current->[2];
106
    if ($ns) { /^\Q$ns\E$/ and return $attrs->{$_} for keys %$attrs }
107

            
108
    # Namespace attribute
109
    elsif (defined $attrs->{xmlns}) { return $attrs->{xmlns} }
110

            
111
    $current = $current->[3];
112
  }
113

            
114
  return '';
115
}
116

            
117
sub next { shift->_siblings->[1][0] }
118

            
119
sub parent {
120
  my $self = shift;
121
  return undef if (my $tree = $self->tree)->[0] eq 'root';
122
  return $self->new->tree($tree->[3])->xml($self->xml);
123
}
124

            
125
sub parse { shift->_delegate(parse => shift) }
126

            
127
sub prepend { shift->_add(0, @_) }
128

            
129
sub prepend_content {
130
  my ($self, $new) = @_;
131
  my $tree = $self->tree;
132
  splice @$tree, _offset($tree), 0, _link($self->_parse("$new"), $tree);
133
  return $self;
134
}
135

            
136
sub previous { shift->_siblings->[0][-1] }
137

            
138
sub remove { shift->replace('') }
139

            
140
sub replace {
141
  my ($self, $new) = @_;
142
  my $tree = $self->tree;
143
  return $self->xml(undef)->parse($new) if $tree->[0] eq 'root';
144
  return $self->_replace($tree, $self->_parse("$new"));
145
}
146

            
147
sub replace_content {
148
  my ($self, $new) = @_;
149
  my $tree = $self->tree;
150
  splice @$tree, _offset($tree), $#$tree, _link($self->_parse("$new"), $tree);
151
  return $self;
152
}
153

            
154
sub root {
155
  my $self = shift;
156
  return $self unless my $tree = _ancestors($self->tree, 1);
157
  return $self->new->tree($tree)->xml($self->xml);
158
}
159

            
160
sub siblings { _select(Mojo::Collection->new(@{_siblings($_[0], 1)}), $_[1]) }
161

            
162
sub strip {
163
  my $self = shift;
164
  my $tree = $self->tree;
165
  return $self if $tree->[0] eq 'root';
166
  return $self->_replace($tree, ['root', _nodes($tree)]);
167
}
168

            
169
sub tap { shift->Mojo::Base::tap(@_) }
170

            
171
sub text { shift->_content(0, @_) }
172

            
173
sub text_after {
174
  my ($self, $trim) = @_;
175

            
176
  return '' if (my $tree = $self->tree)->[0] eq 'root';
177

            
178
  my (@nodes, $started);
179
  for my $n (_nodes($tree->[3])) {
180
    ++$started and next if $n eq $tree;
181
    next unless $started;
182
    last if $n->[0] eq 'tag';
183
    push @nodes, $n;
184
  }
185

            
186
  return _text(\@nodes, 0, _trim($tree->[3], $trim));
187
}
188

            
189
sub text_before {
190
  my ($self, $trim) = @_;
191

            
192
  return '' if (my $tree = $self->tree)->[0] eq 'root';
193

            
194
  my @nodes;
195
  for my $n (_nodes($tree->[3])) {
196
    last if $n eq $tree;
197
    push @nodes, $n;
198
    @nodes = () if $n->[0] eq 'tag';
199
  }
200

            
201
  return _text(\@nodes, 0, _trim($tree->[3], $trim));
202
}
203

            
204
sub to_xml { shift->[0]->render }
205

            
206
sub tree { shift->_delegate(tree => @_) }
207

            
208
sub type {
209
  my ($self, $type) = @_;
210
  return '' if (my $tree = $self->tree)->[0] eq 'root';
211
  return $tree->[1] unless $type;
212
  $tree->[1] = $type;
213
  return $self;
214
}
215

            
216
sub xml { shift->_delegate(xml => @_) }
217

            
218
sub _add {
219
  my ($self, $offset, $new) = @_;
220

            
221
  return $self if (my $tree = $self->tree)->[0] eq 'root';
222

            
223
  my $parent = $tree->[3];
224
  splice @$parent, _parent($parent, $tree) + $offset, 0,
225
    _link($self->_parse("$new"), $parent);
226

            
227
  return $self;
228
}
229

            
230
sub _ancestors {
231
  my ($tree, $root) = @_;
232
  my @ancestors;
233
  push @ancestors, $tree while ($tree->[0] eq 'tag') && ($tree = $tree->[3]);
234
  return $root ? $ancestors[-1] : @ancestors[0 .. $#ancestors - 1];
235
}
236

            
237
sub _collect {
238
  my $self = shift;
239
  my $xml  = $self->xml;
240
  return Mojo::Collection->new(@_)
241
    ->map(sub { $self->new->tree($_)->xml($xml) });
242
}
243

            
244
sub _content {
245
  my $tree = shift->tree;
246
  return _text([_nodes($tree)], shift, _trim($tree, @_));
247
}
248

            
249
sub _delegate {
250
  my ($self, $method) = (shift, shift);
251
  return $self->[0]->$method unless @_;
252
  $self->[0]->$method(@_);
253
  return $self;
254
}
255

            
256
sub _link {
257
  my ($children, $parent) = @_;
258

            
259
  # Link parent to children
260
  my @new;
261
  for my $n (@$children[1 .. $#$children]) {
262
    push @new, $n;
263
    next unless $n->[0] eq 'tag';
264
    $n->[3] = $parent;
265
    weaken $n->[3];
266
  }
267

            
268
  return @new;
269
}
270

            
271
sub _nodes {
272
  return unless my $n = shift;
273
  return @$n[_offset($n) .. $#$n];
274
}
275

            
276
sub _offset { $_[0][0] eq 'root' ? 1 : 4 }
277

            
278
sub _parent {
279
  my ($parent, $child) = @_;
280

            
281
  # Find parent offset for child
282
  my $i = _offset($parent);
283
  for my $n (@$parent[$i .. $#$parent]) {
284
    last if $n == $child;
285
    $i++;
286
  }
287

            
288
  return $i;
289
}
290

            
291
sub _parse { Mojo::DOM::HTML->new(xml => shift->xml)->parse(shift)->tree }
292

            
293
sub _render { Mojo::DOM::HTML->new(tree => shift, xml => shift)->render }
294

            
295
sub _replace {
296
  my ($self, $tree, $new) = @_;
297
  my $parent = $tree->[3];
298
  splice @$parent, _parent($parent, $tree), 1, _link($new, $parent);
299
  return $self->parent;
300
}
301

            
302
sub _select {
303
  my ($self, $selector) = @_;
304
  return defined $selector ? $self->grep(sub { $_->match($selector) }) : $self;
305
}
306

            
307
sub _siblings {
308
  my ($self, $merge) = @_;
309

            
310
  return $merge ? [] : [[], []] unless my $parent = $self->parent;
311

            
312
  my $tree = $self->tree;
313
  my (@before, @after, $match);
314
  for my $child ($parent->children->each) {
315
    ++$match and next if $child->tree eq $tree;
316
    $match ? push @after, $child : push @before, $child;
317
  }
318

            
319
  return $merge ? [@before, @after] : [\@before, \@after];
320
}
321

            
322
sub _text {
323
  my ($nodes, $recurse, $trim) = @_;
324

            
325
  # Merge successive text nodes
326
  my $i = 0;
327
  while (my $next = $nodes->[$i + 1]) {
328
    ++$i and next unless $nodes->[$i][0] eq 'text' && $next->[0] eq 'text';
329
    splice @$nodes, $i, 2, ['text', $nodes->[$i][1] . $next->[1]];
330
  }
331

            
332
  my $text = '';
333
  for my $n (@$nodes) {
334
    my $type = $n->[0];
335

            
336
    # Nested tag
337
    my $content = '';
338
    if ($type eq 'tag' && $recurse) {
339
      $content = _text([_nodes($n)], 1, _trim($n, $trim));
340
    }
341

            
342
    # Text
343
    elsif ($type eq 'text') { $content = $trim ? squish($n->[1]) : $n->[1] }
344

            
345
    # CDATA or raw text
346
    elsif ($type eq 'cdata' || $type eq 'raw') { $content = $n->[1] }
347

            
348
    # Add leading whitespace if punctuation allows it
349
    $content = " $content" if $text =~ /\S\z/ && $content =~ /^[^.!?,;:\s]+/;
350

            
351
    # Trim whitespace blocks
352
    $text .= $content if $content =~ /\S+/ || !$trim;
353
  }
354

            
355
  return $text;
356
}
357

            
358
sub _trim {
359
  my ($e, $trim) = @_;
360

            
361
  # Disabled
362
  return 0 unless $e && ($trim = defined $trim ? $trim : 1);
363

            
364
  # Detect "pre" tag
365
  while ($e->[0] eq 'tag') {
366
    return 0 if $e->[1] eq 'pre';
367
    last unless $e = $e->[3];
368
  }
369

            
370
  return 1;
371
}
372

            
373
1;
374

            
375
=encoding utf8
376

            
377
=head1 NAME
378

            
379
Mojo::DOM - Minimalistic HTML/XML DOM parser with CSS selectors
380

            
381
=head1 SYNOPSIS
382

            
383
  use Mojo::DOM;
384

            
385
  # Parse
386
  my $dom = Mojo::DOM->new('<div><p id="a">A</p><p id="b">B</p></div>');
387

            
388
  # Find
389
  say $dom->at('#b')->text;
390
  say $dom->find('p')->text;
391
  say $dom->find('[id]')->attr('id');
392

            
393
  # Walk
394
  say $dom->div->p->[0]->text;
395
  say $dom->div->children('p')->first->{id};
396

            
397
  # Iterate
398
  $dom->find('p[id]')->each(sub { say shift->{id} });
399

            
400
  # Loop
401
  for my $e ($dom->find('p[id]')->each) {
402
    say $e->text;
403
  }
404

            
405
  # Modify
406
  $dom->div->p->[1]->append('<p id="c">C</p>');
407
  $dom->find(':not(p)')->strip;
408

            
409
  # Render
410
  say "$dom";
411

            
412
=head1 DESCRIPTION
413

            
414
L<Mojo::DOM> is a minimalistic and relaxed HTML/XML DOM parser with CSS
415
selector support. It will even try to interpret broken XML, so you should not
416
use it for validation.
417

            
418
=head1 CASE SENSITIVITY
419

            
420
L<Mojo::DOM> defaults to HTML semantics, that means all tags and attributes
421
are lowercased and selectors need to be lowercase as well.
422

            
423
  my $dom = Mojo::DOM->new('<P ID="greeting">Hi!</P>');
424
  say $dom->at('p')->text;
425
  say $dom->p->{id};
426

            
427
If XML processing instructions are found, the parser will automatically switch
428
into XML mode and everything becomes case sensitive.
429

            
430
  my $dom = Mojo::DOM->new('<?xml version="1.0"?><P ID="greeting">Hi!</P>');
431
  say $dom->at('P')->text;
432
  say $dom->P->{ID};
433

            
434
XML detection can also be disabled with the L</"xml"> method.
435

            
436
  # Force XML semantics
437
  $dom->xml(1);
438

            
439
  # Force HTML semantics
440
  $dom->xml(0);
441

            
442
=head1 METHODS
443

            
444
L<Mojo::DOM> implements the following methods.
445

            
446
=head2 new
447

            
448
  my $dom = Mojo::DOM->new;
449
  my $dom = Mojo::DOM->new('<foo bar="baz">test</foo>');
450

            
451
Construct a new array-based L<Mojo::DOM> object and L</"parse"> HTML/XML
452
fragment if necessary.
453

            
454
=head2 all_text
455

            
456
  my $trimmed   = $dom->all_text;
457
  my $untrimmed = $dom->all_text(0);
458

            
459
Extract all text content from DOM structure, smart whitespace trimming is
460
enabled by default.
461

            
462
  # "foo bar baz"
463
  $dom->parse("<div>foo\n<p>bar</p>baz\n</div>")->div->all_text;
464

            
465
  # "foo\nbarbaz\n"
466
  $dom->parse("<div>foo\n<p>bar</p>baz\n</div>")->div->all_text(0);
467

            
468
=head2 ancestors
469

            
470
  my $collection = $dom->ancestors;
471
  my $collection = $dom->ancestors('div');
472

            
473
Find all ancestors of this element matching the CSS selector and return a
474
L<Mojo::Collection> object containing these elements as L<Mojo::DOM> objects.
475
All selectors from L<Mojo::DOM::CSS> are supported.
476

            
477
  # List types of ancestor elements
478
  say $dom->ancestors->type;
479

            
480
=head2 append
481

            
482
  $dom = $dom->append('<p>Hi!</p>');
483

            
484
Append HTML/XML fragment to element.
485

            
486
  # "<div><h1>A</h1><h2>B</h2></div>"
487
  $dom->parse('<div><h1>A</h1></div>')->at('h1')->append('<h2>B</h2>')->root;
488

            
489
=head2 append_content
490

            
491
  $dom = $dom->append_content('<p>Hi!</p>');
492

            
493
Append HTML/XML fragment to element content.
494

            
495
  # "<div><h1>AB</h1></div>"
496
  $dom->parse('<div><h1>A</h1></div>')->at('h1')->append_content('B')->root;
497

            
498
=head2 at
499

            
500
  my $result = $dom->at('html title');
501

            
502
Find first element matching the CSS selector and return it as a L<Mojo::DOM>
503
object or return C<undef> if none could be found. All selectors from
504
L<Mojo::DOM::CSS> are supported.
505

            
506
  # Find first element with "svg" namespace definition
507
  my $namespace = $dom->at('[xmlns\:svg]')->{'xmlns:svg'};
508

            
509
=head2 attr
510

            
511
  my $attrs = $dom->attr;
512
  my $foo   = $dom->attr('foo');
513
  $dom      = $dom->attr({foo => 'bar'});
514
  $dom      = $dom->attr(foo => 'bar');
515

            
516
Element attributes.
517

            
518
  # List id attributes
519
  say $dom->find('*')->attr('id')->compact;
520

            
521
=head2 children
522

            
523
  my $collection = $dom->children;
524
  my $collection = $dom->children('div');
525

            
526
Find all children of this element matching the CSS selector and return a
527
L<Mojo::Collection> object containing these elements as L<Mojo::DOM> objects.
528
All selectors from L<Mojo::DOM::CSS> are supported.
529

            
530
  # Show type of random child element
531
  say $dom->children->shuffle->first->type;
532

            
533
=head2 content_xml
534

            
535
  my $xml = $dom->content_xml;
536

            
537
Render content of this element to XML.
538

            
539
  # "<b>test</b>"
540
  $dom->parse('<div><b>test</b></div>')->div->content_xml;
541

            
542
=head2 find
543

            
544
  my $collection = $dom->find('html title');
545

            
546
Find all elements matching the CSS selector and return a L<Mojo::Collection>
547
object containing these elements as L<Mojo::DOM> objects. All selectors from
548
L<Mojo::DOM::CSS> are supported.
549

            
550
  # Find a specific element and extract information
551
  my $id = $dom->find('div')->[23]{id};
552

            
553
  # Extract information from multiple elements
554
  my @headers = $dom->find('h1, h2, h3')->text->each;
555
  my @links   = $dom->find('a[href]')->attr('href')->each;
556

            
557
=head2 match
558

            
559
  my $result = $dom->match('html title');
560

            
561
Match the CSS selector against this element and return it as a L<Mojo::DOM>
562
object or return C<undef> if it didn't match. All selectors from
563
L<Mojo::DOM::CSS> are supported.
564

            
565
=head2 namespace
566

            
567
  my $namespace = $dom->namespace;
568

            
569
Find element namespace.
570

            
571
  # Find namespace for an element with namespace prefix
572
  my $namespace = $dom->at('svg > svg\:circle')->namespace;
573

            
574
  # Find namespace for an element that may or may not have a namespace prefix
575
  my $namespace = $dom->at('svg > circle')->namespace;
576

            
577
=head2 next
578

            
579
  my $sibling = $dom->next;
580

            
581
Return L<Mojo::DOM> object for next sibling of element or C<undef> if there
582
are no more siblings.
583

            
584
  # "<h2>B</h2>"
585
  $dom->parse('<div><h1>A</h1><h2>B</h2></div>')->at('h1')->next;
586

            
587
=head2 parent
588

            
589
  my $parent = $dom->parent;
590

            
591
Return L<Mojo::DOM> object for parent of element or C<undef> if this element
592
has no parent.
593

            
594
=head2 parse
595

            
596
  $dom = $dom->parse('<foo bar="baz">test</foo>');
597

            
598
Parse HTML/XML fragment with L<Mojo::DOM::HTML>.
599

            
600
  # Parse XML
601
  my $dom = Mojo::DOM->new->xml(1)->parse($xml);
602

            
603
=head2 prepend
604

            
605
  $dom = $dom->prepend('<p>Hi!</p>');
606

            
607
Prepend HTML/XML fragment to element.
608

            
609
  # "<div><h1>A</h1><h2>B</h2></div>"
610
  $dom->parse('<div><h2>B</h2></div>')->at('h2')->prepend('<h1>A</h1>')->root;
611

            
612
=head2 prepend_content
613

            
614
  $dom = $dom->prepend_content('<p>Hi!</p>');
615

            
616
Prepend HTML/XML fragment to element content.
617

            
618
  # "<div><h2>AB</h2></div>"
619
  $dom->parse('<div><h2>B</h2></div>')->at('h2')->prepend_content('A')->root;
620

            
621
=head2 previous
622

            
623
  my $sibling = $dom->previous;
624

            
625
Return L<Mojo::DOM> object for previous sibling of element or C<undef> if
626
there are no more siblings.
627

            
628
  # "<h1>A</h1>"
629
  $dom->parse('<div><h1>A</h1><h2>B</h2></div>')->at('h2')->previous;
630

            
631
=head2 remove
632

            
633
  my $parent = $dom->remove;
634

            
635
Remove element and return L<Mojo::DOM> object for parent of element.
636

            
637
  # "<div></div>"
638
  $dom->parse('<div><h1>A</h1></div>')->at('h1')->remove;
639

            
640
=head2 replace
641

            
642
  my $parent = $dom->replace('<div>test</div>');
643

            
644
Replace element with HTML/XML fragment and return L<Mojo::DOM> object for
645
parent of element.
646

            
647
  # "<div><h2>B</h2></div>"
648
  $dom->parse('<div><h1>A</h1></div>')->at('h1')->replace('<h2>B</h2>');
649

            
650
  # "<div></div>"
651
  $dom->parse('<div><h1>A</h1></div>')->at('h1')->replace('');
652

            
653
=head2 replace_content
654

            
655
  $dom = $dom->replace_content('<p>test</p>');
656

            
657
Replace element content with HTML/XML fragment.
658

            
659
  # "<div><h1>B</h1></div>"
660
  $dom->parse('<div><h1>A</h1></div>')->at('h1')->replace_content('B')->root;
661

            
662
  # "<div><h1></h1></div>"
663
  $dom->parse('<div><h1>A</h1></div>')->at('h1')->replace_content('')->root;
664

            
665
=head2 root
666

            
667
  my $root = $dom->root;
668

            
669
Return L<Mojo::DOM> object for root node.
670

            
671
=head2 siblings
672

            
673
  my $collection = $dom->siblings;
674
  my $collection = $dom->siblings('div');
675

            
676
Find all siblings of this element matching the CSS selector and return a
677
L<Mojo::Collection> object containing these elements as L<Mojo::DOM> objects.
678
All selectors from L<Mojo::DOM::CSS> are supported.
679

            
680
  # List types of sibling elements
681
  say $dom->siblings->type;
682

            
683
=head2 strip
684

            
685
  my $parent = $dom->strip;
686

            
687
Remove element while preserving its content and return L<Mojo::DOM> object for
688
parent of element.
689

            
690
  # "<div>A</div>"
691
  $dom->parse('<div><h1>A</h1></div>')->at('h1')->strip;
692

            
693
=head2 tap
694

            
695
  $dom = $dom->tap(sub {...});
696

            
697
Alias for L<Mojo::Base/"tap">.
698

            
699
=head2 text
700

            
701
  my $trimmed   = $dom->text;
702
  my $untrimmed = $dom->text(0);
703

            
704
Extract text content from element only (not including child elements), smart
705
whitespace trimming is enabled by default.
706

            
707
  # "foo baz"
708
  $dom->parse("<div>foo\n<p>bar</p>baz\n</div>")->div->text;
709

            
710
  # "foo\nbaz\n"
711
  $dom->parse("<div>foo\n<p>bar</p>baz\n</div>")->div->text(0);
712

            
713
=head2 text_after
714

            
715
  my $trimmed   = $dom->text_after;
716
  my $untrimmed = $dom->text_after(0);
717

            
718
Extract text content immediately following element, smart whitespace trimming
719
is enabled by default.
720

            
721
  # "baz"
722
  $dom->parse("<div>foo\n<p>bar</p>baz\n</div>")->div->p->text_after;
723

            
724
  # "baz\n"
725
  $dom->parse("<div>foo\n<p>bar</p>baz\n</div>")->div->p->text_after(0);
726

            
727
=head2 text_before
728

            
729
  my $trimmed   = $dom->text_before;
730
  my $untrimmed = $dom->text_before(0);
731

            
732
Extract text content immediately preceding element, smart whitespace trimming
733
is enabled by default.
734

            
735
  # "foo"
736
  $dom->parse("<div>foo\n<p>bar</p>baz\n</div>")->div->p->text_before;
737

            
738
  # "foo\n"
739
  $dom->parse("<div>foo\n<p>bar</p>baz\n</div>")->div->p->text_before(0);
740

            
741
=head2 to_xml
742

            
743
  my $xml = $dom->to_xml;
744
  my $xml = "$dom";
745

            
746
Render this element and its content to XML.
747

            
748
  # "<b>test</b>"
749
  $dom->parse('<div><b>test</b></div>')->div->b->to_xml;
750

            
751
=head2 tree
752

            
753
  my $tree = $dom->tree;
754
  $dom     = $dom->tree(['root', ['text', 'foo']]);
755

            
756
Document Object Model. Note that this structure should only be used very
757
carefully since it is very dynamic.
758

            
759
=head2 type
760

            
761
  my $type = $dom->type;
762
  $dom     = $dom->type('div');
763

            
764
Element type.
765

            
766
  # List types of child elements
767
  say $dom->children->type;
768

            
769
=head2 xml
770

            
771
  my $bool = $dom->xml;
772
  $dom     = $dom->xml($bool);
773

            
774
Disable HTML semantics in parser and activate case sensitivity, defaults to
775
auto detection based on processing instructions.
776

            
777
=head1 CHILD ELEMENTS
778

            
779
In addition to the methods above, many child elements are also automatically
780
available as object methods, which return a L<Mojo::DOM> or
781
L<Mojo::Collection> object, depending on number of children.
782

            
783
  say $dom->p->text;
784
  say $dom->div->[23]->text;
785
  say $dom->div->text;
786

            
787
=head1 ELEMENT ATTRIBUTES
788

            
789
Direct hash reference access to element attributes is also possible.
790

            
791
  say $dom->{foo};
792
  say $dom->div->{id};
793

            
794
=head1 SEE ALSO
795

            
796
L<Mojolicious>, L<Mojolicious::Guides>, L<http://mojolicio.us>.
797

            
798
=cut