add files
|
1 |
package Mojo::DOM; |
2 |
use Mojo::Base -strict; |
|
3 |
use overload |
|
4 |
'%{}' => sub { shift->attr }, |
|
5 |
bool => sub {1}, |
|
6 |
'""' => sub { shift->to_xml }, |
|
7 |
fallback => 1; |
|
8 | ||
9 |
# "Fry: This snow is beautiful. I'm glad global warming never happened. |
|
10 |
# Leela: Actually, it did. But thank God nuclear winter canceled it out." |
|
11 |
use Carp 'croak'; |
|
12 |
use Mojo::Collection; |
|
13 |
use Mojo::DOM::CSS; |
|
14 |
use Mojo::DOM::HTML; |
|
15 |
use Mojo::Util 'squish'; |
|
16 |
use Scalar::Util qw(blessed weaken); |
|
17 | ||
18 |
sub AUTOLOAD { |
|
19 |
my $self = shift; |
|
20 | ||
21 |
my ($package, $method) = our $AUTOLOAD =~ /^([\w:]+)::(\w+)$/; |
|
22 |
croak "Undefined subroutine &${package}::$method called" |
|
23 |
unless blessed $self && $self->isa(__PACKAGE__); |
|
24 | ||
25 |
# Search children of current element |
|
26 |
my $children = $self->children($method); |
|
27 |
return @$children > 1 ? $children : $children->[0] if @$children; |
|
28 |
croak qq{Can't locate object method "$method" via package "$package"}; |
|
29 |
} |
|
30 | ||
31 |
sub DESTROY { } |
|
32 | ||
33 |
sub new { |
|
34 |
my $class = shift; |
|
35 |
my $self = bless [Mojo::DOM::HTML->new], ref $class || $class; |
|
36 |
return @_ ? $self->parse(@_) : $self; |
|
37 |
} |
|
38 | ||
39 |
sub all_text { shift->_content(1, @_) } |
|
40 | ||
41 |
sub ancestors { _select($_[0]->_collect(_ancestors($_[0]->tree)), $_[1]) } |
|
42 | ||
43 |
sub append { shift->_add(1, @_) } |
|
44 | ||
45 |
sub append_content { |
|
46 |
my ($self, $new) = @_; |
|
47 |
my $tree = $self->tree; |
|
48 |
push @$tree, _link($self->_parse("$new"), $tree); |
|
49 |
return $self; |
|
50 |
} |
|
51 | ||
52 |
sub at { shift->find(@_)->[0] } |
|
53 | ||
54 |
sub attr { |
|
55 |
my $self = shift; |
|
56 | ||
57 |
# Hash |
|
58 |
my $tree = $self->tree; |
|
59 |
my $attrs = $tree->[0] eq 'root' ? {} : $tree->[2]; |
|
60 |
return $attrs unless @_; |
|
61 | ||
62 |
# Get |
|
63 |
return $attrs->{$_[0]} // '' unless @_ > 1 || ref $_[0]; |
|
64 | ||
65 |
# Set |
|
66 |
%$attrs = (%$attrs, %{ref $_[0] ? $_[0] : {@_}}); |
|
67 | ||
68 |
return $self; |
|
69 |
} |
|
70 | ||
71 |
sub children { |
|
72 |
my $self = shift; |
|
73 |
return _select( |
|
74 |
$self->_collect(grep { $_->[0] eq 'tag' } _nodes($self->tree)), @_); |
|
75 |
} |
|
76 | ||
77 |
sub content_xml { |
|
78 |
my $self = shift; |
|
79 |
my $xml = $self->xml; |
|
80 |
return join '', map { _render($_, $xml) } _nodes($self->tree); |
|
81 |
} |
|
82 | ||
83 |
sub find { |
|
84 |
my $self = shift; |
|
85 |
my $results = Mojo::DOM::CSS->new(tree => $self->tree)->select(@_); |
|
86 |
return $self->_collect(@$results); |
|
87 |
} |
|
88 | ||
89 |
sub match { |
|
90 |
my $self = shift; |
|
91 |
return undef unless Mojo::DOM::CSS->new(tree => $self->tree)->match(@_); |
|
92 |
return $self; |
|
93 |
} |
|
94 | ||
95 |
sub namespace { |
|
96 |
my $self = shift; |
|
97 | ||
98 |
return '' if (my $current = $self->tree)->[0] eq 'root'; |
|
99 | ||
100 |
# Extract namespace prefix and search parents |
|
101 |
my $ns = $current->[1] =~ /^(.*?):/ ? "xmlns:$1" : undef; |
|
102 |
while ($current->[0] ne 'root') { |
|
103 | ||
104 |
# Namespace for prefix |
|
105 |
my $attrs = $current->[2]; |
|
106 |
if ($ns) { /^\Q$ns\E$/ and return $attrs->{$_} for keys %$attrs } |
|
107 | ||
108 |
# Namespace attribute |
|
109 |
elsif (defined $attrs->{xmlns}) { return $attrs->{xmlns} } |
|
110 | ||
111 |
$current = $current->[3]; |
|
112 |
} |
|
113 | ||
114 |
return ''; |
|
115 |
} |
|
116 | ||
117 |
sub next { shift->_siblings->[1][0] } |
|
118 | ||
119 |
sub parent { |
|
120 |
my $self = shift; |
|
121 |
return undef if (my $tree = $self->tree)->[0] eq 'root'; |
|
122 |
return $self->new->tree($tree->[3])->xml($self->xml); |
|
123 |
} |
|
124 | ||
125 |
sub parse { shift->_delegate(parse => shift) } |
|
126 | ||
127 |
sub prepend { shift->_add(0, @_) } |
|
128 | ||
129 |
sub prepend_content { |
|
130 |
my ($self, $new) = @_; |
|
131 |
my $tree = $self->tree; |
|
132 |
splice @$tree, _offset($tree), 0, _link($self->_parse("$new"), $tree); |
|
133 |
return $self; |
|
134 |
} |
|
135 | ||
136 |
sub previous { shift->_siblings->[0][-1] } |
|
137 | ||
138 |
sub remove { shift->replace('') } |
|
139 | ||
140 |
sub replace { |
|
141 |
my ($self, $new) = @_; |
|
142 |
my $tree = $self->tree; |
|
143 |
return $self->xml(undef)->parse($new) if $tree->[0] eq 'root'; |
|
144 |
return $self->_replace($tree, $self->_parse("$new")); |
|
145 |
} |
|
146 | ||
147 |
sub replace_content { |
|
148 |
my ($self, $new) = @_; |
|
149 |
my $tree = $self->tree; |
|
150 |
splice @$tree, _offset($tree), $#$tree, _link($self->_parse("$new"), $tree); |
|
151 |
return $self; |
|
152 |
} |
|
153 | ||
154 |
sub root { |
|
155 |
my $self = shift; |
|
156 |
return $self unless my $tree = _ancestors($self->tree, 1); |
|
157 |
return $self->new->tree($tree)->xml($self->xml); |
|
158 |
} |
|
159 | ||
160 |
sub siblings { _select(Mojo::Collection->new(@{_siblings($_[0], 1)}), $_[1]) } |
|
161 | ||
162 |
sub strip { |
|
163 |
my $self = shift; |
|
164 |
my $tree = $self->tree; |
|
165 |
return $self if $tree->[0] eq 'root'; |
|
166 |
return $self->_replace($tree, ['root', _nodes($tree)]); |
|
167 |
} |
|
168 | ||
169 |
sub tap { shift->Mojo::Base::tap(@_) } |
|
170 | ||
171 |
sub text { shift->_content(0, @_) } |
|
172 | ||
173 |
sub text_after { |
|
174 |
my ($self, $trim) = @_; |
|
175 | ||
176 |
return '' if (my $tree = $self->tree)->[0] eq 'root'; |
|
177 | ||
178 |
my (@nodes, $started); |
|
179 |
for my $n (_nodes($tree->[3])) { |
|
180 |
++$started and next if $n eq $tree; |
|
181 |
next unless $started; |
|
182 |
last if $n->[0] eq 'tag'; |
|
183 |
push @nodes, $n; |
|
184 |
} |
|
185 | ||
186 |
return _text(\@nodes, 0, _trim($tree->[3], $trim)); |
|
187 |
} |
|
188 | ||
189 |
sub text_before { |
|
190 |
my ($self, $trim) = @_; |
|
191 | ||
192 |
return '' if (my $tree = $self->tree)->[0] eq 'root'; |
|
193 | ||
194 |
my @nodes; |
|
195 |
for my $n (_nodes($tree->[3])) { |
|
196 |
last if $n eq $tree; |
|
197 |
push @nodes, $n; |
|
198 |
@nodes = () if $n->[0] eq 'tag'; |
|
199 |
} |
|
200 | ||
201 |
return _text(\@nodes, 0, _trim($tree->[3], $trim)); |
|
202 |
} |
|
203 | ||
204 |
sub to_xml { shift->[0]->render } |
|
205 | ||
206 |
sub tree { shift->_delegate(tree => @_) } |
|
207 | ||
208 |
sub type { |
|
209 |
my ($self, $type) = @_; |
|
210 |
return '' if (my $tree = $self->tree)->[0] eq 'root'; |
|
211 |
return $tree->[1] unless $type; |
|
212 |
$tree->[1] = $type; |
|
213 |
return $self; |
|
214 |
} |
|
215 | ||
216 |
sub xml { shift->_delegate(xml => @_) } |
|
217 | ||
218 |
sub _add { |
|
219 |
my ($self, $offset, $new) = @_; |
|
220 | ||
221 |
return $self if (my $tree = $self->tree)->[0] eq 'root'; |
|
222 | ||
223 |
my $parent = $tree->[3]; |
|
224 |
splice @$parent, _parent($parent, $tree) + $offset, 0, |
|
225 |
_link($self->_parse("$new"), $parent); |
|
226 | ||
227 |
return $self; |
|
228 |
} |
|
229 | ||
230 |
sub _ancestors { |
|
231 |
my ($tree, $root) = @_; |
|
232 |
my @ancestors; |
|
233 |
push @ancestors, $tree while ($tree->[0] eq 'tag') && ($tree = $tree->[3]); |
|
234 |
return $root ? $ancestors[-1] : @ancestors[0 .. $#ancestors - 1]; |
|
235 |
} |
|
236 | ||
237 |
sub _collect { |
|
238 |
my $self = shift; |
|
239 |
my $xml = $self->xml; |
|
240 |
return Mojo::Collection->new(@_) |
|
241 |
->map(sub { $self->new->tree($_)->xml($xml) }); |
|
242 |
} |
|
243 | ||
244 |
sub _content { |
|
245 |
my $tree = shift->tree; |
|
246 |
return _text([_nodes($tree)], shift, _trim($tree, @_)); |
|
247 |
} |
|
248 | ||
249 |
sub _delegate { |
|
250 |
my ($self, $method) = (shift, shift); |
|
251 |
return $self->[0]->$method unless @_; |
|
252 |
$self->[0]->$method(@_); |
|
253 |
return $self; |
|
254 |
} |
|
255 | ||
256 |
sub _link { |
|
257 |
my ($children, $parent) = @_; |
|
258 | ||
259 |
# Link parent to children |
|
260 |
my @new; |
|
261 |
for my $n (@$children[1 .. $#$children]) { |
|
262 |
push @new, $n; |
|
263 |
next unless $n->[0] eq 'tag'; |
|
264 |
$n->[3] = $parent; |
|
265 |
weaken $n->[3]; |
|
266 |
} |
|
267 | ||
268 |
return @new; |
|
269 |
} |
|
270 | ||
271 |
sub _nodes { |
|
272 |
return unless my $n = shift; |
|
273 |
return @$n[_offset($n) .. $#$n]; |
|
274 |
} |
|
275 | ||
276 |
sub _offset { $_[0][0] eq 'root' ? 1 : 4 } |
|
277 | ||
278 |
sub _parent { |
|
279 |
my ($parent, $child) = @_; |
|
280 | ||
281 |
# Find parent offset for child |
|
282 |
my $i = _offset($parent); |
|
283 |
for my $n (@$parent[$i .. $#$parent]) { |
|
284 |
last if $n == $child; |
|
285 |
$i++; |
|
286 |
} |
|
287 | ||
288 |
return $i; |
|
289 |
} |
|
290 | ||
291 |
sub _parse { Mojo::DOM::HTML->new(xml => shift->xml)->parse(shift)->tree } |
|
292 | ||
293 |
sub _render { Mojo::DOM::HTML->new(tree => shift, xml => shift)->render } |
|
294 | ||
295 |
sub _replace { |
|
296 |
my ($self, $tree, $new) = @_; |
|
297 |
my $parent = $tree->[3]; |
|
298 |
splice @$parent, _parent($parent, $tree), 1, _link($new, $parent); |
|
299 |
return $self->parent; |
|
300 |
} |
|
301 | ||
302 |
sub _select { |
|
303 |
my ($self, $selector) = @_; |
|
304 |
return defined $selector ? $self->grep(sub { $_->match($selector) }) : $self; |
|
305 |
} |
|
306 | ||
307 |
sub _siblings { |
|
308 |
my ($self, $merge) = @_; |
|
309 | ||
310 |
return $merge ? [] : [[], []] unless my $parent = $self->parent; |
|
311 | ||
312 |
my $tree = $self->tree; |
|
313 |
my (@before, @after, $match); |
|
314 |
for my $child ($parent->children->each) { |
|
315 |
++$match and next if $child->tree eq $tree; |
|
316 |
$match ? push @after, $child : push @before, $child; |
|
317 |
} |
|
318 | ||
319 |
return $merge ? [@before, @after] : [\@before, \@after]; |
|
320 |
} |
|
321 | ||
322 |
sub _text { |
|
323 |
my ($nodes, $recurse, $trim) = @_; |
|
324 | ||
325 |
# Merge successive text nodes |
|
326 |
my $i = 0; |
|
327 |
while (my $next = $nodes->[$i + 1]) { |
|
328 |
++$i and next unless $nodes->[$i][0] eq 'text' && $next->[0] eq 'text'; |
|
329 |
splice @$nodes, $i, 2, ['text', $nodes->[$i][1] . $next->[1]]; |
|
330 |
} |
|
331 | ||
332 |
my $text = ''; |
|
333 |
for my $n (@$nodes) { |
|
334 |
my $type = $n->[0]; |
|
335 | ||
336 |
# Nested tag |
|
337 |
my $content = ''; |
|
338 |
if ($type eq 'tag' && $recurse) { |
|
339 |
$content = _text([_nodes($n)], 1, _trim($n, $trim)); |
|
340 |
} |
|
341 | ||
342 |
# Text |
|
343 |
elsif ($type eq 'text') { $content = $trim ? squish($n->[1]) : $n->[1] } |
|
344 | ||
345 |
# CDATA or raw text |
|
346 |
elsif ($type eq 'cdata' || $type eq 'raw') { $content = $n->[1] } |
|
347 | ||
348 |
# Add leading whitespace if punctuation allows it |
|
349 |
$content = " $content" if $text =~ /\S\z/ && $content =~ /^[^.!?,;:\s]+/; |
|
350 | ||
351 |
# Trim whitespace blocks |
|
352 |
$text .= $content if $content =~ /\S+/ || !$trim; |
|
353 |
} |
|
354 | ||
355 |
return $text; |
|
356 |
} |
|
357 | ||
358 |
sub _trim { |
|
359 |
my ($e, $trim) = @_; |
|
360 | ||
361 |
# Disabled |
|
362 |
return 0 unless $e && ($trim = defined $trim ? $trim : 1); |
|
363 | ||
364 |
# Detect "pre" tag |
|
365 |
while ($e->[0] eq 'tag') { |
|
366 |
return 0 if $e->[1] eq 'pre'; |
|
367 |
last unless $e = $e->[3]; |
|
368 |
} |
|
369 | ||
370 |
return 1; |
|
371 |
} |
|
372 | ||
373 |
1; |
|
374 | ||
375 |
=encoding utf8 |
|
376 | ||
377 |
=head1 NAME |
|
378 | ||
379 |
Mojo::DOM - Minimalistic HTML/XML DOM parser with CSS selectors |
|
380 | ||
381 |
=head1 SYNOPSIS |
|
382 | ||
383 |
use Mojo::DOM; |
|
384 | ||
385 |
# Parse |
|
386 |
my $dom = Mojo::DOM->new('<div><p id="a">A</p><p id="b">B</p></div>'); |
|
387 | ||
388 |
# Find |
|
389 |
say $dom->at('#b')->text; |
|
390 |
say $dom->find('p')->text; |
|
391 |
say $dom->find('[id]')->attr('id'); |
|
392 | ||
393 |
# Walk |
|
394 |
say $dom->div->p->[0]->text; |
|
395 |
say $dom->div->children('p')->first->{id}; |
|
396 | ||
397 |
# Iterate |
|
398 |
$dom->find('p[id]')->each(sub { say shift->{id} }); |
|
399 | ||
400 |
# Loop |
|
401 |
for my $e ($dom->find('p[id]')->each) { |
|
402 |
say $e->text; |
|
403 |
} |
|
404 | ||
405 |
# Modify |
|
406 |
$dom->div->p->[1]->append('<p id="c">C</p>'); |
|
407 |
$dom->find(':not(p)')->strip; |
|
408 | ||
409 |
# Render |
|
410 |
say "$dom"; |
|
411 | ||
412 |
=head1 DESCRIPTION |
|
413 | ||
414 |
L<Mojo::DOM> is a minimalistic and relaxed HTML/XML DOM parser with CSS |
|
415 |
selector support. It will even try to interpret broken XML, so you should not |
|
416 |
use it for validation. |
|
417 | ||
418 |
=head1 CASE SENSITIVITY |
|
419 | ||
420 |
L<Mojo::DOM> defaults to HTML semantics, that means all tags and attributes |
|
421 |
are lowercased and selectors need to be lowercase as well. |
|
422 | ||
423 |
my $dom = Mojo::DOM->new('<P ID="greeting">Hi!</P>'); |
|
424 |
say $dom->at('p')->text; |
|
425 |
say $dom->p->{id}; |
|
426 | ||
427 |
If XML processing instructions are found, the parser will automatically switch |
|
428 |
into XML mode and everything becomes case sensitive. |
|
429 | ||
430 |
my $dom = Mojo::DOM->new('<?xml version="1.0"?><P ID="greeting">Hi!</P>'); |
|
431 |
say $dom->at('P')->text; |
|
432 |
say $dom->P->{ID}; |
|
433 | ||
434 |
XML detection can also be disabled with the L</"xml"> method. |
|
435 | ||
436 |
# Force XML semantics |
|
437 |
$dom->xml(1); |
|
438 | ||
439 |
# Force HTML semantics |
|
440 |
$dom->xml(0); |
|
441 | ||
442 |
=head1 METHODS |
|
443 | ||
444 |
L<Mojo::DOM> implements the following methods. |
|
445 | ||
446 |
=head2 new |
|
447 | ||
448 |
my $dom = Mojo::DOM->new; |
|
449 |
my $dom = Mojo::DOM->new('<foo bar="baz">test</foo>'); |
|
450 | ||
451 |
Construct a new array-based L<Mojo::DOM> object and L</"parse"> HTML/XML |
|
452 |
fragment if necessary. |
|
453 | ||
454 |
=head2 all_text |
|
455 | ||
456 |
my $trimmed = $dom->all_text; |
|
457 |
my $untrimmed = $dom->all_text(0); |
|
458 | ||
459 |
Extract all text content from DOM structure, smart whitespace trimming is |
|
460 |
enabled by default. |
|
461 | ||
462 |
# "foo bar baz" |
|
463 |
$dom->parse("<div>foo\n<p>bar</p>baz\n</div>")->div->all_text; |
|
464 | ||
465 |
# "foo\nbarbaz\n" |
|
466 |
$dom->parse("<div>foo\n<p>bar</p>baz\n</div>")->div->all_text(0); |
|
467 | ||
468 |
=head2 ancestors |
|
469 | ||
470 |
my $collection = $dom->ancestors; |
|
471 |
my $collection = $dom->ancestors('div'); |
|
472 | ||
473 |
Find all ancestors of this element matching the CSS selector and return a |
|
474 |
L<Mojo::Collection> object containing these elements as L<Mojo::DOM> objects. |
|
475 |
All selectors from L<Mojo::DOM::CSS> are supported. |
|
476 | ||
477 |
# List types of ancestor elements |
|
478 |
say $dom->ancestors->type; |
|
479 | ||
480 |
=head2 append |
|
481 | ||
482 |
$dom = $dom->append('<p>Hi!</p>'); |
|
483 | ||
484 |
Append HTML/XML fragment to element. |
|
485 | ||
486 |
# "<div><h1>A</h1><h2>B</h2></div>" |
|
487 |
$dom->parse('<div><h1>A</h1></div>')->at('h1')->append('<h2>B</h2>')->root; |
|
488 | ||
489 |
=head2 append_content |
|
490 | ||
491 |
$dom = $dom->append_content('<p>Hi!</p>'); |
|
492 | ||
493 |
Append HTML/XML fragment to element content. |
|
494 | ||
495 |
# "<div><h1>AB</h1></div>" |
|
496 |
$dom->parse('<div><h1>A</h1></div>')->at('h1')->append_content('B')->root; |
|
497 | ||
498 |
=head2 at |
|
499 | ||
500 |
my $result = $dom->at('html title'); |
|
501 | ||
502 |
Find first element matching the CSS selector and return it as a L<Mojo::DOM> |
|
503 |
object or return C<undef> if none could be found. All selectors from |
|
504 |
L<Mojo::DOM::CSS> are supported. |
|
505 | ||
506 |
# Find first element with "svg" namespace definition |
|
507 |
my $namespace = $dom->at('[xmlns\:svg]')->{'xmlns:svg'}; |
|
508 | ||
509 |
=head2 attr |
|
510 | ||
511 |
my $attrs = $dom->attr; |
|
512 |
my $foo = $dom->attr('foo'); |
|
513 |
$dom = $dom->attr({foo => 'bar'}); |
|
514 |
$dom = $dom->attr(foo => 'bar'); |
|
515 | ||
516 |
Element attributes. |
|
517 | ||
518 |
# List id attributes |
|
519 |
say $dom->find('*')->attr('id')->compact; |
|
520 | ||
521 |
=head2 children |
|
522 | ||
523 |
my $collection = $dom->children; |
|
524 |
my $collection = $dom->children('div'); |
|
525 | ||
526 |
Find all children of this element matching the CSS selector and return a |
|
527 |
L<Mojo::Collection> object containing these elements as L<Mojo::DOM> objects. |
|
528 |
All selectors from L<Mojo::DOM::CSS> are supported. |
|
529 | ||
530 |
# Show type of random child element |
|
531 |
say $dom->children->shuffle->first->type; |
|
532 | ||
533 |
=head2 content_xml |
|
534 | ||
535 |
my $xml = $dom->content_xml; |
|
536 | ||
537 |
Render content of this element to XML. |
|
538 | ||
539 |
# "<b>test</b>" |
|
540 |
$dom->parse('<div><b>test</b></div>')->div->content_xml; |
|
541 | ||
542 |
=head2 find |
|
543 | ||
544 |
my $collection = $dom->find('html title'); |
|
545 | ||
546 |
Find all elements matching the CSS selector and return a L<Mojo::Collection> |
|
547 |
object containing these elements as L<Mojo::DOM> objects. All selectors from |
|
548 |
L<Mojo::DOM::CSS> are supported. |
|
549 | ||
550 |
# Find a specific element and extract information |
|
551 |
my $id = $dom->find('div')->[23]{id}; |
|
552 | ||
553 |
# Extract information from multiple elements |
|
554 |
my @headers = $dom->find('h1, h2, h3')->text->each; |
|
555 |
my @links = $dom->find('a[href]')->attr('href')->each; |
|
556 | ||
557 |
=head2 match |
|
558 | ||
559 |
my $result = $dom->match('html title'); |
|
560 | ||
561 |
Match the CSS selector against this element and return it as a L<Mojo::DOM> |
|
562 |
object or return C<undef> if it didn't match. All selectors from |
|
563 |
L<Mojo::DOM::CSS> are supported. |
|
564 | ||
565 |
=head2 namespace |
|
566 | ||
567 |
my $namespace = $dom->namespace; |
|
568 | ||
569 |
Find element namespace. |
|
570 | ||
571 |
# Find namespace for an element with namespace prefix |
|
572 |
my $namespace = $dom->at('svg > svg\:circle')->namespace; |
|
573 | ||
574 |
# Find namespace for an element that may or may not have a namespace prefix |
|
575 |
my $namespace = $dom->at('svg > circle')->namespace; |
|
576 | ||
577 |
=head2 next |
|
578 | ||
579 |
my $sibling = $dom->next; |
|
580 | ||
581 |
Return L<Mojo::DOM> object for next sibling of element or C<undef> if there |
|
582 |
are no more siblings. |
|
583 | ||
584 |
# "<h2>B</h2>" |
|
585 |
$dom->parse('<div><h1>A</h1><h2>B</h2></div>')->at('h1')->next; |
|
586 | ||
587 |
=head2 parent |
|
588 | ||
589 |
my $parent = $dom->parent; |
|
590 | ||
591 |
Return L<Mojo::DOM> object for parent of element or C<undef> if this element |
|
592 |
has no parent. |
|
593 | ||
594 |
=head2 parse |
|
595 | ||
596 |
$dom = $dom->parse('<foo bar="baz">test</foo>'); |
|
597 | ||
598 |
Parse HTML/XML fragment with L<Mojo::DOM::HTML>. |
|
599 | ||
600 |
# Parse XML |
|
601 |
my $dom = Mojo::DOM->new->xml(1)->parse($xml); |
|
602 | ||
603 |
=head2 prepend |
|
604 | ||
605 |
$dom = $dom->prepend('<p>Hi!</p>'); |
|
606 | ||
607 |
Prepend HTML/XML fragment to element. |
|
608 | ||
609 |
# "<div><h1>A</h1><h2>B</h2></div>" |
|
610 |
$dom->parse('<div><h2>B</h2></div>')->at('h2')->prepend('<h1>A</h1>')->root; |
|
611 | ||
612 |
=head2 prepend_content |
|
613 | ||
614 |
$dom = $dom->prepend_content('<p>Hi!</p>'); |
|
615 | ||
616 |
Prepend HTML/XML fragment to element content. |
|
617 | ||
618 |
# "<div><h2>AB</h2></div>" |
|
619 |
$dom->parse('<div><h2>B</h2></div>')->at('h2')->prepend_content('A')->root; |
|
620 | ||
621 |
=head2 previous |
|
622 | ||
623 |
my $sibling = $dom->previous; |
|
624 | ||
625 |
Return L<Mojo::DOM> object for previous sibling of element or C<undef> if |
|
626 |
there are no more siblings. |
|
627 | ||
628 |
# "<h1>A</h1>" |
|
629 |
$dom->parse('<div><h1>A</h1><h2>B</h2></div>')->at('h2')->previous; |
|
630 | ||
631 |
=head2 remove |
|
632 | ||
633 |
my $parent = $dom->remove; |
|
634 | ||
635 |
Remove element and return L<Mojo::DOM> object for parent of element. |
|
636 | ||
637 |
# "<div></div>" |
|
638 |
$dom->parse('<div><h1>A</h1></div>')->at('h1')->remove; |
|
639 | ||
640 |
=head2 replace |
|
641 | ||
642 |
my $parent = $dom->replace('<div>test</div>'); |
|
643 | ||
644 |
Replace element with HTML/XML fragment and return L<Mojo::DOM> object for |
|
645 |
parent of element. |
|
646 | ||
647 |
# "<div><h2>B</h2></div>" |
|
648 |
$dom->parse('<div><h1>A</h1></div>')->at('h1')->replace('<h2>B</h2>'); |
|
649 | ||
650 |
# "<div></div>" |
|
651 |
$dom->parse('<div><h1>A</h1></div>')->at('h1')->replace(''); |
|
652 | ||
653 |
=head2 replace_content |
|
654 | ||
655 |
$dom = $dom->replace_content('<p>test</p>'); |
|
656 | ||
657 |
Replace element content with HTML/XML fragment. |
|
658 | ||
659 |
# "<div><h1>B</h1></div>" |
|
660 |
$dom->parse('<div><h1>A</h1></div>')->at('h1')->replace_content('B')->root; |
|
661 | ||
662 |
# "<div><h1></h1></div>" |
|
663 |
$dom->parse('<div><h1>A</h1></div>')->at('h1')->replace_content('')->root; |
|
664 | ||
665 |
=head2 root |
|
666 | ||
667 |
my $root = $dom->root; |
|
668 | ||
669 |
Return L<Mojo::DOM> object for root node. |
|
670 | ||
671 |
=head2 siblings |
|
672 | ||
673 |
my $collection = $dom->siblings; |
|
674 |
my $collection = $dom->siblings('div'); |
|
675 | ||
676 |
Find all siblings of this element matching the CSS selector and return a |
|
677 |
L<Mojo::Collection> object containing these elements as L<Mojo::DOM> objects. |
|
678 |
All selectors from L<Mojo::DOM::CSS> are supported. |
|
679 | ||
680 |
# List types of sibling elements |
|
681 |
say $dom->siblings->type; |
|
682 | ||
683 |
=head2 strip |
|
684 | ||
685 |
my $parent = $dom->strip; |
|
686 | ||
687 |
Remove element while preserving its content and return L<Mojo::DOM> object for |
|
688 |
parent of element. |
|
689 | ||
690 |
# "<div>A</div>" |
|
691 |
$dom->parse('<div><h1>A</h1></div>')->at('h1')->strip; |
|
692 | ||
693 |
=head2 tap |
|
694 | ||
695 |
$dom = $dom->tap(sub {...}); |
|
696 | ||
697 |
Alias for L<Mojo::Base/"tap">. |
|
698 | ||
699 |
=head2 text |
|
700 | ||
701 |
my $trimmed = $dom->text; |
|
702 |
my $untrimmed = $dom->text(0); |
|
703 | ||
704 |
Extract text content from element only (not including child elements), smart |
|
705 |
whitespace trimming is enabled by default. |
|
706 | ||
707 |
# "foo baz" |
|
708 |
$dom->parse("<div>foo\n<p>bar</p>baz\n</div>")->div->text; |
|
709 | ||
710 |
# "foo\nbaz\n" |
|
711 |
$dom->parse("<div>foo\n<p>bar</p>baz\n</div>")->div->text(0); |
|
712 | ||
713 |
=head2 text_after |
|
714 | ||
715 |
my $trimmed = $dom->text_after; |
|
716 |
my $untrimmed = $dom->text_after(0); |
|
717 | ||
718 |
Extract text content immediately following element, smart whitespace trimming |
|
719 |
is enabled by default. |
|
720 | ||
721 |
# "baz" |
|
722 |
$dom->parse("<div>foo\n<p>bar</p>baz\n</div>")->div->p->text_after; |
|
723 | ||
724 |
# "baz\n" |
|
725 |
$dom->parse("<div>foo\n<p>bar</p>baz\n</div>")->div->p->text_after(0); |
|
726 | ||
727 |
=head2 text_before |
|
728 | ||
729 |
my $trimmed = $dom->text_before; |
|
730 |
my $untrimmed = $dom->text_before(0); |
|
731 | ||
732 |
Extract text content immediately preceding element, smart whitespace trimming |
|
733 |
is enabled by default. |
|
734 | ||
735 |
# "foo" |
|
736 |
$dom->parse("<div>foo\n<p>bar</p>baz\n</div>")->div->p->text_before; |
|
737 | ||
738 |
# "foo\n" |
|
739 |
$dom->parse("<div>foo\n<p>bar</p>baz\n</div>")->div->p->text_before(0); |
|
740 | ||
741 |
=head2 to_xml |
|
742 | ||
743 |
my $xml = $dom->to_xml; |
|
744 |
my $xml = "$dom"; |
|
745 | ||
746 |
Render this element and its content to XML. |
|
747 | ||
748 |
# "<b>test</b>" |
|
749 |
$dom->parse('<div><b>test</b></div>')->div->b->to_xml; |
|
750 | ||
751 |
=head2 tree |
|
752 | ||
753 |
my $tree = $dom->tree; |
|
754 |
$dom = $dom->tree(['root', ['text', 'foo']]); |
|
755 | ||
756 |
Document Object Model. Note that this structure should only be used very |
|
757 |
carefully since it is very dynamic. |
|
758 | ||
759 |
=head2 type |
|
760 | ||
761 |
my $type = $dom->type; |
|
762 |
$dom = $dom->type('div'); |
|
763 | ||
764 |
Element type. |
|
765 | ||
766 |
# List types of child elements |
|
767 |
say $dom->children->type; |
|
768 | ||
769 |
=head2 xml |
|
770 | ||
771 |
my $bool = $dom->xml; |
|
772 |
$dom = $dom->xml($bool); |
|
773 | ||
774 |
Disable HTML semantics in parser and activate case sensitivity, defaults to |
|
775 |
auto detection based on processing instructions. |
|
776 | ||
777 |
=head1 CHILD ELEMENTS |
|
778 | ||
779 |
In addition to the methods above, many child elements are also automatically |
|
780 |
available as object methods, which return a L<Mojo::DOM> or |
|
781 |
L<Mojo::Collection> object, depending on number of children. |
|
782 | ||
783 |
say $dom->p->text; |
|
784 |
say $dom->div->[23]->text; |
|
785 |
say $dom->div->text; |
|
786 | ||
787 |
=head1 ELEMENT ATTRIBUTES |
|
788 | ||
789 |
Direct hash reference access to element attributes is also possible. |
|
790 | ||
791 |
say $dom->{foo}; |
|
792 |
say $dom->div->{id}; |
|
793 | ||
794 |
=head1 SEE ALSO |
|
795 | ||
796 |
L<Mojolicious>, L<Mojolicious::Guides>, L<http://mojolicio.us>. |
|
797 | ||
798 |
=cut |