Newer Older
587 lines | 13.71kb
add files
Yuki Kimoto authored on 2014-03-26
1
package Mojo::Content;
2
use Mojo::Base 'Mojo::EventEmitter';
3

            
4
use Carp 'croak';
5
use Compress::Raw::Zlib qw(WANT_GZIP Z_STREAM_END);
6
use Mojo::Headers;
7

            
8
has [qw(auto_relax relaxed skip_body)];
9
has headers           => sub { Mojo::Headers->new };
10
has max_buffer_size   => sub { $ENV{MOJO_MAX_BUFFER_SIZE} || 262144 };
11
has max_leftover_size => sub { $ENV{MOJO_MAX_LEFTOVER_SIZE} || 262144 };
12

            
13
sub body_contains {
14
  croak 'Method "body_contains" not implemented by subclass';
15
}
16

            
17
sub body_size { croak 'Method "body_size" not implemented by subclass' }
18

            
19
sub boundary {
20
  return undef unless my $type = shift->headers->content_type;
21
  $type =~ m!multipart.*boundary\s*=\s*(?:"([^"]+)"|([\w'(),.:?\-+/]+))!i
22
    and return $1 // $2;
23
  return undef;
24
}
25

            
26
sub build_body    { shift->_build('get_body_chunk') }
27
sub build_headers { shift->_build('get_header_chunk') }
28

            
29
sub charset {
30
  my $type = shift->headers->content_type // '';
31
  return $type =~ /charset\s*=\s*"?([^"\s;]+)"?/i ? $1 : undef;
32
}
33

            
34
sub clone {
35
  my $self = shift;
36
  return undef if $self->is_dynamic;
37
  return $self->new(headers => $self->headers->clone);
38
}
39

            
40
sub generate_body_chunk {
41
  my ($self, $offset) = @_;
42

            
43
  $self->emit(drain => $offset)
44
    if !delete $self->{delay} && !length($self->{body_buffer} // '');
45
  my $chunk = delete $self->{body_buffer} // '';
46
  return $self->{eof} ? '' : undef unless length $chunk;
47

            
48
  return $chunk;
49
}
50

            
51
sub get_body_chunk {
52
  croak 'Method "get_body_chunk" not implemented by subclass';
53
}
54

            
55
sub get_header_chunk {
56
  my ($self, $offset) = @_;
57

            
58
  unless (defined $self->{header_buffer}) {
59
    my $headers = $self->headers->to_string;
60
    $self->{header_buffer}
61
      = $headers ? "$headers\x0d\x0a\x0d\x0a" : "\x0d\x0a";
62
  }
63

            
64
  return substr $self->{header_buffer}, $offset, 131072;
65
}
66

            
67
sub header_size { length shift->build_headers }
68

            
69
sub is_chunked { !!shift->headers->transfer_encoding }
70

            
71
sub is_compressed { (shift->headers->content_encoding // '') =~ /^gzip$/i }
72

            
73
sub is_dynamic { $_[0]{dynamic} && !defined $_[0]->headers->content_length }
74

            
75
sub is_finished { (shift->{state} // '') eq 'finished' }
76

            
77
sub is_limit_exceeded { !!shift->{limit} }
78

            
79
sub is_multipart {undef}
80

            
81
sub is_parsing_body { (shift->{state} // '') eq 'body' }
82

            
83
sub leftovers { shift->{buffer} }
84

            
85
sub parse {
86
  my $self = shift;
87

            
88
  # Headers
89
  $self->_parse_until_body(@_);
90
  return $self if $self->{state} eq 'headers';
91
  $self->emit('body') unless $self->{body}++;
92

            
93
  # Chunked content
94
  $self->{real_size} //= 0;
95
  if ($self->is_chunked && $self->{state} ne 'headers') {
96
    $self->_parse_chunked;
97
    $self->{state} = 'finished' if ($self->{chunk_state} // '') eq 'finished';
98
  }
99

            
100
  # Not chunked, pass through to second buffer
101
  else {
102
    $self->{real_size} += length $self->{pre_buffer};
103
    my $limit = $self->is_finished
104
      && length($self->{buffer}) > $self->max_leftover_size;
105
    $self->{buffer} .= $self->{pre_buffer} unless $limit;
106
    $self->{pre_buffer} = '';
107
  }
108

            
109
  # No content
110
  if ($self->skip_body) {
111
    $self->{state} = 'finished';
112
    return $self;
113
  }
114

            
115
  # Relaxed parsing
116
  my $headers = $self->headers;
117
  if ($self->auto_relax) {
118
    my $connection = $headers->connection     // '';
119
    my $len        = $headers->content_length // '';
120
    $self->relaxed(1)
121
      if !length $len && ($connection =~ /close/i || $headers->content_type);
122
  }
123

            
124
  # Chunked or relaxed content
125
  if ($self->is_chunked || $self->relaxed) {
126
    $self->{size} += length($self->{buffer} //= '');
127
    $self->_uncompress($self->{buffer});
128
    $self->{buffer} = '';
129
  }
130

            
131
  # Normal content
132
  else {
133
    my $len = $headers->content_length || 0;
134
    $self->{size} ||= 0;
135
    if ((my $need = $len - $self->{size}) > 0) {
136
      my $len = length $self->{buffer};
137
      my $chunk = substr $self->{buffer}, 0, $need > $len ? $len : $need, '';
138
      $self->_uncompress($chunk);
139
      $self->{size} += length $chunk;
140
    }
141
    $self->{state} = 'finished' if $len <= $self->progress;
142
  }
143

            
144
  return $self;
145
}
146

            
147
sub parse_body {
148
  my $self = shift;
149
  $self->{state} = 'body';
150
  return $self->parse(@_);
151
}
152

            
153
sub progress {
154
  my $self = shift;
155
  return 0 unless my $state = $self->{state};
156
  return 0 unless $state eq 'body' || $state eq 'finished';
157
  return $self->{raw_size} - ($self->{header_size} || 0);
158
}
159

            
160
sub write {
161
  my ($self, $chunk, $cb) = @_;
162

            
163
  $self->{dynamic} = 1;
164
  if (defined $chunk) { $self->{body_buffer} .= $chunk }
165
  else                { $self->{delay} = 1 }
166
  $self->once(drain => $cb) if $cb;
167
  $self->{eof} = 1 if defined $chunk && $chunk eq '';
168

            
169
  return $self;
170
}
171

            
172
sub write_chunk {
173
  my ($self, $chunk, $cb) = @_;
174
  $self->headers->transfer_encoding('chunked') unless $self->is_chunked;
175
  $self->write(defined $chunk ? $self->_build_chunk($chunk) : $chunk, $cb);
176
  $self->{eof} = 1 if defined $chunk && $chunk eq '';
177
  return $self;
178
}
179

            
180
sub _build {
181
  my ($self, $method) = @_;
182

            
183
  my $buffer = '';
184
  my $offset = 0;
185
  while (1) {
186

            
187
    # No chunk yet, try again
188
    next unless defined(my $chunk = $self->$method($offset));
189

            
190
    # End of part
191
    last unless my $len = length $chunk;
192

            
193
    $offset += $len;
194
    $buffer .= $chunk;
195
  }
196

            
197
  return $buffer;
198
}
199

            
200
sub _build_chunk {
201
  my ($self, $chunk) = @_;
202

            
203
  # End
204
  return "\x0d\x0a0\x0d\x0a\x0d\x0a" if length $chunk == 0;
205

            
206
  # First chunk has no leading CRLF
207
  my $crlf = $self->{chunks}++ ? "\x0d\x0a" : '';
208
  return $crlf . sprintf('%x', length $chunk) . "\x0d\x0a$chunk";
209
}
210

            
211
sub _parse_chunked {
212
  my $self = shift;
213

            
214
  # Trailing headers
215
  return $self->_parse_chunked_trailing_headers
216
    if ($self->{chunk_state} // '') eq 'trailing_headers';
217

            
218
  while (my $len = length $self->{pre_buffer}) {
219

            
220
    # Start new chunk (ignore the chunk extension)
221
    unless ($self->{chunk_len}) {
222
      last
223
        unless $self->{pre_buffer} =~ s/^(?:\x0d?\x0a)?([0-9a-fA-F]+).*\x0a//;
224
      next if $self->{chunk_len} = hex $1;
225

            
226
      # Last chunk
227
      $self->{chunk_state} = 'trailing_headers';
228
      last;
229
    }
230

            
231
    # Remove as much as possible from payload
232
    $len = $self->{chunk_len} if $self->{chunk_len} < $len;
233
    $self->{buffer} .= substr $self->{pre_buffer}, 0, $len, '';
234
    $self->{real_size} += $len;
235
    $self->{chunk_len} -= $len;
236
  }
237

            
238
  # Trailing headers
239
  $self->_parse_chunked_trailing_headers
240
    if ($self->{chunk_state} // '') eq 'trailing_headers';
241

            
242
  # Check buffer size
243
  $self->{limit} = $self->{state} = 'finished'
244
    if length($self->{pre_buffer} // '') > $self->max_buffer_size;
245
}
246

            
247
sub _parse_chunked_trailing_headers {
248
  my $self = shift;
249

            
250
  my $headers = $self->headers->parse(delete $self->{pre_buffer});
251
  return unless $headers->is_finished;
252
  $self->{chunk_state} = 'finished';
253

            
254
  # Replace Transfer-Encoding with Content-Length
255
  $headers->remove('Transfer-Encoding');
256
  $headers->content_length($self->{real_size}) unless $headers->content_length;
257
}
258

            
259
sub _parse_headers {
260
  my $self = shift;
261

            
262
  my $headers = $self->headers->parse(delete $self->{pre_buffer});
263
  return unless $headers->is_finished;
264
  $self->{state} = 'body';
265

            
266
  # Take care of leftovers
267
  my $leftovers = $self->{pre_buffer} = $headers->leftovers;
268
  $self->{header_size} = $self->{raw_size} - length $leftovers;
269
  $self->emit('body') unless $self->{body}++;
270
}
271

            
272
sub _parse_until_body {
273
  my ($self, $chunk) = @_;
274

            
275
  $self->{raw_size} += length($chunk //= '');
276
  $self->{pre_buffer} .= $chunk;
277

            
278
  unless ($self->{state}) {
279
    $self->{header_size} = $self->{raw_size} - length $self->{pre_buffer};
280
    $self->{state}       = 'headers';
281
  }
282
  $self->_parse_headers if ($self->{state} // '') eq 'headers';
283
}
284

            
285
sub _uncompress {
286
  my ($self, $chunk) = @_;
287

            
288
  # No compression
289
  return $self->emit(read => $chunk) unless $self->is_compressed;
290

            
291
  # Uncompress
292
  $self->{post_buffer} .= $chunk;
293
  my $gz = $self->{gz}
294
    //= Compress::Raw::Zlib::Inflate->new(WindowBits => WANT_GZIP);
295
  my $status = $gz->inflate(\$self->{post_buffer}, my $out);
296
  $self->emit(read => $out) if defined $out;
297

            
298
  # Replace Content-Encoding with Content-Length
299
  $self->headers->content_length($gz->total_out)->remove('Content-Encoding')
300
    if $status == Z_STREAM_END;
301

            
302
  # Check buffer size
303
  $self->{limit} = $self->{state} = 'finished'
304
    if length($self->{post_buffer} // '') > $self->max_buffer_size;
305
}
306

            
307
1;
308

            
309
=encoding utf8
310

            
311
=head1 NAME
312

            
313
Mojo::Content - HTTP content base class
314

            
315
=head1 SYNOPSIS
316

            
317
  package Mojo::Content::MyContent;
318
  use Mojo::Base 'Mojo::Content';
319

            
320
  sub body_contains  {...}
321
  sub body_size      {...}
322
  sub get_body_chunk {...}
323

            
324
=head1 DESCRIPTION
325

            
326
L<Mojo::Content> is an abstract base class for HTTP content as described in
327
RFC 2616.
328

            
329
=head1 EVENTS
330

            
331
L<Mojo::Content> inherits all events from L<Mojo::EventEmitter> and can emit
332
the following new ones.
333

            
334
=head2 body
335

            
336
  $content->on(body => sub {
337
    my $content = shift;
338
    ...
339
  });
340

            
341
Emitted once all headers have been parsed and the body starts.
342

            
343
  $content->on(body => sub {
344
    my $content = shift;
345
    $content->auto_upgrade(0) if $content->headers->header('X-No-MultiPart');
346
  });
347

            
348
=head2 drain
349

            
350
  $content->on(drain => sub {
351
    my ($content, $offset) = @_;
352
    ...
353
  });
354

            
355
Emitted once all data has been written.
356

            
357
  $content->on(drain => sub {
358
    my $content = shift;
359
    $content->write_chunk(time);
360
  });
361

            
362
=head2 read
363

            
364
  $content->on(read => sub {
365
    my ($content, $bytes) = @_;
366
    ...
367
  });
368

            
369
Emitted when a new chunk of content arrives.
370

            
371
  $content->unsubscribe('read');
372
  $content->on(read => sub {
373
    my ($content, $bytes) = @_;
374
    say "Streaming: $bytes";
375
  });
376

            
377
=head1 ATTRIBUTES
378

            
379
L<Mojo::Content> implements the following attributes.
380

            
381
=head2 auto_relax
382

            
383
  my $bool = $content->auto_relax;
384
  $content = $content->auto_relax($bool);
385

            
386
Try to detect when relaxed parsing is necessary.
387

            
388
=head2 headers
389

            
390
  my $headers = $content->headers;
391
  $content    = $content->headers(Mojo::Headers->new);
392

            
393
Content headers, defaults to a L<Mojo::Headers> object.
394

            
395
=head2 max_buffer_size
396

            
397
  my $size = $content->max_buffer_size;
398
  $content = $content->max_buffer_size(1024);
399

            
400
Maximum size in bytes of buffer for content parser, defaults to the value of
401
the MOJO_MAX_BUFFER_SIZE environment variable or C<262144>.
402

            
403
=head2 max_leftover_size
404

            
405
  my $size = $content->max_leftover_size;
406
  $content = $content->max_leftover_size(1024);
407

            
408
Maximum size in bytes of buffer for pipelined HTTP requests, defaults to the
409
value of the MOJO_MAX_LEFTOVER_SIZE environment variable or C<262144>.
410

            
411
=head2 relaxed
412

            
413
  my $bool = $content->relaxed;
414
  $content = $content->relaxed($bool);
415

            
416
Activate relaxed parsing for responses that are terminated with a connection
417
close.
418

            
419
=head2 skip_body
420

            
421
  my $bool = $content->skip_body;
422
  $content = $content->skip_body($bool);
423

            
424
Skip body parsing and finish after headers.
425

            
426
=head1 METHODS
427

            
428
L<Mojo::Content> inherits all methods from L<Mojo::EventEmitter> and
429
implements the following new ones.
430

            
431
=head2 body_contains
432

            
433
  my $bool = $content->body_contains('foo bar baz');
434

            
435
Check if content contains a specific string. Meant to be overloaded in a
436
subclass.
437

            
438
=head2 body_size
439

            
440
  my $size = $content->body_size;
441

            
442
Content size in bytes. Meant to be overloaded in a subclass.
443

            
444
=head2 boundary
445

            
446
  my $boundary = $content->boundary;
447

            
448
Extract multipart boundary from C<Content-Type> header.
449

            
450
=head2 build_body
451

            
452
  my $str = $content->build_body;
453

            
454
Render whole body.
455

            
456
=head2 build_headers
457

            
458
  my $str = $content->build_headers;
459

            
460
Render all headers.
461

            
462
=head2 charset
463

            
464
  my $charset = $content->charset;
465

            
466
Extract charset from C<Content-Type> header.
467

            
468
=head2 clone
469

            
470
  my $clone = $content->clone;
471

            
472
Clone content if possible, otherwise return C<undef>.
473

            
474
=head2 generate_body_chunk
475

            
476
  my $bytes = $content->generate_body_chunk(0);
477

            
478
Generate dynamic content.
479

            
480
=head2 get_body_chunk
481

            
482
  my $bytes = $content->get_body_chunk(0);
483

            
484
Get a chunk of content starting from a specific position. Meant to be
485
overloaded in a subclass.
486

            
487
=head2 get_header_chunk
488

            
489
  my $bytes = $content->get_header_chunk(13);
490

            
491
Get a chunk of the headers starting from a specific position.
492

            
493
=head2 header_size
494

            
495
  my $size = $content->header_size;
496

            
497
Size of headers in bytes.
498

            
499
=head2 is_chunked
500

            
501
  my $bool = $content->is_chunked;
502

            
503
Check if content is chunked.
504

            
505
=head2 is_compressed
506

            
507
  my $bool = $content->is_compressed;
508

            
509
Check if content is C<gzip> compressed.
510

            
511
=head2 is_dynamic
512

            
513
  my $bool = $content->is_dynamic;
514

            
515
Check if content will be dynamically generated, which prevents L</"clone">
516
from working.
517

            
518
=head2 is_finished
519

            
520
  my $bool = $content->is_finished;
521

            
522
Check if parser is finished.
523

            
524
=head2 is_limit_exceeded
525

            
526
  my $bool = $content->is_limit_exceeded;
527

            
528
Check if buffer has exceeded L</"max_buffer_size">.
529

            
530
=head2 is_multipart
531

            
532
  my $false = $content->is_multipart;
533

            
534
False.
535

            
536
=head2 is_parsing_body
537

            
538
  my $bool = $content->is_parsing_body;
539

            
540
Check if body parsing started yet.
541

            
542
=head2 leftovers
543

            
544
  my $bytes = $content->leftovers;
545

            
546
Get leftover data from content parser.
547

            
548
=head2 parse
549

            
550
  $content
551
    = $content->parse("Content-Length: 12\x0d\x0a\x0d\x0aHello World!");
552

            
553
Parse content chunk.
554

            
555
=head2 parse_body
556

            
557
  $content = $content->parse_body('Hi!');
558

            
559
Parse body chunk and skip headers.
560

            
561
=head2 progress
562

            
563
  my $size = $content->progress;
564

            
565
Size of content already received from message in bytes.
566

            
567
=head2 write
568

            
569
  $content = $content->write($bytes);
570
  $content = $content->write($bytes => sub {...});
571

            
572
Write dynamic content non-blocking, the optional drain callback will be
573
invoked once all data has been written.
574

            
575
=head2 write_chunk
576

            
577
  $content = $content->write_chunk($bytes);
578
  $content = $content->write_chunk($bytes => sub {...});
579

            
580
Write dynamic content non-blocking with C<chunked> transfer encoding, the
581
optional drain callback will be invoked once all data has been written.
582

            
583
=head1 SEE ALSO
584

            
585
L<Mojolicious>, L<Mojolicious::Guides>, L<http://mojolicio.us>.
586

            
587
=cut