Newer Older
659 lines | 13.764kb
add files
Yuki Kimoto authored on 2014-03-26
1
package Mojo::Util;
2
use Mojo::Base 'Exporter';
3

            
4
use Carp qw(carp croak);
5
use Data::Dumper ();
6
use Digest::MD5 qw(md5 md5_hex);
7
use Digest::SHA qw(hmac_sha1 sha1 sha1_hex);
8
use Encode 'find_encoding';
9
use File::Basename 'dirname';
10
use File::Spec::Functions 'catfile';
11
use MIME::Base64 qw(decode_base64 encode_base64);
12
use Time::HiRes ();
13

            
14
# Check for monotonic clock support
15
use constant MONOTONIC => eval
16
  '!!Time::HiRes::clock_gettime(Time::HiRes::CLOCK_MONOTONIC())';
17

            
18
# Punycode bootstring parameters
19
use constant {
20
  PC_BASE         => 36,
21
  PC_TMIN         => 1,
22
  PC_TMAX         => 26,
23
  PC_SKEW         => 38,
24
  PC_DAMP         => 700,
25
  PC_INITIAL_BIAS => 72,
26
  PC_INITIAL_N    => 128
27
};
28

            
29
# To update HTML entities run this command
30
# perl examples/entities.pl > lib/Mojo/entities.txt
31
my %ENTITIES;
32
for my $line (split "\x0a", slurp(catfile dirname(__FILE__), 'entities.txt')) {
33
  next unless $line =~ /^(\S+)\s+U\+(\S+)(?:\s+U\+(\S+))?/;
34
  $ENTITIES{$1} = defined $3 ? (chr(hex $2) . chr(hex $3)) : chr(hex $2);
35
}
36

            
37
# Encoding cache
38
my %CACHE;
39

            
40
our @EXPORT_OK = (
41
  qw(b64_decode b64_encode camelize class_to_file class_to_path decamelize),
42
  qw(decode deprecated dumper encode get_line hmac_sha1_sum html_unescape),
43
  qw(md5_bytes md5_sum monkey_patch punycode_decode punycode_encode quote),
44
  qw(secure_compare sha1_bytes sha1_sum slurp split_header spurt squish),
45
  qw(steady_time trim unquote url_escape url_unescape xml_escape xor_encode)
46
);
47

            
48
sub b64_decode { decode_base64($_[0]) }
49
sub b64_encode { encode_base64($_[0], $_[1]) }
50

            
51
sub camelize {
52
  my $str = shift;
53
  return $str if $str =~ /^[A-Z]/;
54

            
55
  # CamelCase words
56
  return join '::', map {
57
    join '', map { ucfirst lc } split /_/, $_
58
  } split /-/, $str;
59
}
60

            
61
sub class_to_file {
62
  my $class = shift;
63
  $class =~ s/::|'//g;
64
  $class =~ s/([A-Z])([A-Z]*)/$1.lc($2)/ge;
65
  return decamelize($class);
66
}
67

            
68
sub class_to_path { join '.', join('/', split /::|'/, shift), 'pm' }
69

            
70
sub decamelize {
71
  my $str = shift;
72
  return $str if $str !~ /^[A-Z]/;
73

            
74
  # Module parts
75
  my @parts;
76
  for my $part (split /::/, $str) {
77

            
78
    # snake_case words
79
    my @words;
80
    push @words, lc $1 while $part =~ s/([A-Z]{1}[^A-Z]*)//;
81
    push @parts, join '_', @words;
82
  }
83

            
84
  return join '-', @parts;
85
}
86

            
87
sub decode {
88
  my ($encoding, $bytes) = @_;
89
  return undef
90
    unless eval { $bytes = _encoding($encoding)->decode("$bytes", 1); 1 };
91
  return $bytes;
92
}
93

            
94
sub deprecated {
95
  local $Carp::CarpLevel = 1;
96
  $ENV{MOJO_FATAL_DEPRECATIONS} ? croak(@_) : carp(@_);
97
}
98

            
99
sub dumper { Data::Dumper->new([@_])->Indent(1)->Sortkeys(1)->Terse(1)->Dump }
100

            
101
sub encode { _encoding($_[0])->encode("$_[1]") }
102

            
103
sub get_line {
104

            
105
  # Locate line ending
106
  return undef if (my $pos = index ${$_[0]}, "\x0a") == -1;
107

            
108
  # Extract line and ending
109
  my $line = substr ${$_[0]}, 0, $pos + 1, '';
110
  $line =~ s/\x0d?\x0a$//;
111

            
112
  return $line;
113
}
114

            
115
sub hmac_sha1_sum { unpack 'H*', hmac_sha1(@_) }
116

            
117
sub html_unescape {
118
  my $str = shift;
119
  return $str if index($str, '&') == -1;
120
  $str =~ s/&(?:\#((?:\d{1,7}|x[0-9a-fA-F]{1,6}));|(\w+;?))/_decode($1, $2)/ge;
121
  return $str;
122
}
123

            
124
sub md5_bytes { md5(@_) }
125
sub md5_sum   { md5_hex(@_) }
126

            
127
sub monkey_patch {
128
  my ($class, %patch) = @_;
129
  no strict 'refs';
130
  no warnings 'redefine';
131
  *{"${class}::$_"} = $patch{$_} for keys %patch;
132
}
133

            
134
# Direct translation of RFC 3492
135
sub punycode_decode {
136
  my $input = shift;
137
  use integer;
138

            
139
  my $n    = PC_INITIAL_N;
140
  my $i    = 0;
141
  my $bias = PC_INITIAL_BIAS;
142
  my @output;
143

            
144
  # Consume all code points before the last delimiter
145
  push @output, split //, $1 if $input =~ s/(.*)\x2d//s;
146

            
147
  while (length $input) {
148
    my $oldi = $i;
149
    my $w    = 1;
150

            
151
    # Base to infinity in steps of base
152
    for (my $k = PC_BASE; 1; $k += PC_BASE) {
153
      my $digit = ord substr $input, 0, 1, '';
154
      $digit = $digit < 0x40 ? $digit + (26 - 0x30) : ($digit & 0x1f) - 1;
155
      $i += $digit * $w;
156
      my $t = $k - $bias;
157
      $t = $t < PC_TMIN ? PC_TMIN : $t > PC_TMAX ? PC_TMAX : $t;
158
      last if $digit < $t;
159
      $w *= PC_BASE - $t;
160
    }
161

            
162
    $bias = _adapt($i - $oldi, @output + 1, $oldi == 0);
163
    $n += $i / (@output + 1);
164
    $i = $i % (@output + 1);
165
    splice @output, $i++, 0, chr $n;
166
  }
167

            
168
  return join '', @output;
169
}
170

            
171
# Direct translation of RFC 3492
172
sub punycode_encode {
173
  my $output = shift;
174
  use integer;
175

            
176
  my $n     = PC_INITIAL_N;
177
  my $delta = 0;
178
  my $bias  = PC_INITIAL_BIAS;
179

            
180
  # Extract basic code points
181
  my $len   = length $output;
182
  my @input = map {ord} split //, $output;
183
  my @chars = sort grep { $_ >= PC_INITIAL_N } @input;
184
  $output =~ s/[^\x00-\x7f]+//gs;
185
  my $h = my $b = length $output;
186
  $output .= "\x2d" if $b > 0;
187

            
188
  for my $m (@chars) {
189
    next if $m < $n;
190
    $delta += ($m - $n) * ($h + 1);
191
    $n = $m;
192

            
193
    for (my $i = 0; $i < $len; $i++) {
194
      my $c = $input[$i];
195

            
196
      if ($c < $n) { $delta++ }
197
      elsif ($c == $n) {
198
        my $q = $delta;
199

            
200
        # Base to infinity in steps of base
201
        for (my $k = PC_BASE; 1; $k += PC_BASE) {
202
          my $t = $k - $bias;
203
          $t = $t < PC_TMIN ? PC_TMIN : $t > PC_TMAX ? PC_TMAX : $t;
204
          last if $q < $t;
205
          my $o = $t + (($q - $t) % (PC_BASE - $t));
206
          $output .= chr $o + ($o < 26 ? 0x61 : 0x30 - 26);
207
          $q = ($q - $t) / (PC_BASE - $t);
208
        }
209

            
210
        $output .= chr $q + ($q < 26 ? 0x61 : 0x30 - 26);
211
        $bias = _adapt($delta, $h + 1, $h == $b);
212
        $delta = 0;
213
        $h++;
214
      }
215
    }
216

            
217
    $delta++;
218
    $n++;
219
  }
220

            
221
  return $output;
222
}
223

            
224
sub quote {
225
  my $str = shift;
226
  $str =~ s/(["\\])/\\$1/g;
227
  return qq{"$str"};
228
}
229

            
230
sub secure_compare {
231
  my ($a, $b) = @_;
232
  return undef if length $a != length $b;
233
  my $r = 0;
234
  $r |= ord(substr $a, $_) ^ ord(substr $b, $_) for 0 .. length($a) - 1;
235
  return $r == 0;
236
}
237

            
238
sub sha1_bytes { sha1(@_) }
239
sub sha1_sum   { sha1_hex(@_) }
240

            
241
sub slurp {
242
  my $path = shift;
243
  croak qq{Can't open file "$path": $!} unless open my $file, '<', $path;
244
  my $content = '';
245
  while ($file->sysread(my $buffer, 131072, 0)) { $content .= $buffer }
246
  return $content;
247
}
248

            
249
sub split_header {
250
  my $str = shift;
251

            
252
  my (@tree, @token);
253
  while ($str =~ s/^[,;\s]*([^=;, ]+)\s*//) {
254
    push @token, $1, undef;
255
    $token[-1] = unquote($1)
256
      if $str =~ s/^=\s*("(?:\\\\|\\"|[^"])*"|[^;, ]*)\s*//;
257

            
258
    # Separator
259
    $str =~ s/^;\s*//;
260
    next unless $str =~ s/^,\s*//;
261
    push @tree, [@token];
262
    @token = ();
263
  }
264

            
265
  # Take care of final token
266
  return [@token ? (@tree, \@token) : @tree];
267
}
268

            
269
sub spurt {
270
  my ($content, $path) = @_;
271
  croak qq{Can't open file "$path": $!} unless open my $file, '>', $path;
272
  croak qq{Can't write to file "$path": $!}
273
    unless defined $file->syswrite($content);
274
  return $content;
275
}
276

            
277
sub squish {
278
  my $str = trim(@_);
279
  $str =~ s/\s+/ /g;
280
  return $str;
281
}
282

            
283
sub steady_time () {
284
  MONOTONIC
285
    ? Time::HiRes::clock_gettime(Time::HiRes::CLOCK_MONOTONIC())
286
    : Time::HiRes::time;
287
}
288

            
289
sub trim {
290
  my $str = shift;
291
  $str =~ s/^\s+|\s+$//g;
292
  return $str;
293
}
294

            
295
sub unquote {
296
  my $str = shift;
297
  return $str unless $str =~ s/^"(.*)"$/$1/g;
298
  $str =~ s/\\\\/\\/g;
299
  $str =~ s/\\"/"/g;
300
  return $str;
301
}
302

            
303
sub url_escape {
304
  my ($str, $pattern) = @_;
305
  $pattern ||= '^A-Za-z0-9\-._~';
306
  $str =~ s/([$pattern])/sprintf('%%%02X',ord($1))/ge;
307
  return $str;
308
}
309

            
310
sub url_unescape {
311
  my $str = shift;
312
  return $str if index($str, '%') == -1;
313
  $str =~ s/%([0-9a-fA-F]{2})/chr(hex($1))/ge;
314
  return $str;
315
}
316

            
317
sub xml_escape {
318
  my $str = shift;
319

            
320
  $str =~ s/&/&amp;/g;
321
  $str =~ s/</&lt;/g;
322
  $str =~ s/>/&gt;/g;
323
  $str =~ s/"/&quot;/g;
324
  $str =~ s/'/&#39;/g;
325

            
326
  return $str;
327
}
328

            
329
sub xor_encode {
330
  my ($input, $key) = @_;
331

            
332
  # Encode with variable key length
333
  my $len = length $key;
334
  my $buffer = my $output = '';
335
  $output .= $buffer ^ $key
336
    while length($buffer = substr($input, 0, $len, '')) == $len;
337
  return $output .= $buffer ^ substr($key, 0, length $buffer, '');
338
}
339

            
340
sub _adapt {
341
  my ($delta, $numpoints, $firsttime) = @_;
342
  use integer;
343

            
344
  $delta = $firsttime ? $delta / PC_DAMP : $delta / 2;
345
  $delta += $delta / $numpoints;
346
  my $k = 0;
347
  while ($delta > ((PC_BASE - PC_TMIN) * PC_TMAX) / 2) {
348
    $delta /= PC_BASE - PC_TMIN;
349
    $k += PC_BASE;
350
  }
351

            
352
  return $k + (((PC_BASE - PC_TMIN + 1) * $delta) / ($delta + PC_SKEW));
353
}
354

            
355
sub _decode {
356
  my ($point, $name) = @_;
357

            
358
  # Code point
359
  return chr($point !~ /^x/ ? $point : hex $point) unless defined $name;
360

            
361
  # Find entity name
362
  my $rest = '';
363
  while (length $name) {
364
    return "$ENTITIES{$name}$rest" if exists $ENTITIES{$name};
365
    $rest = chop($name) . $rest;
366
  }
367
  return "&$rest";
368
}
369

            
370
sub _encoding {
371
  $CACHE{$_[0]} //= find_encoding($_[0]) // croak "Unknown encoding '$_[0]'";
372
}
373

            
374
1;
375

            
376
=encoding utf8
377

            
378
=head1 NAME
379

            
380
Mojo::Util - Portable utility functions
381

            
382
=head1 SYNOPSIS
383

            
384
  use Mojo::Util qw(b64_encode url_escape url_unescape);
385

            
386
  my $str = 'test=23';
387
  my $escaped = url_escape $str;
388
  say url_unescape $escaped;
389
  say b64_encode $escaped, '';
390

            
391
=head1 DESCRIPTION
392

            
393
L<Mojo::Util> provides portable utility functions for L<Mojo>.
394

            
395
=head1 FUNCTIONS
396

            
397
L<Mojo::Util> implements the following functions.
398

            
399
=head2 b64_decode
400

            
401
  my $bytes = b64_decode $b64;
402

            
403
Base64 decode bytes.
404

            
405
=head2 b64_encode
406

            
407
  my $b64 = b64_encode $bytes;
408
  my $b64 = b64_encode $bytes, "\n";
409

            
410
Base64 encode bytes, the line ending defaults to a newline.
411

            
412
=head2 camelize
413

            
414
  my $camelcase = camelize $snakecase;
415

            
416
Convert snake_case string to CamelCase and replace C<-> with C<::>.
417

            
418
  # "FooBar"
419
  camelize 'foo_bar';
420

            
421
  # "FooBar::Baz"
422
  camelize 'foo_bar-baz';
423

            
424
  # "FooBar::Baz"
425
  camelize 'FooBar::Baz';
426

            
427
=head2 class_to_file
428

            
429
  my $file = class_to_file 'Foo::Bar';
430

            
431
Convert a class name to a file.
432

            
433
  # "foo_bar"
434
  class_to_file 'Foo::Bar';
435

            
436
  # "foobar"
437
  class_to_file 'FOO::Bar';
438

            
439
  # "foo_bar"
440
  class_to_file 'FooBar';
441

            
442
  # "foobar"
443
  class_to_file 'FOOBar';
444

            
445
=head2 class_to_path
446

            
447
  my $path = class_to_path 'Foo::Bar';
448

            
449
Convert class name to path.
450

            
451
  # "Foo/Bar.pm"
452
  class_to_path 'Foo::Bar';
453

            
454
  # "FooBar.pm"
455
  class_to_path 'FooBar';
456

            
457
=head2 decamelize
458

            
459
  my $snakecase = decamelize $camelcase;
460

            
461
Convert CamelCase string to snake_case and replace C<::> with C<->.
462

            
463
  # "foo_bar"
464
  decamelize 'FooBar';
465

            
466
  # "foo_bar-baz"
467
  decamelize 'FooBar::Baz';
468

            
469
  # "foo_bar-baz"
470
  decamelize 'foo_bar-baz';
471

            
472
=head2 decode
473

            
474
  my $chars = decode 'UTF-8', $bytes;
475

            
476
Decode bytes to characters and return C<undef> if decoding failed.
477

            
478
=head2 deprecated
479

            
480
  deprecated 'foo is DEPRECATED in favor of bar';
481

            
482
Warn about deprecated feature from perspective of caller. You can also set the
483
MOJO_FATAL_DEPRECATIONS environment variable to make them die instead.
484

            
485
=head2 dumper
486

            
487
  my $perl = dumper {some => 'data'};
488

            
489
Dump a Perl data structure with L<Data::Dumper>.
490

            
491
=head2 encode
492

            
493
  my $bytes = encode 'UTF-8', $chars;
494

            
495
Encode characters to bytes.
496

            
497
=head2 get_line
498

            
499
  my $line = get_line \$str;
500

            
501
Extract whole line from string or return C<undef>. Lines are expected to end
502
with C<0x0d 0x0a> or C<0x0a>.
503

            
504
=head2 hmac_sha1_sum
505

            
506
  my $checksum = hmac_sha1_sum $bytes, 'passw0rd';
507

            
508
Generate HMAC-SHA1 checksum for bytes.
509

            
510
=head2 html_unescape
511

            
512
  my $str = html_unescape $escaped;
513

            
514
Unescape all HTML entities in string.
515

            
516
=head2 md5_bytes
517

            
518
  my $checksum = md5_bytes $bytes;
519

            
520
Generate binary MD5 checksum for bytes.
521

            
522
=head2 md5_sum
523

            
524
  my $checksum = md5_sum $bytes;
525

            
526
Generate MD5 checksum for bytes.
527

            
528
=head2 monkey_patch
529

            
530
  monkey_patch $package, foo => sub {...};
531
  monkey_patch $package, foo => sub {...}, bar => sub {...};
532

            
533
Monkey patch functions into package.
534

            
535
  monkey_patch 'MyApp',
536
    one   => sub { say 'One!' },
537
    two   => sub { say 'Two!' },
538
    three => sub { say 'Three!' };
539

            
540
=head2 punycode_decode
541

            
542
  my $str = punycode_decode $punycode;
543

            
544
Punycode decode string.
545

            
546
=head2 punycode_encode
547

            
548
  my $punycode = punycode_encode $str;
549

            
550
Punycode encode string.
551

            
552
=head2 quote
553

            
554
  my $quoted = quote $str;
555

            
556
Quote string.
557

            
558
=head2 secure_compare
559

            
560
  my $bool = secure_compare $str1, $str2;
561

            
562
Constant time comparison algorithm to prevent timing attacks.
563

            
564
=head2 sha1_bytes
565

            
566
  my $checksum = sha1_bytes $bytes;
567

            
568
Generate binary SHA1 checksum for bytes.
569

            
570
=head2 sha1_sum
571

            
572
  my $checksum = sha1_sum $bytes;
573

            
574
Generate SHA1 checksum for bytes.
575

            
576
=head2 slurp
577

            
578
  my $bytes = slurp '/etc/passwd';
579

            
580
Read all data at once from file.
581

            
582
=head2 split_header
583

            
584
   my $tree = split_header 'foo="bar baz"; test=123, yada';
585

            
586
Split HTTP header value.
587

            
588
  # "one"
589
  split_header('one; two="three four", five=six')->[0][0];
590

            
591
  # "three four"
592
  split_header('one; two="three four", five=six')->[0][3];
593

            
594
  # "five"
595
  split_header('one; two="three four", five=six')->[1][0];
596

            
597
=head2 spurt
598

            
599
  $bytes = spurt $bytes, '/etc/passwd';
600

            
601
Write all data at once to file.
602

            
603
=head2 squish
604

            
605
  my $squished = squish $str;
606

            
607
Trim whitespace characters from both ends of string and then change all
608
consecutive groups of whitespace into one space each.
609

            
610
=head2 steady_time
611

            
612
  my $time = steady_time;
613

            
614
High resolution time, resilient to time jumps if a monotonic clock is
615
available through L<Time::HiRes>.
616

            
617
=head2 trim
618

            
619
  my $trimmed = trim $str;
620

            
621
Trim whitespace characters from both ends of string.
622

            
623
=head2 unquote
624

            
625
  my $str = unquote $quoted;
626

            
627
Unquote string.
628

            
629
=head2 url_escape
630

            
631
  my $escaped = url_escape $str;
632
  my $escaped = url_escape $str, '^A-Za-z0-9\-._~';
633

            
634
Percent encode unsafe characters in string, the pattern used defaults to
635
C<^A-Za-z0-9\-._~>.
636

            
637
=head2 url_unescape
638

            
639
  my $str = url_unescape $escaped;
640

            
641
Decode percent encoded characters in string.
642

            
643
=head2 xml_escape
644

            
645
  my $escaped = xml_escape $str;
646

            
647
Escape unsafe characters C<&>, C<E<lt>>, C<E<gt>>, C<"> and C<'> in string.
648

            
649
=head2 xor_encode
650

            
651
  my $encoded = xor_encode $str, $key;
652

            
653
XOR encode string with variable length key.
654

            
655
=head1 SEE ALSO
656

            
657
L<Mojolicious>, L<Mojolicious::Guides>, L<http://mojolicio.us>.
658

            
659
=cut