1 contributor
    
    
      
    
        package Mojo::Util;
use Mojo::Base 'Exporter';
use Carp qw(carp croak);
use Data::Dumper ();
use Digest::MD5 qw(md5 md5_hex);
BEGIN {eval {require Digest::SHA; import Digest::SHA qw(hmac_sha1 sha1 sha1_hex)}}
use Encode 'find_encoding';
use File::Basename 'dirname';
use File::Spec::Functions 'catfile';
use MIME::Base64 qw(decode_base64 encode_base64);
use Time::HiRes ();
# Check for monotonic clock support
use constant MONOTONIC => eval
  '!!Time::HiRes::clock_gettime(Time::HiRes::CLOCK_MONOTONIC())';
# Punycode bootstring parameters
use constant {
  PC_BASE         => 36,
  PC_TMIN         => 1,
  PC_TMAX         => 26,
  PC_SKEW         => 38,
  PC_DAMP         => 700,
  PC_INITIAL_BIAS => 72,
  PC_INITIAL_N    => 128
};
# To update HTML entities run this command
# perl examples/entities.pl > lib/Mojo/entities.txt
my %ENTITIES;
for my $line (split "\x0a", slurp(catfile dirname(__FILE__), 'entities.txt')) {
  next unless $line =~ /^(\S+)\s+U\+(\S+)(?:\s+U\+(\S+))?/;
  $ENTITIES{$1} = defined $3 ? (chr(hex $2) . chr(hex $3)) : chr(hex $2);
}
# Encoding cache
my %CACHE;
our @EXPORT_OK = (
  qw(b64_decode b64_encode camelize class_to_file class_to_path decamelize),
  qw(decode deprecated dumper encode get_line hmac_sha1_sum html_unescape),
  qw(md5_bytes md5_sum monkey_patch punycode_decode punycode_encode quote),
  qw(secure_compare sha1_bytes sha1_sum slurp split_header spurt squish),
  qw(steady_time trim unquote url_escape url_unescape xml_escape xor_encode)
);
sub b64_decode { decode_base64($_[0]) }
sub b64_encode { encode_base64($_[0], $_[1]) }
sub camelize {
  my $str = shift;
  return $str if $str =~ /^[A-Z]/;
  # CamelCase words
  return join '::', map {
    join '', map { ucfirst lc } split /_/, $_
  } split /-/, $str;
}
sub class_to_file {
  my $class = shift;
  $class =~ s/::|'//g;
  $class =~ s/([A-Z])([A-Z]*)/$1.lc($2)/ge;
  return decamelize($class);
}
sub class_to_path { join '.', join('/', split /::|'/, shift), 'pm' }
sub decamelize {
  my $str = shift;
  return $str if $str !~ /^[A-Z]/;
  # Module parts
  my @parts;
  for my $part (split /::/, $str) {
    # snake_case words
    my @words;
    push @words, lc $1 while $part =~ s/([A-Z]{1}[^A-Z]*)//;
    push @parts, join '_', @words;
  }
  return join '-', @parts;
}
sub decode {
  my ($encoding, $bytes) = @_;
  return undef
    unless eval { $bytes = _encoding($encoding)->decode("$bytes", 1); 1 };
  return $bytes;
}
sub deprecated {
  local $Carp::CarpLevel = 1;
  $ENV{MOJO_FATAL_DEPRECATIONS} ? croak(@_) : carp(@_);
}
sub dumper { Data::Dumper->new([@_])->Indent(1)->Sortkeys(1)->Terse(1)->Dump }
sub encode { _encoding($_[0])->encode("$_[1]") }
sub get_line {
  # Locate line ending
  return undef if (my $pos = index ${$_[0]}, "\x0a") == -1;
  # Extract line and ending
  my $line = substr ${$_[0]}, 0, $pos + 1, '';
  $line =~ s/\x0d?\x0a$//;
  return $line;
}
sub hmac_sha1_sum { unpack 'H*', hmac_sha1(@_) }
sub html_unescape {
  my $str = shift;
  return $str if index($str, '&') == -1;
  $str =~ s/&(?:\#((?:\d{1,7}|x[0-9a-fA-F]{1,6}));|(\w+;?))/_decode($1, $2)/ge;
  return $str;
}
sub md5_bytes { md5(@_) }
sub md5_sum   { md5_hex(@_) }
sub monkey_patch {
  my ($class, %patch) = @_;
  no strict 'refs';
  no warnings 'redefine';
  *{"${class}::$_"} = $patch{$_} for keys %patch;
}
# Direct translation of RFC 3492
sub punycode_decode {
  my $input = shift;
  use integer;
  my $n    = PC_INITIAL_N;
  my $i    = 0;
  my $bias = PC_INITIAL_BIAS;
  my @output;
  # Consume all code points before the last delimiter
  push @output, split //, $1 if $input =~ s/(.*)\x2d//s;
  while (length $input) {
    my $oldi = $i;
    my $w    = 1;
    # Base to infinity in steps of base
    for (my $k = PC_BASE; 1; $k += PC_BASE) {
      my $digit = ord substr $input, 0, 1, '';
      $digit = $digit < 0x40 ? $digit + (26 - 0x30) : ($digit & 0x1f) - 1;
      $i += $digit * $w;
      my $t = $k - $bias;
      $t = $t < PC_TMIN ? PC_TMIN : $t > PC_TMAX ? PC_TMAX : $t;
      last if $digit < $t;
      $w *= PC_BASE - $t;
    }
    $bias = _adapt($i - $oldi, @output + 1, $oldi == 0);
    $n += $i / (@output + 1);
    $i = $i % (@output + 1);
    splice @output, $i++, 0, chr $n;
  }
  return join '', @output;
}
# Direct translation of RFC 3492
sub punycode_encode {
  my $output = shift;
  use integer;
  my $n     = PC_INITIAL_N;
  my $delta = 0;
  my $bias  = PC_INITIAL_BIAS;
  # Extract basic code points
  my $len   = length $output;
  my @input = map {ord} split //, $output;
  my @chars = sort grep { $_ >= PC_INITIAL_N } @input;
  $output =~ s/[^\x00-\x7f]+//gs;
  my $h = my $b = length $output;
  $output .= "\x2d" if $b > 0;
  for my $m (@chars) {
    next if $m < $n;
    $delta += ($m - $n) * ($h + 1);
    $n = $m;
    for (my $i = 0; $i < $len; $i++) {
      my $c = $input[$i];
      if ($c < $n) { $delta++ }
      elsif ($c == $n) {
        my $q = $delta;
        # Base to infinity in steps of base
        for (my $k = PC_BASE; 1; $k += PC_BASE) {
          my $t = $k - $bias;
          $t = $t < PC_TMIN ? PC_TMIN : $t > PC_TMAX ? PC_TMAX : $t;
          last if $q < $t;
          my $o = $t + (($q - $t) % (PC_BASE - $t));
          $output .= chr $o + ($o < 26 ? 0x61 : 0x30 - 26);
          $q = ($q - $t) / (PC_BASE - $t);
        }
        $output .= chr $q + ($q < 26 ? 0x61 : 0x30 - 26);
        $bias = _adapt($delta, $h + 1, $h == $b);
        $delta = 0;
        $h++;
      }
    }
    $delta++;
    $n++;
  }
  return $output;
}
sub quote {
  my $str = shift;
  $str =~ s/(["\\])/\\$1/g;
  return qq{"$str"};
}
sub secure_compare {
  my ($a, $b) = @_;
  return undef if length $a != length $b;
  my $r = 0;
  $r |= ord(substr $a, $_) ^ ord(substr $b, $_) for 0 .. length($a) - 1;
  return $r == 0;
}
sub sha1_bytes { sha1(@_) }
sub sha1_sum   { sha1_hex(@_) }
sub slurp {
  my $path = shift;
  croak qq{Can't open file "$path": $!} unless open my $file, '<', $path;
  my $content = '';
  while ($file->sysread(my $buffer, 131072, 0)) { $content .= $buffer }
  return $content;
}
sub split_header {
  my $str = shift;
  my (@tree, @token);
  while ($str =~ s/^[,;\s]*([^=;, ]+)\s*//) {
    push @token, $1, undef;
    $token[-1] = unquote($1)
      if $str =~ s/^=\s*("(?:\\\\|\\"|[^"])*"|[^;, ]*)\s*//;
    # Separator
    $str =~ s/^;\s*//;
    next unless $str =~ s/^,\s*//;
    push @tree, [@token];
    @token = ();
  }
  # Take care of final token
  return [@token ? (@tree, \@token) : @tree];
}
sub spurt {
  my ($content, $path) = @_;
  croak qq{Can't open file "$path": $!} unless open my $file, '>', $path;
  croak qq{Can't write to file "$path": $!}
    unless defined $file->syswrite($content);
  return $content;
}
sub squish {
  my $str = trim(@_);
  $str =~ s/\s+/ /g;
  return $str;
}
sub steady_time () {
  MONOTONIC
    ? Time::HiRes::clock_gettime(Time::HiRes::CLOCK_MONOTONIC())
    : Time::HiRes::time;
}
sub trim {
  my $str = shift;
  $str =~ s/^\s+|\s+$//g;
  return $str;
}
sub unquote {
  my $str = shift;
  return $str unless $str =~ s/^"(.*)"$/$1/g;
  $str =~ s/\\\\/\\/g;
  $str =~ s/\\"/"/g;
  return $str;
}
sub url_escape {
  my ($str, $pattern) = @_;
  $pattern ||= '^A-Za-z0-9\-._~';
  $str =~ s/([$pattern])/sprintf('%%%02X',ord($1))/ge;
  return $str;
}
sub url_unescape {
  my $str = shift;
  return $str if index($str, '%') == -1;
  $str =~ s/%([0-9a-fA-F]{2})/chr(hex($1))/ge;
  return $str;
}
sub xml_escape {
  my $str = shift;
  $str =~ s/&/&/g;
  $str =~ s/</</g;
  $str =~ s/>/>/g;
  $str =~ s/"/"/g;
  $str =~ s/'/'/g;
  return $str;
}
sub xor_encode {
  my ($input, $key) = @_;
  # Encode with variable key length
  my $len = length $key;
  my $buffer = my $output = '';
  $output .= $buffer ^ $key
    while length($buffer = substr($input, 0, $len, '')) == $len;
  return $output .= $buffer ^ substr($key, 0, length $buffer, '');
}
sub _adapt {
  my ($delta, $numpoints, $firsttime) = @_;
  use integer;
  $delta = $firsttime ? $delta / PC_DAMP : $delta / 2;
  $delta += $delta / $numpoints;
  my $k = 0;
  while ($delta > ((PC_BASE - PC_TMIN) * PC_TMAX) / 2) {
    $delta /= PC_BASE - PC_TMIN;
    $k += PC_BASE;
  }
  return $k + (((PC_BASE - PC_TMIN + 1) * $delta) / ($delta + PC_SKEW));
}
sub _decode {
  my ($point, $name) = @_;
  # Code point
  return chr($point !~ /^x/ ? $point : hex $point) unless defined $name;
  # Find entity name
  my $rest = '';
  while (length $name) {
    return "$ENTITIES{$name}$rest" if exists $ENTITIES{$name};
    $rest = chop($name) . $rest;
  }
  return "&$rest";
}
sub _encoding {
  $CACHE{$_[0]} = defined $CACHE{$_[0]} ? $CACHE{$_[0]} : defined find_encoding($_[0]) ? find_encoding($_[0]) : croak "Unknown encoding '$_[0]'";
}
1;
=encoding utf8
=head1 NAME
Mojo::Util - Portable utility functions
=head1 SYNOPSIS
  use Mojo::Util qw(b64_encode url_escape url_unescape);
  my $str = 'test=23';
  my $escaped = url_escape $str;
  say url_unescape $escaped;
  say b64_encode $escaped, '';
=head1 DESCRIPTION
L<Mojo::Util> provides portable utility functions for L<Mojo>.
=head1 FUNCTIONS
L<Mojo::Util> implements the following functions.
=head2 b64_decode
  my $bytes = b64_decode $b64;
Base64 decode bytes.
=head2 b64_encode
  my $b64 = b64_encode $bytes;
  my $b64 = b64_encode $bytes, "\n";
Base64 encode bytes, the line ending defaults to a newline.
=head2 camelize
  my $camelcase = camelize $snakecase;
Convert snake_case string to CamelCase and replace C<-> with C<::>.
  # "FooBar"
  camelize 'foo_bar';
  # "FooBar::Baz"
  camelize 'foo_bar-baz';
  # "FooBar::Baz"
  camelize 'FooBar::Baz';
=head2 class_to_file
  my $file = class_to_file 'Foo::Bar';
Convert a class name to a file.
  # "foo_bar"
  class_to_file 'Foo::Bar';
  # "foobar"
  class_to_file 'FOO::Bar';
  # "foo_bar"
  class_to_file 'FooBar';
  # "foobar"
  class_to_file 'FOOBar';
=head2 class_to_path
  my $path = class_to_path 'Foo::Bar';
Convert class name to path.
  # "Foo/Bar.pm"
  class_to_path 'Foo::Bar';
  # "FooBar.pm"
  class_to_path 'FooBar';
=head2 decamelize
  my $snakecase = decamelize $camelcase;
Convert CamelCase string to snake_case and replace C<::> with C<->.
  # "foo_bar"
  decamelize 'FooBar';
  # "foo_bar-baz"
  decamelize 'FooBar::Baz';
  # "foo_bar-baz"
  decamelize 'foo_bar-baz';
=head2 decode
  my $chars = decode 'UTF-8', $bytes;
Decode bytes to characters and return C<undef> if decoding failed.
=head2 deprecated
  deprecated 'foo is DEPRECATED in favor of bar';
Warn about deprecated feature from perspective of caller. You can also set the
MOJO_FATAL_DEPRECATIONS environment variable to make them die instead.
=head2 dumper
  my $perl = dumper {some => 'data'};
Dump a Perl data structure with L<Data::Dumper>.
=head2 encode
  my $bytes = encode 'UTF-8', $chars;
Encode characters to bytes.
=head2 get_line
  my $line = get_line \$str;
Extract whole line from string or return C<undef>. Lines are expected to end
with C<0x0d 0x0a> or C<0x0a>.
=head2 hmac_sha1_sum
  my $checksum = hmac_sha1_sum $bytes, 'passw0rd';
Generate HMAC-SHA1 checksum for bytes.
=head2 html_unescape
  my $str = html_unescape $escaped;
Unescape all HTML entities in string.
=head2 md5_bytes
  my $checksum = md5_bytes $bytes;
Generate binary MD5 checksum for bytes.
=head2 md5_sum
  my $checksum = md5_sum $bytes;
Generate MD5 checksum for bytes.
=head2 monkey_patch
  monkey_patch $package, foo => sub {...};
  monkey_patch $package, foo => sub {...}, bar => sub {...};
Monkey patch functions into package.
  monkey_patch 'MyApp',
    one   => sub { say 'One!' },
    two   => sub { say 'Two!' },
    three => sub { say 'Three!' };
=head2 punycode_decode
  my $str = punycode_decode $punycode;
Punycode decode string.
=head2 punycode_encode
  my $punycode = punycode_encode $str;
Punycode encode string.
=head2 quote
  my $quoted = quote $str;
Quote string.
=head2 secure_compare
  my $bool = secure_compare $str1, $str2;
Constant time comparison algorithm to prevent timing attacks.
=head2 sha1_bytes
  my $checksum = sha1_bytes $bytes;
Generate binary SHA1 checksum for bytes.
=head2 sha1_sum
  my $checksum = sha1_sum $bytes;
Generate SHA1 checksum for bytes.
=head2 slurp
  my $bytes = slurp '/etc/passwd';
Read all data at once from file.
=head2 split_header
   my $tree = split_header 'foo="bar baz"; test=123, yada';
Split HTTP header value.
  # "one"
  split_header('one; two="three four", five=six')->[0][0];
  # "three four"
  split_header('one; two="three four", five=six')->[0][3];
  # "five"
  split_header('one; two="three four", five=six')->[1][0];
=head2 spurt
  $bytes = spurt $bytes, '/etc/passwd';
Write all data at once to file.
=head2 squish
  my $squished = squish $str;
Trim whitespace characters from both ends of string and then change all
consecutive groups of whitespace into one space each.
=head2 steady_time
  my $time = steady_time;
High resolution time, resilient to time jumps if a monotonic clock is
available through L<Time::HiRes>.
=head2 trim
  my $trimmed = trim $str;
Trim whitespace characters from both ends of string.
=head2 unquote
  my $str = unquote $quoted;
Unquote string.
=head2 url_escape
  my $escaped = url_escape $str;
  my $escaped = url_escape $str, '^A-Za-z0-9\-._~';
Percent encode unsafe characters in string, the pattern used defaults to
C<^A-Za-z0-9\-._~>.
=head2 url_unescape
  my $str = url_unescape $escaped;
Decode percent encoded characters in string.
=head2 xml_escape
  my $escaped = xml_escape $str;
Escape unsafe characters C<&>, C<E<lt>>, C<E<gt>>, C<"> and C<'> in string.
=head2 xor_encode
  my $encoded = xor_encode $str, $key;
XOR encode string with variable length key.
=head1 SEE ALSO
L<Mojolicious>, L<Mojolicious::Guides>, L<http://mojolicio.us>.
=cut