kimoto/biblesearch at master ・ Gitprep

biblesearch / mojo / lib / Mojo / Util.pm /

Newer ■ ■ ■ ■ ■ ■ ■ ■ ■ ■ Older

659 lines | 13.764kb

add files aa0f2e9 Yuki Kimoto authored on 2014-03-26	1	package Mojo::Util;
	2	use Mojo::Base 'Exporter';
	3
	4	use Carp qw(carp croak);
	5	use Data::Dumper ();
	6	use Digest::MD5 qw(md5 md5_hex);
	7	use Digest::SHA qw(hmac_sha1 sha1 sha1_hex);
	8	use Encode 'find_encoding';
	9	use File::Basename 'dirname';
	10	use File::Spec::Functions 'catfile';
	11	use MIME::Base64 qw(decode_base64 encode_base64);
	12	use Time::HiRes ();
	13
	14	# Check for monotonic clock support
	15	use constant MONOTONIC => eval
	16	'!!Time::HiRes::clock_gettime(Time::HiRes::CLOCK_MONOTONIC())';
	17
	18	# Punycode bootstring parameters
	19	use constant {
	20	PC_BASE => 36,
	21	PC_TMIN => 1,
	22	PC_TMAX => 26,
	23	PC_SKEW => 38,
	24	PC_DAMP => 700,
	25	PC_INITIAL_BIAS => 72,
	26	PC_INITIAL_N => 128
	27	};
	28
	29	# To update HTML entities run this command
	30	# perl examples/entities.pl > lib/Mojo/entities.txt
	31	my %ENTITIES;
	32	for my $line (split "\x0a", slurp(catfile dirname(__FILE__), 'entities.txt')) {
	33	next unless $line =~ /^(\S+)\s+U\+(\S+)(?:\s+U\+(\S+))?/;
	34	$ENTITIES{$1} = defined $3 ? (chr(hex $2) . chr(hex $3)) : chr(hex $2);
	35	}
	36
	37	# Encoding cache
	38	my %CACHE;
	39
	40	our @EXPORT_OK = (
	41	qw(b64_decode b64_encode camelize class_to_file class_to_path decamelize),
	42	qw(decode deprecated dumper encode get_line hmac_sha1_sum html_unescape),
	43	qw(md5_bytes md5_sum monkey_patch punycode_decode punycode_encode quote),
	44	qw(secure_compare sha1_bytes sha1_sum slurp split_header spurt squish),
	45	qw(steady_time trim unquote url_escape url_unescape xml_escape xor_encode)
	46	);
	47
	48	sub b64_decode { decode_base64($_[0]) }
	49	sub b64_encode { encode_base64($_[0], $_[1]) }
	50
	51	sub camelize {
	52	my $str = shift;
	53	return $str if $str =~ /^[A-Z]/;
	54
	55	# CamelCase words
	56	return join '::', map {
	57	join '', map { ucfirst lc } split /_/, $_
	58	} split /-/, $str;
	59	}
	60
	61	sub class_to_file {
	62	my $class = shift;
	63	$class =~ s/::\|'//g;
	64	$class =~ s/([A-Z])([A-Z]*)/$1.lc($2)/ge;
	65	return decamelize($class);
	66	}
	67
	68	sub class_to_path { join '.', join('/', split /::\|'/, shift), 'pm' }
	69
	70	sub decamelize {
	71	my $str = shift;
	72	return $str if $str !~ /^[A-Z]/;
	73
	74	# Module parts
	75	my @parts;
	76	for my $part (split /::/, $str) {
	77
	78	# snake_case words
	79	my @words;
	80	push @words, lc $1 while $part =~ s/([A-Z]{1}[^A-Z]*)//;
	81	push @parts, join '_', @words;
	82	}
	83
	84	return join '-', @parts;
	85	}
	86
	87	sub decode {
	88	my ($encoding, $bytes) = @_;
	89	return undef
	90	unless eval { $bytes = _encoding($encoding)->decode("$bytes", 1); 1 };
	91	return $bytes;
	92	}
	93
	94	sub deprecated {
	95	local $Carp::CarpLevel = 1;
	96	$ENV{MOJO_FATAL_DEPRECATIONS} ? croak(@_) : carp(@_);
	97	}
	98
	99	sub dumper { Data::Dumper->new([@_])->Indent(1)->Sortkeys(1)->Terse(1)->Dump }
	100
	101	sub encode { _encoding($_[0])->encode("$_[1]") }
	102
	103	sub get_line {
	104
	105	# Locate line ending
	106	return undef if (my $pos = index ${$_[0]}, "\x0a") == -1;
	107
	108	# Extract line and ending
	109	my $line = substr ${$_[0]}, 0, $pos + 1, '';
	110	$line =~ s/\x0d?\x0a$//;
	111
	112	return $line;
	113	}
	114
	115	sub hmac_sha1_sum { unpack 'H*', hmac_sha1(@_) }
	116
	117	sub html_unescape {
	118	my $str = shift;
	119	return $str if index($str, '&') == -1;
	120	$str =~ s/&(?:\#((?:\d{1,7}\|x[0-9a-fA-F]{1,6}));\|(\w+;?))/_decode($1, $2)/ge;
	121	return $str;
	122	}
	123
	124	sub md5_bytes { md5(@_) }
	125	sub md5_sum { md5_hex(@_) }
	126
	127	sub monkey_patch {
	128	my ($class, %patch) = @_;
	129	no strict 'refs';
	130	no warnings 'redefine';
	131	*{"${class}::$_"} = $patch{$_} for keys %patch;
	132	}
	133
	134	# Direct translation of RFC 3492
	135	sub punycode_decode {
	136	my $input = shift;
	137	use integer;
	138
	139	my $n = PC_INITIAL_N;
	140	my $i = 0;
	141	my $bias = PC_INITIAL_BIAS;
	142	my @output;
	143
	144	# Consume all code points before the last delimiter
	145	push @output, split //, $1 if $input =~ s/(.*)\x2d//s;
	146
	147	while (length $input) {
	148	my $oldi = $i;
	149	my $w = 1;
	150
	151	# Base to infinity in steps of base
	152	for (my $k = PC_BASE; 1; $k += PC_BASE) {
	153	my $digit = ord substr $input, 0, 1, '';
	154	$digit = $digit < 0x40 ? $digit + (26 - 0x30) : ($digit & 0x1f) - 1;
	155	$i += $digit * $w;
	156	my $t = $k - $bias;
	157	$t = $t < PC_TMIN ? PC_TMIN : $t > PC_TMAX ? PC_TMAX : $t;
	158	last if $digit < $t;
	159	$w *= PC_BASE - $t;
	160	}
	161
	162	$bias = _adapt($i - $oldi, @output + 1, $oldi == 0);
	163	$n += $i / (@output + 1);
	164	$i = $i % (@output + 1);
	165	splice @output, $i++, 0, chr $n;
	166	}
	167
	168	return join '', @output;
	169	}
	170
	171	# Direct translation of RFC 3492
	172	sub punycode_encode {
	173	my $output = shift;
	174	use integer;
	175
	176	my $n = PC_INITIAL_N;
	177	my $delta = 0;
	178	my $bias = PC_INITIAL_BIAS;
	179
	180	# Extract basic code points
	181	my $len = length $output;
	182	my @input = map {ord} split //, $output;
	183	my @chars = sort grep { $_ >= PC_INITIAL_N } @input;
	184	$output =~ s/[^\x00-\x7f]+//gs;
	185	my $h = my $b = length $output;
	186	$output .= "\x2d" if $b > 0;
	187
	188	for my $m (@chars) {
	189	next if $m < $n;
	190	$delta += ($m - $n) * ($h + 1);
	191	$n = $m;
	192
	193	for (my $i = 0; $i < $len; $i++) {
	194	my $c = $input[$i];
	195
	196	if ($c < $n) { $delta++ }
	197	elsif ($c == $n) {
	198	my $q = $delta;
	199
	200	# Base to infinity in steps of base
	201	for (my $k = PC_BASE; 1; $k += PC_BASE) {
	202	my $t = $k - $bias;
	203	$t = $t < PC_TMIN ? PC_TMIN : $t > PC_TMAX ? PC_TMAX : $t;
	204	last if $q < $t;
	205	my $o = $t + (($q - $t) % (PC_BASE - $t));
	206	$output .= chr $o + ($o < 26 ? 0x61 : 0x30 - 26);
	207	$q = ($q - $t) / (PC_BASE - $t);
	208	}
	209
	210	$output .= chr $q + ($q < 26 ? 0x61 : 0x30 - 26);
	211	$bias = _adapt($delta, $h + 1, $h == $b);
	212	$delta = 0;
	213	$h++;
	214	}
	215	}
	216
	217	$delta++;
	218	$n++;
	219	}
	220
	221	return $output;
	222	}
	223
	224	sub quote {
	225	my $str = shift;
	226	$str =~ s/(["\\])/\\$1/g;
	227	return qq{"$str"};
	228	}
	229
	230	sub secure_compare {
	231	my ($a, $b) = @_;
	232	return undef if length $a != length $b;
	233	my $r = 0;
	234	$r \|= ord(substr $a, $_) ^ ord(substr $b, $_) for 0 .. length($a) - 1;
	235	return $r == 0;
	236	}
	237
	238	sub sha1_bytes { sha1(@_) }
	239	sub sha1_sum { sha1_hex(@_) }
	240
	241	sub slurp {
	242	my $path = shift;
	243	croak qq{Can't open file "$path": $!} unless open my $file, '<', $path;
	244	my $content = '';
	245	while ($file->sysread(my $buffer, 131072, 0)) { $content .= $buffer }
	246	return $content;
	247	}
	248
	249	sub split_header {
	250	my $str = shift;
	251
	252	my (@tree, @token);
	253	while ($str =~ s/^[,;\s]([^=;, ]+)\s//) {
	254	push @token, $1, undef;
	255	$token[-1] = unquote($1)
	256	if $str =~ s/^=\s("(?:\\\\\|\\"\|[^"])"\|[^;, ])\s//;
	257
	258	# Separator
	259	$str =~ s/^;\s*//;
	260	next unless $str =~ s/^,\s*//;
	261	push @tree, [@token];
	262	@token = ();
	263	}
	264
	265	# Take care of final token
	266	return [@token ? (@tree, \@token) : @tree];
	267	}
	268
	269	sub spurt {
	270	my ($content, $path) = @_;
	271	croak qq{Can't open file "$path": $!} unless open my $file, '>', $path;
	272	croak qq{Can't write to file "$path": $!}
	273	unless defined $file->syswrite($content);
	274	return $content;
	275	}
	276
	277	sub squish {
	278	my $str = trim(@_);
	279	$str =~ s/\s+/ /g;
	280	return $str;
	281	}
	282
	283	sub steady_time () {
	284	MONOTONIC
	285	? Time::HiRes::clock_gettime(Time::HiRes::CLOCK_MONOTONIC())
	286	: Time::HiRes::time;
	287	}
	288
	289	sub trim {
	290	my $str = shift;
	291	$str =~ s/^\s+\|\s+$//g;
	292	return $str;
	293	}
	294
	295	sub unquote {
	296	my $str = shift;
	297	return $str unless $str =~ s/^"(.*)"$/$1/g;
	298	$str =~ s/\\\\/\\/g;
	299	$str =~ s/\\"/"/g;
	300	return $str;
	301	}
	302
	303	sub url_escape {
	304	my ($str, $pattern) = @_;
	305	$pattern \|\|= '^A-Za-z0-9\-._~';
	306	$str =~ s/([$pattern])/sprintf('%%%02X',ord($1))/ge;
	307	return $str;
	308	}
	309
	310	sub url_unescape {
	311	my $str = shift;
	312	return $str if index($str, '%') == -1;
	313	$str =~ s/%([0-9a-fA-F]{2})/chr(hex($1))/ge;
	314	return $str;
	315	}
	316
	317	sub xml_escape {
	318	my $str = shift;
	319
	320	$str =~ s/&/&/g;
	321	$str =~ s/</</g;
	322	$str =~ s/>/>/g;
	323	$str =~ s/"/"/g;
	324	$str =~ s/'/'/g;
	325
	326	return $str;
	327	}
	328
	329	sub xor_encode {
	330	my ($input, $key) = @_;
	331
	332	# Encode with variable key length
	333	my $len = length $key;
	334	my $buffer = my $output = '';
	335	$output .= $buffer ^ $key
	336	while length($buffer = substr($input, 0, $len, '')) == $len;
	337	return $output .= $buffer ^ substr($key, 0, length $buffer, '');
	338	}
	339
	340	sub _adapt {
	341	my ($delta, $numpoints, $firsttime) = @_;
	342	use integer;
	343
	344	$delta = $firsttime ? $delta / PC_DAMP : $delta / 2;
	345	$delta += $delta / $numpoints;
	346	my $k = 0;
	347	while ($delta > ((PC_BASE - PC_TMIN) * PC_TMAX) / 2) {
	348	$delta /= PC_BASE - PC_TMIN;
	349	$k += PC_BASE;
	350	}
	351
	352	return $k + (((PC_BASE - PC_TMIN + 1) * $delta) / ($delta + PC_SKEW));
	353	}
	354
	355	sub _decode {
	356	my ($point, $name) = @_;
	357
	358	# Code point
	359	return chr($point !~ /^x/ ? $point : hex $point) unless defined $name;
	360
	361	# Find entity name
	362	my $rest = '';
	363	while (length $name) {
	364	return "$ENTITIES{$name}$rest" if exists $ENTITIES{$name};
	365	$rest = chop($name) . $rest;
	366	}
	367	return "&$rest";
	368	}
	369
	370	sub _encoding {
	371	$CACHE{$_[0]} //= find_encoding($_[0]) // croak "Unknown encoding '$_[0]'";
	372	}
	373
	374	1;
	375
	376	=encoding utf8
	377
	378	=head1 NAME
	379
	380	Mojo::Util - Portable utility functions
	381
	382	=head1 SYNOPSIS
	383
	384	use Mojo::Util qw(b64_encode url_escape url_unescape);
	385
	386	my $str = 'test=23';
	387	my $escaped = url_escape $str;
	388	say url_unescape $escaped;
	389	say b64_encode $escaped, '';
	390
	391	=head1 DESCRIPTION
	392
	393	L<Mojo::Util> provides portable utility functions for L<Mojo>.
	394
	395	=head1 FUNCTIONS
	396
	397	L<Mojo::Util> implements the following functions.
	398
	399	=head2 b64_decode
	400
	401	my $bytes = b64_decode $b64;
	402
	403	Base64 decode bytes.
	404
	405	=head2 b64_encode
	406
	407	my $b64 = b64_encode $bytes;
	408	my $b64 = b64_encode $bytes, "\n";
	409
	410	Base64 encode bytes, the line ending defaults to a newline.
	411
	412	=head2 camelize
	413
	414	my $camelcase = camelize $snakecase;
	415
	416	Convert snake_case string to CamelCase and replace C<-> with C<::>.
	417
	418	# "FooBar"
	419	camelize 'foo_bar';
	420
	421	# "FooBar::Baz"
	422	camelize 'foo_bar-baz';
	423
	424	# "FooBar::Baz"
	425	camelize 'FooBar::Baz';
	426
	427	=head2 class_to_file
	428
	429	my $file = class_to_file 'Foo::Bar';
	430
	431	Convert a class name to a file.
	432
	433	# "foo_bar"
	434	class_to_file 'Foo::Bar';
	435
	436	# "foobar"
	437	class_to_file 'FOO::Bar';
	438
	439	# "foo_bar"
	440	class_to_file 'FooBar';
	441
	442	# "foobar"
	443	class_to_file 'FOOBar';
	444
	445	=head2 class_to_path
	446
	447	my $path = class_to_path 'Foo::Bar';
	448
	449	Convert class name to path.
	450
	451	# "Foo/Bar.pm"
	452	class_to_path 'Foo::Bar';
	453
	454	# "FooBar.pm"
	455	class_to_path 'FooBar';
	456
	457	=head2 decamelize
	458
	459	my $snakecase = decamelize $camelcase;
	460
	461	Convert CamelCase string to snake_case and replace C<::> with C<->.
	462
	463	# "foo_bar"
	464	decamelize 'FooBar';
	465
	466	# "foo_bar-baz"
	467	decamelize 'FooBar::Baz';
	468
	469	# "foo_bar-baz"
	470	decamelize 'foo_bar-baz';
	471
	472	=head2 decode
	473
	474	my $chars = decode 'UTF-8', $bytes;
	475
	476	Decode bytes to characters and return C<undef> if decoding failed.
	477
	478	=head2 deprecated
	479
	480	deprecated 'foo is DEPRECATED in favor of bar';
	481
	482	Warn about deprecated feature from perspective of caller. You can also set the
	483	MOJO_FATAL_DEPRECATIONS environment variable to make them die instead.
	484
	485	=head2 dumper
	486
	487	my $perl = dumper {some => 'data'};
	488
	489	Dump a Perl data structure with L<Data::Dumper>.
	490
	491	=head2 encode
	492
	493	my $bytes = encode 'UTF-8', $chars;
	494
	495	Encode characters to bytes.
	496
	497	=head2 get_line
	498
	499	my $line = get_line \$str;
	500
	501	Extract whole line from string or return C<undef>. Lines are expected to end
	502	with C<0x0d 0x0a> or C<0x0a>.
	503
	504	=head2 hmac_sha1_sum
	505
	506	my $checksum = hmac_sha1_sum $bytes, 'passw0rd';
	507
	508	Generate HMAC-SHA1 checksum for bytes.
	509
	510	=head2 html_unescape
	511
	512	my $str = html_unescape $escaped;
	513
	514	Unescape all HTML entities in string.
	515
	516	=head2 md5_bytes
	517
	518	my $checksum = md5_bytes $bytes;
	519
	520	Generate binary MD5 checksum for bytes.
	521
	522	=head2 md5_sum
	523
	524	my $checksum = md5_sum $bytes;
	525
	526	Generate MD5 checksum for bytes.
	527
	528	=head2 monkey_patch
	529
	530	monkey_patch $package, foo => sub {...};
	531	monkey_patch $package, foo => sub {...}, bar => sub {...};
	532
	533	Monkey patch functions into package.
	534
	535	monkey_patch 'MyApp',
	536	one => sub { say 'One!' },
	537	two => sub { say 'Two!' },
	538	three => sub { say 'Three!' };
	539
	540	=head2 punycode_decode
	541
	542	my $str = punycode_decode $punycode;
	543
	544	Punycode decode string.
	545
	546	=head2 punycode_encode
	547
	548	my $punycode = punycode_encode $str;
	549
	550	Punycode encode string.
	551
	552	=head2 quote
	553
	554	my $quoted = quote $str;
	555
	556	Quote string.
	557
	558	=head2 secure_compare
	559
	560	my $bool = secure_compare $str1, $str2;
	561
	562	Constant time comparison algorithm to prevent timing attacks.
	563
	564	=head2 sha1_bytes
	565
	566	my $checksum = sha1_bytes $bytes;
	567
	568	Generate binary SHA1 checksum for bytes.
	569
	570	=head2 sha1_sum
	571
	572	my $checksum = sha1_sum $bytes;
	573
	574	Generate SHA1 checksum for bytes.
	575
	576	=head2 slurp
	577
	578	my $bytes = slurp '/etc/passwd';
	579
	580	Read all data at once from file.
	581
	582	=head2 split_header
	583
	584	my $tree = split_header 'foo="bar baz"; test=123, yada';
	585
	586	Split HTTP header value.
	587
	588	# "one"
	589	split_header('one; two="three four", five=six')->[0][0];
	590
	591	# "three four"
	592	split_header('one; two="three four", five=six')->[0][3];
	593
	594	# "five"
	595	split_header('one; two="three four", five=six')->[1][0];
	596
	597	=head2 spurt
	598
	599	$bytes = spurt $bytes, '/etc/passwd';
	600
	601	Write all data at once to file.
	602
	603	=head2 squish
	604
	605	my $squished = squish $str;
	606
	607	Trim whitespace characters from both ends of string and then change all
	608	consecutive groups of whitespace into one space each.
	609
	610	=head2 steady_time
	611
	612	my $time = steady_time;
	613
	614	High resolution time, resilient to time jumps if a monotonic clock is
	615	available through L<Time::HiRes>.
	616
	617	=head2 trim
	618
	619	my $trimmed = trim $str;
	620
	621	Trim whitespace characters from both ends of string.
	622
	623	=head2 unquote
	624
	625	my $str = unquote $quoted;
	626
	627	Unquote string.
	628
	629	=head2 url_escape
	630
	631	my $escaped = url_escape $str;
	632	my $escaped = url_escape $str, '^A-Za-z0-9\-._~';
	633
	634	Percent encode unsafe characters in string, the pattern used defaults to
	635	C<^A-Za-z0-9\-._~>.
	636
	637	=head2 url_unescape
	638
	639	my $str = url_unescape $escaped;
	640
	641	Decode percent encoded characters in string.
	642
	643	=head2 xml_escape
	644
	645	my $escaped = xml_escape $str;
	646
	647	Escape unsafe characters C<&>, C<E<lt>>, C<E<gt>>, C<"> and C<'> in string.
	648
	649	=head2 xor_encode
	650
	651	my $encoded = xor_encode $str, $key;
	652
	653	XOR encode string with variable length key.
	654
	655	=head1 SEE ALSO
	656
	657	L<Mojolicious>, L<Mojolicious::Guides>, L<http://mojolicio.us>.
	658
	659	=cut