Newer Older
444 lines | 10.012kb
add files
Yuki Kimoto authored on 2014-03-26
1
package Mojo::JSON;
2
use Mojo::Base -base;
3

            
4
use B;
5
use Exporter 'import';
6
use Mojo::Util;
7
use Scalar::Util 'blessed';
8

            
9
has 'error';
10

            
11
our @EXPORT_OK = ('j');
12

            
13
# Literal names
14
my $FALSE = bless \(my $false = 0), 'Mojo::JSON::_Bool';
15
my $TRUE  = bless \(my $true  = 1), 'Mojo::JSON::_Bool';
16

            
17
# Escaped special character map (with u2028 and u2029)
18
my %ESCAPE = (
19
  '"'     => '"',
20
  '\\'    => '\\',
21
  '/'     => '/',
22
  'b'     => "\x08",
23
  'f'     => "\x0c",
24
  'n'     => "\x0a",
25
  'r'     => "\x0d",
26
  't'     => "\x09",
27
  'u2028' => "\x{2028}",
28
  'u2029' => "\x{2029}"
29
);
30
my %REVERSE = map { $ESCAPE{$_} => "\\$_" } keys %ESCAPE;
31
for (0x00 .. 0x1f, 0x7f) { $REVERSE{pack 'C', $_} //= sprintf '\u%.4X', $_ }
32

            
33
# Unicode encoding detection
34
my $UTF_PATTERNS = {
35
  'UTF-32BE' => qr/^\x00{3}[^\x00]/,
36
  'UTF-32LE' => qr/^[^\x00]\x00{3}/,
37
  'UTF-16BE' => qr/^(?:\x00[^\x00]){2}/,
38
  'UTF-16LE' => qr/^(?:[^\x00]\x00){2}/
39
};
40

            
41
my $WHITESPACE_RE = qr/[\x20\x09\x0a\x0d]*/;
42

            
43
sub decode {
44
  my ($self, $bytes) = @_;
45

            
46
  # Clean start
47
  $self->error(undef);
48

            
49
  # Missing input
50
  $self->error('Missing or empty input') and return undef unless $bytes;
51

            
52
  # Remove BOM
53
  $bytes =~ s/^(?:\357\273\277|\377\376\0\0|\0\0\376\377|\376\377|\377\376)//g;
54

            
55
  # Wide characters
56
  $self->error('Wide character in input') and return undef
57
    unless utf8::downgrade($bytes, 1);
58

            
59
  # Detect and decode Unicode
60
  my $encoding = 'UTF-8';
61
  $bytes =~ $UTF_PATTERNS->{$_} and $encoding = $_ for keys %$UTF_PATTERNS;
62
  $bytes = Mojo::Util::decode $encoding, $bytes;
63

            
64
  # Object or array
65
  my $res = eval {
66
    local $_ = $bytes;
67

            
68
    # Leading whitespace
69
    m/\G$WHITESPACE_RE/gc;
70

            
71
    # Array
72
    my $ref;
73
    if (m/\G\[/gc) { $ref = _decode_array() }
74

            
75
    # Object
76
    elsif (m/\G\{/gc) { $ref = _decode_object() }
77

            
78
    # Invalid character
79
    else { _exception('Expected array or object') }
80

            
81
    # Leftover data
82
    unless (m/\G$WHITESPACE_RE\z/gc) {
83
      my $got = ref $ref eq 'ARRAY' ? 'array' : 'object';
84
      _exception("Unexpected data after $got");
85
    }
86

            
87
    $ref;
88
  };
89

            
90
  # Exception
91
  if (!$res && (my $e = $@)) {
92
    chomp $e;
93
    $self->error($e);
94
  }
95

            
96
  return $res;
97
}
98

            
99
sub encode {
100
  my ($self, $ref) = @_;
101
  return Mojo::Util::encode 'UTF-8', _encode_value($ref);
102
}
103

            
104
sub false {$FALSE}
105

            
106
sub j {
107
  my $d = shift;
108
  return __PACKAGE__->new->encode($d) if ref $d eq 'ARRAY' || ref $d eq 'HASH';
109
  return __PACKAGE__->new->decode($d);
110
}
111

            
112
sub true {$TRUE}
113

            
114
sub _decode_array {
115
  my @array;
116
  until (m/\G$WHITESPACE_RE\]/gc) {
117

            
118
    # Value
119
    push @array, _decode_value();
120

            
121
    # Separator
122
    redo if m/\G$WHITESPACE_RE,/gc;
123

            
124
    # End
125
    last if m/\G$WHITESPACE_RE\]/gc;
126

            
127
    # Invalid character
128
    _exception('Expected comma or right square bracket while parsing array');
129
  }
130

            
131
  return \@array;
132
}
133

            
134
sub _decode_object {
135
  my %hash;
136
  until (m/\G$WHITESPACE_RE\}/gc) {
137

            
138
    # Quote
139
    m/\G$WHITESPACE_RE"/gc
140
      or _exception('Expected string while parsing object');
141

            
142
    # Key
143
    my $key = _decode_string();
144

            
145
    # Colon
146
    m/\G$WHITESPACE_RE:/gc
147
      or _exception('Expected colon while parsing object');
148

            
149
    # Value
150
    $hash{$key} = _decode_value();
151

            
152
    # Separator
153
    redo if m/\G$WHITESPACE_RE,/gc;
154

            
155
    # End
156
    last if m/\G$WHITESPACE_RE\}/gc;
157

            
158
    # Invalid character
159
    _exception('Expected comma or right curly bracket while parsing object');
160
  }
161

            
162
  return \%hash;
163
}
164

            
165
sub _decode_string {
166
  my $pos = pos;
167

            
168
  # Extract string with escaped characters
169
  m!\G((?:(?:[^\x00-\x1f\\"]|\\(?:["\\/bfnrt]|u[0-9a-fA-F]{4})){0,32766})*)!gc;
170
  my $str = $1;
171

            
172
  # Invalid character
173
  unless (m/\G"/gc) {
174
    _exception('Unexpected character or invalid escape while parsing string')
175
      if m/\G[\x00-\x1f\\]/;
176
    _exception('Unterminated string');
177
  }
178

            
179
  # Unescape popular characters
180
  if (index($str, '\\u') < 0) {
181
    $str =~ s!\\(["\\/bfnrt])!$ESCAPE{$1}!gs;
182
    return $str;
183
  }
184

            
185
  # Unescape everything else
186
  my $buffer = '';
187
  while ($str =~ m/\G([^\\]*)\\(?:([^u])|u(.{4}))/gc) {
188
    $buffer .= $1;
189

            
190
    # Popular character
191
    if ($2) { $buffer .= $ESCAPE{$2} }
192

            
193
    # Escaped
194
    else {
195
      my $ord = hex $3;
196

            
197
      # Surrogate pair
198
      if (($ord & 0xf800) == 0xd800) {
199

            
200
        # High surrogate
201
        ($ord & 0xfc00) == 0xd800
202
          or pos($_) = $pos + pos($str), _exception('Missing high-surrogate');
203

            
204
        # Low surrogate
205
        $str =~ m/\G\\u([Dd][C-Fc-f]..)/gc
206
          or pos($_) = $pos + pos($str), _exception('Missing low-surrogate');
207

            
208
        $ord = 0x10000 + ($ord - 0xd800) * 0x400 + (hex($1) - 0xdc00);
209
      }
210

            
211
      # Character
212
      $buffer .= pack 'U', $ord;
213
    }
214
  }
215

            
216
  # The rest
217
  return $buffer . substr $str, pos($str), length($str);
218
}
219

            
220
sub _decode_value {
221

            
222
  # Leading whitespace
223
  m/\G$WHITESPACE_RE/gc;
224

            
225
  # String
226
  return _decode_string() if m/\G"/gc;
227

            
228
  # Array
229
  return _decode_array() if m/\G\[/gc;
230

            
231
  # Object
232
  return _decode_object() if m/\G\{/gc;
233

            
234
  # Number
235
  return 0 + $1
236
    if m/\G([-]?(?:0|[1-9][0-9]*)(?:\.[0-9]*)?(?:[eE][+-]?[0-9]+)?)/gc;
237

            
238
  # True
239
  return $TRUE if m/\Gtrue/gc;
240

            
241
  # False
242
  return $FALSE if m/\Gfalse/gc;
243

            
244
  # Null
245
  return undef if m/\Gnull/gc;
246

            
247
  # Invalid character
248
  _exception('Expected string, array, object, number, boolean or null');
249
}
250

            
251
sub _encode_array {
252
  my $array = shift;
253
  return '[' . join(',', map { _encode_value($_) } @$array) . ']';
254
}
255

            
256
sub _encode_object {
257
  my $object = shift;
258
  my @pairs = map { _encode_string($_) . ':' . _encode_value($object->{$_}) }
259
    keys %$object;
260
  return '{' . join(',', @pairs) . '}';
261
}
262

            
263
sub _encode_string {
264
  my $str = shift;
265
  $str =~ s!([\x00-\x1f\x7f\x{2028}\x{2029}\\"/\b\f\n\r\t])!$REVERSE{$1}!gs;
266
  return "\"$str\"";
267
}
268

            
269
sub _encode_value {
270
  my $value = shift;
271

            
272
  # Reference
273
  if (my $ref = ref $value) {
274

            
275
    # Array
276
    return _encode_array($value) if $ref eq 'ARRAY';
277

            
278
    # Object
279
    return _encode_object($value) if $ref eq 'HASH';
280

            
281
    # True or false
282
    return $$value ? 'true' : 'false' if $ref eq 'SCALAR';
283
    return $value  ? 'true' : 'false' if $ref eq 'Mojo::JSON::_Bool';
284

            
285
    # Blessed reference with TO_JSON method
286
    if (blessed $value && (my $sub = $value->can('TO_JSON'))) {
287
      return _encode_value($value->$sub);
288
    }
289
  }
290

            
291
  # Null
292
  return 'null' unless defined $value;
293

            
294
  # Number
295
  my $flags = B::svref_2object(\$value)->FLAGS;
296
  return 0 + $value if $flags & (B::SVp_IOK | B::SVp_NOK) && $value * 0 == 0;
297

            
298
  # String
299
  return _encode_string($value);
300
}
301

            
302
sub _exception {
303

            
304
  # Leading whitespace
305
  m/\G$WHITESPACE_RE/gc;
306

            
307
  # Context
308
  my $context = 'Malformed JSON: ' . shift;
309
  if (m/\G\z/gc) { $context .= ' before end of data' }
310
  else {
311
    my @lines = split /\n/, substr($_, 0, pos);
312
    $context .= ' at line ' . @lines . ', offset ' . length(pop @lines || '');
313
  }
314

            
315
  die "$context\n";
316
}
317

            
318
# Emulate boolean type
319
package Mojo::JSON::_Bool;
320
use overload '0+' => sub { ${$_[0]} }, '""' => sub { ${$_[0]} }, fallback => 1;
321

            
322
1;
323

            
324
=encoding utf8
325

            
326
=head1 NAME
327

            
328
Mojo::JSON - Minimalistic JSON
329

            
330
=head1 SYNOPSIS
331

            
332
  # Encode and decode JSON
333
  use Mojo::JSON;
334
  my $json  = Mojo::JSON->new;
335
  my $bytes = $json->encode({foo => [1, 2], bar => 'hello!', baz => \1});
336
  my $hash  = $json->decode($bytes);
337

            
338
  # Check for errors
339
  my $json = Mojo::JSON->new;
340
  if (defined(my $hash = $json->decode($bytes))) { say $hash->{message} }
341
  else { say 'Error: ', $json->error }
342

            
343
  # Use the alternative interface
344
  use Mojo::JSON 'j';
345
  my $bytes = j({foo => [1, 2], bar => 'hello!', baz => \1});
346
  my $hash  = j($bytes);
347

            
348
=head1 DESCRIPTION
349

            
350
L<Mojo::JSON> is a minimalistic and relaxed implementation of RFC 4627. While
351
it is possibly the fastest pure-Perl JSON parser available, you should not use
352
it for validation.
353

            
354
It supports normal Perl data types like C<Scalar>, C<Array> reference, C<Hash>
355
reference and will try to call the C<TO_JSON> method on blessed references, or
356
stringify them if it doesn't exist. Differentiating between strings and
357
numbers in Perl is hard, depending on how it has been used, a C<Scalar> can be
358
both at the same time. Since numeric comparisons on strings are very unlikely
359
to happen intentionally, the numeric value always gets priority, so any
360
C<Scalar> that has been used in numeric context is considered a number.
361

            
362
  [1, -2, 3]     -> [1, -2, 3]
363
  {"foo": "bar"} -> {foo => 'bar'}
364

            
365
Literal names will be translated to and from L<Mojo::JSON> constants or a
366
similar native Perl value.
367

            
368
  true  -> Mojo::JSON->true
369
  false -> Mojo::JSON->false
370
  null  -> undef
371

            
372
In addition C<Scalar> references will be used to generate booleans, based on
373
if their values are true or false.
374

            
375
  \1 -> true
376
  \0 -> false
377

            
378
Decoding UTF-16 (LE/BE) and UTF-32 (LE/BE) will be handled transparently,
379
encoding will only generate UTF-8. The two Unicode whitespace characters
380
C<u2028> and C<u2029> will always be escaped to make JSONP easier.
381

            
382
=head1 FUNCTIONS
383

            
384
L<Mojo::JSON> implements the following functions.
385

            
386
=head2 j
387

            
388
  my $bytes = j([1, 2, 3]);
389
  my $bytes = j({foo => 'bar'});
390
  my $array = j($bytes);
391
  my $hash  = j($bytes);
392

            
393
Encode Perl data structure or decode JSON and return C<undef> if decoding
394
fails.
395

            
396
=head1 ATTRIBUTES
397

            
398
L<Mojo::JSON> implements the following attributes.
399

            
400
=head2 error
401

            
402
  my $err = $json->error;
403
  $json   = $json->error('Parser error');
404

            
405
Parser errors.
406

            
407
=head1 METHODS
408

            
409
L<Mojo::JSON> inherits all methods from L<Mojo::Base> and implements the
410
following new ones.
411

            
412
=head2 decode
413

            
414
  my $array = $json->decode($bytes);
415
  my $hash  = $json->decode($bytes);
416

            
417
Decode JSON to Perl data structure and return C<undef> if decoding fails.
418

            
419
=head2 encode
420

            
421
  my $bytes = $json->encode([1, 2, 3]);
422
  my $bytes = $json->encode({foo => 'bar'});
423

            
424
Encode Perl data structure to JSON.
425

            
426
=head2 false
427

            
428
  my $false = Mojo::JSON->false;
429
  my $false = $json->false;
430

            
431
False value, used because Perl has no native equivalent.
432

            
433
=head2 true
434

            
435
  my $true = Mojo::JSON->true;
436
  my $true = $json->true;
437

            
438
True value, used because Perl has no native equivalent.
439

            
440
=head1 SEE ALSO
441

            
442
L<Mojolicious>, L<Mojolicious::Guides>, L<http://mojolicio.us>.
443

            
444
=cut