#!/usr/bin/env perl
# $Id: syntaxform.pl 914 2024-01-07 21:48:39Z karl $
# Public domain.  Originally written by Karl Berry, 2016.
# Read pdftex.tex and generate pdftex-syntax.tex
# by looking for the primitive descriptions.

use strict; use warnings;

# classes need to be shown in a specific order,
# and have specific text for the headings.
# sorry for using an array. global for simpler sorting.
my %class_info = (
  "integer"            => [1, "Integer registers"],
  "read-only integer"  => [2, "Read-only integers"],
  "dimen"              => [3, "Dimen registers"],
  "tokens"             => [4, "Token registers"],
  "expandable"         => [5, "Expandable commands"],
  "general"            => [6, "General commands"],
);

exit (&main ());

sub main {
  my %primitive = &read_manual_for_primitives ();

  # group primitives into classes based on their \Whatever construct.
  my %class;
  #warn "\n\f\n sorting\n";
  for my $p (sort keys %primitive) {
    #print STDERR "sorting primitive $p...";
    my ($class) = $primitive{$p} =~ /\\Whatever *\{(.*?)\}/;
    # general commands don't have a \Whatever.
    $class = "general" if ! $class;
    #warn "class $class\n";
    
    # append this primitive, comma-separated.
    $class{$class} .= "$p,"
  }
  
  print <<END_HEADER;
% This is the list of new or extended primitives provided by pdftex.
% Don't edit this file, as it is auto-generated from the
% pdfTeX manual source pdftex.tex by the script syntaxform.pl.
% Syntax rule conventions borrowed from `TeXbook naruby' by Petr Olsak.
END_HEADER
  &print_by_class (\%class, \%primitive);
  return 0;
}

{

sub print_by_class {
  my ($class_ref,$primitive_ref) = @_;
  my %class = %$class_ref;  
  
  #warn "\n\f\n printing by class\n";
  for my $c (sort by_class keys %class) {
    #warn "\f printing class $c\n";
    my $aref = $class_info{$c};
    my $heading_name = $class_info{$c}->[1];
    #warn "  heading_name $heading_name\n";
    print <<END_START_CLASS;
\\section{$heading_name}

END_START_CLASS

    # extract list of primitives for this class.
    my @prims = split (/,/, $class{$c});
    for my $p (@prims) {
      my $val = $primitive_ref->{$p};
      #warn "  primitive $p, val $val\n";
      
      # get rid of the \pdftexprimitive{ and trailing }
      $val =~ s,^\\pdftexprimitive *\{,,;
      $val =~ s, *\} *$,,;
      
      # leading \Syntax and trailing } on their own lines for syntaxform.awk.
      $val =~ s,^ *\\Syntax *\{,\\Syntax\{\n,;
      $val =~ s, *\} *$,\n\},;
      
      # collapse multiple spaces
      $val =~ s,  +, ,g;
      
      print "\n$val\n";
    }
  }
}

sub by_class {
  if (! defined ($class_info{$a}->[0])) {
  warn "undef $a $class_info{$a},",  $class_info{$a}->[0], ".\n"; }
#  if (! $class_info{$b}->[0]) { warn "no class info for $b\n"; }
$class_info{$a}->[0] <=> $class_info{$b}->[0]; }

} # end block for %class_info


# read <> for \pdftexprimitive blocks.  Return hash with keys being the
# primitive name (including the leading \) and values the entire block,
# without newlines.
# 
sub read_manual_for_primitives {
  my $printing = 0;
  my $primitive = "";
  my %primitive;

  while (<>) {
    # \pdftexprimitive block ends at next unindented blank or \... line.
    $printing = 0 if /^($|\\)/;
    if (/^\\pdftexprimitive/) {
      $printing = 1;
      
      my $type;
      ($primitive) = m/\\cs *\{(.*?)\}/;
      #warn "$.: found primitive $primitive\n";
      warn "$ARGV:$.: no primitive found in: $_" if (! $primitive);
      $primitive = "\\$primitive";
      
      # Just one case, \special, has multiple instances of \pdftexprimitive.
      # kludge by appending spaces to the name to make it unique;
      # we later reduce multiple spaces to one, so it's not visible.
      $primitive .= " " until ! exists $primitive{$primitive};
    }

    if ($printing) {
      # concatenate lines of definition; assume spacing is reasonable.
      chomp;
      $primitive{$primitive} .= $_;
      #warn "  concatenated $primitive to $primitive{$primitive}\n";
    }
  }
  
  return %primitive;
}