use strict;

my %op = (
  nop => {
    a => 0,
    b => 0,
  },
  ld => {
    a => 1,
    b => 0,
  },
  st => {
    a => 1,
    b => 1,
  },
  lea => {
    a => 1,
    b => 2,
  },
  add => {
    a => 2,
    b => 0,
  },
  sub => {
    a => 2,
    b => 1,
  },
  addadr => {
    a => 2,
    b => 2,
  },
  subadr => {
    a => 2,
    b => 3,
  },
  and => {
    a => 3,
    b => 0,
  },
  or => {
    a => 3,
    b => 1,
  },
  eor => {
    a => 3,
    b => 2,
  },
  cpa => {
    a => 4,
    b => 0,
  },
  cpl => {
    a => 4,
    b => 1,
  },
  jpz => {
    a => 6,
    b => 0,
  },
  jmi => {
    a => 6,
    b => 1,
  },
  jnz => {
    a => 6,
    b => 2,
  },
  jze => {
    a => 6,
    b => 3,
  },
  jmp => {
    a => 6,
    b => 4,
  },
  halt => {
    a => 0xf,
    b => 0xf,
    c => 0xf,
    d => 0xf,
    e => 0xffff,
  },
);

my @line;

my $addr = 0;
my %label2addr;
while (<>) {

  ## Comment
  s/;.*$//;
  
  ## Label
  if (s/^(\w+)\s*//) {
    my $lname = lc $1;
    if (exists $label2addr{$lname}) {
      die qq<Line $.: Label "$lname" is already defined>;
    }
    $label2addr{$lname} = $addr;
  } elsif (s/^\s+//) {
    #
  }

  if (s/^(\w+)\s*//) {
    my $mnem = lc $1;
    my $op = $op{$mnem};
    unless ($op) {
      die qq<Line $.: Instruction "$mnem" is not defined>;
    }

    my $oline = {
      a => 0,
      b => 0,
      c => 0,
      d => 0,
      e => 0,
      %$op,
    };

    if (s/^[Gg][Rr]([0-9A-Fa-f]+)\s*(?:,\s*)?//) {
      $oline->{c} = hex $1;
    }

    if (s/^(0x[0-9A-Fa-f]+|0b[01]+|\d+)\s*(?:,\s*)?//) {
      my $v = $1;
      if ($v =~ /^0x/) {
        $oline->{e} = hex $v;
      } elsif ($v =~ /^0b/) {
        $oline->{e} = eval $v;
      } else {
        $oline->{e} = $1;
      }
    } elsif (s/^(\w+)\s*(?:,\s*)?//) {
      $oline->{e_label} = lc $1;
    }

    if (s/^[Gg][Rr]([0-9A-Fa-f]+)\s*(?:,\s*)?//) {
      $oline->{d} = hex $1;
    }

    push @line, $oline;

    $addr++;    
  }

  s/[\x0D\x0A]+//g;
  if (length) {
    die qq<Line $.: "$_": Syntax error>;
  }
}

for my $line (@line) {
  my $v = ($line->{a} << 28) + ($line->{b} << 24) + ($line->{c} << 20) + ($line->{d} << 16);
  if (exists $line->{e_label}) {
    unless (exists $label2addr{$line->{e_label}}) {
      die qq!Label "$line->{e_label}" is not defined!;
    }
    $v += $label2addr{$line->{e_label}};
  } else {
    $v += $line->{e};
  }

  printf "%08X\n", $v;
}


__END__

=head1 NAME

casl.pl - An assembler for a COMET / CASL variant

=head1 SYNOPSIS

  $ perl casl.pl input.cas | perl hex2bin.pl > output.bin

=head1 DESCRIPTION

The C<casl.pl> script is an assembler that converts a source
program in CASL variant into a memory image binary for a COMET-like 
processor.

=head1 ASSEMBLY LANGUAGE SYNTAX

  casl-like-program := [line] *(line-seprator line)

  line := [label-line / instruction-line] *s [comment]
            ;; Any empty line is ignored.

  label-line := label
            ;; Its value is set to the address of the first byte of
            ;; the instruction generated from the next |instruction-line|,
            ;; if any, to the address of the byte just after the
            ;; last byte of the last instruction generated from the program,
            ;; if any, or to zero, in this prcedence.

  label := <Perl /\w+/>
            ;; Label must be unique.
            ;; Labels are case-insensitive.

  instruction-line := [label]
                      1*s operation
                      [1*s (three-operands / two-operands)]
            ;; Operands must be valid for the operation

  operation := <Mnemonic for the processor>
            ;; Case-insensitive

  three-operands := register-operand
                    operand-separator address-operand
                    [operand-separator index-operand]

  two-operands := address-operand [operand-separator index-operand]

  register-operand := gr-ref
            ;; General register operand

  address-operand := label-ref / number
            ;; Address operand

  index-operand := gr-ref - "GR0"
            ;; Index register operand

  gr-ref := "GR" HEXDIGIT
            ;; Case-insensitive

  label-ref := label
            ;; Must be a defined label.

  number := dec-number / hex-number / bin-number

  dec-number := 1*(U+0030 / ... / U+0039)

  hex-number := '0x' 1*(U+0030 / ... / U+0039 / "A" / ... / "F")
            ;; '0x' prefix is case-sensitive; numbers are case-insensitive.

  bin-number := '0b' 1*(U+0030 / U+0031)
            ;; Case-insensitive.

  operand-separator := *s "," *s

  line-separator := [U+000D] U+000A

  s := U+0009 / U+0020
  
  comment := U+003B *(<any character> - [U+000D / U+000A])
            ;; Always ignored.

=head1 LICENSE

Copyright 2006 Wakaba <w@suika.fam.cx>.  All rights reserved.

This program is free software; you can redistribute it and/or
modify it under the same terms as Perl itself.

=cut
