#!/usr/bin/perl
# (C) 2008  Török Edwin <edwin@clamav.net>
# parse <!ENTITY declarations and output them in the format
# used by generate_entitylist.c
# Format is EntityName,EntityValue.
# Only accepts entity values 0 < V < 0xffff, and doesn't accept entities that have multiple values assigned.
while(<>) {
	chomp;
	if(/<!ENTITY +([^ \t]+)[ \t]+\" *([^ \"]+) *\" *>/) {
		$name = $1;
		$v = $2;
		if($v =~ /^&(#38;)?#([^;]+);$/) {
			$valx = $2;
			my $value;
			if($valx =~ /^x([0-9a-fA-F]+)$/) {
				$value = hex($valx);
				if($value > 0xffff) {
					printf STDERR "TOOBIG $_\n"
				} else {
					printf "$name,%d\n", $value
				}
			} elsif($valx =~ /^[0-9]+$/) {
				if($valx > 0xffff) {
					print STDERR "TOOBIG $_\n";
				} else {
					printf "$name,%d\n", $valx
				}
			} else {
				print "unknown1: $_\n";
			}
		} elsif($v =~ /^(&#x[0-9a-fA-F]+;)+$/) {
			print STDERR "MULTIPLECHARS $name $1\n";
		} else {
			print "unknown2: $_\n";
		}
	} elsif(/.*<!ENTITY.*/) {
		if($_ !~ /.*(PUBLIC|SYSTEM).*/) {
			print "unknown3: $_\n";
		}
	}
}