contrib/entitynorm/entity_decl_parse.pl
b0b8398b
 #!/usr/bin/perl
 # (C) 2008  Török Edwin <edwin@clamav.net>
 # parse <!ENTITY declarations and output them in the format
 # used by generate_entitylist.c
 # Format is EntityName,EntityValue.
 # Only accepts entity values 0 < V < 0xffff, and doesn't accept entities that have multiple values assigned.
 while(<>) {
 	chomp;
 	if(/<!ENTITY +([^ \t]+)[ \t]+\" *([^ \"]+) *\" *>/) {
 		$name = $1;
 		$v = $2;
 		if($v =~ /^&(#38;)?#([^;]+);$/) {
 			$valx = $2;
 			my $value;
 			if($valx =~ /^x([0-9a-fA-F]+)$/) {
 				$value = hex($valx);
 				if($value > 0xffff) {
 					printf STDERR "TOOBIG $_\n"
 				} else {
 					printf "$name,%d\n", $value
 				}
 			} elsif($valx =~ /^[0-9]+$/) {
 				if($valx > 0xffff) {
 					print STDERR "TOOBIG $_\n";
 				} else {
 					printf "$name,%d\n", $valx
 				}
 			} else {
 				print "unknown1: $_\n";
 			}
 		} elsif($v =~ /^(&#x[0-9a-fA-F]+;)+$/) {
 			print STDERR "MULTIPLECHARS $name $1\n";
 		} else {
 			print "unknown2: $_\n";
 		}
 	} elsif(/.*<!ENTITY.*/) {
 		if($_ !~ /.*(PUBLIC|SYSTEM).*/) {
 			print "unknown3: $_\n";
 		}
 	}
 }