Browse code

use tomsfastmath

Török Edvin authored on 2012/01/10 00:06:07
Showing 9 changed files
... ...
@@ -758,28 +758,6 @@ AC_ARG_ENABLE([milter],
758 758
 [  --enable-milter	  build clamav-milter],
759 759
 have_milter=$enableval, have_milter="no")
760 760
 
761
-have_system_tommath=no
762
-AC_ARG_WITH([system-tommath],
763
-[  --with-system-tommath	link against system libtommath (default = no, use bundled)],
764
-[
765
-	case "$withval" in
766
-	yes)
767
-		AC_CHECK_LIB([tommath], [mp_mul_2d],
768
-			[
769
-				AC_DEFINE([HAVE_SYSTEM_TOMMATH], 1, [link against system-wide libtommath])
770
-				LIBCLAMAV_LIBS="$LIBCLAMAV_LIBS -ltommath"
771
-				have_system_tommath=yes
772
-			],
773
-			[AC_MSG_WARN([****** Option --with-system-tommath was given but libtommath does not seem to be available, using bundled files instead])]
774
-		)
775
-	;;
776
-	no) ;;
777
-	*) AC_MSG_ERROR([--with-system-tommath does not take an argument]) ;;
778
-	esac
779
-])
780
-AM_CONDITIONAL([LINK_TOMMATH], test "x$have_system_tommath" = "xyes")
781
-
782
-
783 761
 dnl we need to try to link with iconv, otherwise there could be a 
784 762
 dnl mismatch between a 32-bit and 64-bit lib. Detect this at configure time.
785 763
 dnl we need to check after zlib/bzip2, because they can change the include path
... ...
@@ -341,7 +341,10 @@ libclamav_la_SOURCES = \
341 341
 	sha256.h\
342 342
 	sha1.c\
343 343
 	sha1.h\
344
+	bignum.c\
344 345
 	bignum.h\
346
+	bignum_fast.h\
347
+	bignum_superclass.h\
345 348
 	bytecode.c\
346 349
 	bytecode.h\
347 350
 	bytecode_vm.c\
... ...
@@ -384,11 +387,6 @@ libclamav_la_SOURCES = \
384 384
 	asn1.c \
385 385
 	asn1.h
386 386
 
387
-if !LINK_TOMMATH
388
-libclamav_la_SOURCES += bignum.c \
389
-		     bignum_class.h
390
-endif
391
-
392 387
 .PHONY: version.h.tmp
393 388
 version.c: version.h
394 389
 version.h: version.h.tmp
... ...
@@ -413,7 +411,7 @@ version.h.tmp:
413 413
 
414 414
 lib_LTLIBRARIES += libclamav.la
415 415
 noinst_LTLIBRARIES = libclamav_internal_utils.la libclamav_internal_utils_nothreads.la libclamav_nocxx.la
416
-EXTRA_DIST += regex/engine.c libclamav.map \
416
+EXTRA_DIST += regex/engine.c fp_sqr_comba.c fp_mul_comba.c libclamav.map \
417 417
 	     jsparse/generated/operators.h jsparse/generated/keywords.h jsparse/future_reserved_words.list\
418 418
 	     jsparse/keywords.list jsparse/special_keywords.list jsparse/operators.gperf
419 419
 COMMON_CLEANFILES=version.h version.h.tmp *.gcda *.gcno
... ...
@@ -63,9 +63,6 @@ target_triplet = @target@
63 63
 @ENABLE_UNRAR_TRUE@	@top_srcdir@/libclamunrar_iface/libclamunrar_iface.map
64 64
 @ENABLE_UNRAR_TRUE@@VERSIONSCRIPT_TRUE@am__append_5 = -Wl,@VERSIONSCRIPTFLAG@,@top_srcdir@/libclamunrar_iface/libclamunrar_iface.map
65 65
 @VERSIONSCRIPT_TRUE@am__append_6 = -Wl,@VERSIONSCRIPTFLAG@,@top_srcdir@/libclamav/libclamav.map
66
-@LINK_TOMMATH_FALSE@am__append_7 = bignum.c \
67
-@LINK_TOMMATH_FALSE@		     bignum_class.h
68
-
69 66
 subdir = libclamav
70 67
 DIST_COMMON = $(include_HEADERS) $(srcdir)/Makefile.am \
71 68
 	$(srcdir)/Makefile.in
... ...
@@ -113,53 +110,6 @@ LTLIBRARIES = $(lib_LTLIBRARIES) $(noinst_LTLIBRARIES)
113 113
 @ENABLE_UNRAR_TRUE@am__DEPENDENCIES_1 = libclamunrar_iface.la
114 114
 @ENABLE_LLVM_FALSE@am__DEPENDENCIES_2 = libclamav_nocxx.la
115 115
 @ENABLE_LLVM_TRUE@am__DEPENDENCIES_2 = c++/libclamavcxx.la
116
-am__libclamav_la_SOURCES_DIST = clamav.h matcher-ac.c matcher-ac.h \
117
-	matcher-bm.c matcher-bm.h matcher-hash.c matcher-hash.h \
118
-	matcher.c matcher.h others.c others.h readdb.c readdb.h cvd.c \
119
-	cvd.h dsig.c dsig.h scanners.c scanners.h textdet.c textdet.h \
120
-	filetypes.c filetypes.h filetypes_int.h rtf.c rtf.h blob.c \
121
-	blob.h mbox.c mbox.h message.c message.h table.c table.h \
122
-	text.c text.h ole2_extract.c ole2_extract.h vba_extract.c \
123
-	vba_extract.h cltypes.h msexpand.c msexpand.h pe.c pe.h \
124
-	pe_icons.c pe_icons.h disasm.c disasm.h disasm-common.h \
125
-	disasmpriv.h upx.c upx.h htmlnorm.c htmlnorm.h chmunpack.c \
126
-	chmunpack.h rebuildpe.c rebuildpe.h petite.c petite.h \
127
-	wwunpack.c wwunpack.h unsp.c unsp.h aspack.c aspack.h \
128
-	packlibs.c packlibs.h fsg.c fsg.h mew.c mew.h upack.c upack.h \
129
-	line.c line.h untar.c untar.h unzip.c unzip.h inflate64.c \
130
-	inflate64.h inffixed64.h inflate64_priv.h special.c special.h \
131
-	binhex.c binhex.h is_tar.c is_tar.h tnef.c tnef.h autoit.c \
132
-	autoit.h unarj.c unarj.h nsis/bzlib.c nsis/bzlib_private.h \
133
-	nsis/nsis_bzlib.h nsis/nulsft.c nsis/nulsft.h nsis/infblock.c \
134
-	nsis/nsis_zconf.h nsis/nsis_zlib.h nsis/nsis_zutil.h pdf.c \
135
-	pdf.h spin.c spin.h yc.c yc.h elf.c elf.h execs.h sis.c sis.h \
136
-	uuencode.c uuencode.h phishcheck.c phishcheck.h \
137
-	phish_domaincheck_db.c phish_domaincheck_db.h \
138
-	phish_whitelist.c phish_whitelist.h iana_cctld.h iana_tld.h \
139
-	regex_list.c regex_list.h regex_suffix.c regex_suffix.h \
140
-	mspack.c mspack.h cab.c cab.h entconv.c entconv.h entitylist.h \
141
-	encoding_aliases.h hashtab.c hashtab.h dconf.c dconf.h \
142
-	lzma_iface.c lzma_iface.h 7z_iface.c 7z_iface.h 7z/7z.h \
143
-	7z/7zAlloc.c 7z/7zAlloc.h 7z/7zBuf.c 7z/7zBuf.h 7z/7zBuf2.c \
144
-	7z/7zCrc.c 7z/7zCrc.h 7z/7zDec.c 7z/7zFile.c 7z/7zFile.h \
145
-	7z/7zIn.c 7z/7zStream.c 7z/7zVersion.h 7z/Bcj2.c 7z/Bcj2.h \
146
-	7z/Bra.c 7z/Bra.h 7z/Bra86.c 7z/CpuArch.h 7z/Lzma2Dec.c \
147
-	7z/Lzma2Dec.h 7z/LzmaDec.c 7z/LzmaDec.h 7z/Ppmd.h 7z/Ppmd7.c \
148
-	7z/Ppmd7.h 7z/Ppmd7Dec.c 7z/Types.h explode.c explode.h \
149
-	textnorm.c textnorm.h dlp.c dlp.h jsparse/js-norm.c \
150
-	jsparse/js-norm.h jsparse/lexglobal.h jsparse/textbuf.h uniq.c \
151
-	uniq.h version.c version.h mpool.c mpool.h filtering.h \
152
-	filtering.c fmap.c fmap.h perflogging.c perflogging.h \
153
-	default.h sha256.c sha256.h sha1.c sha1.h bignum.h bytecode.c \
154
-	bytecode.h bytecode_vm.c bytecode_priv.h clambc.h cpio.c \
155
-	cpio.h macho.c macho.h ishield.c ishield.h type_desc.h \
156
-	bcfeatures.h bytecode_api.c bytecode_api_decl.c bytecode_api.h \
157
-	bytecode_api_impl.h bytecode_hooks.h cache.c cache.h \
158
-	bytecode_detect.c bytecode_detect.h builtin_bytecodes.h \
159
-	events.c events.h swf.c swf.h jpeg.c jpeg.h png.c png.h \
160
-	iso9660.c iso9660.h arc4.c arc4.h rijndael.c rijndael.h \
161
-	crtmgr.c crtmgr.h asn1.c asn1.h bignum.c bignum_class.h
162
-@LINK_TOMMATH_FALSE@am__objects_1 = libclamav_la-bignum.lo
163 116
 am_libclamav_la_OBJECTS = libclamav_la-matcher-ac.lo \
164 117
 	libclamav_la-matcher-bm.lo libclamav_la-matcher-hash.lo \
165 118
 	libclamav_la-matcher.lo libclamav_la-others.lo \
... ...
@@ -206,15 +156,16 @@ am_libclamav_la_OBJECTS = libclamav_la-matcher-ac.lo \
206 206
 	libclamav_la-mpool.lo libclamav_la-filtering.lo \
207 207
 	libclamav_la-fmap.lo libclamav_la-perflogging.lo \
208 208
 	libclamav_la-sha256.lo libclamav_la-sha1.lo \
209
-	libclamav_la-bytecode.lo libclamav_la-bytecode_vm.lo \
210
-	libclamav_la-cpio.lo libclamav_la-macho.lo \
211
-	libclamav_la-ishield.lo libclamav_la-bytecode_api.lo \
212
-	libclamav_la-bytecode_api_decl.lo libclamav_la-cache.lo \
213
-	libclamav_la-bytecode_detect.lo libclamav_la-events.lo \
214
-	libclamav_la-swf.lo libclamav_la-jpeg.lo libclamav_la-png.lo \
209
+	libclamav_la-bignum.lo libclamav_la-bytecode.lo \
210
+	libclamav_la-bytecode_vm.lo libclamav_la-cpio.lo \
211
+	libclamav_la-macho.lo libclamav_la-ishield.lo \
212
+	libclamav_la-bytecode_api.lo libclamav_la-bytecode_api_decl.lo \
213
+	libclamav_la-cache.lo libclamav_la-bytecode_detect.lo \
214
+	libclamav_la-events.lo libclamav_la-swf.lo \
215
+	libclamav_la-jpeg.lo libclamav_la-png.lo \
215 216
 	libclamav_la-iso9660.lo libclamav_la-arc4.lo \
216 217
 	libclamav_la-rijndael.lo libclamav_la-crtmgr.lo \
217
-	libclamav_la-asn1.lo $(am__objects_1)
218
+	libclamav_la-asn1.lo
218 219
 libclamav_la_OBJECTS = $(am_libclamav_la_OBJECTS)
219 220
 AM_V_lt = $(am__v_lt_$(V))
220 221
 am__v_lt_ = $(am__v_lt_$(AM_DEFAULT_VERBOSITY))
... ...
@@ -322,7 +273,7 @@ SOURCES = $(libclamav_la_SOURCES) \
322 322
 	$(libclamav_internal_utils_nothreads_la_SOURCES) \
323 323
 	$(libclamav_nocxx_la_SOURCES) $(libclamunrar_la_SOURCES) \
324 324
 	$(libclamunrar_iface_la_SOURCES)
325
-DIST_SOURCES = $(am__libclamav_la_SOURCES_DIST) \
325
+DIST_SOURCES = $(libclamav_la_SOURCES) \
326 326
 	$(libclamav_internal_utils_la_SOURCES) \
327 327
 	$(libclamav_internal_utils_nothreads_la_SOURCES) \
328 328
 	$(libclamav_nocxx_la_SOURCES) \
... ...
@@ -540,8 +491,8 @@ AM_CPPFLAGS = -I$(top_srcdir) -I@srcdir@/nsis $(LTDLINCL) \
540 540
 AM_CFLAGS = @WERR_CFLAGS@
541 541
 lib_LTLIBRARIES = $(am__append_3) libclamav.la
542 542
 EXTRA_DIST = c++/Makefile.nollvm.in $(am__append_4) regex/engine.c \
543
-	libclamav.map jsparse/generated/operators.h \
544
-	jsparse/generated/keywords.h \
543
+	fp_sqr_comba.c fp_mul_comba.c libclamav.map \
544
+	jsparse/generated/operators.h jsparse/generated/keywords.h \
545 545
 	jsparse/future_reserved_words.list jsparse/keywords.list \
546 546
 	jsparse/special_keywords.list jsparse/operators.gperf
547 547
 @ENABLE_UNRAR_TRUE@libclamunrar_la_LDFLAGS = @TH_SAFE@ -version-info \
... ...
@@ -628,52 +579,256 @@ libclamav_la_CFLAGS = $(AM_CFLAGS) -DSEARCH_LIBDIR=\"$(libdir)\"
628 628
 libclamav_la_LDFLAGS = @TH_SAFE@ -version-info @LIBCLAMAV_VERSION@ \
629 629
 	-no-undefined $(am__append_6)
630 630
 include_HEADERS = clamav.h
631
-libclamav_la_SOURCES = clamav.h matcher-ac.c matcher-ac.h matcher-bm.c \
632
-	matcher-bm.h matcher-hash.c matcher-hash.h matcher.c matcher.h \
633
-	others.c others.h readdb.c readdb.h cvd.c cvd.h dsig.c dsig.h \
634
-	scanners.c scanners.h textdet.c textdet.h filetypes.c \
635
-	filetypes.h filetypes_int.h rtf.c rtf.h blob.c blob.h mbox.c \
636
-	mbox.h message.c message.h table.c table.h text.c text.h \
637
-	ole2_extract.c ole2_extract.h vba_extract.c vba_extract.h \
638
-	cltypes.h msexpand.c msexpand.h pe.c pe.h pe_icons.c \
639
-	pe_icons.h disasm.c disasm.h disasm-common.h disasmpriv.h \
640
-	upx.c upx.h htmlnorm.c htmlnorm.h chmunpack.c chmunpack.h \
641
-	rebuildpe.c rebuildpe.h petite.c petite.h wwunpack.c \
642
-	wwunpack.h unsp.c unsp.h aspack.c aspack.h packlibs.c \
643
-	packlibs.h fsg.c fsg.h mew.c mew.h upack.c upack.h line.c \
644
-	line.h untar.c untar.h unzip.c unzip.h inflate64.c inflate64.h \
645
-	inffixed64.h inflate64_priv.h special.c special.h binhex.c \
646
-	binhex.h is_tar.c is_tar.h tnef.c tnef.h autoit.c autoit.h \
647
-	unarj.c unarj.h nsis/bzlib.c nsis/bzlib_private.h \
648
-	nsis/nsis_bzlib.h nsis/nulsft.c nsis/nulsft.h nsis/infblock.c \
649
-	nsis/nsis_zconf.h nsis/nsis_zlib.h nsis/nsis_zutil.h pdf.c \
650
-	pdf.h spin.c spin.h yc.c yc.h elf.c elf.h execs.h sis.c sis.h \
651
-	uuencode.c uuencode.h phishcheck.c phishcheck.h \
652
-	phish_domaincheck_db.c phish_domaincheck_db.h \
653
-	phish_whitelist.c phish_whitelist.h iana_cctld.h iana_tld.h \
654
-	regex_list.c regex_list.h regex_suffix.c regex_suffix.h \
655
-	mspack.c mspack.h cab.c cab.h entconv.c entconv.h entitylist.h \
656
-	encoding_aliases.h hashtab.c hashtab.h dconf.c dconf.h \
657
-	lzma_iface.c lzma_iface.h 7z_iface.c 7z_iface.h 7z/7z.h \
658
-	7z/7zAlloc.c 7z/7zAlloc.h 7z/7zBuf.c 7z/7zBuf.h 7z/7zBuf2.c \
659
-	7z/7zCrc.c 7z/7zCrc.h 7z/7zDec.c 7z/7zFile.c 7z/7zFile.h \
660
-	7z/7zIn.c 7z/7zStream.c 7z/7zVersion.h 7z/Bcj2.c 7z/Bcj2.h \
661
-	7z/Bra.c 7z/Bra.h 7z/Bra86.c 7z/CpuArch.h 7z/Lzma2Dec.c \
662
-	7z/Lzma2Dec.h 7z/LzmaDec.c 7z/LzmaDec.h 7z/Ppmd.h 7z/Ppmd7.c \
663
-	7z/Ppmd7.h 7z/Ppmd7Dec.c 7z/Types.h explode.c explode.h \
664
-	textnorm.c textnorm.h dlp.c dlp.h jsparse/js-norm.c \
665
-	jsparse/js-norm.h jsparse/lexglobal.h jsparse/textbuf.h uniq.c \
666
-	uniq.h version.c version.h mpool.c mpool.h filtering.h \
667
-	filtering.c fmap.c fmap.h perflogging.c perflogging.h \
668
-	default.h sha256.c sha256.h sha1.c sha1.h bignum.h bytecode.c \
669
-	bytecode.h bytecode_vm.c bytecode_priv.h clambc.h cpio.c \
670
-	cpio.h macho.c macho.h ishield.c ishield.h type_desc.h \
671
-	bcfeatures.h bytecode_api.c bytecode_api_decl.c bytecode_api.h \
672
-	bytecode_api_impl.h bytecode_hooks.h cache.c cache.h \
673
-	bytecode_detect.c bytecode_detect.h builtin_bytecodes.h \
674
-	events.c events.h swf.c swf.h jpeg.c jpeg.h png.c png.h \
675
-	iso9660.c iso9660.h arc4.c arc4.h rijndael.c rijndael.h \
676
-	crtmgr.c crtmgr.h asn1.c asn1.h $(am__append_7)
631
+libclamav_la_SOURCES = \
632
+	clamav.h \
633
+        matcher-ac.c \
634
+        matcher-ac.h \
635
+        matcher-bm.c \
636
+        matcher-bm.h \
637
+        matcher-hash.c \
638
+        matcher-hash.h \
639
+        matcher.c \
640
+        matcher.h \
641
+        others.c \
642
+        others.h \
643
+        readdb.c \
644
+	readdb.h \
645
+	cvd.c \
646
+	cvd.h \
647
+	dsig.c \
648
+	dsig.h \
649
+	scanners.c \
650
+	scanners.h \
651
+	textdet.c \
652
+	textdet.h \
653
+	filetypes.c \
654
+	filetypes.h \
655
+	filetypes_int.h \
656
+	rtf.c \
657
+	rtf.h \
658
+	blob.c \
659
+	blob.h \
660
+	mbox.c \
661
+	mbox.h \
662
+	message.c \
663
+	message.h \
664
+	table.c \
665
+	table.h \
666
+	text.c \
667
+	text.h \
668
+	ole2_extract.c \
669
+	ole2_extract.h \
670
+	vba_extract.c \
671
+	vba_extract.h \
672
+	cltypes.h \
673
+	msexpand.c \
674
+	msexpand.h \
675
+	pe.c \
676
+	pe.h \
677
+	pe_icons.c \
678
+	pe_icons.h \
679
+	disasm.c \
680
+	disasm.h \
681
+	disasm-common.h \
682
+	disasmpriv.h \
683
+	upx.c \
684
+	upx.h \
685
+	htmlnorm.c \
686
+	htmlnorm.h \
687
+	chmunpack.c \
688
+	chmunpack.h \
689
+	rebuildpe.c \
690
+	rebuildpe.h \
691
+	petite.c \
692
+	petite.h \
693
+	wwunpack.c \
694
+	wwunpack.h \
695
+	unsp.c \
696
+	unsp.h \
697
+	aspack.c \
698
+	aspack.h \
699
+	packlibs.c \
700
+	packlibs.h \
701
+	fsg.c \
702
+	fsg.h \
703
+	mew.c \
704
+	mew.h \
705
+	upack.c \
706
+	upack.h \
707
+	line.c \
708
+	line.h \
709
+	untar.c \
710
+	untar.h \
711
+	unzip.c \
712
+	unzip.h \
713
+	inflate64.c \
714
+	inflate64.h \
715
+	inffixed64.h \
716
+	inflate64_priv.h \
717
+	special.c \
718
+	special.h \
719
+	binhex.c \
720
+	binhex.h \
721
+	is_tar.c \
722
+	is_tar.h \
723
+	tnef.c \
724
+	tnef.h \
725
+	autoit.c \
726
+	autoit.h \
727
+	unarj.c \
728
+	unarj.h \
729
+	nsis/bzlib.c \
730
+	nsis/bzlib_private.h \
731
+	nsis/nsis_bzlib.h \
732
+	nsis/nulsft.c \
733
+	nsis/nulsft.h \
734
+	nsis/infblock.c \
735
+	nsis/nsis_zconf.h \
736
+	nsis/nsis_zlib.h \
737
+	nsis/nsis_zutil.h \
738
+	pdf.c \
739
+	pdf.h \
740
+	spin.c \
741
+	spin.h \
742
+	yc.c \
743
+	yc.h \
744
+	elf.c \
745
+	elf.h \
746
+	execs.h \
747
+	sis.c \
748
+	sis.h \
749
+	uuencode.c \
750
+	uuencode.h \
751
+	phishcheck.c \
752
+	phishcheck.h \
753
+	phish_domaincheck_db.c \
754
+	phish_domaincheck_db.h \
755
+	phish_whitelist.c \
756
+	phish_whitelist.h \
757
+	iana_cctld.h \
758
+	iana_tld.h \
759
+	regex_list.c \
760
+	regex_list.h \
761
+	regex_suffix.c \
762
+	regex_suffix.h \
763
+	mspack.c \
764
+	mspack.h \
765
+	cab.c \
766
+	cab.h \
767
+	entconv.c \
768
+	entconv.h \
769
+	entitylist.h \
770
+	encoding_aliases.h \
771
+	hashtab.c \
772
+	hashtab.h \
773
+	dconf.c \
774
+	dconf.h \
775
+	lzma_iface.c \
776
+	lzma_iface.h \
777
+	7z_iface.c \
778
+	7z_iface.h \
779
+	7z/7z.h \
780
+	7z/7zAlloc.c \
781
+	7z/7zAlloc.h \
782
+	7z/7zBuf.c \
783
+	7z/7zBuf.h \
784
+	7z/7zBuf2.c \
785
+	7z/7zCrc.c \
786
+	7z/7zCrc.h \
787
+	7z/7zDec.c \
788
+	7z/7zFile.c \
789
+	7z/7zFile.h \
790
+	7z/7zIn.c \
791
+	7z/7zStream.c \
792
+	7z/7zVersion.h \
793
+	7z/Bcj2.c \
794
+	7z/Bcj2.h \
795
+	7z/Bra.c \
796
+	7z/Bra.h \
797
+	7z/Bra86.c \
798
+	7z/CpuArch.h \
799
+	7z/Lzma2Dec.c \
800
+	7z/Lzma2Dec.h \
801
+	7z/LzmaDec.c \
802
+	7z/LzmaDec.h \
803
+	7z/Ppmd.h \
804
+	7z/Ppmd7.c \
805
+	7z/Ppmd7.h \
806
+	7z/Ppmd7Dec.c \
807
+	7z/Types.h \
808
+	explode.c \
809
+	explode.h \
810
+	textnorm.c \
811
+	textnorm.h \
812
+	dlp.c \
813
+	dlp.h \
814
+	jsparse/js-norm.c \
815
+	jsparse/js-norm.h \
816
+	jsparse/lexglobal.h \
817
+	jsparse/textbuf.h \
818
+	uniq.c \
819
+	uniq.h \
820
+	version.c\
821
+	version.h\
822
+	mpool.c\
823
+	mpool.h \
824
+	filtering.h\
825
+	filtering.c\
826
+	fmap.c \
827
+	fmap.h \
828
+	perflogging.c\
829
+	perflogging.h\
830
+	default.h\
831
+	sha256.c\
832
+	sha256.h\
833
+	sha1.c\
834
+	sha1.h\
835
+	bignum.c\
836
+	bignum.h\
837
+	bignum_fast.h\
838
+	bignum_superclass.h\
839
+	bytecode.c\
840
+	bytecode.h\
841
+	bytecode_vm.c\
842
+	bytecode_priv.h\
843
+	clambc.h \
844
+	cpio.c \
845
+	cpio.h \
846
+	macho.c \
847
+	macho.h \
848
+	ishield.c \
849
+	ishield.h \
850
+	type_desc.h \
851
+	bcfeatures.h \
852
+	bytecode_api.c \
853
+	bytecode_api_decl.c \
854
+	bytecode_api.h \
855
+	bytecode_api_impl.h \
856
+	bytecode_hooks.h \
857
+	cache.c \
858
+	cache.h \
859
+	bytecode_detect.c \
860
+	bytecode_detect.h\
861
+	builtin_bytecodes.h\
862
+	events.c\
863
+	events.h \
864
+	swf.c \
865
+	swf.h \
866
+	jpeg.c \
867
+	jpeg.h \
868
+	png.c \
869
+	png.h \
870
+	iso9660.c \
871
+	iso9660.h \
872
+	arc4.c \
873
+	arc4.h \
874
+	rijndael.c \
875
+	rijndael.h \
876
+	crtmgr.c \
877
+	crtmgr.h \
878
+	asn1.c \
879
+	asn1.h
880
+
677 881
 noinst_LTLIBRARIES = libclamav_internal_utils.la libclamav_internal_utils_nothreads.la libclamav_nocxx.la
678 882
 COMMON_CLEANFILES = version.h version.h.tmp *.gcda *.gcno
679 883
 @MAINTAINER_MODE_TRUE@BUILT_SOURCES = jsparse/generated/operators.h jsparse/generated/keywords.h jsparse-keywords.gperf
... ...
@@ -1683,6 +1838,14 @@ libclamav_la-sha1.lo: sha1.c
1683 1683
 @AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
1684 1684
 @am__fastdepCC_FALSE@	$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libclamav_la_CFLAGS) $(CFLAGS) -c -o libclamav_la-sha1.lo `test -f 'sha1.c' || echo '$(srcdir)/'`sha1.c
1685 1685
 
1686
+libclamav_la-bignum.lo: bignum.c
1687
+@am__fastdepCC_TRUE@	$(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libclamav_la_CFLAGS) $(CFLAGS) -MT libclamav_la-bignum.lo -MD -MP -MF $(DEPDIR)/libclamav_la-bignum.Tpo -c -o libclamav_la-bignum.lo `test -f 'bignum.c' || echo '$(srcdir)/'`bignum.c
1688
+@am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/libclamav_la-bignum.Tpo $(DEPDIR)/libclamav_la-bignum.Plo
1689
+@am__fastdepCC_FALSE@	$(AM_V_CC) @AM_BACKSLASH@
1690
+@AMDEP_TRUE@@am__fastdepCC_FALSE@	source='bignum.c' object='libclamav_la-bignum.lo' libtool=yes @AMDEPBACKSLASH@
1691
+@AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
1692
+@am__fastdepCC_FALSE@	$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libclamav_la_CFLAGS) $(CFLAGS) -c -o libclamav_la-bignum.lo `test -f 'bignum.c' || echo '$(srcdir)/'`bignum.c
1693
+
1686 1694
 libclamav_la-bytecode.lo: bytecode.c
1687 1695
 @am__fastdepCC_TRUE@	$(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libclamav_la_CFLAGS) $(CFLAGS) -MT libclamav_la-bytecode.lo -MD -MP -MF $(DEPDIR)/libclamav_la-bytecode.Tpo -c -o libclamav_la-bytecode.lo `test -f 'bytecode.c' || echo '$(srcdir)/'`bytecode.c
1688 1696
 @am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/libclamav_la-bytecode.Tpo $(DEPDIR)/libclamav_la-bytecode.Plo
... ...
@@ -1827,14 +1990,6 @@ libclamav_la-asn1.lo: asn1.c
1827 1827
 @AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
1828 1828
 @am__fastdepCC_FALSE@	$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libclamav_la_CFLAGS) $(CFLAGS) -c -o libclamav_la-asn1.lo `test -f 'asn1.c' || echo '$(srcdir)/'`asn1.c
1829 1829
 
1830
-libclamav_la-bignum.lo: bignum.c
1831
-@am__fastdepCC_TRUE@	$(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libclamav_la_CFLAGS) $(CFLAGS) -MT libclamav_la-bignum.lo -MD -MP -MF $(DEPDIR)/libclamav_la-bignum.Tpo -c -o libclamav_la-bignum.lo `test -f 'bignum.c' || echo '$(srcdir)/'`bignum.c
1832
-@am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/libclamav_la-bignum.Tpo $(DEPDIR)/libclamav_la-bignum.Plo
1833
-@am__fastdepCC_FALSE@	$(AM_V_CC) @AM_BACKSLASH@
1834
-@AMDEP_TRUE@@am__fastdepCC_FALSE@	source='bignum.c' object='libclamav_la-bignum.lo' libtool=yes @AMDEPBACKSLASH@
1835
-@AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
1836
-@am__fastdepCC_FALSE@	$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libclamav_la_CFLAGS) $(CFLAGS) -c -o libclamav_la-bignum.lo `test -f 'bignum.c' || echo '$(srcdir)/'`bignum.c
1837
-
1838 1830
 libclamav_internal_utils_la-str.lo: str.c
1839 1831
 @am__fastdepCC_TRUE@	$(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libclamav_internal_utils_la_CFLAGS) $(CFLAGS) -MT libclamav_internal_utils_la-str.lo -MD -MP -MF $(DEPDIR)/libclamav_internal_utils_la-str.Tpo -c -o libclamav_internal_utils_la-str.lo `test -f 'str.c' || echo '$(srcdir)/'`str.c
1840 1832
 @am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/libclamav_internal_utils_la-str.Tpo $(DEPDIR)/libclamav_internal_utils_la-str.Plo
... ...
@@ -1,840 +1,61 @@
1
-/* LibTomMath, multiple-precision integer library -- Tom St Denis
2
- *
3
- * LibTomMath is a library that provides multiple-precision
4
- * integer arithmetic as well as number theoretic functionality.
5
- *
6
- * The library was designed directly after the MPI library by
7
- * Michael Fromberger but has been written from scratch with
8
- * additional optimizations in place.
9
- *
10
- * The library is free for all purposes without any express
11
- * guarantee it works.
12
- *
13
- * Tom St Denis, tomstdenis@gmail.com, http://math.libtomcrypt.com
14
- */
15
-
16
-#if HAVE_CONFIG_H
17
-#include "clamav-config.h"
18
-#endif
19
-
20
-/* Start: bn_error.c */
21
-#include <bignum.h>
22
-#include "others.h"
23
-
24
-#ifdef BN_ERROR_C
25
-static const struct {
26
-     int code;
27
-     const char *msg;
28
-} msgs[] = {
29
-     { MP_OKAY, "Successful" },
30
-     { MP_MEM,  "Out of heap" },
31
-     { MP_VAL,  "Value out of range" }
32
-};
33
-
34
-/* return a char * string for a given code */
35
-const char *mp_error_to_string(int code)
36
-{
37
-   int x;
38
-
39
-   /* scan the lookup table for the given message */
40
-   for (x = 0; x < (int)(sizeof(msgs) / sizeof(msgs[0])); x++) {
41
-       if (msgs[x].code == code) {
42
-          return msgs[x].msg;
43
-       }
44
-   }
45
-
46
-   /* generic reply for invalid code */
47
-   return "Invalid error code";
48
-}
49
-
50
-#endif
51
-
52
-/* $Source: /cvs/libtom/libtommath/bn_error.c,v $ */
53
-/* $Revision: 1.3 $ */
54
-/* $Date: 2006/03/31 14:18:44 $ */
55
-
56
-/* End: bn_error.c */
57
-
58
-/* Start: bn_fast_mp_invmod.c */
59
-#include <bignum.h>
60
-#ifdef BN_FAST_MP_INVMOD_C
61
-/* LibTomMath, multiple-precision integer library -- Tom St Denis
62
- *
63
- * LibTomMath is a library that provides multiple-precision
64
- * integer arithmetic as well as number theoretic functionality.
65
- *
66
- * The library was designed directly after the MPI library by
67
- * Michael Fromberger but has been written from scratch with
68
- * additional optimizations in place.
69
- *
70
- * The library is free for all purposes without any express
71
- * guarantee it works.
72
- *
73
- * Tom St Denis, tomstdenis@gmail.com, http://math.libtomcrypt.com
74
- */
75
-
76
-/* computes the modular inverse via binary extended euclidean algorithm, 
77
- * that is c = 1/a mod b 
78
- *
79
- * Based on slow invmod except this is optimized for the case where b is 
80
- * odd as per HAC Note 14.64 on pp. 610
81
- */
82
-int fast_mp_invmod (mp_int * a, mp_int * b, mp_int * c)
83
-{
84
-  mp_int  x, y, u, v, B, D;
85
-  int     res, neg;
86
-
87
-  /* 2. [modified] b must be odd   */
88
-  if (mp_iseven (b) == 1) {
89
-    return MP_VAL;
90
-  }
91
-
92
-  /* init all our temps */
93
-  if ((res = mp_init_multi(&x, &y, &u, &v, &B, &D, NULL)) != MP_OKAY) {
94
-     return res;
95
-  }
96
-
97
-  /* x == modulus, y == value to invert */
98
-  if ((res = mp_copy (b, &x)) != MP_OKAY) {
99
-    goto LBL_ERR;
100
-  }
101
-
102
-  /* we need y = |a| */
103
-  if ((res = mp_mod (a, b, &y)) != MP_OKAY) {
104
-    goto LBL_ERR;
105
-  }
106
-
107
-  /* 3. u=x, v=y, A=1, B=0, C=0,D=1 */
108
-  if ((res = mp_copy (&x, &u)) != MP_OKAY) {
109
-    goto LBL_ERR;
110
-  }
111
-  if ((res = mp_copy (&y, &v)) != MP_OKAY) {
112
-    goto LBL_ERR;
113
-  }
114
-  mp_set (&D, 1);
115
-
116
-top:
117
-  /* 4.  while u is even do */
118
-  while (mp_iseven (&u) == 1) {
119
-    /* 4.1 u = u/2 */
120
-    if ((res = mp_div_2 (&u, &u)) != MP_OKAY) {
121
-      goto LBL_ERR;
122
-    }
123
-    /* 4.2 if B is odd then */
124
-    if (mp_isodd (&B) == 1) {
125
-      if ((res = mp_sub (&B, &x, &B)) != MP_OKAY) {
126
-        goto LBL_ERR;
127
-      }
128
-    }
129
-    /* B = B/2 */
130
-    if ((res = mp_div_2 (&B, &B)) != MP_OKAY) {
131
-      goto LBL_ERR;
132
-    }
133
-  }
134
-
135
-  /* 5.  while v is even do */
136
-  while (mp_iseven (&v) == 1) {
137
-    /* 5.1 v = v/2 */
138
-    if ((res = mp_div_2 (&v, &v)) != MP_OKAY) {
139
-      goto LBL_ERR;
140
-    }
141
-    /* 5.2 if D is odd then */
142
-    if (mp_isodd (&D) == 1) {
143
-      /* D = (D-x)/2 */
144
-      if ((res = mp_sub (&D, &x, &D)) != MP_OKAY) {
145
-        goto LBL_ERR;
146
-      }
147
-    }
148
-    /* D = D/2 */
149
-    if ((res = mp_div_2 (&D, &D)) != MP_OKAY) {
150
-      goto LBL_ERR;
151
-    }
152
-  }
153
-
154
-  /* 6.  if u >= v then */
155
-  if (mp_cmp (&u, &v) != MP_LT) {
156
-    /* u = u - v, B = B - D */
157
-    if ((res = mp_sub (&u, &v, &u)) != MP_OKAY) {
158
-      goto LBL_ERR;
159
-    }
160
-
161
-    if ((res = mp_sub (&B, &D, &B)) != MP_OKAY) {
162
-      goto LBL_ERR;
163
-    }
164
-  } else {
165
-    /* v - v - u, D = D - B */
166
-    if ((res = mp_sub (&v, &u, &v)) != MP_OKAY) {
167
-      goto LBL_ERR;
168
-    }
169
-
170
-    if ((res = mp_sub (&D, &B, &D)) != MP_OKAY) {
171
-      goto LBL_ERR;
172
-    }
173
-  }
174
-
175
-  /* if not zero goto step 4 */
176
-  if (mp_iszero (&u) == 0) {
177
-    goto top;
178
-  }
179
-
180
-  /* now a = C, b = D, gcd == g*v */
181
-
182
-  /* if v != 1 then there is no inverse */
183
-  if (mp_cmp_d (&v, 1) != MP_EQ) {
184
-    res = MP_VAL;
185
-    goto LBL_ERR;
186
-  }
187
-
188
-  /* b is now the inverse */
189
-  neg = a->sign;
190
-  while (D.sign == MP_NEG) {
191
-    if ((res = mp_add (&D, b, &D)) != MP_OKAY) {
192
-      goto LBL_ERR;
193
-    }
194
-  }
195
-  mp_exch (&D, c);
196
-  c->sign = neg;
197
-  res = MP_OKAY;
198
-
199
-LBL_ERR:mp_clear_multi (&x, &y, &u, &v, &B, &D, NULL);
200
-  return res;
201
-}
202
-#endif
203
-
204
-/* $Source: /cvs/libtom/libtommath/bn_fast_mp_invmod.c,v $ */
205
-/* $Revision: 1.3 $ */
206
-/* $Date: 2006/03/31 14:18:44 $ */
207
-
208
-/* End: bn_fast_mp_invmod.c */
209
-
210
-/* Start: bn_fast_mp_montgomery_reduce.c */
211
-#include <bignum.h>
212
-#ifdef BN_FAST_MP_MONTGOMERY_REDUCE_C
213
-/* LibTomMath, multiple-precision integer library -- Tom St Denis
214
- *
215
- * LibTomMath is a library that provides multiple-precision
216
- * integer arithmetic as well as number theoretic functionality.
217
- *
218
- * The library was designed directly after the MPI library by
219
- * Michael Fromberger but has been written from scratch with
220
- * additional optimizations in place.
221
- *
222
- * The library is free for all purposes without any express
223
- * guarantee it works.
224
- *
225
- * Tom St Denis, tomstdenis@gmail.com, http://math.libtomcrypt.com
226
- */
227
-
228
-/* computes xR**-1 == x (mod N) via Montgomery Reduction
229
- *
230
- * This is an optimized implementation of montgomery_reduce
231
- * which uses the comba method to quickly calculate the columns of the
232
- * reduction.
233
- *
234
- * Based on Algorithm 14.32 on pp.601 of HAC.
235
-*/
236
-int fast_mp_montgomery_reduce (mp_int * x, mp_int * n, mp_digit rho)
237
-{
238
-  int     ix, res, olduse;
239
-  mp_word W[MP_WARRAY];
240
-
241
-  /* get old used count */
242
-  olduse = x->used;
243
-
244
-  /* grow a as required */
245
-  if (x->alloc < n->used + 1) {
246
-    if ((res = mp_grow (x, n->used + 1)) != MP_OKAY) {
247
-      return res;
248
-    }
249
-  }
250
-
251
-  /* first we have to get the digits of the input into
252
-   * an array of double precision words W[...]
253
-   */
254
-  {
255
-    register mp_word *_W;
256
-    register mp_digit *tmpx;
257
-
258
-    /* alias for the W[] array */
259
-    _W   = W;
260
-
261
-    /* alias for the digits of  x*/
262
-    tmpx = x->dp;
263
-
264
-    /* copy the digits of a into W[0..a->used-1] */
265
-    for (ix = 0; ix < x->used; ix++) {
266
-      *_W++ = *tmpx++;
267
-    }
268
-
269
-    /* zero the high words of W[a->used..m->used*2] */
270
-    for (; ix < n->used * 2 + 1; ix++) {
271
-      *_W++ = 0;
272
-    }
273
-  }
274
-
275
-  /* now we proceed to zero successive digits
276
-   * from the least significant upwards
277
-   */
278
-  for (ix = 0; ix < n->used; ix++) {
279
-    /* mu = ai * m' mod b
280
-     *
281
-     * We avoid a double precision multiplication (which isn't required)
282
-     * by casting the value down to a mp_digit.  Note this requires
283
-     * that W[ix-1] have  the carry cleared (see after the inner loop)
284
-     */
285
-    register mp_digit mu;
286
-    mu = (mp_digit) (((W[ix] & MP_MASK) * rho) & MP_MASK);
287
-
288
-    /* a = a + mu * m * b**i
289
-     *
290
-     * This is computed in place and on the fly.  The multiplication
291
-     * by b**i is handled by offseting which columns the results
292
-     * are added to.
293
-     *
294
-     * Note the comba method normally doesn't handle carries in the
295
-     * inner loop In this case we fix the carry from the previous
296
-     * column since the Montgomery reduction requires digits of the
297
-     * result (so far) [see above] to work.  This is
298
-     * handled by fixing up one carry after the inner loop.  The
299
-     * carry fixups are done in order so after these loops the
300
-     * first m->used words of W[] have the carries fixed
301
-     */
302
-    {
303
-      register int iy;
304
-      register mp_digit *tmpn;
305
-      register mp_word *_W;
306
-
307
-      /* alias for the digits of the modulus */
308
-      tmpn = n->dp;
309
-
310
-      /* Alias for the columns set by an offset of ix */
311
-      _W = W + ix;
312
-
313
-      /* inner loop */
314
-      for (iy = 0; iy < n->used; iy++) {
315
-          *_W++ += ((mp_word)mu) * ((mp_word)*tmpn++);
316
-      }
317
-    }
318
-
319
-    /* now fix carry for next digit, W[ix+1] */
320
-    W[ix + 1] += W[ix] >> ((mp_word) DIGIT_BIT);
321
-  }
322
-
323
-  /* now we have to propagate the carries and
324
-   * shift the words downward [all those least
325
-   * significant digits we zeroed].
326
-   */
327
-  {
328
-    register mp_digit *tmpx;
329
-    register mp_word *_W, *_W1;
330
-
331
-    /* nox fix rest of carries */
332
-
333
-    /* alias for current word */
334
-    _W1 = W + ix;
335
-
336
-    /* alias for next word, where the carry goes */
337
-    _W = W + ++ix;
338
-
339
-    for (; ix <= n->used * 2 + 1; ix++) {
340
-      *_W++ += *_W1++ >> ((mp_word) DIGIT_BIT);
341
-    }
342
-
343
-    /* copy out, A = A/b**n
344
-     *
345
-     * The result is A/b**n but instead of converting from an
346
-     * array of mp_word to mp_digit than calling mp_rshd
347
-     * we just copy them in the right order
348
-     */
349
-
350
-    /* alias for destination word */
351
-    tmpx = x->dp;
352
-
353
-    /* alias for shifted double precision result */
354
-    _W = W + n->used;
355
-
356
-    for (ix = 0; ix < n->used + 1; ix++) {
357
-      *tmpx++ = (mp_digit)(*_W++ & ((mp_word) MP_MASK));
358
-    }
359
-
360
-    /* zero oldused digits, if the input a was larger than
361
-     * m->used+1 we'll have to clear the digits
362
-     */
363
-    for (; ix < olduse; ix++) {
364
-      *tmpx++ = 0;
365
-    }
366
-  }
367
-
368
-  /* set the max used and clamp */
369
-  x->used = n->used + 1;
370
-  mp_clamp (x);
371
-
372
-  /* if A >= m then A = A - m */
373
-  if (mp_cmp_mag (x, n) != MP_LT) {
374
-    return s_mp_sub (x, n, x);
375
-  }
376
-  return MP_OKAY;
377
-}
378
-#endif
379
-
380
-/* $Source: /cvs/libtom/libtommath/bn_fast_mp_montgomery_reduce.c,v $ */
381
-/* $Revision: 1.3 $ */
382
-/* $Date: 2006/03/31 14:18:44 $ */
383
-
384
-/* End: bn_fast_mp_montgomery_reduce.c */
385
-
386
-/* Start: bn_fast_s_mp_mul_digs.c */
387
-#include <bignum.h>
388
-#ifdef BN_FAST_S_MP_MUL_DIGS_C
389
-/* LibTomMath, multiple-precision integer library -- Tom St Denis
390
- *
391
- * LibTomMath is a library that provides multiple-precision
392
- * integer arithmetic as well as number theoretic functionality.
393
- *
394
- * The library was designed directly after the MPI library by
395
- * Michael Fromberger but has been written from scratch with
396
- * additional optimizations in place.
397
- *
398
- * The library is free for all purposes without any express
399
- * guarantee it works.
400
- *
401
- * Tom St Denis, tomstdenis@gmail.com, http://math.libtomcrypt.com
402
- */
403
-
404
-/* Fast (comba) multiplier
405
- *
406
- * This is the fast column-array [comba] multiplier.  It is 
407
- * designed to compute the columns of the product first 
408
- * then handle the carries afterwards.  This has the effect 
409
- * of making the nested loops that compute the columns very
410
- * simple and schedulable on super-scalar processors.
411
- *
412
- * This has been modified to produce a variable number of 
413
- * digits of output so if say only a half-product is required 
414
- * you don't have to compute the upper half (a feature 
415
- * required for fast Barrett reduction).
416
- *
417
- * Based on Algorithm 14.12 on pp.595 of HAC.
418
- *
419
- */
420
-int fast_s_mp_mul_digs (mp_int * a, mp_int * b, mp_int * c, int digs)
421
-{
422
-  int     olduse, res, pa, ix, iz;
423
-  mp_digit W[MP_WARRAY];
424
-  register mp_word  _W;
425
-
426
-  /* grow the destination as required */
427
-  if (c->alloc < digs) {
428
-    if ((res = mp_grow (c, digs)) != MP_OKAY) {
429
-      return res;
430
-    }
431
-  }
432
-
433
-  /* number of output digits to produce */
434
-  pa = MIN(digs, a->used + b->used);
435
-
436
-  /* clear the carry */
437
-  _W = 0;
438
-  for (ix = 0; ix < pa; ix++) { 
439
-      int      tx, ty;
440
-      int      iy;
441
-      mp_digit *tmpx, *tmpy;
442
-
443
-      /* get offsets into the two bignums */
444
-      ty = MIN(b->used-1, ix);
445
-      tx = ix - ty;
446
-
447
-      /* setup temp aliases */
448
-      tmpx = a->dp + tx;
449
-      tmpy = b->dp + ty;
450
-
451
-      /* this is the number of times the loop will iterrate, essentially 
452
-         while (tx++ < a->used && ty-- >= 0) { ... }
453
-       */
454
-      iy = MIN(a->used-tx, ty+1);
455
-
456
-      /* execute loop */
457
-      for (iz = 0; iz < iy; ++iz) {
458
-         _W += ((mp_word)*tmpx++)*((mp_word)*tmpy--);
459
-
460
-      }
461
-
462
-      /* store term */
463
-      W[ix] = ((mp_digit)_W) & MP_MASK;
464
-
465
-      /* make next carry */
466
-      _W = _W >> ((mp_word)DIGIT_BIT);
467
- }
468
-
469
-  /* setup dest */
470
-  olduse  = c->used;
471
-  c->used = pa;
472
-
473
-  {
474
-    register mp_digit *tmpc;
475
-    tmpc = c->dp;
476
-    for (ix = 0; ix < pa+1; ix++) {
477
-      /* now extract the previous digit [below the carry] */
478
-      *tmpc++ = W[ix];
479
-    }
480
-
481
-    /* clear unused digits [that existed in the old copy of c] */
482
-    for (; ix < olduse; ix++) {
483
-      *tmpc++ = 0;
484
-    }
485
-  }
486
-  mp_clamp (c);
487
-  return MP_OKAY;
488
-}
489
-#endif
490
-
491
-/* $Source: /cvs/libtom/libtommath/bn_fast_s_mp_mul_digs.c,v $ */
492
-/* $Revision: 1.7 $ */
493
-/* $Date: 2006/03/31 14:18:44 $ */
494
-
495
-/* End: bn_fast_s_mp_mul_digs.c */
496
-
497
-/* Start: bn_fast_s_mp_mul_high_digs.c */
498
-#include <bignum.h>
499
-#ifdef BN_FAST_S_MP_MUL_HIGH_DIGS_C
500
-/* LibTomMath, multiple-precision integer library -- Tom St Denis
501
- *
502
- * LibTomMath is a library that provides multiple-precision
503
- * integer arithmetic as well as number theoretic functionality.
504
- *
505
- * The library was designed directly after the MPI library by
506
- * Michael Fromberger but has been written from scratch with
507
- * additional optimizations in place.
508
- *
509
- * The library is free for all purposes without any express
510
- * guarantee it works.
511
- *
512
- * Tom St Denis, tomstdenis@gmail.com, http://math.libtomcrypt.com
513
- */
514
-
515
-/* this is a modified version of fast_s_mul_digs that only produces
516
- * output digits *above* digs.  See the comments for fast_s_mul_digs
517
- * to see how it works.
518
- *
519
- * This is used in the Barrett reduction since for one of the multiplications
520
- * only the higher digits were needed.  This essentially halves the work.
521
- *
522
- * Based on Algorithm 14.12 on pp.595 of HAC.
523
- */
524
-int fast_s_mp_mul_high_digs (mp_int * a, mp_int * b, mp_int * c, int digs)
525
-{
526
-  int     olduse, res, pa, ix, iz;
527
-  mp_digit W[MP_WARRAY];
528
-  mp_word  _W;
529
-
530
-  /* grow the destination as required */
531
-  pa = a->used + b->used;
532
-  if (c->alloc < pa) {
533
-    if ((res = mp_grow (c, pa)) != MP_OKAY) {
534
-      return res;
535
-    }
536
-  }
537
-
538
-  /* number of output digits to produce */
539
-  pa = a->used + b->used;
540
-  _W = 0;
541
-  for (ix = digs; ix < pa; ix++) { 
542
-      int      tx, ty, iy;
543
-      mp_digit *tmpx, *tmpy;
544
-
545
-      /* get offsets into the two bignums */
546
-      ty = MIN(b->used-1, ix);
547
-      tx = ix - ty;
548
-
549
-      /* setup temp aliases */
550
-      tmpx = a->dp + tx;
551
-      tmpy = b->dp + ty;
552
-
553
-      /* this is the number of times the loop will iterrate, essentially its 
554
-         while (tx++ < a->used && ty-- >= 0) { ... }
555
-       */
556
-      iy = MIN(a->used-tx, ty+1);
557
-
558
-      /* execute loop */
559
-      for (iz = 0; iz < iy; iz++) {
560
-         _W += ((mp_word)*tmpx++)*((mp_word)*tmpy--);
561
-      }
562
-
563
-      /* store term */
564
-      W[ix] = ((mp_digit)_W) & MP_MASK;
565
-
566
-      /* make next carry */
567
-      _W = _W >> ((mp_word)DIGIT_BIT);
568
-  }
569
-  
570
-  /* setup dest */
571
-  olduse  = c->used;
572
-  c->used = pa;
573
-
574
-  {
575
-    register mp_digit *tmpc;
576
-
577
-    tmpc = c->dp + digs;
578
-    for (ix = digs; ix <= pa; ix++) {
579
-      /* now extract the previous digit [below the carry] */
580
-      *tmpc++ = W[ix];
581
-    }
582
-
583
-    /* clear unused digits [that existed in the old copy of c] */
584
-    for (; ix < olduse; ix++) {
585
-      *tmpc++ = 0;
586
-    }
587
-  }
588
-  mp_clamp (c);
589
-  return MP_OKAY;
590
-}
591
-#endif
592
-
593
-/* $Source: /cvs/libtom/libtommath/bn_fast_s_mp_mul_high_digs.c,v $ */
594
-/* $Revision: 1.4 $ */
595
-/* $Date: 2006/03/31 14:18:44 $ */
596
-
597
-/* End: bn_fast_s_mp_mul_high_digs.c */
598
-
599
-/* Start: bn_fast_s_mp_sqr.c */
600
-#include <bignum.h>
601
-#ifdef BN_FAST_S_MP_SQR_C
602
-/* LibTomMath, multiple-precision integer library -- Tom St Denis
603
- *
604
- * LibTomMath is a library that provides multiple-precision
605
- * integer arithmetic as well as number theoretic functionality.
606
- *
607
- * The library was designed directly after the MPI library by
608
- * Michael Fromberger but has been written from scratch with
609
- * additional optimizations in place.
610
- *
611
- * The library is free for all purposes without any express
612
- * guarantee it works.
1
+/* Start: fp_2expt.c */
2
+/* TomsFastMath, a fast ISO C bignum library.
3
+ * 
4
+ * This project is meant to fill in where LibTomMath
5
+ * falls short.  That is speed ;-)
613 6
  *
614
- * Tom St Denis, tomstdenis@gmail.com, http://math.libtomcrypt.com
7
+ * This project is public domain and free for all purposes.
8
+ * 
9
+ * Tom St Denis, tomstdenis@gmail.com
615 10
  */
11
+#include "bignum_fast.h"
616 12
 
617
-/* the jist of squaring...
618
- * you do like mult except the offset of the tmpx [one that 
619
- * starts closer to zero] can't equal the offset of tmpy.  
620
- * So basically you set up iy like before then you min it with
621
- * (ty-tx) so that it never happens.  You double all those 
622
- * you add in the inner loop
623
-
624
-After that loop you do the squares and add them in.
625
-*/
626
-
627
-int fast_s_mp_sqr (mp_int * a, mp_int * b)
13
+/* computes a = 2**b */
14
+void fp_2expt(fp_int *a, int b)
628 15
 {
629
-  int       olduse, res, pa, ix, iz;
630
-  mp_digit   W[MP_WARRAY], *tmpx;
631
-  mp_word   W1;
632
-
633
-  /* grow the destination as required */
634
-  pa = a->used + a->used;
635
-  if (b->alloc < pa) {
636
-    if ((res = mp_grow (b, pa)) != MP_OKAY) {
637
-      return res;
638
-    }
639
-  }
640
-
641
-  /* number of output digits to produce */
642
-  W1 = 0;
643
-  for (ix = 0; ix < pa; ix++) { 
644
-      int      tx, ty, iy;
645
-      mp_word  _W;
646
-      mp_digit *tmpy;
647
-
648
-      /* clear counter */
649
-      _W = 0;
650
-
651
-      /* get offsets into the two bignums */
652
-      ty = MIN(a->used-1, ix);
653
-      tx = ix - ty;
654
-
655
-      /* setup temp aliases */
656
-      tmpx = a->dp + tx;
657
-      tmpy = a->dp + ty;
658
-
659
-      /* this is the number of times the loop will iterrate, essentially
660
-         while (tx++ < a->used && ty-- >= 0) { ... }
661
-       */
662
-      iy = MIN(a->used-tx, ty+1);
663
-
664
-      /* now for squaring tx can never equal ty 
665
-       * we halve the distance since they approach at a rate of 2x
666
-       * and we have to round because odd cases need to be executed
667
-       */
668
-      iy = MIN(iy, (ty-tx+1)>>1);
669
-
670
-      /* execute loop */
671
-      for (iz = 0; iz < iy; iz++) {
672
-         _W += ((mp_word)*tmpx++)*((mp_word)*tmpy--);
673
-      }
674
-
675
-      /* double the inner product and add carry */
676
-      _W = _W + _W + W1;
677
-
678
-      /* even columns have the square term in them */
679
-      if ((ix&1) == 0) {
680
-         _W += ((mp_word)a->dp[ix>>1])*((mp_word)a->dp[ix>>1]);
681
-      }
682
-
683
-      /* store it */
684
-      W[ix] = (mp_digit)(_W & MP_MASK);
685
-
686
-      /* make next carry */
687
-      W1 = _W >> ((mp_word)DIGIT_BIT);
688
-  }
16
+   int     z;
689 17
 
690
-  /* setup dest */
691
-  olduse  = b->used;
692
-  b->used = a->used+a->used;
693
-
694
-  {
695
-    mp_digit *tmpb;
696
-    tmpb = b->dp;
697
-    for (ix = 0; ix < pa; ix++) {
698
-      *tmpb++ = W[ix] & MP_MASK;
699
-    }
700
-
701
-    /* clear unused digits [that existed in the old copy of c] */
702
-    for (; ix < olduse; ix++) {
703
-      *tmpb++ = 0;
704
-    }
705
-  }
706
-  mp_clamp (b);
707
-  return MP_OKAY;
708
-}
709
-#endif
710
-
711
-/* $Source: /cvs/libtom/libtommath/bn_fast_s_mp_sqr.c,v $ */
712
-/* $Revision: 1.3 $ */
713
-/* $Date: 2006/03/31 14:18:44 $ */
18
+   /* zero a as per default */
19
+   fp_zero (a);
714 20
 
715
-/* End: bn_fast_s_mp_sqr.c */
716
-
717
-/* Start: bn_mp_2expt.c */
718
-#include <bignum.h>
719
-#ifdef BN_MP_2EXPT_C
720
-/* LibTomMath, multiple-precision integer library -- Tom St Denis
721
- *
722
- * LibTomMath is a library that provides multiple-precision
723
- * integer arithmetic as well as number theoretic functionality.
724
- *
725
- * The library was designed directly after the MPI library by
726
- * Michael Fromberger but has been written from scratch with
727
- * additional optimizations in place.
728
- *
729
- * The library is free for all purposes without any express
730
- * guarantee it works.
731
- *
732
- * Tom St Denis, tomstdenis@gmail.com, http://math.libtomcrypt.com
733
- */
734
-
735
-/* computes a = 2**b 
736
- *
737
- * Simple algorithm which zeroes the int, grows it then just sets one bit
738
- * as required.
739
- */
740
-int
741
-mp_2expt (mp_int * a, int b)
742
-{
743
-  int     res;
744
-
745
-  /* zero a as per default */
746
-  mp_zero (a);
21
+   if (b < 0) { 
22
+      return;
23
+   }
747 24
 
748
-  /* grow a to accomodate the single bit */
749
-  if ((res = mp_grow (a, b / DIGIT_BIT + 1)) != MP_OKAY) {
750
-    return res;
751
-  }
25
+   z = b / DIGIT_BIT;
26
+   if (z >= FP_SIZE) {
27
+      return; 
28
+   }
752 29
 
753 30
   /* set the used count of where the bit will go */
754
-  a->used = b / DIGIT_BIT + 1;
31
+  a->used = z + 1;
755 32
 
756 33
   /* put the single bit in its place */
757
-  a->dp[b / DIGIT_BIT] = ((mp_digit)1) << (b % DIGIT_BIT);
758
-
759
-  return MP_OKAY;
34
+  a->dp[z] = ((fp_digit)1) << (b % DIGIT_BIT);
760 35
 }
761
-#endif
762 36
 
763
-/* $Source: /cvs/libtom/libtommath/bn_mp_2expt.c,v $ */
764
-/* $Revision: 1.3 $ */
765
-/* $Date: 2006/03/31 14:18:44 $ */
766
-
767
-/* End: bn_mp_2expt.c */
768 37
 
769
-/* Start: bn_mp_abs.c */
770
-#include <bignum.h>
771
-#ifdef BN_MP_ABS_C
772
-/* LibTomMath, multiple-precision integer library -- Tom St Denis
773
- *
774
- * LibTomMath is a library that provides multiple-precision
775
- * integer arithmetic as well as number theoretic functionality.
776
- *
777
- * The library was designed directly after the MPI library by
778
- * Michael Fromberger but has been written from scratch with
779
- * additional optimizations in place.
780
- *
781
- * The library is free for all purposes without any express
782
- * guarantee it works.
783
- *
784
- * Tom St Denis, tomstdenis@gmail.com, http://math.libtomcrypt.com
785
- */
786
-
787
-/* b = |a| 
788
- *
789
- * Simple function copies the input and fixes the sign to positive
790
- */
791
-int
792
-mp_abs (mp_int * a, mp_int * b)
793
-{
794
-  int     res;
795
-
796
-  /* copy a to b */
797
-  if (a != b) {
798
-     if ((res = mp_copy (a, b)) != MP_OKAY) {
799
-       return res;
800
-     }
801
-  }
802
-
803
-  /* force the sign of b to positive */
804
-  b->sign = MP_ZPOS;
805
-
806
-  return MP_OKAY;
807
-}
808
-#endif
809
-
810
-/* $Source: /cvs/libtom/libtommath/bn_mp_abs.c,v $ */
811
-/* $Revision: 1.3 $ */
812
-/* $Date: 2006/03/31 14:18:44 $ */
38
+/* $Source: /cvs/libtom/tomsfastmath/src/exptmod/fp_2expt.c,v $ */
39
+/* $Revision: 1.1 $ */
40
+/* $Date: 2006/12/31 21:25:53 $ */
813 41
 
814
-/* End: bn_mp_abs.c */
42
+/* End: fp_2expt.c */
815 43
 
816
-/* Start: bn_mp_add.c */
817
-#include <bignum.h>
818
-#ifdef BN_MP_ADD_C
819
-/* LibTomMath, multiple-precision integer library -- Tom St Denis
820
- *
821
- * LibTomMath is a library that provides multiple-precision
822
- * integer arithmetic as well as number theoretic functionality.
823
- *
824
- * The library was designed directly after the MPI library by
825
- * Michael Fromberger but has been written from scratch with
826
- * additional optimizations in place.
827
- *
828
- * The library is free for all purposes without any express
829
- * guarantee it works.
44
+/* Start: fp_add.c */
45
+/* TomsFastMath, a fast ISO C bignum library.
46
+ * 
47
+ * This project is meant to fill in where LibTomMath
48
+ * falls short.  That is speed ;-)
830 49
  *
831
- * Tom St Denis, tomstdenis@gmail.com, http://math.libtomcrypt.com
50
+ * This project is public domain and free for all purposes.
51
+ * 
52
+ * Tom St Denis, tomstdenis@gmail.com
832 53
  */
54
+#include "bignum_fast.h"
833 55
 
834
-/* high level addition (handles signs) */
835
-int mp_add (mp_int * a, mp_int * b, mp_int * c)
56
+void fp_add(fp_int *a, fp_int *b, fp_int *c)
836 57
 {
837
-  int     sa, sb, res;
58
+  int     sa, sb;
838 59
 
839 60
   /* get sign of both inputs */
840 61
   sa = a->sign;
... ...
@@ -845,571 +66,221 @@ int mp_add (mp_int * a, mp_int * b, mp_int * c)
845 845
     /* both positive or both negative */
846 846
     /* add their magnitudes, copy the sign */
847 847
     c->sign = sa;
848
-    res = s_mp_add (a, b, c);
848
+    s_fp_add (a, b, c);
849 849
   } else {
850 850
     /* one positive, the other negative */
851 851
     /* subtract the one with the greater magnitude from */
852 852
     /* the one of the lesser magnitude.  The result gets */
853 853
     /* the sign of the one with the greater magnitude. */
854
-    if (mp_cmp_mag (a, b) == MP_LT) {
854
+    if (fp_cmp_mag (a, b) == FP_LT) {
855 855
       c->sign = sb;
856
-      res = s_mp_sub (b, a, c);
856
+      s_fp_sub (b, a, c);
857 857
     } else {
858 858
       c->sign = sa;
859
-      res = s_mp_sub (a, b, c);
859
+      s_fp_sub (a, b, c);
860 860
     }
861 861
   }
862
-  return res;
863 862
 }
864 863
 
865
-#endif
864
+/* $Source: /cvs/libtom/tomsfastmath/src/addsub/fp_add.c,v $ */
865
+/* $Revision: 1.1 $ */
866
+/* $Date: 2006/12/31 21:25:53 $ */
866 867
 
867
-/* $Source: /cvs/libtom/libtommath/bn_mp_add.c,v $ */
868
-/* $Revision: 1.3 $ */
869
-/* $Date: 2006/03/31 14:18:44 $ */
868
+/* End: fp_add.c */
870 869
 
871
-/* End: bn_mp_add.c */
872
-
873
-/* Start: bn_mp_add_d.c */
874
-#include <bignum.h>
875
-#ifdef BN_MP_ADD_D_C
876
-/* LibTomMath, multiple-precision integer library -- Tom St Denis
877
- *
878
- * LibTomMath is a library that provides multiple-precision
879
- * integer arithmetic as well as number theoretic functionality.
880
- *
881
- * The library was designed directly after the MPI library by
882
- * Michael Fromberger but has been written from scratch with
883
- * additional optimizations in place.
884
- *
885
- * The library is free for all purposes without any express
886
- * guarantee it works.
870
+/* Start: fp_add_d.c */
871
+/* TomsFastMath, a fast ISO C bignum library.
872
+ * 
873
+ * This project is meant to fill in where LibTomMath
874
+ * falls short.  That is speed ;-)
887 875
  *
888
- * Tom St Denis, tomstdenis@gmail.com, http://math.libtomcrypt.com
876
+ * This project is public domain and free for all purposes.
877
+ * 
878
+ * Tom St Denis, tomstdenis@gmail.com
889 879
  */
880
+#include "bignum_fast.h"
890 881
 
891
-/* single digit addition */
892
-int
893
-mp_add_d (mp_int * a, mp_digit b, mp_int * c)
882
+/* c = a + b */
883
+void fp_add_d(fp_int *a, fp_digit b, fp_int *c)
894 884
 {
895
-  int     res, ix, oldused;
896
-  mp_digit *tmpa, *tmpc, mu;
897
-
898
-  /* grow c as required */
899
-  if (c->alloc < a->used + 1) {
900
-     if ((res = mp_grow(c, a->used + 1)) != MP_OKAY) {
901
-        return res;
902
-     }
903
-  }
904
-
905
-  /* if a is negative and |a| >= b, call c = |a| - b */
906
-  if (a->sign == MP_NEG && (a->used > 1 || a->dp[0] >= b)) {
907
-     /* temporarily fix sign of a */
908
-     a->sign = MP_ZPOS;
909
-
910
-     /* c = |a| - b */
911
-     res = mp_sub_d(a, b, c);
912
-
913
-     /* fix sign  */
914
-     a->sign = c->sign = MP_NEG;
915
-
916
-     /* clamp */
917
-     mp_clamp(c);
918
-
919
-     return res;
920
-  }
921
-
922
-  /* old number of used digits in c */
923
-  oldused = c->used;
924
-
925
-  /* sign always positive */
926
-  c->sign = MP_ZPOS;
927
-
928
-  /* source alias */
929
-  tmpa    = a->dp;
930
-
931
-  /* destination alias */
932
-  tmpc    = c->dp;
933
-
934
-  /* if a is positive */
935
-  if (a->sign == MP_ZPOS) {
936
-     /* add digit, after this we're propagating
937
-      * the carry.
938
-      */
939
-     *tmpc   = *tmpa++ + b;
940
-     mu      = *tmpc >> DIGIT_BIT;
941
-     *tmpc++ &= MP_MASK;
942
-
943
-     /* now handle rest of the digits */
944
-     for (ix = 1; ix < a->used; ix++) {
945
-        *tmpc   = *tmpa++ + mu;
946
-        mu      = *tmpc >> DIGIT_BIT;
947
-        *tmpc++ &= MP_MASK;
948
-     }
949
-     /* set final carry */
950
-     ix++;
951
-     *tmpc++  = mu;
952
-
953
-     /* setup size */
954
-     c->used = a->used + 1;
955
-  } else {
956
-     /* a was negative and |a| < b */
957
-     c->used  = 1;
958
-
959
-     /* the result is a single digit */
960
-     if (a->used == 1) {
961
-        *tmpc++  =  b - a->dp[0];
962
-     } else {
963
-        *tmpc++  =  b;
964
-     }
965
-
966
-     /* setup count so the clearing of oldused
967
-      * can fall through correctly
968
-      */
969
-     ix       = 1;
970
-  }
971
-
972
-  /* now zero to oldused */
973
-  while (ix++ < oldused) {
974
-     *tmpc++ = 0;
975
-  }
976
-  mp_clamp(c);
977
-
978
-  return MP_OKAY;
885
+   fp_int tmp;
886
+   fp_set(&tmp, b);
887
+   fp_add(a,&tmp,c);
979 888
 }
980 889
 
981
-#endif
982
-
983
-/* $Source: /cvs/libtom/libtommath/bn_mp_add_d.c,v $ */
984
-/* $Revision: 1.4 $ */
985
-/* $Date: 2006/03/31 14:18:44 $ */
890
+/* $Source: /cvs/libtom/tomsfastmath/src/addsub/fp_add_d.c,v $ */
891
+/* $Revision: 1.1 $ */
892
+/* $Date: 2006/12/31 21:25:53 $ */
986 893
 
987
-/* End: bn_mp_add_d.c */
894
+/* End: fp_add_d.c */
988 895
 
989
-/* Start: bn_mp_addmod.c */
990
-#include <bignum.h>
991
-#ifdef BN_MP_ADDMOD_C
992
-/* LibTomMath, multiple-precision integer library -- Tom St Denis
993
- *
994
- * LibTomMath is a library that provides multiple-precision
995
- * integer arithmetic as well as number theoretic functionality.
996
- *
997
- * The library was designed directly after the MPI library by
998
- * Michael Fromberger but has been written from scratch with
999
- * additional optimizations in place.
1000
- *
1001
- * The library is free for all purposes without any express
1002
- * guarantee it works.
896
+/* Start: fp_addmod.c */
897
+/* TomsFastMath, a fast ISO C bignum library.
898
+ * 
899
+ * This project is meant to fill in where LibTomMath
900
+ * falls short.  That is speed ;-)
1003 901
  *
1004
- * Tom St Denis, tomstdenis@gmail.com, http://math.libtomcrypt.com
902
+ * This project is public domain and free for all purposes.
903
+ * 
904
+ * Tom St Denis, tomstdenis@gmail.com
1005 905
  */
906
+#include "bignum_fast.h"
1006 907
 
1007 908
 /* d = a + b (mod c) */
1008
-int
1009
-mp_addmod (mp_int * a, mp_int * b, mp_int * c, mp_int * d)
1010
-{
1011
-  int     res;
1012
-  mp_int  t;
1013
-
1014
-  if ((res = mp_init (&t)) != MP_OKAY) {
1015
-    return res;
1016
-  }
1017
-
1018
-  if ((res = mp_add (a, b, &t)) != MP_OKAY) {
1019
-    mp_clear (&t);
1020
-    return res;
1021
-  }
1022
-  res = mp_mod (&t, c, d);
1023
-  mp_clear (&t);
1024
-  return res;
1025
-}
1026
-#endif
1027
-
1028
-/* $Source: /cvs/libtom/libtommath/bn_mp_addmod.c,v $ */
1029
-/* $Revision: 1.3 $ */
1030
-/* $Date: 2006/03/31 14:18:44 $ */
1031
-
1032
-/* End: bn_mp_addmod.c */
1033
-
1034
-/* Start: bn_mp_and.c */
1035
-#include <bignum.h>
1036
-#ifdef BN_MP_AND_C
1037
-/* LibTomMath, multiple-precision integer library -- Tom St Denis
1038
- *
1039
- * LibTomMath is a library that provides multiple-precision
1040
- * integer arithmetic as well as number theoretic functionality.
1041
- *
1042
- * The library was designed directly after the MPI library by
1043
- * Michael Fromberger but has been written from scratch with
1044
- * additional optimizations in place.
1045
- *
1046
- * The library is free for all purposes without any express
1047
- * guarantee it works.
1048
- *
1049
- * Tom St Denis, tomstdenis@gmail.com, http://math.libtomcrypt.com
1050
- */
1051
-
1052
-/* AND two ints together */
1053
-int
1054
-mp_and (mp_int * a, mp_int * b, mp_int * c)
1055
-{
1056
-  int     res, ix, px;
1057
-  mp_int  t, *x;
1058
-
1059
-  if (a->used > b->used) {
1060
-    if ((res = mp_init_copy (&t, a)) != MP_OKAY) {
1061
-      return res;
1062
-    }
1063
-    px = b->used;
1064
-    x = b;
1065
-  } else {
1066
-    if ((res = mp_init_copy (&t, b)) != MP_OKAY) {
1067
-      return res;
1068
-    }
1069
-    px = a->used;
1070
-    x = a;
1071
-  }
1072
-
1073
-  for (ix = 0; ix < px; ix++) {
1074
-    t.dp[ix] &= x->dp[ix];
1075
-  }
1076
-
1077
-  /* zero digits above the last from the smallest mp_int */
1078
-  for (; ix < t.used; ix++) {
1079
-    t.dp[ix] = 0;
1080
-  }
1081
-
1082
-  mp_clamp (&t);
1083
-  mp_exch (c, &t);
1084
-  mp_clear (&t);
1085
-  return MP_OKAY;
1086
-}
1087
-#endif
1088
-
1089
-/* $Source: /cvs/libtom/libtommath/bn_mp_and.c,v $ */
1090
-/* $Revision: 1.3 $ */
1091
-/* $Date: 2006/03/31 14:18:44 $ */
1092
-
1093
-/* End: bn_mp_and.c */
1094
-
1095
-/* Start: bn_mp_clamp.c */
1096
-#include <bignum.h>
1097
-#ifdef BN_MP_CLAMP_C
1098
-/* LibTomMath, multiple-precision integer library -- Tom St Denis
1099
- *
1100
- * LibTomMath is a library that provides multiple-precision
1101
- * integer arithmetic as well as number theoretic functionality.
1102
- *
1103
- * The library was designed directly after the MPI library by
1104
- * Michael Fromberger but has been written from scratch with
1105
- * additional optimizations in place.
1106
- *
1107
- * The library is free for all purposes without any express
1108
- * guarantee it works.
1109
- *
1110
- * Tom St Denis, tomstdenis@gmail.com, http://math.libtomcrypt.com
1111
- */
1112
-
1113
-/* trim unused digits 
1114
- *
1115
- * This is used to ensure that leading zero digits are
1116
- * trimed and the leading "used" digit will be non-zero
1117
- * Typically very fast.  Also fixes the sign if there
1118
- * are no more leading digits
1119
- */
1120
-void
1121
-mp_clamp (mp_int * a)
1122
-{
1123
-  /* decrease used while the most significant digit is
1124
-   * zero.
1125
-   */
1126
-  while (a->used > 0 && a->dp[a->used - 1] == 0) {
1127
-    --(a->used);
1128
-  }
1129
-
1130
-  /* reset the sign flag if used == 0 */
1131
-  if (a->used == 0) {
1132
-    a->sign = MP_ZPOS;
1133
-  }
1134
-}
1135
-#endif
1136
-
1137
-/* $Source: /cvs/libtom/libtommath/bn_mp_clamp.c,v $ */
1138
-/* $Revision: 1.3 $ */
1139
-/* $Date: 2006/03/31 14:18:44 $ */
1140
-
1141
-/* End: bn_mp_clamp.c */
1142
-
1143
-/* Start: bn_mp_clear.c */
1144
-#include <bignum.h>
1145
-#ifdef BN_MP_CLEAR_C
1146
-/* LibTomMath, multiple-precision integer library -- Tom St Denis
1147
- *
1148
- * LibTomMath is a library that provides multiple-precision
1149
- * integer arithmetic as well as number theoretic functionality.
1150
- *
1151
- * The library was designed directly after the MPI library by
1152
- * Michael Fromberger but has been written from scratch with
1153
- * additional optimizations in place.
1154
- *
1155
- * The library is free for all purposes without any express
1156
- * guarantee it works.
1157
- *
1158
- * Tom St Denis, tomstdenis@gmail.com, http://math.libtomcrypt.com
1159
- */
1160
-
1161
-/* clear one (frees)  */
1162
-void
1163
-mp_clear (mp_int * a)
1164
-{
1165
-  int i;
1166
-
1167
-  /* only do anything if a hasn't been freed previously */
1168
-  if (a->dp != NULL) {
1169
-    /* first zero the digits */
1170
-    for (i = 0; i < a->used; i++) {
1171
-        a->dp[i] = 0;
1172
-    }
1173
-
1174
-    /* free ram */
1175
-    free(a->dp);
1176
-
1177
-    /* reset members to make debugging easier */
1178
-    a->dp    = NULL;
1179
-    a->alloc = a->used = 0;
1180
-    a->sign  = MP_ZPOS;
1181
-  }
1182
-}
1183
-#endif
1184
-
1185
-/* $Source: /cvs/libtom/libtommath/bn_mp_clear.c,v $ */
1186
-/* $Revision: 1.3 $ */
1187
-/* $Date: 2006/03/31 14:18:44 $ */
1188
-
1189
-/* End: bn_mp_clear.c */
1190
-
1191
-/* Start: bn_mp_clear_multi.c */
1192
-#include <bignum.h>
1193
-#ifdef BN_MP_CLEAR_MULTI_C
1194
-/* LibTomMath, multiple-precision integer library -- Tom St Denis
1195
- *
1196
- * LibTomMath is a library that provides multiple-precision
1197
- * integer arithmetic as well as number theoretic functionality.
1198
- *
1199
- * The library was designed directly after the MPI library by
1200
- * Michael Fromberger but has been written from scratch with
1201
- * additional optimizations in place.
1202
- *
1203
- * The library is free for all purposes without any express
1204
- * guarantee it works.
1205
- *
1206
- * Tom St Denis, tomstdenis@gmail.com, http://math.libtomcrypt.com
1207
- */
1208
-#include <stdarg.h>
1209
-
1210
-void mp_clear_multi(mp_int *mp, ...) 
909
+int fp_addmod(fp_int *a, fp_int *b, fp_int *c, fp_int *d)
1211 910
 {
1212
-    mp_int* next_mp = mp;
1213
-    va_list args;
1214
-    va_start(args, mp);
1215
-    while (next_mp != NULL) {
1216
-        mp_clear(next_mp);
1217
-        next_mp = va_arg(args, mp_int*);
1218
-    }
1219
-    va_end(args);
911
+  fp_int tmp;
912
+  fp_zero(&tmp);
913
+  fp_add(a, b, &tmp);
914
+  return fp_mod(&tmp, c, d);
1220 915
 }
1221
-#endif
1222 916
 
1223
-/* $Source: /cvs/libtom/libtommath/bn_mp_clear_multi.c,v $ */
1224
-/* $Revision: 1.3 $ */
1225
-/* $Date: 2006/03/31 14:18:44 $ */
917
+/* $Source: /cvs/libtom/tomsfastmath/src/addsub/fp_addmod.c,v $ */
918
+/* $Revision: 1.1 $ */
919
+/* $Date: 2006/12/31 21:25:53 $ */
1226 920
 
1227
-/* End: bn_mp_clear_multi.c */
921
+/* End: fp_addmod.c */
1228 922
 
1229
-/* Start: bn_mp_cmp.c */
1230
-#include <bignum.h>
1231
-#ifdef BN_MP_CMP_C
1232
-/* LibTomMath, multiple-precision integer library -- Tom St Denis
1233
- *
1234
- * LibTomMath is a library that provides multiple-precision
1235
- * integer arithmetic as well as number theoretic functionality.
1236
- *
1237
- * The library was designed directly after the MPI library by
1238
- * Michael Fromberger but has been written from scratch with
1239
- * additional optimizations in place.
1240
- *
1241
- * The library is free for all purposes without any express
1242
- * guarantee it works.
923
+/* Start: fp_cmp.c */
924
+/* TomsFastMath, a fast ISO C bignum library.
925
+ * 
926
+ * This project is meant to fill in where LibTomMath
927
+ * falls short.  That is speed ;-)
1243 928
  *
1244
- * Tom St Denis, tomstdenis@gmail.com, http://math.libtomcrypt.com
929
+ * This project is public domain and free for all purposes.
930
+ * 
931
+ * Tom St Denis, tomstdenis@gmail.com
1245 932
  */
933
+#include "bignum_fast.h"
1246 934
 
1247
-/* compare two ints (signed)*/
1248
-int
1249
-mp_cmp (mp_int * a, mp_int * b)
935
+int fp_cmp(fp_int *a, fp_int *b)
1250 936
 {
1251
-  /* compare based on sign */
1252
-  if (a->sign != b->sign) {
1253
-     if (a->sign == MP_NEG) {
1254
-        return MP_LT;
1255
-     } else {
1256
-        return MP_GT;
1257
-     }
1258
-  }
1259
-  
1260
-  /* compare digits */
1261
-  if (a->sign == MP_NEG) {
1262
-     /* if negative compare opposite direction */
1263
-     return mp_cmp_mag(b, a);
1264
-  } else {
1265
-     return mp_cmp_mag(a, b);
1266
-  }
937
+   if (a->sign == FP_NEG && b->sign == FP_ZPOS) {
938
+      return FP_LT;
939
+   } else if (a->sign == FP_ZPOS && b->sign == FP_NEG) {
940
+      return FP_GT;
941
+   } else {
942
+      /* compare digits */
943
+      if (a->sign == FP_NEG) {
944
+         /* if negative compare opposite direction */
945
+         return fp_cmp_mag(b, a);
946
+      } else {
947
+         return fp_cmp_mag(a, b);
948
+      }
949
+   }
1267 950
 }
1268
-#endif
1269 951
 
1270
-/* $Source: /cvs/libtom/libtommath/bn_mp_cmp.c,v $ */
1271
-/* $Revision: 1.3 $ */
1272
-/* $Date: 2006/03/31 14:18:44 $ */
952
+/* $Source: /cvs/libtom/tomsfastmath/src/addsub/fp_cmp.c,v $ */
953
+/* $Revision: 1.1 $ */
954
+/* $Date: 2006/12/31 21:25:53 $ */
1273 955
 
1274
-/* End: bn_mp_cmp.c */
956
+/* End: fp_cmp.c */
1275 957
 
1276
-/* Start: bn_mp_cmp_d.c */
1277
-#include <bignum.h>
1278
-#ifdef BN_MP_CMP_D_C
1279
-/* LibTomMath, multiple-precision integer library -- Tom St Denis
1280
- *
1281
- * LibTomMath is a library that provides multiple-precision
1282
- * integer arithmetic as well as number theoretic functionality.
1283
- *
1284
- * The library was designed directly after the MPI library by
1285
- * Michael Fromberger but has been written from scratch with
1286
- * additional optimizations in place.
1287
- *
1288
- * The library is free for all purposes without any express
1289
- * guarantee it works.
958
+/* Start: fp_cmp_d.c */
959
+/* TomsFastMath, a fast ISO C bignum library.
960
+ * 
961
+ * This project is meant to fill in where LibTomMath
962
+ * falls short.  That is speed ;-)
1290 963
  *
1291
- * Tom St Denis, tomstdenis@gmail.com, http://math.libtomcrypt.com
964
+ * This project is public domain and free for all purposes.
965
+ * 
966
+ * Tom St Denis, tomstdenis@gmail.com
1292 967
  */
968
+#include "bignum_fast.h"
1293 969
 
1294
-/* compare a digit */
1295
-int mp_cmp_d(mp_int * a, mp_digit b)
970
+/* compare against a single digit */
971
+int fp_cmp_d(fp_int *a, fp_digit b)
1296 972
 {
1297 973
   /* compare based on sign */
1298
-  if (a->sign == MP_NEG) {
1299
-    return MP_LT;
974
+  if ((b && a->used == 0) || a->sign == FP_NEG) {
975
+    return FP_LT;
1300 976
   }
1301 977
 
1302 978
   /* compare based on magnitude */
1303 979
   if (a->used > 1) {
1304
-    return MP_GT;
980
+    return FP_GT;
1305 981
   }
1306 982
 
1307 983
   /* compare the only digit of a to b */
1308 984
   if (a->dp[0] > b) {
1309
-    return MP_GT;
985
+    return FP_GT;
1310 986
   } else if (a->dp[0] < b) {
1311
-    return MP_LT;
987
+    return FP_LT;
1312 988
   } else {
1313
-    return MP_EQ;
989
+    return FP_EQ;
1314 990
   }
991
+
1315 992
 }
1316
-#endif
1317 993
 
1318
-/* $Source: /cvs/libtom/libtommath/bn_mp_cmp_d.c,v $ */
1319
-/* $Revision: 1.3 $ */
1320
-/* $Date: 2006/03/31 14:18:44 $ */
994
+/* $Source: /cvs/libtom/tomsfastmath/src/addsub/fp_cmp_d.c,v $ */
995
+/* $Revision: 1.1 $ */
996
+/* $Date: 2006/12/31 21:25:53 $ */
1321 997
 
1322
-/* End: bn_mp_cmp_d.c */
998
+/* End: fp_cmp_d.c */
1323 999
 
1324
-/* Start: bn_mp_cmp_mag.c */
1325
-#include <bignum.h>
1326
-#ifdef BN_MP_CMP_MAG_C
1327
-/* LibTomMath, multiple-precision integer library -- Tom St Denis
1328
- *
1329
- * LibTomMath is a library that provides multiple-precision
1330
- * integer arithmetic as well as number theoretic functionality.
1331
- *
1332
- * The library was designed directly after the MPI library by
1333
- * Michael Fromberger but has been written from scratch with
1334
- * additional optimizations in place.
1335
- *
1336
- * The library is free for all purposes without any express
1337
- * guarantee it works.
1000
+/* Start: fp_cmp_mag.c */
1001
+/* TomsFastMath, a fast ISO C bignum library.
1002
+ * 
1003
+ * This project is meant to fill in where LibTomMath
1004
+ * falls short.  That is speed ;-)
1338 1005
  *
1339
- * Tom St Denis, tomstdenis@gmail.com, http://math.libtomcrypt.com
1006
+ * This project is public domain and free for all purposes.
1007
+ * 
1008
+ * Tom St Denis, tomstdenis@gmail.com
1340 1009
  */
1010
+#include "bignum_fast.h"
1341 1011
 
1342
-/* compare maginitude of two ints (unsigned) */
1343
-int mp_cmp_mag (mp_int * a, mp_int * b)
1012
+int fp_cmp_mag(fp_int *a, fp_int *b)
1344 1013
 {
1345
-  int     n;
1346
-  mp_digit *tmpa, *tmpb;
1347
-
1348
-  /* compare based on # of non-zero digits */
1349
-  if (a->used > b->used) {
1350
-    return MP_GT;
1351
-  }
1352
-  
1353
-  if (a->used < b->used) {
1354
-    return MP_LT;
1355
-  }
1356
-
1357
-  /* alias for a */
1358
-  tmpa = a->dp + (a->used - 1);
1359
-
1360
-  /* alias for b */
1361
-  tmpb = b->dp + (a->used - 1);
1362
-
1363
-  /* compare based on digits  */
1364
-  for (n = 0; n < a->used; ++n, --tmpa, --tmpb) {
1365
-    if (*tmpa > *tmpb) {
1366
-      return MP_GT;
1367
-    }
1014
+   int x;
1368 1015
 
1369
-    if (*tmpa < *tmpb) {
1370
-      return MP_LT;
1371
-    }
1372
-  }
1373
-  return MP_EQ;
1016
+   if (a->used > b->used) {
1017
+      return FP_GT;
1018
+   } else if (a->used < b->used) {
1019
+      return FP_LT;
1020
+   } else {
1021
+      for (x = a->used - 1; x >= 0; x--) {
1022
+          if (a->dp[x] > b->dp[x]) {
1023
+             return FP_GT;
1024
+          } else if (a->dp[x] < b->dp[x]) {
1025
+             return FP_LT;
1026
+          }
1027
+      }
1028
+   }
1029
+   return FP_EQ;
1374 1030
 }
1375
-#endif
1376 1031
 
1377
-/* $Source: /cvs/libtom/libtommath/bn_mp_cmp_mag.c,v $ */
1378
-/* $Revision: 1.3 $ */
1379
-/* $Date: 2006/03/31 14:18:44 $ */
1380 1032
 
1381
-/* End: bn_mp_cmp_mag.c */
1033
+/* $Source: /cvs/libtom/tomsfastmath/src/addsub/fp_cmp_mag.c,v $ */
1034
+/* $Revision: 1.1 $ */
1035
+/* $Date: 2006/12/31 21:25:53 $ */
1382 1036
 
1383
-/* Start: bn_mp_cnt_lsb.c */
1384
-#include <bignum.h>
1385
-#ifdef BN_MP_CNT_LSB_C
1386
-/* LibTomMath, multiple-precision integer library -- Tom St Denis
1387
- *
1388
- * LibTomMath is a library that provides multiple-precision
1389
- * integer arithmetic as well as number theoretic functionality.
1390
- *
1391
- * The library was designed directly after the MPI library by
1392
- * Michael Fromberger but has been written from scratch with
1393
- * additional optimizations in place.
1394
- *
1395
- * The library is free for all purposes without any express
1396
- * guarantee it works.
1037
+/* End: fp_cmp_mag.c */
1038
+
1039
+/* Start: fp_cnt_lsb.c */
1040
+/* TomsFastMath, a fast ISO C bignum library.
1041
+ * 
1042
+ * This project is meant to fill in where LibTomMath
1043
+ * falls short.  That is speed ;-)
1397 1044
  *
1398
- * Tom St Denis, tomstdenis@gmail.com, http://math.libtomcrypt.com
1045
+ * This project is public domain and free for all purposes.
1046
+ * 
1047
+ * Tom St Denis, tomstdenis@gmail.com
1399 1048
  */
1049
+#include "bignum_fast.h"
1400 1050
 
1401
-static const int lnz[16] = { 
1051
+static const int lnz[16] = {
1402 1052
    4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0
1403 1053
 };
1404 1054
 
1405 1055
 /* Counts the number of lsbs which are zero before the first zero bit */
1406
-int mp_cnt_lsb(mp_int *a)
1056
+int fp_cnt_lsb(fp_int *a)
1407 1057
 {
1408 1058
    int x;
1409
-   mp_digit q, qq;
1059
+   fp_digit q, qq;
1410 1060
 
1411 1061
    /* easy out */
1412
-   if (mp_iszero(a) == 1) {
1062
+   if (fp_iszero(a) == 1) {
1413 1063
       return 0;
1414 1064
    }
1415 1065
 
... ...
@@ -1429,110 +300,29 @@ int mp_cnt_lsb(mp_int *a)
1429 1429
    return x;
1430 1430
 }
1431 1431
 
1432
-#endif
1433
-
1434
-/* $Source: /cvs/libtom/libtommath/bn_mp_cnt_lsb.c,v $ */
1435
-/* $Revision: 1.3 $ */
1436
-/* $Date: 2006/03/31 14:18:44 $ */
1437
-
1438
-/* End: bn_mp_cnt_lsb.c */
1439
-
1440
-/* Start: bn_mp_copy.c */
1441
-#include <bignum.h>
1442
-#ifdef BN_MP_COPY_C
1443
-/* LibTomMath, multiple-precision integer library -- Tom St Denis
1444
- *
1445
- * LibTomMath is a library that provides multiple-precision
1446
- * integer arithmetic as well as number theoretic functionality.
1447
- *
1448
- * The library was designed directly after the MPI library by
1449
- * Michael Fromberger but has been written from scratch with
1450
- * additional optimizations in place.
1451
- *
1452
- * The library is free for all purposes without any express
1453
- * guarantee it works.
1454
- *
1455
- * Tom St Denis, tomstdenis@gmail.com, http://math.libtomcrypt.com
1456
- */
1457
-
1458
-/* copy, b = a */
1459
-int
1460
-mp_copy (mp_int * a, mp_int * b)
1461
-{
1462
-  int     res, n;
1463
-
1464
-  /* if dst == src do nothing */
1465
-  if (a == b) {
1466
-    return MP_OKAY;
1467
-  }
1468
-
1469
-  /* grow dest */
1470
-  if (b->alloc < a->used) {
1471
-     if ((res = mp_grow (b, a->used)) != MP_OKAY) {
1472
-        return res;
1473
-     }
1474
-  }
1475
-
1476
-  /* zero b and copy the parameters over */
1477
-  {
1478
-    register mp_digit *tmpa, *tmpb;
1479
-
1480
-    /* pointer aliases */
1481
-
1482
-    /* source */
1483
-    tmpa = a->dp;
1484
-
1485
-    /* destination */
1486
-    tmpb = b->dp;
1487
-
1488
-    /* copy all the digits */
1489
-    for (n = 0; n < a->used; n++) {
1490
-      *tmpb++ = *tmpa++;
1491
-    }
1492 1432
 
1493
-    /* clear high digits */
1494
-    for (; n < b->used; n++) {
1495
-      *tmpb++ = 0;
1496
-    }
1497
-  }
1433
+/* $Source: /cvs/libtom/tomsfastmath/src/bit/fp_cnt_lsb.c,v $ */
1434
+/* $Revision: 1.1 $ */
1435
+/* $Date: 2006/12/31 21:25:53 $ */
1498 1436
 
1499
-  /* copy used count and sign */
1500
-  b->used = a->used;
1501
-  b->sign = a->sign;
1502
-  return MP_OKAY;
1503
-}
1504
-#endif
1505
-
1506
-/* $Source: /cvs/libtom/libtommath/bn_mp_copy.c,v $ */
1507
-/* $Revision: 1.3 $ */
1508
-/* $Date: 2006/03/31 14:18:44 $ */
1437
+/* End: fp_cnt_lsb.c */
1509 1438
 
1510
-/* End: bn_mp_copy.c */
1511
-
1512
-/* Start: bn_mp_count_bits.c */
1513
-#include <bignum.h>
1514
-#ifdef BN_MP_COUNT_BITS_C
1515
-/* LibTomMath, multiple-precision integer library -- Tom St Denis
1516
- *
1517
- * LibTomMath is a library that provides multiple-precision
1518
- * integer arithmetic as well as number theoretic functionality.
1519
- *
1520
- * The library was designed directly after the MPI library by
1521
- * Michael Fromberger but has been written from scratch with
1522
- * additional optimizations in place.
1523
- *
1524
- * The library is free for all purposes without any express
1525
- * guarantee it works.
1439
+/* Start: fp_count_bits.c */
1440
+/* TomsFastMath, a fast ISO C bignum library.
1441
+ * 
1442
+ * This project is meant to fill in where LibTomMath
1443
+ * falls short.  That is speed ;-)
1526 1444
  *
1527
- * Tom St Denis, tomstdenis@gmail.com, http://math.libtomcrypt.com
1445
+ * This project is public domain and free for all purposes.
1446
+ * 
1447
+ * Tom St Denis, tomstdenis@gmail.com
1528 1448
  */
1449
+#include "bignum_fast.h"
1529 1450
 
1530
-/* returns the number of bits in an int */
1531
-int
1532
-mp_count_bits (mp_int * a)
1451
+int fp_count_bits (fp_int * a)
1533 1452
 {
1534 1453
   int     r;
1535
-  mp_digit q;
1454
+  fp_digit q;
1536 1455
 
1537 1456
   /* shortcut */
1538 1457
   if (a->used == 0) {
... ...
@@ -1541,184 +331,74 @@ mp_count_bits (mp_int * a)
1541 1541
 
1542 1542
   /* get number of digits and add that */
1543 1543
   r = (a->used - 1) * DIGIT_BIT;
1544
-  
1544
+
1545 1545
   /* take the last digit and count the bits in it */
1546 1546
   q = a->dp[a->used - 1];
1547
-  while (q > ((mp_digit) 0)) {
1547
+  while (q > ((fp_digit) 0)) {
1548 1548
     ++r;
1549
-    q >>= ((mp_digit) 1);
1549
+    q >>= ((fp_digit) 1);
1550 1550
   }
1551 1551
   return r;
1552 1552
 }
1553
-#endif
1554 1553
 
1555
-/* $Source: /cvs/libtom/libtommath/bn_mp_count_bits.c,v $ */
1556
-/* $Revision: 1.3 $ */
1557
-/* $Date: 2006/03/31 14:18:44 $ */
1554
+/* $Source: /cvs/libtom/tomsfastmath/src/bit/fp_count_bits.c,v $ */
1555
+/* $Revision: 1.1 $ */
1556
+/* $Date: 2006/12/31 21:25:53 $ */
1558 1557
 
1559
-/* End: bn_mp_count_bits.c */
1558
+/* End: fp_count_bits.c */
1560 1559
 
1561
-/* Start: bn_mp_div.c */
1562
-#include <bignum.h>
1563
-#ifdef BN_MP_DIV_C
1564
-/* LibTomMath, multiple-precision integer library -- Tom St Denis
1565
- *
1566
- * LibTomMath is a library that provides multiple-precision
1567
- * integer arithmetic as well as number theoretic functionality.
1568
- *
1569
- * The library was designed directly after the MPI library by
1570
- * Michael Fromberger but has been written from scratch with
1571
- * additional optimizations in place.
1572
- *
1573
- * The library is free for all purposes without any express
1574
- * guarantee it works.
1560
+/* Start: fp_div.c */
1561
+/* TomsFastMath, a fast ISO C bignum library.
1562
+ * 
1563
+ * This project is meant to fill in where LibTomMath
1564
+ * falls short.  That is speed ;-)
1575 1565
  *
1576
- * Tom St Denis, tomstdenis@gmail.com, http://math.libtomcrypt.com
1566
+ * This project is public domain and free for all purposes.
1567
+ * 
1568
+ * Tom St Denis, tomstdenis@gmail.com
1577 1569
  */
1570
+#include "bignum_fast.h"
1578 1571
 
1579
-#ifdef BN_MP_DIV_SMALL
1580
-
1581
-/* slower bit-bang division... also smaller */
1582
-int mp_div(mp_int * a, mp_int * b, mp_int * c, mp_int * d)
1583
-{
1584
-   mp_int ta, tb, tq, q;
1585
-   int    res, n, n2;
1586
-
1587
-  /* is divisor zero ? */
1588
-  if (mp_iszero (b) == 1) {
1589
-    return MP_VAL;
1590
-  }
1591
-
1592
-  /* if a < b then q=0, r = a */
1593
-  if (mp_cmp_mag (a, b) == MP_LT) {
1594
-    if (d != NULL) {
1595
-      res = mp_copy (a, d);
1596
-    } else {
1597
-      res = MP_OKAY;
1598
-    }
1599
-    if (c != NULL) {
1600
-      mp_zero (c);
1601
-    }
1602
-    return res;
1603
-  }
1604
-	
1605
-  /* init our temps */
1606
-  if ((res = mp_init_multi(&ta, &tb, &tq, &q, NULL) != MP_OKAY)) {
1607
-     return res;
1608
-  }
1609
-
1610
-
1611
-  mp_set(&tq, 1);
1612
-  n = mp_count_bits(a) - mp_count_bits(b);
1613
-  if (((res = mp_abs(a, &ta)) != MP_OKAY) ||
1614
-      ((res = mp_abs(b, &tb)) != MP_OKAY) || 
1615
-      ((res = mp_mul_2d(&tb, n, &tb)) != MP_OKAY) ||
1616
-      ((res = mp_mul_2d(&tq, n, &tq)) != MP_OKAY)) {
1617
-      goto LBL_ERR;
1618
-  }
1619
-
1620
-  while (n-- >= 0) {
1621
-     if (mp_cmp(&tb, &ta) != MP_GT) {
1622
-        if (((res = mp_sub(&ta, &tb, &ta)) != MP_OKAY) ||
1623
-            ((res = mp_add(&q, &tq, &q)) != MP_OKAY)) {
1624
-           goto LBL_ERR;
1625
-        }
1626
-     }
1627
-     if (((res = mp_div_2d(&tb, 1, &tb, NULL)) != MP_OKAY) ||
1628
-         ((res = mp_div_2d(&tq, 1, &tq, NULL)) != MP_OKAY)) {
1629
-           goto LBL_ERR;
1630
-     }
1631
-  }
1632
-
1633
-  /* now q == quotient and ta == remainder */
1634
-  n  = a->sign;
1635
-  n2 = (a->sign == b->sign ? MP_ZPOS : MP_NEG);
1636
-  if (c != NULL) {
1637
-     mp_exch(c, &q);
1638
-     c->sign  = (mp_iszero(c) == MP_YES) ? MP_ZPOS : n2;
1639
-  }
1640
-  if (d != NULL) {
1641
-     mp_exch(d, &ta);
1642
-     d->sign = (mp_iszero(d) == MP_YES) ? MP_ZPOS : n;
1643
-  }
1644
-LBL_ERR:
1645
-   mp_clear_multi(&ta, &tb, &tq, &q, NULL);
1646
-   return res;
1647
-}
1648
-
1649
-#else
1650
-
1651
-/* integer signed division. 
1652
- * c*b + d == a [e.g. a/b, c=quotient, d=remainder]
1653
- * HAC pp.598 Algorithm 14.20
1654
- *
1655
- * Note that the description in HAC is horribly 
1656
- * incomplete.  For example, it doesn't consider 
1657
- * the case where digits are removed from 'x' in 
1658
- * the inner loop.  It also doesn't consider the 
1659
- * case that y has fewer than three digits, etc..
1660
- *
1661
- * The overall algorithm is as described as 
1662
- * 14.20 from HAC but fixed to treat these cases.
1663
-*/
1664
-int mp_div (mp_int * a, mp_int * b, mp_int * c, mp_int * d)
1572
+/* a/b => cb + d == a */
1573
+int fp_div(fp_int *a, fp_int *b, fp_int *c, fp_int *d)
1665 1574
 {
1666
-  mp_int  q, x, y, t1, t2;
1667
-  int     res, n, t, i, norm, neg;
1575
+  fp_int  q, x, y, t1, t2;
1576
+  int     n, t, i, norm, neg;
1668 1577
 
1669 1578
   /* is divisor zero ? */
1670
-  if (mp_iszero (b) == 1) {
1671
-    return MP_VAL;
1579
+  if (fp_iszero (b) == 1) {
1580
+    return FP_VAL;
1672 1581
   }
1673 1582
 
1674 1583
   /* if a < b then q=0, r = a */
1675
-  if (mp_cmp_mag (a, b) == MP_LT) {
1584
+  if (fp_cmp_mag (a, b) == FP_LT) {
1676 1585
     if (d != NULL) {
1677
-      res = mp_copy (a, d);
1678
-    } else {
1679
-      res = MP_OKAY;
1680
-    }
1586
+      fp_copy (a, d);
1587
+    } 
1681 1588
     if (c != NULL) {
1682
-      mp_zero (c);
1589
+      fp_zero (c);
1683 1590
     }
1684
-    return res;
1591
+    return FP_OKAY;
1685 1592
   }
1686 1593
 
1687
-  if ((res = mp_init_size (&q, a->used + 2)) != MP_OKAY) {
1688
-    return res;
1689
-  }
1594
+  fp_init(&q);
1690 1595
   q.used = a->used + 2;
1691 1596
 
1692
-  if ((res = mp_init (&t1)) != MP_OKAY) {
1693
-    goto LBL_Q;
1694
-  }
1695
-
1696
-  if ((res = mp_init (&t2)) != MP_OKAY) {
1697
-    goto LBL_T1;
1698
-  }
1699
-
1700
-  if ((res = mp_init_copy (&x, a)) != MP_OKAY) {
1701
-    goto LBL_T2;
1702
-  }
1703
-
1704
-  if ((res = mp_init_copy (&y, b)) != MP_OKAY) {
1705
-    goto LBL_X;
1706
-  }
1597
+  fp_init(&t1);
1598
+  fp_init(&t2);
1599
+  fp_init_copy(&x, a);
1600
+  fp_init_copy(&y, b);
1707 1601
 
1708 1602
   /* fix the sign */
1709
-  neg = (a->sign == b->sign) ? MP_ZPOS : MP_NEG;
1710
-  x.sign = y.sign = MP_ZPOS;
1603
+  neg = (a->sign == b->sign) ? FP_ZPOS : FP_NEG;
1604
+  x.sign = y.sign = FP_ZPOS;
1711 1605
 
1712 1606
   /* normalize both x and y, ensure that y >= b/2, [b == 2**DIGIT_BIT] */
1713
-  norm = mp_count_bits(&y) % DIGIT_BIT;
1607
+  norm = fp_count_bits(&y) % DIGIT_BIT;
1714 1608
   if (norm < (int)(DIGIT_BIT-1)) {
1715 1609
      norm = (DIGIT_BIT-1) - norm;
1716
-     if ((res = mp_mul_2d (&x, norm, &x)) != MP_OKAY) {
1717
-       goto LBL_Y;
1718
-     }
1719
-     if ((res = mp_mul_2d (&y, norm, &y)) != MP_OKAY) {
1720
-       goto LBL_Y;
1721
-     }
1610
+     fp_mul_2d (&x, norm, &x);
1611
+     fp_mul_2d (&y, norm, &y);
1722 1612
   } else {
1723 1613
      norm = 0;
1724 1614
   }
... ...
@@ -1728,19 +408,15 @@ int mp_div (mp_int * a, mp_int * b, mp_int * c, mp_int * d)
1728 1728
   t = y.used - 1;
1729 1729
 
1730 1730
   /* while (x >= y*b**n-t) do { q[n-t] += 1; x -= y*b**{n-t} } */
1731
-  if ((res = mp_lshd (&y, n - t)) != MP_OKAY) { /* y = y*b**{n-t} */
1732
-    goto LBL_Y;
1733
-  }
1731
+  fp_lshd (&y, n - t);                                             /* y = y*b**{n-t} */
1734 1732
 
1735
-  while (mp_cmp (&x, &y) != MP_LT) {
1733
+  while (fp_cmp (&x, &y) != FP_LT) {
1736 1734
     ++(q.dp[n - t]);
1737
-    if ((res = mp_sub (&x, &y, &x)) != MP_OKAY) {
1738
-      goto LBL_Y;
1739
-    }
1735
+    fp_sub (&x, &y, &x);
1740 1736
   }
1741 1737
 
1742 1738
   /* reset y by shifting it back down */
1743
-  mp_rshd (&y, n - t);
1739
+  fp_rshd (&y, n - t);
1744 1740
 
1745 1741
   /* step 3. for i from n down to (t + 1) */
1746 1742
   for (i = n; i >= (t + 1); i--) {
... ...
@@ -1751,15 +427,13 @@ int mp_div (mp_int * a, mp_int * b, mp_int * c, mp_int * d)
1751 1751
     /* step 3.1 if xi == yt then set q{i-t-1} to b-1, 
1752 1752
      * otherwise set q{i-t-1} to (xi*b + x{i-1})/yt */
1753 1753
     if (x.dp[i] == y.dp[t]) {
1754
-      q.dp[i - t - 1] = ((((mp_digit)1) << DIGIT_BIT) - 1);
1754
+      q.dp[i - t - 1] = ((((fp_word)1) << DIGIT_BIT) - 1);
1755 1755
     } else {
1756
-      mp_word tmp;
1757
-      tmp = ((mp_word) x.dp[i]) << ((mp_word) DIGIT_BIT);
1758
-      tmp |= ((mp_word) x.dp[i - 1]);
1759
-      tmp /= ((mp_word) y.dp[t]);
1760
-      if (tmp > (mp_word) MP_MASK)
1761
-        tmp = MP_MASK;
1762
-      q.dp[i - t - 1] = (mp_digit) (tmp & (mp_word) (MP_MASK));
1756
+      fp_word tmp;
1757
+      tmp = ((fp_word) x.dp[i]) << ((fp_word) DIGIT_BIT);
1758
+      tmp |= ((fp_word) x.dp[i - 1]);
1759
+      tmp /= ((fp_word) y.dp[t]);
1760
+      q.dp[i - t - 1] = (fp_digit) (tmp);
1763 1761
     }
1764 1762
 
1765 1763
     /* while (q{i-t-1} * (yt * b + y{t-1})) > 
... ...
@@ -1767,52 +441,35 @@ int mp_div (mp_int * a, mp_int * b, mp_int * c, mp_int * d)
1767 1767
      
1768 1768
        do q{i-t-1} -= 1; 
1769 1769
     */
1770
-    q.dp[i - t - 1] = (q.dp[i - t - 1] + 1) & MP_MASK;
1770
+    q.dp[i - t - 1] = (q.dp[i - t - 1] + 1);
1771 1771
     do {
1772
-      q.dp[i - t - 1] = (q.dp[i - t - 1] - 1) & MP_MASK;
1772
+      q.dp[i - t - 1] = (q.dp[i - t - 1] - 1);
1773 1773
 
1774 1774
       /* find left hand */
1775
-      mp_zero (&t1);
1775
+      fp_zero (&t1);
1776 1776
       t1.dp[0] = (t - 1 < 0) ? 0 : y.dp[t - 1];
1777 1777
       t1.dp[1] = y.dp[t];
1778 1778
       t1.used = 2;
1779
-      if ((res = mp_mul_d (&t1, q.dp[i - t - 1], &t1)) != MP_OKAY) {
1780
-        goto LBL_Y;
1781
-      }
1779
+      fp_mul_d (&t1, q.dp[i - t - 1], &t1);
1782 1780
 
1783 1781
       /* find right hand */
1784 1782
       t2.dp[0] = (i - 2 < 0) ? 0 : x.dp[i - 2];
1785 1783
       t2.dp[1] = (i - 1 < 0) ? 0 : x.dp[i - 1];
1786 1784
       t2.dp[2] = x.dp[i];
1787 1785
       t2.used = 3;
1788
-    } while (mp_cmp_mag(&t1, &t2) == MP_GT);
1786
+    } while (fp_cmp_mag(&t1, &t2) == FP_GT);
1789 1787
 
1790 1788
     /* step 3.3 x = x - q{i-t-1} * y * b**{i-t-1} */
1791
-    if ((res = mp_mul_d (&y, q.dp[i - t - 1], &t1)) != MP_OKAY) {
1792
-      goto LBL_Y;
1793
-    }
1794
-
1795
-    if ((res = mp_lshd (&t1, i - t - 1)) != MP_OKAY) {
1796
-      goto LBL_Y;
1797
-    }
1798
-
1799
-    if ((res = mp_sub (&x, &t1, &x)) != MP_OKAY) {
1800
-      goto LBL_Y;
1801
-    }
1789
+    fp_mul_d (&y, q.dp[i - t - 1], &t1);
1790
+    fp_lshd  (&t1, i - t - 1);
1791
+    fp_sub   (&x, &t1, &x);
1802 1792
 
1803 1793
     /* if x < 0 then { x = x + y*b**{i-t-1}; q{i-t-1} -= 1; } */
1804
-    if (x.sign == MP_NEG) {
1805
-      if ((res = mp_copy (&y, &t1)) != MP_OKAY) {
1806
-        goto LBL_Y;
1807
-      }
1808
-      if ((res = mp_lshd (&t1, i - t - 1)) != MP_OKAY) {
1809
-        goto LBL_Y;
1810
-      }
1811
-      if ((res = mp_add (&x, &t1, &x)) != MP_OKAY) {
1812
-        goto LBL_Y;
1813
-      }
1814
-
1815
-      q.dp[i - t - 1] = (q.dp[i - t - 1] - 1UL) & MP_MASK;
1794
+    if (x.sign == FP_NEG) {
1795
+      fp_copy (&y, &t1);
1796
+      fp_lshd (&t1, i - t - 1);
1797
+      fp_add (&x, &t1, &x);
1798
+      q.dp[i - t - 1] = q.dp[i - t - 1] - 1;
1816 1799
     }
1817 1800
   }
1818 1801
 
... ...
@@ -1821,73 +478,57 @@ int mp_div (mp_int * a, mp_int * b, mp_int * c, mp_int * d)
1821 1821
    */
1822 1822
   
1823 1823
   /* get sign before writing to c */
1824
-  x.sign = x.used == 0 ? MP_ZPOS : a->sign;
1824
+  x.sign = x.used == 0 ? FP_ZPOS : a->sign;
1825 1825
 
1826 1826
   if (c != NULL) {
1827
-    mp_clamp (&q);
1828
-    mp_exch (&q, c);
1827
+    fp_clamp (&q);
1828
+    fp_copy (&q, c);
1829 1829
     c->sign = neg;
1830 1830
   }
1831 1831
 
1832 1832
   if (d != NULL) {
1833
-    mp_div_2d (&x, norm, &x, NULL);
1834
-    mp_exch (&x, d);
1835
-  }
1833
+    fp_div_2d (&x, norm, &x, NULL);
1836 1834
 
1837
-  res = MP_OKAY;
1835
+/* the following is a kludge, essentially we were seeing the right remainder but 
1836
+   with excess digits that should have been zero
1837
+ */
1838
+    for (i = b->used; i < x.used; i++) {
1839
+        x.dp[i] = 0;
1840
+    }
1841
+    fp_clamp(&x);
1842
+    fp_copy (&x, d);
1843
+  }
1838 1844
 
1839
-LBL_Y:mp_clear (&y);
1840
-LBL_X:mp_clear (&x);
1841
-LBL_T2:mp_clear (&t2);
1842
-LBL_T1:mp_clear (&t1);
1843
-LBL_Q:mp_clear (&q);
1844
-  return res;
1845
+  return FP_OKAY;
1845 1846
 }
1846 1847
 
1847
-#endif
1848
+/* $Source: /cvs/libtom/tomsfastmath/src/divide/fp_div.c,v $ */
1849
+/* $Revision: 1.1 $ */
1850
+/* $Date: 2006/12/31 21:25:53 $ */
1848 1851
 
1849
-#endif
1852
+/* End: fp_div.c */
1850 1853
 
1851
-/* $Source: /cvs/libtom/libtommath/bn_mp_div.c,v $ */
1852
-/* $Revision: 1.3 $ */
1853
-/* $Date: 2006/03/31 14:18:44 $ */
1854
-
1855
-/* End: bn_mp_div.c */
1856
-
1857
-/* Start: bn_mp_div_2.c */
1858
-#include <bignum.h>
1859
-#ifdef BN_MP_DIV_2_C
1860
-/* LibTomMath, multiple-precision integer library -- Tom St Denis
1861
- *
1862
- * LibTomMath is a library that provides multiple-precision
1863
- * integer arithmetic as well as number theoretic functionality.
1864
- *
1865
- * The library was designed directly after the MPI library by
1866
- * Michael Fromberger but has been written from scratch with
1867
- * additional optimizations in place.
1868
- *
1869
- * The library is free for all purposes without any express
1870
- * guarantee it works.
1854
+/* Start: fp_div_2.c */
1855
+/* TomsFastMath, a fast ISO C bignum library.
1856
+ * 
1857
+ * This project is meant to fill in where LibTomMath
1858
+ * falls short.  That is speed ;-)
1871 1859
  *
1872
- * Tom St Denis, tomstdenis@gmail.com, http://math.libtomcrypt.com
1860
+ * This project is public domain and free for all purposes.
1861
+ * 
1862
+ * Tom St Denis, tomstdenis@gmail.com
1873 1863
  */
1864
+#include "bignum_fast.h"
1874 1865
 
1875 1866
 /* b = a/2 */
1876
-int mp_div_2(mp_int * a, mp_int * b)
1867
+void fp_div_2(fp_int * a, fp_int * b)
1877 1868
 {
1878
-  int     x, res, oldused;
1879
-
1880
-  /* copy */
1881
-  if (b->alloc < a->used) {
1882
-    if ((res = mp_grow (b, a->used)) != MP_OKAY) {
1883
-      return res;
1884
-    }
1885
-  }
1869
+  int     x, oldused;
1886 1870
 
1887 1871
   oldused = b->used;
1888 1872
   b->used = a->used;
1889 1873
   {
1890
-    register mp_digit r, rr, *tmpa, *tmpb;
1874
+    register fp_digit r, rr, *tmpa, *tmpb;
1891 1875
 
1892 1876
     /* source alias */
1893 1877
     tmpa = a->dp + b->used - 1;
... ...
@@ -1915,82 +556,65 @@ int mp_div_2(mp_int * a, mp_int * b)
1915 1915
     }
1916 1916
   }
1917 1917
   b->sign = a->sign;
1918
-  mp_clamp (b);
1919
-  return MP_OKAY;
1918
+  fp_clamp (b);
1920 1919
 }
1921
-#endif
1922 1920
 
1923
-/* $Source: /cvs/libtom/libtommath/bn_mp_div_2.c,v $ */
1924
-/* $Revision: 1.3 $ */
1925
-/* $Date: 2006/03/31 14:18:44 $ */
1921
+/* $Source: /cvs/libtom/tomsfastmath/src/bit/fp_div_2.c,v $ */
1922
+/* $Revision: 1.1 $ */
1923
+/* $Date: 2006/12/31 21:25:53 $ */
1926 1924
 
1927
-/* End: bn_mp_div_2.c */
1925
+/* End: fp_div_2.c */
1928 1926
 
1929
-/* Start: bn_mp_div_2d.c */
1930
-#include <bignum.h>
1931
-#ifdef BN_MP_DIV_2D_C
1932
-/* LibTomMath, multiple-precision integer library -- Tom St Denis
1933
- *
1934
- * LibTomMath is a library that provides multiple-precision
1935
- * integer arithmetic as well as number theoretic functionality.
1936
- *
1937
- * The library was designed directly after the MPI library by
1938
- * Michael Fromberger but has been written from scratch with
1939
- * additional optimizations in place.
1940
- *
1941
- * The library is free for all purposes without any express
1942
- * guarantee it works.
1927
+/* Start: fp_div_2d.c */
1928
+/* TomsFastMath, a fast ISO C bignum library.
1929
+ * 
1930
+ * This project is meant to fill in where LibTomMath
1931
+ * falls short.  That is speed ;-)
1943 1932
  *
1944
- * Tom St Denis, tomstdenis@gmail.com, http://math.libtomcrypt.com
1933
+ * This project is public domain and free for all purposes.
1934
+ * 
1935
+ * Tom St Denis, tomstdenis@gmail.com
1945 1936
  */
1937
+#include "bignum_fast.h"
1946 1938
 
1947
-/* shift right by a certain bit count (store quotient in c, optional remainder in d) */
1948
-int mp_div_2d (mp_int * a, int b, mp_int * c, mp_int * d)
1939
+/* c = a / 2**b */
1940
+void fp_div_2d(fp_int *a, int b, fp_int *c, fp_int *d)
1949 1941
 {
1950
-  mp_digit D, r, rr;
1951
-  int     x, res;
1952
-  mp_int  t;
1953
-
1942
+  fp_digit D, r, rr;
1943
+  int      x;
1944
+  fp_int   t;
1954 1945
 
1955 1946
   /* if the shift count is <= 0 then we do no work */
1956 1947
   if (b <= 0) {
1957
-    res = mp_copy (a, c);
1948
+    fp_copy (a, c);
1958 1949
     if (d != NULL) {
1959
-      mp_zero (d);
1950
+      fp_zero (d);
1960 1951
     }
1961
-    return res;
1952
+    return;
1962 1953
   }
1963 1954
 
1964
-  if ((res = mp_init (&t)) != MP_OKAY) {
1965
-    return res;
1966
-  }
1955
+  fp_init(&t);
1967 1956
 
1968 1957
   /* get the remainder */
1969 1958
   if (d != NULL) {
1970
-    if ((res = mp_mod_2d (a, b, &t)) != MP_OKAY) {
1971
-      mp_clear (&t);
1972
-      return res;
1973
-    }
1959
+    fp_mod_2d (a, b, &t);
1974 1960
   }
1975 1961
 
1976 1962
   /* copy */
1977
-  if ((res = mp_copy (a, c)) != MP_OKAY) {
1978
-    mp_clear (&t);
1979
-    return res;
1980
-  }
1963
+  fp_copy(a, c);
1981 1964
 
1982 1965
   /* shift by as many digits in the bit count */
1983 1966
   if (b >= (int)DIGIT_BIT) {
1984
-    mp_rshd (c, b / DIGIT_BIT);
1967
+    fp_rshd (c, b / DIGIT_BIT);
1985 1968
   }
1986 1969
 
1987 1970
   /* shift any bit count < DIGIT_BIT */
1988
-  D = (mp_digit) (b % DIGIT_BIT);
1971
+  D = (fp_digit) (b % DIGIT_BIT);
1989 1972
   if (D != 0) {
1990
-    register mp_digit *tmpc, mask, shift;
1973
+    register fp_digit *tmpc, mask, shift;
1991 1974
 
1992 1975
     /* mask */
1993
-    mask = (((mp_digit)1) << D) - 1;
1976
+    mask = (((fp_digit)1) << D) - 1;
1994 1977
 
1995 1978
     /* shift for lsb */
1996 1979
     shift = DIGIT_BIT - D;
... ...
@@ -2012,128 +636,41 @@ int mp_div_2d (mp_int * a, int b, mp_int * c, mp_int * d)
2012 2012
       r = rr;
2013 2013
     }
2014 2014
   }
2015
-  mp_clamp (c);
2015
+  fp_clamp (c);
2016 2016
   if (d != NULL) {
2017
-    mp_exch (&t, d);
2017
+    fp_copy (&t, d);
2018 2018
   }
2019
-  mp_clear (&t);
2020
-  return MP_OKAY;
2021 2019
 }
2022
-#endif
2023
-
2024
-/* $Source: /cvs/libtom/libtommath/bn_mp_div_2d.c,v $ */
2025
-/* $Revision: 1.3 $ */
2026
-/* $Date: 2006/03/31 14:18:44 $ */
2027
-
2028
-/* End: bn_mp_div_2d.c */
2029
-
2030
-/* Start: bn_mp_div_3.c */
2031
-#include <bignum.h>
2032
-#ifdef BN_MP_DIV_3_C
2033
-/* LibTomMath, multiple-precision integer library -- Tom St Denis
2034
- *
2035
- * LibTomMath is a library that provides multiple-precision
2036
- * integer arithmetic as well as number theoretic functionality.
2037
- *
2038
- * The library was designed directly after the MPI library by
2039
- * Michael Fromberger but has been written from scratch with
2040
- * additional optimizations in place.
2041
- *
2042
- * The library is free for all purposes without any express
2043
- * guarantee it works.
2044
- *
2045
- * Tom St Denis, tomstdenis@gmail.com, http://math.libtomcrypt.com
2046
- */
2047
-
2048
-/* divide by three (based on routine from MPI and the GMP manual) */
2049
-int
2050
-mp_div_3 (mp_int * a, mp_int *c, mp_digit * d)
2051
-{
2052
-  mp_int   q;
2053
-  mp_word  w, t;
2054
-  mp_digit b;
2055
-  int      res, ix;
2056
-  
2057
-  /* b = 2**DIGIT_BIT / 3 */
2058
-  b = (((mp_word)1) << ((mp_word)DIGIT_BIT)) / ((mp_word)3);
2059
-
2060
-  if ((res = mp_init_size(&q, a->used)) != MP_OKAY) {
2061
-     return res;
2062
-  }
2063
-  
2064
-  q.used = a->used;
2065
-  q.sign = a->sign;
2066
-  w = 0;
2067
-  for (ix = a->used - 1; ix >= 0; ix--) {
2068
-     w = (w << ((mp_word)DIGIT_BIT)) | ((mp_word)a->dp[ix]);
2069
-
2070
-     if (w >= 3) {
2071
-        /* multiply w by [1/3] */
2072
-        t = (w * ((mp_word)b)) >> ((mp_word)DIGIT_BIT);
2073
-
2074
-        /* now subtract 3 * [w/3] from w, to get the remainder */
2075
-        w -= t+t+t;
2076
-
2077
-        /* fixup the remainder as required since
2078
-         * the optimization is not exact.
2079
-         */
2080
-        while (w >= 3) {
2081
-           t += 1;
2082
-           w -= 3;
2083
-        }
2084
-      } else {
2085
-        t = 0;
2086
-      }
2087
-      q.dp[ix] = (mp_digit)t;
2088
-  }
2089
-
2090
-  /* [optional] store the remainder */
2091
-  if (d != NULL) {
2092
-     *d = (mp_digit)w;
2093
-  }
2094
-
2095
-  /* [optional] store the quotient */
2096
-  if (c != NULL) {
2097
-     mp_clamp(&q);
2098
-     mp_exch(&q, c);
2099
-  }
2100
-  mp_clear(&q);
2101
-  
2102
-  return res;
2103
-}
2104
-
2105
-#endif
2106 2020
 
2107
-/* $Source: /cvs/libtom/libtommath/bn_mp_div_3.c,v $ */
2108
-/* $Revision: 1.3 $ */
2109
-/* $Date: 2006/03/31 14:18:44 $ */
2021
+/* $Source: /cvs/libtom/tomsfastmath/src/bit/fp_div_2d.c,v $ */
2022
+/* $Revision: 1.1 $ */
2023
+/* $Date: 2006/12/31 21:25:53 $ */
2110 2024
 
2111
-/* End: bn_mp_div_3.c */
2025
+/* End: fp_div_2d.c */
2112 2026
 
2113
-/* Start: bn_mp_div_d.c */
2114
-#include <bignum.h>
2115
-#ifdef BN_MP_DIV_D_C
2116
-/* LibTomMath, multiple-precision integer library -- Tom St Denis
2117
- *
2118
- * LibTomMath is a library that provides multiple-precision
2119
- * integer arithmetic as well as number theoretic functionality.
2120
- *
2121
- * The library was designed directly after the MPI library by
2122
- * Michael Fromberger but has been written from scratch with
2123
- * additional optimizations in place.
2124
- *
2125
- * The library is free for all purposes without any express
2126
- * guarantee it works.
2027
+/* Start: fp_div_d.c */
2028
+/* TomsFastMath, a fast ISO C bignum library.
2029
+ * 
2030
+ * This project is meant to fill in where LibTomMath
2031
+ * falls short.  That is speed ;-)
2127 2032
  *
2128
- * Tom St Denis, tomstdenis@gmail.com, http://math.libtomcrypt.com
2033
+ * This project is public domain and free for all purposes.
2034
+ * 
2035
+ * Tom St Denis, tomstdenis@gmail.com
2129 2036
  */
2037
+#include "bignum_fast.h"
2130 2038
 
2131
-static int s_is_power_of_two(mp_digit b, int *p)
2039
+static int s_is_power_of_two(fp_digit b, int *p)
2132 2040
 {
2133 2041
    int x;
2134 2042
 
2135
-   for (x = 1; x < DIGIT_BIT; x++) {
2136
-      if (b == (((mp_digit)1)<<x)) {
2043
+   /* fast return if no power of two */
2044
+   if ((b==0) || (b & (b-1))) {
2045
+      return 0;
2046
+   }
2047
+
2048
+   for (x = 0; x < DIGIT_BIT; x++) {
2049
+      if (b == (((fp_digit)1)<<x)) {
2137 2050
          *p = x;
2138 2051
          return 1;
2139 2052
       }
... ...
@@ -2141,676 +678,223 @@ static int s_is_power_of_two(mp_digit b, int *p)
2141 2141
    return 0;
2142 2142
 }
2143 2143
 
2144
-/* single digit division (based on routine from MPI) */
2145
-int mp_div_d (mp_int * a, mp_digit b, mp_int * c, mp_digit * d)
2144
+/* a/b => cb + d == a */
2145
+int fp_div_d(fp_int *a, fp_digit b, fp_int *c, fp_digit *d)
2146 2146
 {
2147
-  mp_int  q;
2148
-  mp_word w;
2149
-  mp_digit t;
2150
-  int     res, ix;
2147
+  fp_int   q;
2148
+  fp_word  w;
2149
+  fp_digit t;
2150
+  int      ix;
2151 2151
 
2152 2152
   /* cannot divide by zero */
2153 2153
   if (b == 0) {
2154
-     return MP_VAL;
2154
+     return FP_VAL;
2155 2155
   }
2156 2156
 
2157 2157
   /* quick outs */
2158
-  if (b == 1 || mp_iszero(a) == 1) {
2158
+  if (b == 1 || fp_iszero(a) == 1) {
2159 2159
      if (d != NULL) {
2160 2160
         *d = 0;
2161 2161
      }
2162 2162
      if (c != NULL) {
2163
-        return mp_copy(a, c);
2163
+        fp_copy(a, c);
2164 2164
      }
2165
-     return MP_OKAY;
2165
+     return FP_OKAY;
2166 2166
   }
2167 2167
 
2168 2168
   /* power of two ? */
2169 2169
   if (s_is_power_of_two(b, &ix) == 1) {
2170 2170
      if (d != NULL) {
2171
-        *d = a->dp[0] & ((((mp_digit)1)<<ix) - 1);
2171
+        *d = a->dp[0] & ((((fp_digit)1)<<ix) - 1);
2172 2172
      }
2173 2173
      if (c != NULL) {
2174
-        return mp_div_2d(a, ix, c, NULL);
2174
+        fp_div_2d(a, ix, c, NULL);
2175 2175
      }
2176
-     return MP_OKAY;
2176
+     return FP_OKAY;
2177 2177
   }
2178 2178
 
2179
-#ifdef BN_MP_DIV_3_C
2180
-  /* three? */
2181
-  if (b == 3) {
2182
-     return mp_div_3(a, c, d);
2183
-  }
2184
-#endif
2185
-
2186 2179
   /* no easy answer [c'est la vie].  Just division */
2187
-  if ((res = mp_init_size(&q, a->used)) != MP_OKAY) {
2188
-     return res;
2189
-  }
2180
+  fp_init(&q);
2190 2181
   
2191 2182
   q.used = a->used;
2192 2183
   q.sign = a->sign;
2193 2184
   w = 0;
2194 2185
   for (ix = a->used - 1; ix >= 0; ix--) {
2195
-     w = (w << ((mp_word)DIGIT_BIT)) | ((mp_word)a->dp[ix]);
2186
+     w = (w << ((fp_word)DIGIT_BIT)) | ((fp_word)a->dp[ix]);
2196 2187
      
2197 2188
      if (w >= b) {
2198
-        t = (mp_digit)(w / b);
2199
-        w -= ((mp_word)t) * ((mp_word)b);
2189
+        t = (fp_digit)(w / b);
2190
+        w -= ((fp_word)t) * ((fp_word)b);
2200 2191
       } else {
2201 2192
         t = 0;
2202 2193
       }
2203
-      q.dp[ix] = (mp_digit)t;
2194
+      q.dp[ix] = (fp_digit)t;
2204 2195
   }
2205 2196
   
2206 2197
   if (d != NULL) {
2207
-     *d = (mp_digit)w;
2198
+     *d = (fp_digit)w;
2208 2199
   }
2209 2200
   
2210 2201
   if (c != NULL) {
2211
-     mp_clamp(&q);
2212
-     mp_exch(&q, c);
2202
+     fp_clamp(&q);
2203
+     fp_copy(&q, c);
2213 2204
   }
2214
-  mp_clear(&q);
2215
-  
2216
-  return res;
2205
+ 
2206
+  return FP_OKAY;
2217 2207
 }
2218 2208
 
2219
-#endif
2220 2209
 
2221
-/* $Source: /cvs/libtom/libtommath/bn_mp_div_d.c,v $ */
2222
-/* $Revision: 1.3 $ */
2223
-/* $Date: 2006/03/31 14:18:44 $ */
2210
+/* $Source: /cvs/libtom/tomsfastmath/src/divide/fp_div_d.c,v $ */
2211
+/* $Revision: 1.2 $ */
2212
+/* $Date: 2007/01/12 15:13:54 $ */
2224 2213
 
2225
-/* End: bn_mp_div_d.c */
2214
+/* End: fp_div_d.c */
2226 2215
 
2227
-/* Start: bn_mp_dr_is_modulus.c */
2228
-#include <bignum.h>
2229
-#ifdef BN_MP_DR_IS_MODULUS_C
2230
-/* LibTomMath, multiple-precision integer library -- Tom St Denis
2231
- *
2232
- * LibTomMath is a library that provides multiple-precision
2233
- * integer arithmetic as well as number theoretic functionality.
2234
- *
2235
- * The library was designed directly after the MPI library by
2236
- * Michael Fromberger but has been written from scratch with
2237
- * additional optimizations in place.
2238
- *
2239
- * The library is free for all purposes without any express
2240
- * guarantee it works.
2216
+/* Start: fp_exptmod.c */
2217
+/* TomsFastMath, a fast ISO C bignum library.
2218
+ * 
2219
+ * This project is meant to fill in where LibTomMath
2220
+ * falls short.  That is speed ;-)
2241 2221
  *
2242
- * Tom St Denis, tomstdenis@gmail.com, http://math.libtomcrypt.com
2222
+ * This project is public domain and free for all purposes.
2223
+ * 
2224
+ * Tom St Denis, tomstdenis@gmail.com
2243 2225
  */
2226
+#include "bignum_fast.h"
2244 2227
 
2245
-/* determines if a number is a valid DR modulus */
2246
-int mp_dr_is_modulus(mp_int *a)
2247
-{
2248
-   int ix;
2228
+#ifdef TFM_TIMING_RESISTANT
2249 2229
 
2250
-   /* must be at least two digits */
2251
-   if (a->used < 2) {
2252
-      return 0;
2253
-   }
2230
+/* timing resistant montgomery ladder based exptmod 
2254 2231
 
2255
-   /* must be of the form b**k - a [a <= b] so all
2256
-    * but the first digit must be equal to -1 (mod b).
2257
-    */
2258
-   for (ix = 1; ix < a->used; ix++) {
2259
-       if (a->dp[ix] != MP_MASK) {
2260
-          return 0;
2261
-       }
2262
-   }
2263
-   return 1;
2264
-}
2265
-
2266
-#endif
2267
-
2268
-/* $Source: /cvs/libtom/libtommath/bn_mp_dr_is_modulus.c,v $ */
2269
-/* $Revision: 1.3 $ */
2270
-/* $Date: 2006/03/31 14:18:44 $ */
2271
-
2272
-/* End: bn_mp_dr_is_modulus.c */
2273
-
2274
-/* Start: bn_mp_dr_reduce.c */
2275
-#include <bignum.h>
2276
-#ifdef BN_MP_DR_REDUCE_C
2277
-/* LibTomMath, multiple-precision integer library -- Tom St Denis
2278
- *
2279
- * LibTomMath is a library that provides multiple-precision
2280
- * integer arithmetic as well as number theoretic functionality.
2281
- *
2282
- * The library was designed directly after the MPI library by
2283
- * Michael Fromberger but has been written from scratch with
2284
- * additional optimizations in place.
2285
- *
2286
- * The library is free for all purposes without any express
2287
- * guarantee it works.
2288
- *
2289
- * Tom St Denis, tomstdenis@gmail.com, http://math.libtomcrypt.com
2290
- */
2291
-
2292
-/* reduce "x" in place modulo "n" using the Diminished Radix algorithm.
2293
- *
2294
- * Based on algorithm from the paper
2295
- *
2296
- * "Generating Efficient Primes for Discrete Log Cryptosystems"
2297
- *                 Chae Hoon Lim, Pil Joong Lee,
2298
- *          POSTECH Information Research Laboratories
2299
- *
2300
- * The modulus must be of a special format [see manual]
2301
- *
2302
- * Has been modified to use algorithm 7.10 from the LTM book instead
2303
- *
2304
- * Input x must be in the range 0 <= x <= (n-1)**2
2305
- */
2306
-int
2307
-mp_dr_reduce (mp_int * x, mp_int * n, mp_digit k)
2232
+   Based on work by Marc Joye, Sung-Ming Yen, "The Montgomery Powering Ladder", Cryptographic Hardware and Embedded Systems, CHES 2002
2233
+*/
2234
+static int _fp_exptmod(fp_int * G, fp_int * X, fp_int * P, fp_int * Y)
2308 2235
 {
2309
-  int      err, i, m;
2310
-  mp_word  r;
2311
-  mp_digit mu, *tmpx1, *tmpx2;
2312
-
2313
-  /* m = digits in modulus */
2314
-  m = n->used;
2315
-
2316
-  /* ensure that "x" has at least 2m digits */
2317
-  if (x->alloc < m + m) {
2318
-    if ((err = mp_grow (x, m + m)) != MP_OKAY) {
2319
-      return err;
2320
-    }
2321
-  }
2322
-
2323
-/* top of loop, this is where the code resumes if
2324
- * another reduction pass is required.
2325
- */
2326
-top:
2327
-  /* aliases for digits */
2328
-  /* alias for lower half of x */
2329
-  tmpx1 = x->dp;
2330
-
2331
-  /* alias for upper half of x, or x/B**m */
2332
-  tmpx2 = x->dp + m;
2333
-
2334
-  /* set carry to zero */
2335
-  mu = 0;
2336
-
2337
-  /* compute (x mod B**m) + k * [x/B**m] inline and inplace */
2338
-  for (i = 0; i < m; i++) {
2339
-      r         = ((mp_word)*tmpx2++) * ((mp_word)k) + *tmpx1 + mu;
2340
-      *tmpx1++  = (mp_digit)(r & MP_MASK);
2341
-      mu        = (mp_digit)(r >> ((mp_word)DIGIT_BIT));
2342
-  }
2343
-
2344
-  /* set final carry */
2345
-  *tmpx1++ = mu;
2346
-
2347
-  /* zero words above m */
2348
-  for (i = m + 1; i < x->used; i++) {
2349
-      *tmpx1++ = 0;
2350
-  }
2236
+  fp_int   R[2];
2237
+  fp_digit buf, mp;
2238
+  int      err, bitcnt, digidx, y;
2351 2239
 
2352
-  /* clamp, sub and return */
2353
-  mp_clamp (x);
2354
-
2355
-  /* if x >= n then subtract and reduce again
2356
-   * Each successive "recursion" makes the input smaller and smaller.
2357
-   */
2358
-  if (mp_cmp_mag (x, n) != MP_LT) {
2359
-    s_mp_sub(x, n, x);
2360
-    goto top;
2240
+  /* now setup montgomery  */
2241
+  if ((err = fp_montgomery_setup (P, &mp)) != FP_OKAY) {
2242
+     return err;
2361 2243
   }
2362
-  return MP_OKAY;
2363
-}
2364
-#endif
2365
-
2366
-/* $Source: /cvs/libtom/libtommath/bn_mp_dr_reduce.c,v $ */
2367
-/* $Revision: 1.3 $ */
2368
-/* $Date: 2006/03/31 14:18:44 $ */
2369
-
2370
-/* End: bn_mp_dr_reduce.c */
2371
-
2372
-/* Start: bn_mp_dr_setup.c */
2373
-#include <bignum.h>
2374
-#ifdef BN_MP_DR_SETUP_C
2375
-/* LibTomMath, multiple-precision integer library -- Tom St Denis
2376
- *
2377
- * LibTomMath is a library that provides multiple-precision
2378
- * integer arithmetic as well as number theoretic functionality.
2379
- *
2380
- * The library was designed directly after the MPI library by
2381
- * Michael Fromberger but has been written from scratch with
2382
- * additional optimizations in place.
2383
- *
2384
- * The library is free for all purposes without any express
2385
- * guarantee it works.
2386
- *
2387
- * Tom St Denis, tomstdenis@gmail.com, http://math.libtomcrypt.com
2388
- */
2389
-
2390
-/* determines the setup value */
2391
-void mp_dr_setup(mp_int *a, mp_digit *d)
2392
-{
2393
-   /* the casts are required if DIGIT_BIT is one less than
2394
-    * the number of bits in a mp_digit [e.g. DIGIT_BIT==31]
2395
-    */
2396
-   *d = (mp_digit)((((mp_word)1) << ((mp_word)DIGIT_BIT)) - 
2397
-        ((mp_word)a->dp[0]));
2398
-}
2399
-
2400
-#endif
2401
-
2402
-/* $Source: /cvs/libtom/libtommath/bn_mp_dr_setup.c,v $ */
2403
-/* $Revision: 1.3 $ */
2404
-/* $Date: 2006/03/31 14:18:44 $ */
2405
-
2406
-/* End: bn_mp_dr_setup.c */
2407
-
2408
-/* Start: bn_mp_exch.c */
2409
-#include <bignum.h>
2410
-#ifdef BN_MP_EXCH_C
2411
-/* LibTomMath, multiple-precision integer library -- Tom St Denis
2412
- *
2413
- * LibTomMath is a library that provides multiple-precision
2414
- * integer arithmetic as well as number theoretic functionality.
2415
- *
2416
- * The library was designed directly after the MPI library by
2417
- * Michael Fromberger but has been written from scratch with
2418
- * additional optimizations in place.
2419
- *
2420
- * The library is free for all purposes without any express
2421
- * guarantee it works.
2422
- *
2423
- * Tom St Denis, tomstdenis@gmail.com, http://math.libtomcrypt.com
2424
- */
2425
-
2426
-/* swap the elements of two integers, for cases where you can't simply swap the 
2427
- * mp_int pointers around
2428
- */
2429
-void
2430
-mp_exch (mp_int * a, mp_int * b)
2431
-{
2432
-  mp_int  t;
2433
-
2434
-  t  = *a;
2435
-  *a = *b;
2436
-  *b = t;
2437
-}
2438
-#endif
2439
-
2440
-/* $Source: /cvs/libtom/libtommath/bn_mp_exch.c,v $ */
2441
-/* $Revision: 1.3 $ */
2442
-/* $Date: 2006/03/31 14:18:44 $ */
2443 2244
 
2444
-/* End: bn_mp_exch.c */
2445
-
2446
-/* Start: bn_mp_expt_d.c */
2447
-#include <bignum.h>
2448
-#ifdef BN_MP_EXPT_D_C
2449
-/* LibTomMath, multiple-precision integer library -- Tom St Denis
2450
- *
2451
- * LibTomMath is a library that provides multiple-precision
2452
- * integer arithmetic as well as number theoretic functionality.
2453
- *
2454
- * The library was designed directly after the MPI library by
2455
- * Michael Fromberger but has been written from scratch with
2456
- * additional optimizations in place.
2457
- *
2458
- * The library is free for all purposes without any express
2459
- * guarantee it works.
2460
- *
2461
- * Tom St Denis, tomstdenis@gmail.com, http://math.libtomcrypt.com
2462
- */
2463
-
2464
-/* calculate c = a**b  using a square-multiply algorithm */
2465
-int mp_expt_d (mp_int * a, mp_digit b, mp_int * c)
2466
-{
2467
-  int     res, x;
2468
-  mp_int  g;
2245
+  fp_init(&R[0]);   
2246
+  fp_init(&R[1]);   
2247
+   
2248
+  /* now we need R mod m */
2249
+  fp_montgomery_calc_normalization (&R[0], P);
2469 2250
 
2470
-  if ((res = mp_init_copy (&g, a)) != MP_OKAY) {
2471
-    return res;
2251
+  /* now set R[0][1] to G * R mod m */
2252
+  if (fp_cmp_mag(P, G) != FP_GT) {
2253
+     /* G > P so we reduce it first */
2254
+     fp_mod(G, P, &R[1]);
2255
+  } else {
2256
+     fp_copy(G, &R[1]);
2472 2257
   }
2258
+  fp_mulmod (&R[1], &R[0], P, &R[1]);
2473 2259
 
2474
-  /* set initial result */
2475
-  mp_set (c, 1);
2476
-
2477
-  for (x = 0; x < (int) DIGIT_BIT; x++) {
2478
-    /* square */
2479
-    if ((res = mp_sqr (c, c)) != MP_OKAY) {
2480
-      mp_clear (&g);
2481
-      return res;
2482
-    }
2260
+  /* for j = t-1 downto 0 do
2261
+        r_!k = R0*R1; r_k = r_k^2
2262
+  */
2263
+  
2264
+  /* set initial mode and bit cnt */
2265
+  bitcnt = 1;
2266
+  buf    = 0;
2267
+  digidx = X->used - 1;
2483 2268
 
2484
-    /* if the bit is set multiply */
2485
-    if ((b & (mp_digit) (((mp_digit)1) << (DIGIT_BIT - 1))) != 0) {
2486
-      if ((res = mp_mul (c, &g, c)) != MP_OKAY) {
2487
-         mp_clear (&g);
2488
-         return res;
2269
+  for (;;) {
2270
+    /* grab next digit as required */
2271
+    if (--bitcnt == 0) {
2272
+      /* if digidx == -1 we are out of digits so break */
2273
+      if (digidx == -1) {
2274
+        break;
2489 2275
       }
2276
+      /* read next digit and reset bitcnt */
2277
+      buf    = X->dp[digidx--];
2278
+      bitcnt = (int)DIGIT_BIT;
2490 2279
     }
2491 2280
 
2492
-    /* shift to next bit */
2493
-    b <<= 1;
2494
-  }
2495
-
2496
-  mp_clear (&g);
2497
-  return MP_OKAY;
2498
-}
2499
-#endif
2500
-
2501
-/* $Source: /cvs/libtom/libtommath/bn_mp_expt_d.c,v $ */
2502
-/* $Revision: 1.3 $ */
2503
-/* $Date: 2006/03/31 14:18:44 $ */
2504
-
2505
-/* End: bn_mp_expt_d.c */
2506
-
2507
-/* Start: bn_mp_exptmod.c */
2508
-#include <bignum.h>
2509
-#ifdef BN_MP_EXPTMOD_C
2510
-/* LibTomMath, multiple-precision integer library -- Tom St Denis
2511
- *
2512
- * LibTomMath is a library that provides multiple-precision
2513
- * integer arithmetic as well as number theoretic functionality.
2514
- *
2515
- * The library was designed directly after the MPI library by
2516
- * Michael Fromberger but has been written from scratch with
2517
- * additional optimizations in place.
2518
- *
2519
- * The library is free for all purposes without any express
2520
- * guarantee it works.
2521
- *
2522
- * Tom St Denis, tomstdenis@gmail.com, http://math.libtomcrypt.com
2523
- */
2524
-
2525
-
2526
-/* this is a shell function that calls either the normal or Montgomery
2527
- * exptmod functions.  Originally the call to the montgomery code was
2528
- * embedded in the normal function but that wasted alot of stack space
2529
- * for nothing (since 99% of the time the Montgomery code would be called)
2530
- */
2531
-int mp_exptmod (mp_int * G, mp_int * X, mp_int * P, mp_int * Y)
2532
-{
2533
-  int dr;
2534
-
2535
-  /* modulus P must be positive */
2536
-  if (P->sign == MP_NEG) {
2537
-     return MP_VAL;
2538
-  }
2539
-
2540
-  /* if exponent X is negative we have to recurse */
2541
-  if (X->sign == MP_NEG) {
2542
-#ifdef BN_MP_INVMOD_C
2543
-     mp_int tmpG, tmpX;
2544
-     int err;
2545
-
2546
-     /* first compute 1/G mod P */
2547
-     if ((err = mp_init(&tmpG)) != MP_OKAY) {
2548
-        return err;
2549
-     }
2550
-     if ((err = mp_invmod(G, P, &tmpG)) != MP_OKAY) {
2551
-        mp_clear(&tmpG);
2552
-        return err;
2553
-     }
2554
-
2555
-     /* now get |X| */
2556
-     if ((err = mp_init(&tmpX)) != MP_OKAY) {
2557
-        mp_clear(&tmpG);
2558
-        return err;
2559
-     }
2560
-     if ((err = mp_abs(X, &tmpX)) != MP_OKAY) {
2561
-        mp_clear_multi(&tmpG, &tmpX, NULL);
2562
-        return err;
2563
-     }
2281
+    /* grab the next msb from the exponent */
2282
+    y     = (fp_digit)(buf >> (DIGIT_BIT - 1)) & 1;
2283
+    buf <<= (fp_digit)1;
2564 2284
 
2565
-     /* and now compute (1/G)**|X| instead of G**X [X < 0] */
2566
-     err = mp_exptmod(&tmpG, &tmpX, P, Y);
2567
-     mp_clear_multi(&tmpG, &tmpX, NULL);
2568
-     return err;
2569
-#else 
2570
-     /* no invmod */
2571
-     return MP_VAL;
2572
-#endif
2285
+    /* do ops */
2286
+    fp_mul(&R[0], &R[1], &R[y^1]); fp_montgomery_reduce(&R[y^1], P, mp);
2287
+    fp_sqr(&R[y], &R[y]);          fp_montgomery_reduce(&R[y], P, mp);
2573 2288
   }
2574 2289
 
2575
-/* modified diminished radix reduction */
2576
-#if defined(BN_MP_REDUCE_IS_2K_L_C) && defined(BN_MP_REDUCE_2K_L_C) && defined(BN_S_MP_EXPTMOD_C)
2577
-  if (mp_reduce_is_2k_l(P) == MP_YES) {
2578
-     return s_mp_exptmod(G, X, P, Y, 1);
2579
-  }
2580
-#endif
2290
+   fp_montgomery_reduce(&R[0], P, mp);
2291
+   fp_copy(&R[0], Y);
2292
+   return FP_OKAY;
2293
+}   
2581 2294
 
2582
-#ifdef BN_MP_DR_IS_MODULUS_C
2583
-  /* is it a DR modulus? */
2584
-  dr = mp_dr_is_modulus(P);
2585
-#else
2586
-  /* default to no */
2587
-  dr = 0;
2588
-#endif
2589
-
2590
-#ifdef BN_MP_REDUCE_IS_2K_C
2591
-  /* if not, is it a unrestricted DR modulus? */
2592
-  if (dr == 0) {
2593
-     dr = mp_reduce_is_2k(P) << 1;
2594
-  }
2595
-#endif
2596
-    
2597
-  /* if the modulus is odd or dr != 0 use the montgomery method */
2598
-#ifdef BN_MP_EXPTMOD_FAST_C
2599
-  if (mp_isodd (P) == 1 || dr !=  0) {
2600
-    return mp_exptmod_fast (G, X, P, Y, dr);
2601
-  } else {
2602
-#endif
2603
-#ifdef BN_S_MP_EXPTMOD_C
2604
-    /* otherwise use the generic Barrett reduction technique */
2605
-    return s_mp_exptmod (G, X, P, Y, 0);
2606 2295
 #else
2607
-    /* no exptmod for evens */
2608
-    return MP_VAL;
2609
-#endif
2610
-#ifdef BN_MP_EXPTMOD_FAST_C
2611
-  }
2612
-#endif
2613
-}
2614 2296
 
2615
-#endif
2616
-
2617
-/* $Source: /cvs/libtom/libtommath/bn_mp_exptmod.c,v $ */
2618
-/* $Revision: 1.4 $ */
2619
-/* $Date: 2006/03/31 14:18:44 $ */
2620
-
2621
-/* End: bn_mp_exptmod.c */
2622
-
2623
-/* Start: bn_mp_exptmod_fast.c */
2624
-#include <bignum.h>
2625
-#ifdef BN_MP_EXPTMOD_FAST_C
2626
-/* LibTomMath, multiple-precision integer library -- Tom St Denis
2627
- *
2628
- * LibTomMath is a library that provides multiple-precision
2629
- * integer arithmetic as well as number theoretic functionality.
2630
- *
2631
- * The library was designed directly after the MPI library by
2632
- * Michael Fromberger but has been written from scratch with
2633
- * additional optimizations in place.
2634
- *
2635
- * The library is free for all purposes without any express
2636
- * guarantee it works.
2637
- *
2638
- * Tom St Denis, tomstdenis@gmail.com, http://math.libtomcrypt.com
2297
+/* y = g**x (mod b) 
2298
+ * Some restrictions... x must be positive and < b
2639 2299
  */
2640
-
2641
-/* computes Y == G**X mod P, HAC pp.616, Algorithm 14.85
2642
- *
2643
- * Uses a left-to-right k-ary sliding window to compute the modular exponentiation.
2644
- * The value of k changes based on the size of the exponent.
2645
- *
2646
- * Uses Montgomery or Diminished Radix reduction [whichever appropriate]
2647
- */
2648
-
2649
-#ifdef MP_LOW_MEM
2650
-   #define TAB_SIZE 32
2651
-#else
2652
-   #define TAB_SIZE 256
2653
-#endif
2654
-
2655
-int mp_exptmod_fast (mp_int * G, mp_int * X, mp_int * P, mp_int * Y, int redmode)
2300
+static int _fp_exptmod(fp_int * G, fp_int * X, fp_int * P, fp_int * Y)
2656 2301
 {
2657
-  mp_int  M[TAB_SIZE], res;
2658
-  mp_digit buf, mp;
2659
-  int     err, bitbuf, bitcpy, bitcnt, mode, digidx, x, y, winsize;
2660
-
2661
-  /* use a pointer to the reduction algorithm.  This allows us to use
2662
-   * one of many reduction algorithms without modding the guts of
2663
-   * the code with if statements everywhere.
2664
-   */
2665
-  int     (*redux)(mp_int*,mp_int*,mp_digit);
2302
+  fp_int   M[64], res;
2303
+  fp_digit buf, mp;
2304
+  int      err, bitbuf, bitcpy, bitcnt, mode, digidx, x, y, winsize;
2666 2305
 
2667 2306
   /* find window size */
2668
-  x = mp_count_bits (X);
2669
-  if (x <= 7) {
2670
-    winsize = 2;
2307
+  x = fp_count_bits (X);
2308
+  if (x <= 21) {
2309
+    winsize = 1;
2671 2310
   } else if (x <= 36) {
2672 2311
     winsize = 3;
2673 2312
   } else if (x <= 140) {
2674 2313
     winsize = 4;
2675 2314
   } else if (x <= 450) {
2676 2315
     winsize = 5;
2677
-  } else if (x <= 1303) {
2678
-    winsize = 6;
2679
-  } else if (x <= 3529) {
2680
-    winsize = 7;
2681 2316
   } else {
2682
-    winsize = 8;
2683
-  }
2684
-
2685
-#ifdef MP_LOW_MEM
2686
-  if (winsize > 5) {
2687
-     winsize = 5;
2688
-  }
2689
-#endif
2317
+    winsize = 6;
2318
+  } 
2690 2319
 
2691 2320
   /* init M array */
2692
-  /* init first cell */
2693
-  if ((err = mp_init(&M[1])) != MP_OKAY) {
2694
-     return err;
2695
-  }
2696
-
2697
-  /* now init the second half of the array */
2698
-  for (x = 1<<(winsize-1); x < (1 << winsize); x++) {
2699
-    if ((err = mp_init(&M[x])) != MP_OKAY) {
2700
-      for (y = 1<<(winsize-1); y < x; y++) {
2701
-        mp_clear (&M[y]);
2702
-      }
2703
-      mp_clear(&M[1]);
2704
-      return err;
2705
-    }
2706
-  }
2321
+  memset(M, 0, sizeof(M)); 
2707 2322
 
2708
-  /* determine and setup reduction code */
2709
-  if (redmode == 0) {
2710
-#ifdef BN_MP_MONTGOMERY_SETUP_C     
2711
-     /* now setup montgomery  */
2712
-     if ((err = mp_montgomery_setup (P, &mp)) != MP_OKAY) {
2713
-        goto LBL_M;
2714
-     }
2715
-#else
2716
-     err = MP_VAL;
2717
-     goto LBL_M;
2718
-#endif
2719
-
2720
-     /* automatically pick the comba one if available (saves quite a few calls/ifs) */
2721
-#ifdef BN_FAST_MP_MONTGOMERY_REDUCE_C
2722
-     if (((P->used * 2 + 1) < MP_WARRAY) &&
2723
-          P->used < (1 << ((CHAR_BIT * sizeof (mp_word)) - (2 * DIGIT_BIT)))) {
2724
-        redux = fast_mp_montgomery_reduce;
2725
-     } else 
2726
-#endif
2727
-     {
2728
-#ifdef BN_MP_MONTGOMERY_REDUCE_C
2729
-        /* use slower baseline Montgomery method */
2730
-        redux = mp_montgomery_reduce;
2731
-#else
2732
-        err = MP_VAL;
2733
-        goto LBL_M;
2734
-#endif
2735
-     }
2736
-  } else if (redmode == 1) {
2737
-#if defined(BN_MP_DR_SETUP_C) && defined(BN_MP_DR_REDUCE_C)
2738
-     /* setup DR reduction for moduli of the form B**k - b */
2739
-     mp_dr_setup(P, &mp);
2740
-     redux = mp_dr_reduce;
2741
-#else
2742
-     err = MP_VAL;
2743
-     goto LBL_M;
2744
-#endif
2745
-  } else {
2746
-#if defined(BN_MP_REDUCE_2K_SETUP_C) && defined(BN_MP_REDUCE_2K_C)
2747
-     /* setup DR reduction for moduli of the form 2**k - b */
2748
-     if ((err = mp_reduce_2k_setup(P, &mp)) != MP_OKAY) {
2749
-        goto LBL_M;
2750
-     }
2751
-     redux = mp_reduce_2k;
2752
-#else
2753
-     err = MP_VAL;
2754
-     goto LBL_M;
2755
-#endif
2323
+  /* now setup montgomery  */
2324
+  if ((err = fp_montgomery_setup (P, &mp)) != FP_OKAY) {
2325
+     return err;
2756 2326
   }
2757 2327
 
2758 2328
   /* setup result */
2759
-  if ((err = mp_init (&res)) != MP_OKAY) {
2760
-    goto LBL_M;
2761
-  }
2329
+  fp_init(&res);
2762 2330
 
2763 2331
   /* create M table
2764 2332
    *
2765
-
2333
+   * The M table contains powers of the input base, e.g. M[x] = G^x mod P
2766 2334
    *
2767 2335
    * The first half of the table is not computed though accept for M[0] and M[1]
2768 2336
    */
2769 2337
 
2770
-  if (redmode == 0) {
2771
-#ifdef BN_MP_MONTGOMERY_CALC_NORMALIZATION_C
2772
-     /* now we need R mod m */
2773
-     if ((err = mp_montgomery_calc_normalization (&res, P)) != MP_OKAY) {
2774
-       goto LBL_RES;
2775
-     }
2776
-#else 
2777
-     err = MP_VAL;
2778
-     goto LBL_RES;
2779
-#endif
2338
+   /* now we need R mod m */
2339
+   fp_montgomery_calc_normalization (&res, P);
2780 2340
 
2781
-     /* now set M[1] to G * R mod m */
2782
-     if ((err = mp_mulmod (G, &res, P, &M[1])) != MP_OKAY) {
2783
-       goto LBL_RES;
2784
-     }
2785
-  } else {
2786
-     mp_set(&res, 1);
2787
-     if ((err = mp_mod(G, P, &M[1])) != MP_OKAY) {
2788
-        goto LBL_RES;
2789
-     }
2790
-  }
2341
+   /* now set M[1] to G * R mod m */
2342
+   if (fp_cmp_mag(P, G) != FP_GT) {
2343
+      /* G > P so we reduce it first */
2344
+      fp_mod(G, P, &M[1]);
2345
+   } else {
2346
+      fp_copy(G, &M[1]);
2347
+   }
2348
+   fp_mulmod (&M[1], &res, P, &M[1]);
2791 2349
 
2792 2350
   /* compute the value at M[1<<(winsize-1)] by squaring M[1] (winsize-1) times */
2793
-  if ((err = mp_copy (&M[1], &M[1 << (winsize - 1)])) != MP_OKAY) {
2794
-    goto LBL_RES;
2795
-  }
2796
-
2351
+  fp_copy (&M[1], &M[1 << (winsize - 1)]);
2797 2352
   for (x = 0; x < (winsize - 1); x++) {
2798
-    if ((err = mp_sqr (&M[1 << (winsize - 1)], &M[1 << (winsize - 1)])) != MP_OKAY) {
2799
-      goto LBL_RES;
2800
-    }
2801
-    if ((err = redux (&M[1 << (winsize - 1)], P, mp)) != MP_OKAY) {
2802
-      goto LBL_RES;
2803
-    }
2353
+    fp_sqr (&M[1 << (winsize - 1)], &M[1 << (winsize - 1)]);
2354
+    fp_montgomery_reduce (&M[1 << (winsize - 1)], P, mp);
2804 2355
   }
2805 2356
 
2806 2357
   /* create upper table */
2807 2358
   for (x = (1 << (winsize - 1)) + 1; x < (1 << winsize); x++) {
2808
-    if ((err = mp_mul (&M[x - 1], &M[1], &M[x])) != MP_OKAY) {
2809
-      goto LBL_RES;
2810
-    }
2811
-    if ((err = redux (&M[x], P, mp)) != MP_OKAY) {
2812
-      goto LBL_RES;
2813
-    }
2359
+    fp_mul(&M[x - 1], &M[1], &M[x]);
2360
+    fp_montgomery_reduce(&M[x], P, mp);
2814 2361
   }
2815 2362
 
2816 2363
   /* set initial mode and bit cnt */
... ...
@@ -2834,8 +918,8 @@ int mp_exptmod_fast (mp_int * G, mp_int * X, mp_int * P, mp_int * Y, int redmode
2834 2834
     }
2835 2835
 
2836 2836
     /* grab the next msb from the exponent */
2837
-    y     = (mp_digit)(buf >> (DIGIT_BIT - 1)) & 1;
2838
-    buf <<= (mp_digit)1;
2837
+    y     = (fp_digit)(buf >> (DIGIT_BIT - 1)) & 1;
2838
+    buf <<= (fp_digit)1;
2839 2839
 
2840 2840
     /* if the bit is zero and mode == 0 then we ignore it
2841 2841
      * These represent the leading zero bits before the first 1 bit
... ...
@@ -2848,12 +932,8 @@ int mp_exptmod_fast (mp_int * G, mp_int * X, mp_int * P, mp_int * Y, int redmode
2848 2848
 
2849 2849
     /* if the bit is zero and mode == 1 then we square */
2850 2850
     if (mode == 1 && y == 0) {
2851
-      if ((err = mp_sqr (&res, &res)) != MP_OKAY) {
2852
-        goto LBL_RES;
2853
-      }
2854
-      if ((err = redux (&res, P, mp)) != MP_OKAY) {
2855
-        goto LBL_RES;
2856
-      }
2851
+      fp_sqr(&res, &res);
2852
+      fp_montgomery_reduce(&res, P, mp);
2857 2853
       continue;
2858 2854
     }
2859 2855
 
... ...
@@ -2865,21 +945,13 @@ int mp_exptmod_fast (mp_int * G, mp_int * X, mp_int * P, mp_int * Y, int redmode
2865 2865
       /* ok window is filled so square as required and multiply  */
2866 2866
       /* square first */
2867 2867
       for (x = 0; x < winsize; x++) {
2868
-        if ((err = mp_sqr (&res, &res)) != MP_OKAY) {
2869
-          goto LBL_RES;
2870
-        }
2871
-        if ((err = redux (&res, P, mp)) != MP_OKAY) {
2872
-          goto LBL_RES;
2873
-        }
2868
+        fp_sqr(&res, &res);
2869
+        fp_montgomery_reduce(&res, P, mp);
2874 2870
       }
2875 2871
 
2876 2872
       /* then multiply */
2877
-      if ((err = mp_mul (&res, &M[bitbuf], &res)) != MP_OKAY) {
2878
-        goto LBL_RES;
2879
-      }
2880
-      if ((err = redux (&res, P, mp)) != MP_OKAY) {
2881
-        goto LBL_RES;
2882
-      }
2873
+      fp_mul(&res, &M[bitbuf], &res);
2874
+      fp_montgomery_reduce(&res, P, mp);
2883 2875
 
2884 2876
       /* empty window and reset */
2885 2877
       bitcpy = 0;
... ...
@@ -2892,1992 +964,1331 @@ int mp_exptmod_fast (mp_int * G, mp_int * X, mp_int * P, mp_int * Y, int redmode
2892 2892
   if (mode == 2 && bitcpy > 0) {
2893 2893
     /* square then multiply if the bit is set */
2894 2894
     for (x = 0; x < bitcpy; x++) {
2895
-      if ((err = mp_sqr (&res, &res)) != MP_OKAY) {
2896
-        goto LBL_RES;
2897
-      }
2898
-      if ((err = redux (&res, P, mp)) != MP_OKAY) {
2899
-        goto LBL_RES;
2900
-      }
2895
+      fp_sqr(&res, &res);
2896
+      fp_montgomery_reduce(&res, P, mp);
2901 2897
 
2902 2898
       /* get next bit of the window */
2903 2899
       bitbuf <<= 1;
2904 2900
       if ((bitbuf & (1 << winsize)) != 0) {
2905 2901
         /* then multiply */
2906
-        if ((err = mp_mul (&res, &M[1], &res)) != MP_OKAY) {
2907
-          goto LBL_RES;
2908
-        }
2909
-        if ((err = redux (&res, P, mp)) != MP_OKAY) {
2910
-          goto LBL_RES;
2911
-        }
2902
+        fp_mul(&res, &M[1], &res);
2903
+        fp_montgomery_reduce(&res, P, mp);
2912 2904
       }
2913 2905
     }
2914 2906
   }
2915 2907
 
2916
-  if (redmode == 0) {
2917
-     /* fixup result if Montgomery reduction is used
2918
-      * recall that any value in a Montgomery system is
2919
-      * actually multiplied by R mod n.  So we have
2920
-      * to reduce one more time to cancel out the factor
2921
-      * of R.
2922
-      */
2923
-     if ((err = redux(&res, P, mp)) != MP_OKAY) {
2924
-       goto LBL_RES;
2925
-     }
2926
-  }
2908
+  /* fixup result if Montgomery reduction is used
2909
+   * recall that any value in a Montgomery system is
2910
+   * actually multiplied by R mod n.  So we have
2911
+   * to reduce one more time to cancel out the factor
2912
+   * of R.
2913
+   */
2914
+  fp_montgomery_reduce(&res, P, mp);
2927 2915
 
2928 2916
   /* swap res with Y */
2929
-  mp_exch (&res, Y);
2930
-  err = MP_OKAY;
2931
-LBL_RES:mp_clear (&res);
2932
-LBL_M:
2933
-  mp_clear(&M[1]);
2934
-  for (x = 1<<(winsize-1); x < (1 << winsize); x++) {
2935
-    mp_clear (&M[x]);
2936
-  }
2937
-  return err;
2917
+  fp_copy (&res, Y);
2918
+  return FP_OKAY;
2938 2919
 }
2939
-#endif
2940
-
2941
-
2942
-/* $Source: /cvs/libtom/libtommath/bn_mp_exptmod_fast.c,v $ */
2943
-/* $Revision: 1.3 $ */
2944
-/* $Date: 2006/03/31 14:18:44 $ */
2945 2920
 
2946
-/* End: bn_mp_exptmod_fast.c */
2921
+#endif
2947 2922
 
2948
-/* Start: bn_mp_exteuclid.c */
2949
-#include <bignum.h>
2950
-#ifdef BN_MP_EXTEUCLID_C
2951
-/* LibTomMath, multiple-precision integer library -- Tom St Denis
2952
- *
2953
- * LibTomMath is a library that provides multiple-precision
2954
- * integer arithmetic as well as number theoretic functionality.
2955
- *
2956
- * The library was designed directly after the MPI library by
2957
- * Michael Fromberger but has been written from scratch with
2958
- * additional optimizations in place.
2959
- *
2960
- * The library is free for all purposes without any express
2961
- * guarantee it works.
2962
- *
2963
- * Tom St Denis, tomstdenis@gmail.com, http://math.libtomcrypt.com
2964
- */
2965 2923
 
2966
-/* Extended euclidean algorithm of (a, b) produces 
2967
-   a*u1 + b*u2 = u3
2968
- */
2969
-int mp_exteuclid(mp_int *a, mp_int *b, mp_int *U1, mp_int *U2, mp_int *U3)
2924
+int fp_exptmod(fp_int * G, fp_int * X, fp_int * P, fp_int * Y)
2970 2925
 {
2971
-   mp_int u1,u2,u3,v1,v2,v3,t1,t2,t3,q,tmp;
2972
-   int err;
2973
-
2974
-   if ((err = mp_init_multi(&u1, &u2, &u3, &v1, &v2, &v3, &t1, &t2, &t3, &q, &tmp, NULL)) != MP_OKAY) {
2975
-      return err;
2976
-   }
2977
-
2978
-   /* initialize, (u1,u2,u3) = (1,0,a) */
2979
-   mp_set(&u1, 1);
2980
-   if ((err = mp_copy(a, &u3)) != MP_OKAY)                                        { goto _ERR; }
2981
-
2982
-   /* initialize, (v1,v2,v3) = (0,1,b) */
2983
-   mp_set(&v2, 1);
2984
-   if ((err = mp_copy(b, &v3)) != MP_OKAY)                                        { goto _ERR; }
2985
-
2986
-   /* loop while v3 != 0 */
2987
-   while (mp_iszero(&v3) == MP_NO) {
2988
-       /* q = u3/v3 */
2989
-       if ((err = mp_div(&u3, &v3, &q, NULL)) != MP_OKAY)                         { goto _ERR; }
2990
-
2991
-       /* (t1,t2,t3) = (u1,u2,u3) - (v1,v2,v3)q */
2992
-       if ((err = mp_mul(&v1, &q, &tmp)) != MP_OKAY)                              { goto _ERR; }
2993
-       if ((err = mp_sub(&u1, &tmp, &t1)) != MP_OKAY)                             { goto _ERR; }
2994
-       if ((err = mp_mul(&v2, &q, &tmp)) != MP_OKAY)                              { goto _ERR; }
2995
-       if ((err = mp_sub(&u2, &tmp, &t2)) != MP_OKAY)                             { goto _ERR; }
2996
-       if ((err = mp_mul(&v3, &q, &tmp)) != MP_OKAY)                              { goto _ERR; }
2997
-       if ((err = mp_sub(&u3, &tmp, &t3)) != MP_OKAY)                             { goto _ERR; }
2998
-
2999
-       /* (u1,u2,u3) = (v1,v2,v3) */
3000
-       if ((err = mp_copy(&v1, &u1)) != MP_OKAY)                                  { goto _ERR; }
3001
-       if ((err = mp_copy(&v2, &u2)) != MP_OKAY)                                  { goto _ERR; }
3002
-       if ((err = mp_copy(&v3, &u3)) != MP_OKAY)                                  { goto _ERR; }
3003
-
3004
-       /* (v1,v2,v3) = (t1,t2,t3) */
3005
-       if ((err = mp_copy(&t1, &v1)) != MP_OKAY)                                  { goto _ERR; }
3006
-       if ((err = mp_copy(&t2, &v2)) != MP_OKAY)                                  { goto _ERR; }
3007
-       if ((err = mp_copy(&t3, &v3)) != MP_OKAY)                                  { goto _ERR; }
3008
-   }
3009
-
3010
-   /* make sure U3 >= 0 */
3011
-   if (u3.sign == MP_NEG) {
3012
-      mp_neg(&u1, &u1);
3013
-      mp_neg(&u2, &u2);
3014
-      mp_neg(&u3, &u3);
2926
+   fp_int tmp;
2927
+   int    err;
2928
+#define TFM_CHECK
2929
+#ifdef TFM_CHECK
2930
+   /* prevent overflows */
2931
+   if (P->used > (FP_SIZE/2)) {
2932
+      return FP_VAL;
3015 2933
    }
3016
-
3017
-   /* copy result out */
3018
-   if (U1 != NULL) { mp_exch(U1, &u1); }
3019
-   if (U2 != NULL) { mp_exch(U2, &u2); }
3020
-   if (U3 != NULL) { mp_exch(U3, &u3); }
3021
-
3022
-   err = MP_OKAY;
3023
-_ERR: mp_clear_multi(&u1, &u2, &u3, &v1, &v2, &v3, &t1, &t2, &t3, &q, &tmp, NULL);
3024
-   return err;
3025
-}
3026 2934
 #endif
3027 2935
 
3028
-/* $Source: /cvs/libtom/libtommath/bn_mp_exteuclid.c,v $ */
3029
-/* $Revision: 1.3 $ */
3030
-/* $Date: 2006/03/31 14:18:44 $ */
3031
-
3032
-/* End: bn_mp_exteuclid.c */
3033
-
3034
-/* Start: bn_mp_fread.c */
3035
-#include <bignum.h>
3036
-#ifdef BN_MP_FREAD_C
3037
-/* LibTomMath, multiple-precision integer library -- Tom St Denis
3038
- *
3039
- * LibTomMath is a library that provides multiple-precision
3040
- * integer arithmetic as well as number theoretic functionality.
3041
- *
3042
- * The library was designed directly after the MPI library by
3043
- * Michael Fromberger but has been written from scratch with
3044
- * additional optimizations in place.
3045
- *
3046
- * The library is free for all purposes without any express
3047
- * guarantee it works.
3048
- *
3049
- * Tom St Denis, tomstdenis@gmail.com, http://math.libtomcrypt.com
3050
- */
3051
-
3052
-/* read a bigint from a file stream in ASCII */
3053
-int mp_fread(mp_int *a, int radix, FILE *stream)
3054
-{
3055
-   int err, ch, neg, y;
3056
-   
3057
-   /* clear a */
3058
-   mp_zero(a);
3059
-   
3060
-   /* if first digit is - then set negative */
3061
-   ch = fgetc(stream);
3062
-   if (ch == '-') {
3063
-      neg = MP_NEG;
3064
-      ch = fgetc(stream);
3065
-   } else {
3066
-      neg = MP_ZPOS;
3067
-   }
3068
-   
3069
-   for (;;) {
3070
-      /* find y in the radix map */
3071
-      for (y = 0; y < radix; y++) {
3072
-          if (mp_s_rmap[y] == ch) {
3073
-             break;
3074
-          }
3075
-      }
3076
-      if (y == radix) {
3077
-         break;
3078
-      }
3079
-      
3080
-      /* shift up and add */
3081
-      if ((err = mp_mul_d(a, radix, a)) != MP_OKAY) {
2936
+   /* is X negative?  */
2937
+   if (X->sign == FP_NEG) {
2938
+      /* yes, copy G and invmod it */
2939
+      fp_copy(G, &tmp);
2940
+      if ((err = fp_invmod(&tmp, P, &tmp)) != FP_OKAY) {
3082 2941
          return err;
3083 2942
       }
3084
-      if ((err = mp_add_d(a, y, a)) != MP_OKAY) {
3085
-         return err;
2943
+      X->sign = FP_ZPOS;
2944
+      err =  _fp_exptmod(&tmp, X, P, Y);
2945
+      if (X != Y) {
2946
+         X->sign = FP_NEG;
3086 2947
       }
3087
-      
3088
-      ch = fgetc(stream);
3089
-   }
3090
-   if (mp_cmp_d(a, 0) != MP_EQ) {
3091
-      a->sign = neg;
2948
+      return err;
2949
+   } else {
2950
+      /* Positive exponent so just exptmod */
2951
+      return _fp_exptmod(G, X, P, Y);
3092 2952
    }
3093
-   
3094
-   return MP_OKAY;
3095 2953
 }
3096 2954
 
3097
-#endif
3098
-
3099
-/* $Source: /cvs/libtom/libtommath/bn_mp_fread.c,v $ */
3100
-/* $Revision: 1.3 $ */
3101
-/* $Date: 2006/03/31 14:18:44 $ */
2955
+/* $Source: /cvs/libtom/tomsfastmath/src/exptmod/fp_exptmod.c,v $ */
2956
+/* $Revision: 1.1 $ */
2957
+/* $Date: 2006/12/31 21:25:53 $ */
3102 2958
 
3103
-/* End: bn_mp_fread.c */
2959
+/* End: fp_exptmod.c */
3104 2960
 
3105
-/* Start: bn_mp_fwrite.c */
3106
-#include <bignum.h>
3107
-#ifdef BN_MP_FWRITE_C
3108
-/* LibTomMath, multiple-precision integer library -- Tom St Denis
3109
- *
3110
- * LibTomMath is a library that provides multiple-precision
3111
- * integer arithmetic as well as number theoretic functionality.
3112
- *
3113
- * The library was designed directly after the MPI library by
3114
- * Michael Fromberger but has been written from scratch with
3115
- * additional optimizations in place.
3116
- *
3117
- * The library is free for all purposes without any express
3118
- * guarantee it works.
2961
+/* Start: fp_gcd.c */
2962
+/* TomsFastMath, a fast ISO C bignum library.
2963
+ * 
2964
+ * This project is meant to fill in where LibTomMath
2965
+ * falls short.  That is speed ;-)
3119 2966
  *
3120
- * Tom St Denis, tomstdenis@gmail.com, http://math.libtomcrypt.com
2967
+ * This project is public domain and free for all purposes.
2968
+ * 
2969
+ * Tom St Denis, tomstdenis@gmail.com
3121 2970
  */
2971
+#include "bignum_fast.h"
3122 2972
 
3123
-int mp_fwrite(mp_int *a, int radix, FILE *stream)
2973
+/* c = (a, b) */
2974
+void fp_gcd(fp_int *a, fp_int *b, fp_int *c)
3124 2975
 {
3125
-   char *buf;
3126
-   int err, len, x;
3127
-   
3128
-   if ((err = mp_radix_size(a, radix, &len)) != MP_OKAY) {
3129
-      return err;
2976
+   fp_int u, v, r;
2977
+
2978
+   /* either zero than gcd is the largest */
2979
+   if (fp_iszero (a) == 1 && fp_iszero (b) == 0) {
2980
+     fp_abs (b, c);
2981
+     return;
2982
+   }
2983
+   if (fp_iszero (a) == 0 && fp_iszero (b) == 1) {
2984
+     fp_abs (a, c);
2985
+     return;
3130 2986
    }
3131 2987
 
3132
-   buf = OPT_CAST(char) cli_malloc (len);
3133
-   if (buf == NULL) {
3134
-      return MP_MEM;
2988
+   /* optimized.  At this point if a == 0 then
2989
+    * b must equal zero too
2990
+    */
2991
+   if (fp_iszero (a) == 1) {
2992
+     fp_zero(c);
2993
+     return;
3135 2994
    }
3136
-   
3137
-   if ((err = mp_toradix(a, buf, radix)) != MP_OKAY) {
3138
-      free (buf);
3139
-      return err;
2995
+
2996
+   /* sort inputs */
2997
+   if (fp_cmp_mag(a, b) != FP_LT) {
2998
+      fp_init_copy(&u, a);
2999
+      fp_init_copy(&v, b);
3000
+   } else {
3001
+      fp_init_copy(&u, b);
3002
+      fp_init_copy(&v, a);
3140 3003
    }
3141
-   
3142
-   for (x = 0; x < len; x++) {
3143
-       if (fputc(buf[x], stream) == EOF) {
3144
-          free (buf);
3145
-          return MP_VAL;
3146
-       }
3004
+ 
3005
+   fp_zero(&r);
3006
+   while (fp_iszero(&v) == FP_NO) {
3007
+      fp_mod(&u, &v, &r);
3008
+      fp_copy(&v, &u);
3009
+      fp_copy(&r, &v);
3147 3010
    }
3148
-   
3149
-   free (buf);
3150
-   return MP_OKAY;
3011
+   fp_copy(&u, c);
3151 3012
 }
3152 3013
 
3153
-#endif
3154
-
3155
-/* $Source: /cvs/libtom/libtommath/bn_mp_fwrite.c,v $ */
3156
-/* $Revision: 1.3 $ */
3157
-/* $Date: 2006/03/31 14:18:44 $ */
3014
+/* $Source: /cvs/libtom/tomsfastmath/src/numtheory/fp_gcd.c,v $ */
3015
+/* $Revision: 1.1 $ */
3016
+/* $Date: 2007/01/24 21:25:19 $ */
3158 3017
 
3159
-/* End: bn_mp_fwrite.c */
3018
+/* End: fp_gcd.c */
3160 3019
 
3161
-/* Start: bn_mp_gcd.c */
3162
-#include <bignum.h>
3163
-#ifdef BN_MP_GCD_C
3164
-/* LibTomMath, multiple-precision integer library -- Tom St Denis
3165
- *
3166
- * LibTomMath is a library that provides multiple-precision
3167
- * integer arithmetic as well as number theoretic functionality.
3168
- *
3169
- * The library was designed directly after the MPI library by
3170
- * Michael Fromberger but has been written from scratch with
3171
- * additional optimizations in place.
3172
- *
3173
- * The library is free for all purposes without any express
3174
- * guarantee it works.
3020
+/* Start: fp_ident.c */
3021
+/* TomsFastMath, a fast ISO C bignum library.
3022
+ * 
3023
+ * This project is meant to fill in where LibTomMath
3024
+ * falls short.  That is speed ;-)
3175 3025
  *
3176
- * Tom St Denis, tomstdenis@gmail.com, http://math.libtomcrypt.com
3026
+ * This project is public domain and free for all purposes.
3027
+ * 
3028
+ * Tom St Denis, tomstdenis@gmail.com
3177 3029
  */
3030
+#include "tfm.h"
3178 3031
 
3179
-/* Greatest Common Divisor using the binary method */
3180
-int mp_gcd (mp_int * a, mp_int * b, mp_int * c)
3032
+const char *fp_ident(void)
3181 3033
 {
3182
-  mp_int  u, v;
3183
-  int     k, u_lsb, v_lsb, res;
3184
-
3185
-  /* either zero than gcd is the largest */
3186
-  if (mp_iszero (a) == MP_YES) {
3187
-    return mp_abs (b, c);
3188
-  }
3189
-  if (mp_iszero (b) == MP_YES) {
3190
-    return mp_abs (a, c);
3191
-  }
3192
-
3193
-  /* get copies of a and b we can modify */
3194
-  if ((res = mp_init_copy (&u, a)) != MP_OKAY) {
3195
-    return res;
3196
-  }
3034
+   static char buf[1024];
3197 3035
 
3198
-  if ((res = mp_init_copy (&v, b)) != MP_OKAY) {
3199
-    goto LBL_U;
3200
-  }
3201
-
3202
-  /* must be positive for the remainder of the algorithm */
3203
-  u.sign = v.sign = MP_ZPOS;
3204
-
3205
-  /* B1.  Find the common power of two for u and v */
3206
-  u_lsb = mp_cnt_lsb(&u);
3207
-  v_lsb = mp_cnt_lsb(&v);
3208
-  k     = MIN(u_lsb, v_lsb);
3209
-
3210
-  if (k > 0) {
3211
-     /* divide the power of two out */
3212
-     if ((res = mp_div_2d(&u, k, &u, NULL)) != MP_OKAY) {
3213
-        goto LBL_V;
3214
-     }
3215
-
3216
-     if ((res = mp_div_2d(&v, k, &v, NULL)) != MP_OKAY) {
3217
-        goto LBL_V;
3218
-     }
3219
-  }
3220
-
3221
-  /* divide any remaining factors of two out */
3222
-  if (u_lsb != k) {
3223
-     if ((res = mp_div_2d(&u, u_lsb - k, &u, NULL)) != MP_OKAY) {
3224
-        goto LBL_V;
3225
-     }
3226
-  }
3227
-
3228
-  if (v_lsb != k) {
3229
-     if ((res = mp_div_2d(&v, v_lsb - k, &v, NULL)) != MP_OKAY) {
3230
-        goto LBL_V;
3231
-     }
3232
-  }
3233
-
3234
-  while (mp_iszero(&v) == 0) {
3235
-     /* make sure v is the largest */
3236
-     if (mp_cmp_mag(&u, &v) == MP_GT) {
3237
-        /* swap u and v to make sure v is >= u */
3238
-        mp_exch(&u, &v);
3239
-     }
3240
-     
3241
-     /* subtract smallest from largest */
3242
-     if ((res = s_mp_sub(&v, &u, &v)) != MP_OKAY) {
3243
-        goto LBL_V;
3244
-     }
3245
-     
3246
-     /* Divide out all factors of two */
3247
-     if ((res = mp_div_2d(&v, mp_cnt_lsb(&v), &v, NULL)) != MP_OKAY) {
3248
-        goto LBL_V;
3249
-     } 
3250
-  } 
3251
-
3252
-  /* multiply by 2**k which we divided out at the beginning */
3253
-  if ((res = mp_mul_2d (&u, k, c)) != MP_OKAY) {
3254
-     goto LBL_V;
3255
-  }
3256
-  c->sign = MP_ZPOS;
3257
-  res = MP_OKAY;
3258
-LBL_V:mp_clear (&u);
3259
-LBL_U:mp_clear (&v);
3260
-  return res;
3261
-}
3036
+   memset(buf, 0, sizeof(buf));
3037
+   snprintf(buf, sizeof(buf)-1,
3038
+"TomsFastMath (%s)\n"
3039
+"\n"
3040
+"Sizeofs\n"
3041
+"\tfp_digit = %u\n"
3042
+"\tfp_word  = %u\n"
3043
+"\n"
3044
+"FP_MAX_SIZE = %u\n"
3045
+"\n"
3046
+"Defines: \n"
3047
+#ifdef __i386__
3048
+" __i386__ "
3262 3049
 #endif
3263
-
3264
-/* $Source: /cvs/libtom/libtommath/bn_mp_gcd.c,v $ */
3265
-/* $Revision: 1.4 $ */
3266
-/* $Date: 2006/03/31 14:18:44 $ */
3267
-
3268
-/* End: bn_mp_gcd.c */
3269
-
3270
-/* Start: bn_mp_get_int.c */
3271
-#include <bignum.h>
3272
-#ifdef BN_MP_GET_INT_C
3273
-/* LibTomMath, multiple-precision integer library -- Tom St Denis
3274
- *
3275
- * LibTomMath is a library that provides multiple-precision
3276
- * integer arithmetic as well as number theoretic functionality.
3277
- *
3278
- * The library was designed directly after the MPI library by
3279
- * Michael Fromberger but has been written from scratch with
3280
- * additional optimizations in place.
3281
- *
3282
- * The library is free for all purposes without any express
3283
- * guarantee it works.
3284
- *
3285
- * Tom St Denis, tomstdenis@gmail.com, http://math.libtomcrypt.com
3286
- */
3287
-
3288
-/* get the lower 32-bits of an mp_int */
3289
-unsigned long mp_get_int(mp_int * a) 
3290
-{
3291
-  int i;
3292
-  unsigned long res;
3293
-
3294
-  if (a->used == 0) {
3295
-     return 0;
3296
-  }
3297
-
3298
-  /* get number of digits of the lsb we have to read */
3299
-  i = MIN(a->used,(int)((sizeof(unsigned long)*CHAR_BIT+DIGIT_BIT-1)/DIGIT_BIT))-1;
3300
-
3301
-  /* get most significant digit of result */
3302
-  res = DIGIT(a,i);
3303
-   
3304
-  while (--i >= 0) {
3305
-    res = (res << DIGIT_BIT) | DIGIT(a,i);
3306
-  }
3307
-
3308
-  /* force result to 32-bits always so it is consistent on non 32-bit platforms */
3309
-  return res & 0xFFFFFFFFUL;
3310
-}
3050
+#ifdef __x86_64__
3051
+" __x86_64__ "
3311 3052
 #endif
3312
-
3313
-/* $Source: /cvs/libtom/libtommath/bn_mp_get_int.c,v $ */
3314
-/* $Revision: 1.3 $ */
3315
-/* $Date: 2006/03/31 14:18:44 $ */
3316
-
3317
-/* End: bn_mp_get_int.c */
3318
-
3319
-/* Start: bn_mp_grow.c */
3320
-#include <bignum.h>
3321
-#ifdef BN_MP_GROW_C
3322
-/* LibTomMath, multiple-precision integer library -- Tom St Denis
3323
- *
3324
- * LibTomMath is a library that provides multiple-precision
3325
- * integer arithmetic as well as number theoretic functionality.
3326
- *
3327
- * The library was designed directly after the MPI library by
3328
- * Michael Fromberger but has been written from scratch with
3329
- * additional optimizations in place.
3330
- *
3331
- * The library is free for all purposes without any express
3332
- * guarantee it works.
3333
- *
3334
- * Tom St Denis, tomstdenis@gmail.com, http://math.libtomcrypt.com
3335
- */
3336
-
3337
-/* grow as required */
3338
-int mp_grow (mp_int * a, int size)
3339
-{
3340
-  int     i;
3341
-  mp_digit *tmp;
3342
-
3343
-  /* if the alloc size is smaller alloc more ram */
3344
-  if (a->alloc < size) {
3345
-    /* ensure there are always at least MP_PREC digits extra on top */
3346
-    size += (MP_PREC * 2) - (size % MP_PREC);
3347
-
3348
-    /* reallocate the array a->dp
3349
-     *
3350
-     * We store the return in a temporary variable
3351
-     * in case the operation failed we don't want
3352
-     * to overwrite the dp member of a.
3353
-     */
3354
-    tmp = OPT_CAST(mp_digit) cli_realloc (a->dp, sizeof (mp_digit) * size);
3355
-    if (tmp == NULL) {
3356
-      /* reallocation failed but "a" is still valid [can be freed] */
3357
-      return MP_MEM;
3358
-    }
3359
-
3360
-    /* reallocation succeeded so set a->dp */
3361
-    a->dp = tmp;
3362
-
3363
-    /* zero excess digits */
3364
-    i        = a->alloc;
3365
-    a->alloc = size;
3366
-    for (; i < a->alloc; i++) {
3367
-      a->dp[i] = 0;
3368
-    }
3369
-  }
3370
-  return MP_OKAY;
3371
-}
3053
+#ifdef TFM_X86
3054
+" TFM_X86 "
3372 3055
 #endif
3373
-
3374
-/* $Source: /cvs/libtom/libtommath/bn_mp_grow.c,v $ */
3375
-/* $Revision: 1.3 $ */
3376
-/* $Date: 2006/03/31 14:18:44 $ */
3377
-
3378
-/* End: bn_mp_grow.c */
3379
-
3380
-/* Start: bn_mp_init.c */
3381
-#include <bignum.h>
3382
-#ifdef BN_MP_INIT_C
3383
-/* LibTomMath, multiple-precision integer library -- Tom St Denis
3384
- *
3385
- * LibTomMath is a library that provides multiple-precision
3386
- * integer arithmetic as well as number theoretic functionality.
3387
- *
3388
- * The library was designed directly after the MPI library by
3389
- * Michael Fromberger but has been written from scratch with
3390
- * additional optimizations in place.
3391
- *
3392
- * The library is free for all purposes without any express
3393
- * guarantee it works.
3394
- *
3395
- * Tom St Denis, tomstdenis@gmail.com, http://math.libtomcrypt.com
3396
- */
3397
-
3398
-/* init a new mp_int */
3399
-int mp_init (mp_int * a)
3400
-{
3401
-  int i;
3402
-
3403
-  /* allocate memory required and clear it */
3404
-  a->dp = OPT_CAST(mp_digit) cli_malloc (sizeof (mp_digit) * MP_PREC);
3405
-  if (a->dp == NULL) {
3406
-    return MP_MEM;
3407
-  }
3408
-
3409
-  /* set the digits to zero */
3410
-  for (i = 0; i < MP_PREC; i++) {
3411
-      a->dp[i] = 0;
3412
-  }
3413
-
3414
-  /* set the used to zero, allocated digits to the default precision
3415
-   * and sign to positive */
3416
-  a->used  = 0;
3417
-  a->alloc = MP_PREC;
3418
-  a->sign  = MP_ZPOS;
3419
-
3420
-  return MP_OKAY;
3421
-}
3056
+#ifdef TFM_X86_64
3057
+" TFM_X86_64 "
3422 3058
 #endif
3423
-
3424
-/* $Source: /cvs/libtom/libtommath/bn_mp_init.c,v $ */
3425
-/* $Revision: 1.3 $ */
3426
-/* $Date: 2006/03/31 14:18:44 $ */
3427
-
3428
-/* End: bn_mp_init.c */
3429
-
3430
-/* Start: bn_mp_init_copy.c */
3431
-#include <bignum.h>
3432
-#ifdef BN_MP_INIT_COPY_C
3433
-/* LibTomMath, multiple-precision integer library -- Tom St Denis
3434
- *
3435
- * LibTomMath is a library that provides multiple-precision
3436
- * integer arithmetic as well as number theoretic functionality.
3437
- *
3438
- * The library was designed directly after the MPI library by
3439
- * Michael Fromberger but has been written from scratch with
3440
- * additional optimizations in place.
3441
- *
3442
- * The library is free for all purposes without any express
3443
- * guarantee it works.
3444
- *
3445
- * Tom St Denis, tomstdenis@gmail.com, http://math.libtomcrypt.com
3446
- */
3447
-
3448
-/* creates "a" then copies b into it */
3449
-int mp_init_copy (mp_int * a, mp_int * b)
3450
-{
3451
-  int     res;
3452
-
3453
-  if ((res = mp_init (a)) != MP_OKAY) {
3454
-    return res;
3455
-  }
3456
-  return mp_copy (b, a);
3457
-}
3059
+#ifdef TFM_SSE2
3060
+" TFM_SSE2 "
3458 3061
 #endif
3459
-
3460
-/* $Source: /cvs/libtom/libtommath/bn_mp_init_copy.c,v $ */
3461
-/* $Revision: 1.3 $ */
3462
-/* $Date: 2006/03/31 14:18:44 $ */
3463
-
3464
-/* End: bn_mp_init_copy.c */
3465
-
3466
-/* Start: bn_mp_init_multi.c */
3467
-#include <bignum.h>
3468
-#ifdef BN_MP_INIT_MULTI_C
3469
-/* LibTomMath, multiple-precision integer library -- Tom St Denis
3470
- *
3471
- * LibTomMath is a library that provides multiple-precision
3472
- * integer arithmetic as well as number theoretic functionality.
3473
- *
3474
- * The library was designed directly after the MPI library by
3475
- * Michael Fromberger but has been written from scratch with
3476
- * additional optimizations in place.
3477
- *
3478
- * The library is free for all purposes without any express
3479
- * guarantee it works.
3480
- *
3481
- * Tom St Denis, tomstdenis@gmail.com, http://math.libtomcrypt.com
3482
- */
3483
-#include <stdarg.h>
3484
-
3485
-int mp_init_multi(mp_int *mp, ...) 
3486
-{
3487
-    mp_err res = MP_OKAY;      /* Assume ok until proven otherwise */
3488
-    int n = 0;                 /* Number of ok inits */
3489
-    mp_int* cur_arg = mp;
3490
-    va_list args;
3491
-
3492
-    va_start(args, mp);        /* init args to next argument from caller */
3493
-    while (cur_arg != NULL) {
3494
-        if (mp_init(cur_arg) != MP_OKAY) {
3495
-            /* Oops - error! Back-track and mp_clear what we already
3496
-               succeeded in init-ing, then return error.
3497
-            */
3498
-            va_list clean_args;
3499
-            
3500
-            /* end the current list */
3501
-            va_end(args);
3502
-            
3503
-            /* now start cleaning up */            
3504
-            cur_arg = mp;
3505
-            va_start(clean_args, mp);
3506
-            while (n--) {
3507
-                mp_clear(cur_arg);
3508
-                cur_arg = va_arg(clean_args, mp_int*);
3509
-            }
3510
-            va_end(clean_args);
3511
-            res = MP_MEM;
3512
-            break;
3513
-        }
3514
-        n++;
3515
-        cur_arg = va_arg(args, mp_int*);
3516
-    }
3517
-    va_end(args);
3518
-    return res;                /* Assumed ok, if error flagged above. */
3519
-}
3520
-
3062
+#ifdef TFM_ARM
3063
+" TFM_ARM "
3521 3064
 #endif
3522
-
3523
-/* $Source: /cvs/libtom/libtommath/bn_mp_init_multi.c,v $ */
3524
-/* $Revision: 1.3 $ */
3525
-/* $Date: 2006/03/31 14:18:44 $ */
3526
-
3527
-/* End: bn_mp_init_multi.c */
3528
-
3529
-/* Start: bn_mp_init_set.c */
3530
-#include <bignum.h>
3531
-#ifdef BN_MP_INIT_SET_C
3532
-/* LibTomMath, multiple-precision integer library -- Tom St Denis
3533
- *
3534
- * LibTomMath is a library that provides multiple-precision
3535
- * integer arithmetic as well as number theoretic functionality.
3536
- *
3537
- * The library was designed directly after the MPI library by
3538
- * Michael Fromberger but has been written from scratch with
3539
- * additional optimizations in place.
3540
- *
3541
- * The library is free for all purposes without any express
3542
- * guarantee it works.
3543
- *
3544
- * Tom St Denis, tomstdenis@gmail.com, http://math.libtomcrypt.com
3545
- */
3546
-
3547
-/* initialize and set a digit */
3548
-int mp_init_set (mp_int * a, mp_digit b)
3549
-{
3550
-  int err;
3551
-  if ((err = mp_init(a)) != MP_OKAY) {
3552
-     return err;
3553
-  }
3554
-  mp_set(a, b);
3555
-  return err;
3556
-}
3065
+#ifdef TFM_PPC32
3066
+" TFM_PPC32 "
3557 3067
 #endif
3558
-
3559
-/* $Source: /cvs/libtom/libtommath/bn_mp_init_set.c,v $ */
3560
-/* $Revision: 1.3 $ */
3561
-/* $Date: 2006/03/31 14:18:44 $ */
3562
-
3563
-/* End: bn_mp_init_set.c */
3564
-
3565
-/* Start: bn_mp_init_set_int.c */
3566
-#include <bignum.h>
3567
-#ifdef BN_MP_INIT_SET_INT_C
3568
-/* LibTomMath, multiple-precision integer library -- Tom St Denis
3569
- *
3570
- * LibTomMath is a library that provides multiple-precision
3571
- * integer arithmetic as well as number theoretic functionality.
3572
- *
3573
- * The library was designed directly after the MPI library by
3574
- * Michael Fromberger but has been written from scratch with
3575
- * additional optimizations in place.
3576
- *
3577
- * The library is free for all purposes without any express
3578
- * guarantee it works.
3579
- *
3580
- * Tom St Denis, tomstdenis@gmail.com, http://math.libtomcrypt.com
3581
- */
3582
-
3583
-/* initialize and set a digit */
3584
-int mp_init_set_int (mp_int * a, unsigned long b)
3585
-{
3586
-  int err;
3587
-  if ((err = mp_init(a)) != MP_OKAY) {
3588
-     return err;
3589
-  }
3590
-  return mp_set_int(a, b);
3591
-}
3068
+#ifdef TFM_AVR32
3069
+" TFM_AVR32 "
3070
+#endif
3071
+#ifdef TFM_ECC192
3072
+" TFM_ECC192 "
3073
+#endif
3074
+#ifdef TFM_ECC224
3075
+" TFM_ECC224 "
3076
+#endif
3077
+#ifdef TFM_ECC384
3078
+" TFM_ECC384 "
3079
+#endif
3080
+#ifdef TFM_ECC521
3081
+" TFM_ECC521 "
3592 3082
 #endif
3593 3083
 
3594
-/* $Source: /cvs/libtom/libtommath/bn_mp_init_set_int.c,v $ */
3595
-/* $Revision: 1.3 $ */
3596
-/* $Date: 2006/03/31 14:18:44 $ */
3597
-
3598
-/* End: bn_mp_init_set_int.c */
3599
-
3600
-/* Start: bn_mp_init_size.c */
3601
-#include <bignum.h>
3602
-#ifdef BN_MP_INIT_SIZE_C
3603
-/* LibTomMath, multiple-precision integer library -- Tom St Denis
3604
- *
3605
- * LibTomMath is a library that provides multiple-precision
3606
- * integer arithmetic as well as number theoretic functionality.
3607
- *
3608
- * The library was designed directly after the MPI library by
3609
- * Michael Fromberger but has been written from scratch with
3610
- * additional optimizations in place.
3611
- *
3612
- * The library is free for all purposes without any express
3613
- * guarantee it works.
3614
- *
3615
- * Tom St Denis, tomstdenis@gmail.com, http://math.libtomcrypt.com
3616
- */
3617
-
3618
-/* init an mp_init for a given size */
3619
-int mp_init_size (mp_int * a, int size)
3620
-{
3621
-  int x;
3622
-
3623
-  /* pad size so there are always extra digits */
3624
-  size += (MP_PREC * 2) - (size % MP_PREC);	
3625
-  
3626
-  /* alloc mem */
3627
-  a->dp = OPT_CAST(mp_digit) cli_malloc (sizeof (mp_digit) * size);
3628
-  if (a->dp == NULL) {
3629
-    return MP_MEM;
3630
-  }
3631
-
3632
-  /* set the members */
3633
-  a->used  = 0;
3634
-  a->alloc = size;
3635
-  a->sign  = MP_ZPOS;
3636
-
3637
-  /* zero the digits */
3638
-  for (x = 0; x < size; x++) {
3639
-      a->dp[x] = 0;
3640
-  }
3641
-
3642
-  return MP_OKAY;
3643
-}
3084
+#ifdef TFM_NO_ASM
3085
+" TFM_NO_ASM "
3644 3086
 #endif
3087
+#ifdef FP_64BIT
3088
+" FP_64BIT "
3089
+#endif
3090
+#ifdef TFM_HUGE
3091
+" TFM_HUGE "
3092
+#endif
3093
+"\n", __DATE__, sizeof(fp_digit), sizeof(fp_word), FP_MAX_SIZE);
3645 3094
 
3646
-/* $Source: /cvs/libtom/libtommath/bn_mp_init_size.c,v $ */
3647
-/* $Revision: 1.3 $ */
3648
-/* $Date: 2006/03/31 14:18:44 $ */
3095
+   if (sizeof(fp_digit) == sizeof(fp_word)) {
3096
+      strncat(buf, "WARNING: sizeof(fp_digit) == sizeof(fp_word), this build is likely to not work properly.\n", 
3097
+              sizeof(buf)-1);
3098
+   }
3099
+   return buf;
3100
+}
3649 3101
 
3650
-/* End: bn_mp_init_size.c */
3102
+#ifdef STANDALONE
3651 3103
 
3652
-/* Start: bn_mp_invmod.c */
3653
-#include <bignum.h>
3654
-#ifdef BN_MP_INVMOD_C
3655
-/* LibTomMath, multiple-precision integer library -- Tom St Denis
3656
- *
3657
- * LibTomMath is a library that provides multiple-precision
3658
- * integer arithmetic as well as number theoretic functionality.
3659
- *
3660
- * The library was designed directly after the MPI library by
3661
- * Michael Fromberger but has been written from scratch with
3662
- * additional optimizations in place.
3663
- *
3664
- * The library is free for all purposes without any express
3665
- * guarantee it works.
3666
- *
3667
- * Tom St Denis, tomstdenis@gmail.com, http://math.libtomcrypt.com
3668
- */
3669
-
3670
-/* hac 14.61, pp608 */
3671
-int mp_invmod (mp_int * a, mp_int * b, mp_int * c)
3104
+int main(void)
3672 3105
 {
3673
-  /* b cannot be negative */
3674
-  if (b->sign == MP_NEG || mp_iszero(b) == 1) {
3675
-    return MP_VAL;
3676
-  }
3677
-
3678
-#ifdef BN_FAST_MP_INVMOD_C
3679
-  /* if the modulus is odd we can use a faster routine instead */
3680
-  if (mp_isodd (b) == 1) {
3681
-    return fast_mp_invmod (a, b, c);
3682
-  }
3683
-#endif
3106
+   printf("%s\n", fp_ident());
3107
+   return 0;
3108
+}
3684 3109
 
3685
-#ifdef BN_MP_INVMOD_SLOW_C
3686
-  return mp_invmod_slow(a, b, c);
3687 3110
 #endif
3688 3111
 
3689
-  return MP_VAL;
3690
-}
3691
-#endif
3692 3112
 
3693
-/* $Source: /cvs/libtom/libtommath/bn_mp_invmod.c,v $ */
3694
-/* $Revision: 1.3 $ */
3695
-/* $Date: 2006/03/31 14:18:44 $ */
3113
+/* $Source: /cvs/libtom/tomsfastmath/src/misc/fp_ident.c,v $ */
3114
+/* $Revision: 1.1 $ */
3115
+/* $Date: 2006/12/31 21:25:53 $ */
3696 3116
 
3697
-/* End: bn_mp_invmod.c */
3117
+/* End: fp_ident.c */
3698 3118
 
3699
-/* Start: bn_mp_invmod_slow.c */
3700
-#include <bignum.h>
3701
-#ifdef BN_MP_INVMOD_SLOW_C
3702
-/* LibTomMath, multiple-precision integer library -- Tom St Denis
3703
- *
3704
- * LibTomMath is a library that provides multiple-precision
3705
- * integer arithmetic as well as number theoretic functionality.
3706
- *
3707
- * The library was designed directly after the MPI library by
3708
- * Michael Fromberger but has been written from scratch with
3709
- * additional optimizations in place.
3710
- *
3711
- * The library is free for all purposes without any express
3712
- * guarantee it works.
3119
+/* Start: fp_invmod.c */
3120
+/* TomsFastMath, a fast ISO C bignum library.
3121
+ * 
3122
+ * This project is meant to fill in where LibTomMath
3123
+ * falls short.  That is speed ;-)
3713 3124
  *
3714
- * Tom St Denis, tomstdenis@gmail.com, http://math.libtomcrypt.com
3125
+ * This project is public domain and free for all purposes.
3126
+ * 
3127
+ * Tom St Denis, tomstdenis@gmail.com
3715 3128
  */
3129
+#include "bignum_fast.h"
3716 3130
 
3717
-/* hac 14.61, pp608 */
3718
-int mp_invmod_slow (mp_int * a, mp_int * b, mp_int * c)
3131
+static int fp_invmod_slow (fp_int * a, fp_int * b, fp_int * c)
3719 3132
 {
3720
-  mp_int  x, y, u, v, A, B, C, D;
3133
+  fp_int  x, y, u, v, A, B, C, D;
3721 3134
   int     res;
3722 3135
 
3723 3136
   /* b cannot be negative */
3724
-  if (b->sign == MP_NEG || mp_iszero(b) == 1) {
3725
-    return MP_VAL;
3137
+  if (b->sign == FP_NEG || fp_iszero(b) == 1) {
3138
+    return FP_VAL;
3726 3139
   }
3727 3140
 
3728 3141
   /* init temps */
3729
-  if ((res = mp_init_multi(&x, &y, &u, &v, 
3730
-                           &A, &B, &C, &D, NULL)) != MP_OKAY) {
3731
-     return res;
3732
-  }
3142
+  fp_init(&x);    fp_init(&y);
3143
+  fp_init(&u);    fp_init(&v);
3144
+  fp_init(&A);    fp_init(&B);
3145
+  fp_init(&C);    fp_init(&D);
3733 3146
 
3734 3147
   /* x = a, y = b */
3735
-  if ((res = mp_mod(a, b, &x)) != MP_OKAY) {
3736
-      goto LBL_ERR;
3737
-  }
3738
-  if ((res = mp_copy (b, &y)) != MP_OKAY) {
3739
-    goto LBL_ERR;
3148
+  if ((res = fp_mod(a, b, &x)) != FP_OKAY) {
3149
+      return res;
3740 3150
   }
3151
+  fp_copy(b, &y);
3741 3152
 
3742 3153
   /* 2. [modified] if x,y are both even then return an error! */
3743
-  if (mp_iseven (&x) == 1 && mp_iseven (&y) == 1) {
3744
-    res = MP_VAL;
3745
-    goto LBL_ERR;
3154
+  if (fp_iseven (&x) == 1 && fp_iseven (&y) == 1) {
3155
+    return FP_VAL;
3746 3156
   }
3747 3157
 
3748 3158
   /* 3. u=x, v=y, A=1, B=0, C=0,D=1 */
3749
-  if ((res = mp_copy (&x, &u)) != MP_OKAY) {
3750
-    goto LBL_ERR;
3751
-  }
3752
-  if ((res = mp_copy (&y, &v)) != MP_OKAY) {
3753
-    goto LBL_ERR;
3754
-  }
3755
-  mp_set (&A, 1);
3756
-  mp_set (&D, 1);
3159
+  fp_copy (&x, &u);
3160
+  fp_copy (&y, &v);
3161
+  fp_set (&A, 1);
3162
+  fp_set (&D, 1);
3757 3163
 
3758 3164
 top:
3759 3165
   /* 4.  while u is even do */
3760
-  while (mp_iseven (&u) == 1) {
3166
+  while (fp_iseven (&u) == 1) {
3761 3167
     /* 4.1 u = u/2 */
3762
-    if ((res = mp_div_2 (&u, &u)) != MP_OKAY) {
3763
-      goto LBL_ERR;
3764
-    }
3168
+    fp_div_2 (&u, &u);
3169
+
3765 3170
     /* 4.2 if A or B is odd then */
3766
-    if (mp_isodd (&A) == 1 || mp_isodd (&B) == 1) {
3171
+    if (fp_isodd (&A) == 1 || fp_isodd (&B) == 1) {
3767 3172
       /* A = (A+y)/2, B = (B-x)/2 */
3768
-      if ((res = mp_add (&A, &y, &A)) != MP_OKAY) {
3769
-         goto LBL_ERR;
3770
-      }
3771
-      if ((res = mp_sub (&B, &x, &B)) != MP_OKAY) {
3772
-         goto LBL_ERR;
3773
-      }
3173
+      fp_add (&A, &y, &A);
3174
+      fp_sub (&B, &x, &B);
3774 3175
     }
3775 3176
     /* A = A/2, B = B/2 */
3776
-    if ((res = mp_div_2 (&A, &A)) != MP_OKAY) {
3777
-      goto LBL_ERR;
3778
-    }
3779
-    if ((res = mp_div_2 (&B, &B)) != MP_OKAY) {
3780
-      goto LBL_ERR;
3781
-    }
3177
+    fp_div_2 (&A, &A);
3178
+    fp_div_2 (&B, &B);
3782 3179
   }
3783 3180
 
3784 3181
   /* 5.  while v is even do */
3785
-  while (mp_iseven (&v) == 1) {
3182
+  while (fp_iseven (&v) == 1) {
3786 3183
     /* 5.1 v = v/2 */
3787
-    if ((res = mp_div_2 (&v, &v)) != MP_OKAY) {
3788
-      goto LBL_ERR;
3789
-    }
3184
+    fp_div_2 (&v, &v);
3185
+
3790 3186
     /* 5.2 if C or D is odd then */
3791
-    if (mp_isodd (&C) == 1 || mp_isodd (&D) == 1) {
3187
+    if (fp_isodd (&C) == 1 || fp_isodd (&D) == 1) {
3792 3188
       /* C = (C+y)/2, D = (D-x)/2 */
3793
-      if ((res = mp_add (&C, &y, &C)) != MP_OKAY) {
3794
-         goto LBL_ERR;
3795
-      }
3796
-      if ((res = mp_sub (&D, &x, &D)) != MP_OKAY) {
3797
-         goto LBL_ERR;
3798
-      }
3189
+      fp_add (&C, &y, &C);
3190
+      fp_sub (&D, &x, &D);
3799 3191
     }
3800 3192
     /* C = C/2, D = D/2 */
3801
-    if ((res = mp_div_2 (&C, &C)) != MP_OKAY) {
3802
-      goto LBL_ERR;
3803
-    }
3804
-    if ((res = mp_div_2 (&D, &D)) != MP_OKAY) {
3805
-      goto LBL_ERR;
3806
-    }
3193
+    fp_div_2 (&C, &C);
3194
+    fp_div_2 (&D, &D);
3807 3195
   }
3808 3196
 
3809 3197
   /* 6.  if u >= v then */
3810
-  if (mp_cmp (&u, &v) != MP_LT) {
3198
+  if (fp_cmp (&u, &v) != FP_LT) {
3811 3199
     /* u = u - v, A = A - C, B = B - D */
3812
-    if ((res = mp_sub (&u, &v, &u)) != MP_OKAY) {
3813
-      goto LBL_ERR;
3814
-    }
3815
-
3816
-    if ((res = mp_sub (&A, &C, &A)) != MP_OKAY) {
3817
-      goto LBL_ERR;
3818
-    }
3819
-
3820
-    if ((res = mp_sub (&B, &D, &B)) != MP_OKAY) {
3821
-      goto LBL_ERR;
3822
-    }
3200
+    fp_sub (&u, &v, &u);
3201
+    fp_sub (&A, &C, &A);
3202
+    fp_sub (&B, &D, &B);
3823 3203
   } else {
3824 3204
     /* v - v - u, C = C - A, D = D - B */
3825
-    if ((res = mp_sub (&v, &u, &v)) != MP_OKAY) {
3826
-      goto LBL_ERR;
3827
-    }
3828
-
3829
-    if ((res = mp_sub (&C, &A, &C)) != MP_OKAY) {
3830
-      goto LBL_ERR;
3831
-    }
3832
-
3833
-    if ((res = mp_sub (&D, &B, &D)) != MP_OKAY) {
3834
-      goto LBL_ERR;
3835
-    }
3205
+    fp_sub (&v, &u, &v);
3206
+    fp_sub (&C, &A, &C);
3207
+    fp_sub (&D, &B, &D);
3836 3208
   }
3837 3209
 
3838 3210
   /* if not zero goto step 4 */
3839
-  if (mp_iszero (&u) == 0)
3211
+  if (fp_iszero (&u) == 0)
3840 3212
     goto top;
3841 3213
 
3842 3214
   /* now a = C, b = D, gcd == g*v */
3843 3215
 
3844 3216
   /* if v != 1 then there is no inverse */
3845
-  if (mp_cmp_d (&v, 1) != MP_EQ) {
3846
-    res = MP_VAL;
3847
-    goto LBL_ERR;
3217
+  if (fp_cmp_d (&v, 1) != FP_EQ) {
3218
+    return FP_VAL;
3848 3219
   }
3849 3220
 
3850 3221
   /* if its too low */
3851
-  while (mp_cmp_d(&C, 0) == MP_LT) {
3852
-      if ((res = mp_add(&C, b, &C)) != MP_OKAY) {
3853
-         goto LBL_ERR;
3854
-      }
3222
+  while (fp_cmp_d(&C, 0) == FP_LT) {
3223
+      fp_add(&C, b, &C);
3855 3224
   }
3856 3225
   
3857 3226
   /* too big */
3858
-  while (mp_cmp_mag(&C, b) != MP_LT) {
3859
-      if ((res = mp_sub(&C, b, &C)) != MP_OKAY) {
3860
-         goto LBL_ERR;
3861
-      }
3227
+  while (fp_cmp_mag(&C, b) != FP_LT) {
3228
+      fp_sub(&C, b, &C);
3862 3229
   }
3863 3230
   
3864 3231
   /* C is now the inverse */
3865
-  mp_exch (&C, c);
3866
-  res = MP_OKAY;
3867
-LBL_ERR:mp_clear_multi (&x, &y, &u, &v, &A, &B, &C, &D, NULL);
3868
-  return res;
3232
+  fp_copy(&C, c);
3233
+  return FP_OKAY;
3869 3234
 }
3870
-#endif
3871
-
3872
-/* $Source: /cvs/libtom/libtommath/bn_mp_invmod_slow.c,v $ */
3873
-/* $Revision: 1.3 $ */
3874
-/* $Date: 2006/03/31 14:18:44 $ */
3875
-
3876
-/* End: bn_mp_invmod_slow.c */
3877
-
3878
-/* Start: bn_mp_is_square.c */
3879
-#include <bignum.h>
3880
-#ifdef BN_MP_IS_SQUARE_C
3881
-/* LibTomMath, multiple-precision integer library -- Tom St Denis
3882
- *
3883
- * LibTomMath is a library that provides multiple-precision
3884
- * integer arithmetic as well as number theoretic functionality.
3885
- *
3886
- * The library was designed directly after the MPI library by
3887
- * Michael Fromberger but has been written from scratch with
3888
- * additional optimizations in place.
3889
- *
3890
- * The library is free for all purposes without any express
3891
- * guarantee it works.
3892
- *
3893
- * Tom St Denis, tomstdenis@gmail.com, http://math.libtomcrypt.com
3894
- */
3895
-
3896
-/* Check if remainders are possible squares - fast exclude non-squares */
3897
-static const char rem_128[128] = {
3898
- 0, 0, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1,
3899
- 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1,
3900
- 1, 0, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1,
3901
- 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1,
3902
- 0, 0, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1,
3903
- 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1,
3904
- 1, 0, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1,
3905
- 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1
3906
-};
3907
-
3908
-static const char rem_105[105] = {
3909
- 0, 0, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1,
3910
- 0, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1,
3911
- 0, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1,
3912
- 1, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1,
3913
- 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1,
3914
- 1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 0, 1, 1, 1, 1,
3915
- 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1
3916
-};
3917 3235
 
3918
-/* Store non-zero to ret if arg is square, and zero if not */
3919
-int mp_is_square(mp_int *arg,int *ret) 
3236
+/* c = 1/a (mod b) for odd b only */
3237
+int fp_invmod(fp_int *a, fp_int *b, fp_int *c)
3920 3238
 {
3921
-  int           res;
3922
-  mp_digit      c;
3923
-  mp_int        t;
3924
-  unsigned long r;
3239
+  fp_int  x, y, u, v, B, D;
3240
+  int     neg;
3925 3241
 
3926
-  /* Default to Non-square :) */
3927
-  *ret = MP_NO; 
3928
-
3929
-  if (arg->sign == MP_NEG) {
3930
-    return MP_VAL;
3931
-  }
3932
-
3933
-  /* digits used?  (TSD) */
3934
-  if (arg->used == 0) {
3935
-     return MP_OKAY;
3936
-  }
3937
-
3938
-  /* First check mod 128 (suppose that DIGIT_BIT is at least 7) */
3939
-  if (rem_128[127 & DIGIT(arg,0)] == 1) {
3940
-     return MP_OKAY;
3941
-  }
3942
-
3943
-  /* Next check mod 105 (3*5*7) */
3944
-  if ((res = mp_mod_d(arg,105,&c)) != MP_OKAY) {
3945
-     return res;
3946
-  }
3947
-  if (rem_105[c] == 1) {
3948
-     return MP_OKAY;
3949
-  }
3950
-
3951
-
3952
-  if ((res = mp_init_set_int(&t,11L*13L*17L*19L*23L*29L*31L)) != MP_OKAY) {
3953
-     return res;
3954
-  }
3955
-  if ((res = mp_mod(arg,&t,&t)) != MP_OKAY) {
3956
-     goto ERR;
3957
-  }
3958
-  r = mp_get_int(&t);
3959
-  /* Check for other prime modules, note it's not an ERROR but we must
3960
-   * free "t" so the easiest way is to goto ERR.  We know that res
3961
-   * is already equal to MP_OKAY from the mp_mod call 
3962
-   */ 
3963
-  if ( (1L<<(r%11)) & 0x5C4L )             goto ERR;
3964
-  if ( (1L<<(r%13)) & 0x9E4L )             goto ERR;
3965
-  if ( (1L<<(r%17)) & 0x5CE8L )            goto ERR;
3966
-  if ( (1L<<(r%19)) & 0x4F50CL )           goto ERR;
3967
-  if ( (1L<<(r%23)) & 0x7ACCA0L )          goto ERR;
3968
-  if ( (1L<<(r%29)) & 0xC2EDD0CL )         goto ERR;
3969
-  if ( (1L<<(r%31)) & 0x6DE2B848L )        goto ERR;
3970
-
3971
-  /* Final check - is sqr(sqrt(arg)) == arg ? */
3972
-  if ((res = mp_sqrt(arg,&t)) != MP_OKAY) {
3973
-     goto ERR;
3974
-  }
3975
-  if ((res = mp_sqr(&t,&t)) != MP_OKAY) {
3976
-     goto ERR;
3242
+  /* 2. [modified] b must be odd   */
3243
+  if (fp_iseven (b) == FP_YES) {
3244
+    return fp_invmod_slow(a,b,c);
3977 3245
   }
3978 3246
 
3979
-  *ret = (mp_cmp_mag(&t,arg) == MP_EQ) ? MP_YES : MP_NO;
3980
-ERR:mp_clear(&t);
3981
-  return res;
3982
-}
3983
-#endif
3984
-
3985
-/* $Source: /cvs/libtom/libtommath/bn_mp_is_square.c,v $ */
3986
-/* $Revision: 1.3 $ */
3987
-/* $Date: 2006/03/31 14:18:44 $ */
3988
-
3989
-/* End: bn_mp_is_square.c */
3247
+  /* init all our temps */
3248
+  fp_init(&x);  fp_init(&y);
3249
+  fp_init(&u);  fp_init(&v);
3250
+  fp_init(&B);  fp_init(&D);
3990 3251
 
3991
-/* Start: bn_mp_jacobi.c */
3992
-#include <bignum.h>
3993
-#ifdef BN_MP_JACOBI_C
3994
-/* LibTomMath, multiple-precision integer library -- Tom St Denis
3995
- *
3996
- * LibTomMath is a library that provides multiple-precision
3997
- * integer arithmetic as well as number theoretic functionality.
3998
- *
3999
- * The library was designed directly after the MPI library by
4000
- * Michael Fromberger but has been written from scratch with
4001
- * additional optimizations in place.
4002
- *
4003
- * The library is free for all purposes without any express
4004
- * guarantee it works.
4005
- *
4006
- * Tom St Denis, tomstdenis@gmail.com, http://math.libtomcrypt.com
4007
- */
3252
+  /* x == modulus, y == value to invert */
3253
+  fp_copy(b, &x);
4008 3254
 
4009
-/* computes the jacobi c = (a | n) (or Legendre if n is prime)
4010
- * HAC pp. 73 Algorithm 2.149
4011
- */
4012
-int mp_jacobi (mp_int * a, mp_int * p, int *c)
4013
-{
4014
-  mp_int  a1, p1;
4015
-  int     k, s, r, res;
4016
-  mp_digit residue;
3255
+  /* we need y = |a| */
3256
+  fp_abs(a, &y);
4017 3257
 
4018
-  /* if p <= 0 return MP_VAL */
4019
-  if (mp_cmp_d(p, 0) != MP_GT) {
4020
-     return MP_VAL;
4021
-  }
3258
+  /* 3. u=x, v=y, A=1, B=0, C=0,D=1 */
3259
+  fp_copy(&x, &u);
3260
+  fp_copy(&y, &v);
3261
+  fp_set (&D, 1);
4022 3262
 
4023
-  /* step 1.  if a == 0, return 0 */
4024
-  if (mp_iszero (a) == 1) {
4025
-    *c = 0;
4026
-    return MP_OKAY;
4027
-  }
3263
+top:
3264
+  /* 4.  while u is even do */
3265
+  while (fp_iseven (&u) == FP_YES) {
3266
+    /* 4.1 u = u/2 */
3267
+    fp_div_2 (&u, &u);
4028 3268
 
4029
-  /* step 2.  if a == 1, return 1 */
4030
-  if (mp_cmp_d (a, 1) == MP_EQ) {
4031
-    *c = 1;
4032
-    return MP_OKAY;
3269
+    /* 4.2 if B is odd then */
3270
+    if (fp_isodd (&B) == FP_YES) {
3271
+      fp_sub (&B, &x, &B);
3272
+    }
3273
+    /* B = B/2 */
3274
+    fp_div_2 (&B, &B);
4033 3275
   }
4034 3276
 
4035
-  /* default */
4036
-  s = 0;
3277
+  /* 5.  while v is even do */
3278
+  while (fp_iseven (&v) == FP_YES) {
3279
+    /* 5.1 v = v/2 */
3280
+    fp_div_2 (&v, &v);
4037 3281
 
4038
-  /* step 3.  write a = a1 * 2**k  */
4039
-  if ((res = mp_init_copy (&a1, a)) != MP_OKAY) {
4040
-    return res;
3282
+    /* 5.2 if D is odd then */
3283
+    if (fp_isodd (&D) == FP_YES) {
3284
+      /* D = (D-x)/2 */
3285
+      fp_sub (&D, &x, &D);
3286
+    }
3287
+    /* D = D/2 */
3288
+    fp_div_2 (&D, &D);
4041 3289
   }
4042 3290
 
4043
-  if ((res = mp_init (&p1)) != MP_OKAY) {
4044
-    goto LBL_A1;
3291
+  /* 6.  if u >= v then */
3292
+  if (fp_cmp (&u, &v) != FP_LT) {
3293
+    /* u = u - v, B = B - D */
3294
+    fp_sub (&u, &v, &u);
3295
+    fp_sub (&B, &D, &B);
3296
+  } else {
3297
+    /* v - v - u, D = D - B */
3298
+    fp_sub (&v, &u, &v);
3299
+    fp_sub (&D, &B, &D);
4045 3300
   }
4046 3301
 
4047
-  /* divide out larger power of two */
4048
-  k = mp_cnt_lsb(&a1);
4049
-  if ((res = mp_div_2d(&a1, k, &a1, NULL)) != MP_OKAY) {
4050
-     goto LBL_P1;
3302
+  /* if not zero goto step 4 */
3303
+  if (fp_iszero (&u) == FP_NO) {
3304
+    goto top;
4051 3305
   }
4052 3306
 
4053
-  /* step 4.  if e is even set s=1 */
4054
-  if ((k & 1) == 0) {
4055
-    s = 1;
4056
-  } else {
4057
-    /* else set s=1 if p = 1/7 (mod 8) or s=-1 if p = 3/5 (mod 8) */
4058
-    residue = p->dp[0] & 7;
4059
-
4060
-    if (residue == 1 || residue == 7) {
4061
-      s = 1;
4062
-    } else if (residue == 3 || residue == 5) {
4063
-      s = -1;
4064
-    }
4065
-  }
3307
+  /* now a = C, b = D, gcd == g*v */
4066 3308
 
4067
-  /* step 5.  if p == 3 (mod 4) *and* a1 == 3 (mod 4) then s = -s */
4068
-  if ( ((p->dp[0] & 3) == 3) && ((a1.dp[0] & 3) == 3)) {
4069
-    s = -s;
3309
+  /* if v != 1 then there is no inverse */
3310
+  if (fp_cmp_d (&v, 1) != FP_EQ) {
3311
+    return FP_VAL;
4070 3312
   }
4071 3313
 
4072
-  /* if a1 == 1 we're done */
4073
-  if (mp_cmp_d (&a1, 1) == MP_EQ) {
4074
-    *c = s;
4075
-  } else {
4076
-    /* n1 = n mod a1 */
4077
-    if ((res = mp_mod (p, &a1, &p1)) != MP_OKAY) {
4078
-      goto LBL_P1;
4079
-    }
4080
-    if ((res = mp_jacobi (&p1, &a1, &r)) != MP_OKAY) {
4081
-      goto LBL_P1;
4082
-    }
4083
-    *c = s * r;
3314
+  /* b is now the inverse */
3315
+  neg = a->sign;
3316
+  while (D.sign == FP_NEG) {
3317
+    fp_add (&D, b, &D);
4084 3318
   }
4085
-
4086
-  /* done */
4087
-  res = MP_OKAY;
4088
-LBL_P1:mp_clear (&p1);
4089
-LBL_A1:mp_clear (&a1);
4090
-  return res;
3319
+  fp_copy (&D, c);
3320
+  c->sign = neg;
3321
+  return FP_OKAY;
4091 3322
 }
4092
-#endif
4093 3323
 
4094
-/* $Source: /cvs/libtom/libtommath/bn_mp_jacobi.c,v $ */
4095
-/* $Revision: 1.3 $ */
4096
-/* $Date: 2006/03/31 14:18:44 $ */
3324
+/* $Source: /cvs/libtom/tomsfastmath/src/numtheory/fp_invmod.c,v $ */
3325
+/* $Revision: 1.1 $ */
3326
+/* $Date: 2007/01/24 21:25:19 $ */
4097 3327
 
4098
-/* End: bn_mp_jacobi.c */
3328
+/* End: fp_invmod.c */
4099 3329
 
4100
-/* Start: bn_mp_karatsuba_mul.c */
4101
-#include <bignum.h>
4102
-#ifdef BN_MP_KARATSUBA_MUL_C
4103
-/* LibTomMath, multiple-precision integer library -- Tom St Denis
4104
- *
4105
- * LibTomMath is a library that provides multiple-precision
4106
- * integer arithmetic as well as number theoretic functionality.
4107
- *
4108
- * The library was designed directly after the MPI library by
4109
- * Michael Fromberger but has been written from scratch with
4110
- * additional optimizations in place.
4111
- *
4112
- * The library is free for all purposes without any express
4113
- * guarantee it works.
4114
- *
4115
- * Tom St Denis, tomstdenis@gmail.com, http://math.libtomcrypt.com
4116
- */
4117
-
4118
-/* c = |a| * |b| using Karatsuba Multiplication using 
4119
- * three half size multiplications
4120
- *
4121
- * Let B represent the radix [e.g. 2**DIGIT_BIT] and 
4122
- * let n represent half of the number of digits in 
4123
- * the min(a,b)
4124
- *
4125
- * a = a1 * B**n + a0
4126
- * b = b1 * B**n + b0
4127
- *
4128
- * Then, a * b => 
4129
-   a1b1 * B**2n + ((a1 + a0)(b1 + b0) - (a0b0 + a1b1)) * B + a0b0
4130
- *
4131
- * Note that a1b1 and a0b0 are used twice and only need to be 
4132
- * computed once.  So in total three half size (half # of 
4133
- * digit) multiplications are performed, a0b0, a1b1 and 
4134
- * (a1+b1)(a0+b0)
3330
+/* Start: fp_isprime.c */
3331
+/* TomsFastMath, a fast ISO C bignum library.
3332
+ * 
3333
+ * This project is meant to fill in where LibTomMath
3334
+ * falls short.  That is speed ;-)
4135 3335
  *
4136
- * Note that a multiplication of half the digits requires
4137
- * 1/4th the number of single precision multiplications so in 
4138
- * total after one call 25% of the single precision multiplications 
4139
- * are saved.  Note also that the call to mp_mul can end up back 
4140
- * in this function if the a0, a1, b0, or b1 are above the threshold.  
4141
- * This is known as divide-and-conquer and leads to the famous 
4142
- * O(N**lg(3)) or O(N**1.584) work which is asymptopically lower than 
4143
- * the standard O(N**2) that the baseline/comba methods use.  
4144
- * Generally though the overhead of this method doesn't pay off 
4145
- * until a certain size (N ~ 80) is reached.
3336
+ * This project is public domain and free for all purposes.
3337
+ * 
3338
+ * Tom St Denis, tomstdenis@gmail.com
4146 3339
  */
4147
-int mp_karatsuba_mul (mp_int * a, mp_int * b, mp_int * c)
4148
-{
4149
-  mp_int  x0, x1, y0, y1, t1, x0y0, x1y1;
4150
-  int     B, err;
4151
-
4152
-  /* default the return code to an error */
4153
-  err = MP_MEM;
4154
-
4155
-  /* min # of digits */
4156
-  B = MIN (a->used, b->used);
4157
-
4158
-  /* now divide in two */
4159
-  B = B >> 1;
4160
-
4161
-  /* init copy all the temps */
4162
-  if (mp_init_size (&x0, B) != MP_OKAY)
4163
-    goto ERR;
4164
-  if (mp_init_size (&x1, a->used - B) != MP_OKAY)
4165
-    goto X0;
4166
-  if (mp_init_size (&y0, B) != MP_OKAY)
4167
-    goto X1;
4168
-  if (mp_init_size (&y1, b->used - B) != MP_OKAY)
4169
-    goto Y0;
3340
+#include "bignum_fast.h"
4170 3341
 
4171
-  /* init temps */
4172
-  if (mp_init_size (&t1, B * 2) != MP_OKAY)
4173
-    goto Y1;
4174
-  if (mp_init_size (&x0y0, B * 2) != MP_OKAY)
4175
-    goto T1;
4176
-  if (mp_init_size (&x1y1, B * 2) != MP_OKAY)
4177
-    goto X0Y0;
4178
-
4179
-  /* now shift the digits */
4180
-  x0.used = y0.used = B;
4181
-  x1.used = a->used - B;
4182
-  y1.used = b->used - B;
4183
-
4184
-  {
4185
-    register int x;
4186
-    register mp_digit *tmpa, *tmpb, *tmpx, *tmpy;
4187
-
4188
-    /* we copy the digits directly instead of using higher level functions
4189
-     * since we also need to shift the digits
4190
-     */
4191
-    tmpa = a->dp;
4192
-    tmpb = b->dp;
4193
-
4194
-    tmpx = x0.dp;
4195
-    tmpy = y0.dp;
4196
-    for (x = 0; x < B; x++) {
4197
-      *tmpx++ = *tmpa++;
4198
-      *tmpy++ = *tmpb++;
4199
-    }
4200
-
4201
-    tmpx = x1.dp;
4202
-    for (x = B; x < a->used; x++) {
4203
-      *tmpx++ = *tmpa++;
4204
-    }
4205
-
4206
-    tmpy = y1.dp;
4207
-    for (x = B; x < b->used; x++) {
4208
-      *tmpy++ = *tmpb++;
4209
-    }
4210
-  }
4211
-
4212
-  /* only need to clamp the lower words since by definition the 
4213
-   * upper words x1/y1 must have a known number of digits
4214
-   */
4215
-  mp_clamp (&x0);
4216
-  mp_clamp (&y0);
4217
-
4218
-  /* now calc the products x0y0 and x1y1 */
4219
-  /* after this x0 is no longer required, free temp [x0==t2]! */
4220
-  if (mp_mul (&x0, &y0, &x0y0) != MP_OKAY)  
4221
-    goto X1Y1;          /* x0y0 = x0*y0 */
4222
-  if (mp_mul (&x1, &y1, &x1y1) != MP_OKAY)
4223
-    goto X1Y1;          /* x1y1 = x1*y1 */
4224
-
4225
-  /* now calc x1+x0 and y1+y0 */
4226
-  if (s_mp_add (&x1, &x0, &t1) != MP_OKAY)
4227
-    goto X1Y1;          /* t1 = x1 - x0 */
4228
-  if (s_mp_add (&y1, &y0, &x0) != MP_OKAY)
4229
-    goto X1Y1;          /* t2 = y1 - y0 */
4230
-  if (mp_mul (&t1, &x0, &t1) != MP_OKAY)
4231
-    goto X1Y1;          /* t1 = (x1 + x0) * (y1 + y0) */
4232
-
4233
-  /* add x0y0 */
4234
-  if (mp_add (&x0y0, &x1y1, &x0) != MP_OKAY)
4235
-    goto X1Y1;          /* t2 = x0y0 + x1y1 */
4236
-  if (s_mp_sub (&t1, &x0, &t1) != MP_OKAY)
4237
-    goto X1Y1;          /* t1 = (x1+x0)*(y1+y0) - (x1y1 + x0y0) */
4238
-
4239
-  /* shift by B */
4240
-  if (mp_lshd (&t1, B) != MP_OKAY)
4241
-    goto X1Y1;          /* t1 = (x0y0 + x1y1 - (x1-x0)*(y1-y0))<<B */
4242
-  if (mp_lshd (&x1y1, B * 2) != MP_OKAY)
4243
-    goto X1Y1;          /* x1y1 = x1y1 << 2*B */
4244
-
4245
-  if (mp_add (&x0y0, &t1, &t1) != MP_OKAY)
4246
-    goto X1Y1;          /* t1 = x0y0 + t1 */
4247
-  if (mp_add (&t1, &x1y1, c) != MP_OKAY)
4248
-    goto X1Y1;          /* t1 = x0y0 + t1 + x1y1 */
4249
-
4250
-  /* Algorithm succeeded set the return code to MP_OKAY */
4251
-  err = MP_OKAY;
4252
-
4253
-X1Y1:mp_clear (&x1y1);
4254
-X0Y0:mp_clear (&x0y0);
4255
-T1:mp_clear (&t1);
4256
-Y1:mp_clear (&y1);
4257
-Y0:mp_clear (&y0);
4258
-X1:mp_clear (&x1);
4259
-X0:mp_clear (&x0);
4260
-ERR:
4261
-  return err;
4262
-}
4263
-#endif
3342
+/* a few primes */
3343
+static const fp_digit primes[256] = {
3344
+  0x0002, 0x0003, 0x0005, 0x0007, 0x000B, 0x000D, 0x0011, 0x0013,
3345
+  0x0017, 0x001D, 0x001F, 0x0025, 0x0029, 0x002B, 0x002F, 0x0035,
3346
+  0x003B, 0x003D, 0x0043, 0x0047, 0x0049, 0x004F, 0x0053, 0x0059,
3347
+  0x0061, 0x0065, 0x0067, 0x006B, 0x006D, 0x0071, 0x007F, 0x0083,
3348
+  0x0089, 0x008B, 0x0095, 0x0097, 0x009D, 0x00A3, 0x00A7, 0x00AD,
3349
+  0x00B3, 0x00B5, 0x00BF, 0x00C1, 0x00C5, 0x00C7, 0x00D3, 0x00DF,
3350
+  0x00E3, 0x00E5, 0x00E9, 0x00EF, 0x00F1, 0x00FB, 0x0101, 0x0107,
3351
+  0x010D, 0x010F, 0x0115, 0x0119, 0x011B, 0x0125, 0x0133, 0x0137,
4264 3352
 
4265
-/* $Source: /cvs/libtom/libtommath/bn_mp_karatsuba_mul.c,v $ */
4266
-/* $Revision: 1.5 $ */
4267
-/* $Date: 2006/03/31 14:18:44 $ */
3353
+  0x0139, 0x013D, 0x014B, 0x0151, 0x015B, 0x015D, 0x0161, 0x0167,
3354
+  0x016F, 0x0175, 0x017B, 0x017F, 0x0185, 0x018D, 0x0191, 0x0199,
3355
+  0x01A3, 0x01A5, 0x01AF, 0x01B1, 0x01B7, 0x01BB, 0x01C1, 0x01C9,
3356
+  0x01CD, 0x01CF, 0x01D3, 0x01DF, 0x01E7, 0x01EB, 0x01F3, 0x01F7,
3357
+  0x01FD, 0x0209, 0x020B, 0x021D, 0x0223, 0x022D, 0x0233, 0x0239,
3358
+  0x023B, 0x0241, 0x024B, 0x0251, 0x0257, 0x0259, 0x025F, 0x0265,
3359
+  0x0269, 0x026B, 0x0277, 0x0281, 0x0283, 0x0287, 0x028D, 0x0293,
3360
+  0x0295, 0x02A1, 0x02A5, 0x02AB, 0x02B3, 0x02BD, 0x02C5, 0x02CF,
4268 3361
 
4269
-/* End: bn_mp_karatsuba_mul.c */
3362
+  0x02D7, 0x02DD, 0x02E3, 0x02E7, 0x02EF, 0x02F5, 0x02F9, 0x0301,
3363
+  0x0305, 0x0313, 0x031D, 0x0329, 0x032B, 0x0335, 0x0337, 0x033B,
3364
+  0x033D, 0x0347, 0x0355, 0x0359, 0x035B, 0x035F, 0x036D, 0x0371,
3365
+  0x0373, 0x0377, 0x038B, 0x038F, 0x0397, 0x03A1, 0x03A9, 0x03AD,
3366
+  0x03B3, 0x03B9, 0x03C7, 0x03CB, 0x03D1, 0x03D7, 0x03DF, 0x03E5,
3367
+  0x03F1, 0x03F5, 0x03FB, 0x03FD, 0x0407, 0x0409, 0x040F, 0x0419,
3368
+  0x041B, 0x0425, 0x0427, 0x042D, 0x043F, 0x0443, 0x0445, 0x0449,
3369
+  0x044F, 0x0455, 0x045D, 0x0463, 0x0469, 0x047F, 0x0481, 0x048B,
4270 3370
 
4271
-/* Start: bn_mp_karatsuba_sqr.c */
4272
-#include <bignum.h>
4273
-#ifdef BN_MP_KARATSUBA_SQR_C
4274
-/* LibTomMath, multiple-precision integer library -- Tom St Denis
4275
- *
4276
- * LibTomMath is a library that provides multiple-precision
4277
- * integer arithmetic as well as number theoretic functionality.
4278
- *
4279
- * The library was designed directly after the MPI library by
4280
- * Michael Fromberger but has been written from scratch with
4281
- * additional optimizations in place.
4282
- *
4283
- * The library is free for all purposes without any express
4284
- * guarantee it works.
4285
- *
4286
- * Tom St Denis, tomstdenis@gmail.com, http://math.libtomcrypt.com
4287
- */
3371
+  0x0493, 0x049D, 0x04A3, 0x04A9, 0x04B1, 0x04BD, 0x04C1, 0x04C7,
3372
+  0x04CD, 0x04CF, 0x04D5, 0x04E1, 0x04EB, 0x04FD, 0x04FF, 0x0503,
3373
+  0x0509, 0x050B, 0x0511, 0x0515, 0x0517, 0x051B, 0x0527, 0x0529,
3374
+  0x052F, 0x0551, 0x0557, 0x055D, 0x0565, 0x0577, 0x0581, 0x058F,
3375
+  0x0593, 0x0595, 0x0599, 0x059F, 0x05A7, 0x05AB, 0x05AD, 0x05B3,
3376
+  0x05BF, 0x05C9, 0x05CB, 0x05CF, 0x05D1, 0x05D5, 0x05DB, 0x05E7,
3377
+  0x05F3, 0x05FB, 0x0607, 0x060D, 0x0611, 0x0617, 0x061F, 0x0623,
3378
+  0x062B, 0x062F, 0x063D, 0x0641, 0x0647, 0x0649, 0x064D, 0x0653
3379
+};
4288 3380
 
4289
-/* Karatsuba squaring, computes b = a*a using three 
4290
- * half size squarings
4291
- *
4292
- * See comments of karatsuba_mul for details.  It 
4293
- * is essentially the same algorithm but merely 
4294
- * tuned to perform recursive squarings.
4295
- */
4296
-int mp_karatsuba_sqr (mp_int * a, mp_int * b)
3381
+int fp_isprime(fp_int *a)
4297 3382
 {
4298
-  mp_int  x0, x1, t1, t2, x0x0, x1x1;
4299
-  int     B, err;
4300
-
4301
-  err = MP_MEM;
4302
-
4303
-  /* min # of digits */
4304
-  B = a->used;
4305
-
4306
-  /* now divide in two */
4307
-  B = B >> 1;
4308
-
4309
-  /* init copy all the temps */
4310
-  if (mp_init_size (&x0, B) != MP_OKAY)
4311
-    goto ERR;
4312
-  if (mp_init_size (&x1, a->used - B) != MP_OKAY)
4313
-    goto X0;
4314
-
4315
-  /* init temps */
4316
-  if (mp_init_size (&t1, a->used * 2) != MP_OKAY)
4317
-    goto X1;
4318
-  if (mp_init_size (&t2, a->used * 2) != MP_OKAY)
4319
-    goto T1;
4320
-  if (mp_init_size (&x0x0, B * 2) != MP_OKAY)
4321
-    goto T2;
4322
-  if (mp_init_size (&x1x1, (a->used - B) * 2) != MP_OKAY)
4323
-    goto X0X0;
4324
-
4325
-  {
4326
-    register int x;
4327
-    register mp_digit *dst, *src;
4328
-
4329
-    src = a->dp;
4330
-
4331
-    /* now shift the digits */
4332
-    dst = x0.dp;
4333
-    for (x = 0; x < B; x++) {
4334
-      *dst++ = *src++;
4335
-    }
4336
-
4337
-    dst = x1.dp;
4338
-    for (x = B; x < a->used; x++) {
4339
-      *dst++ = *src++;
4340
-    }
4341
-  }
3383
+   fp_int   b;
3384
+   fp_digit d;
3385
+   int      r, res;
3386
+
3387
+   /* do trial division */
3388
+   for (r = 0; r < 256; r++) {
3389
+       fp_mod_d(a, primes[r], &d);
3390
+       if (d == 0) {
3391
+          return FP_NO;
3392
+       }
3393
+   }
4342 3394
 
4343
-  x0.used = B;
4344
-  x1.used = a->used - B;
4345
-
4346
-  mp_clamp (&x0);
4347
-
4348
-  /* now calc the products x0*x0 and x1*x1 */
4349
-  if (mp_sqr (&x0, &x0x0) != MP_OKAY)
4350
-    goto X1X1;           /* x0x0 = x0*x0 */
4351
-  if (mp_sqr (&x1, &x1x1) != MP_OKAY)
4352
-    goto X1X1;           /* x1x1 = x1*x1 */
4353
-
4354
-  /* now calc (x1+x0)**2 */
4355
-  if (s_mp_add (&x1, &x0, &t1) != MP_OKAY)
4356
-    goto X1X1;           /* t1 = x1 - x0 */
4357
-  if (mp_sqr (&t1, &t1) != MP_OKAY)
4358
-    goto X1X1;           /* t1 = (x1 - x0) * (x1 - x0) */
4359
-
4360
-  /* add x0y0 */
4361
-  if (s_mp_add (&x0x0, &x1x1, &t2) != MP_OKAY)
4362
-    goto X1X1;           /* t2 = x0x0 + x1x1 */
4363
-  if (s_mp_sub (&t1, &t2, &t1) != MP_OKAY)
4364
-    goto X1X1;           /* t1 = (x1+x0)**2 - (x0x0 + x1x1) */
4365
-
4366
-  /* shift by B */
4367
-  if (mp_lshd (&t1, B) != MP_OKAY)
4368
-    goto X1X1;           /* t1 = (x0x0 + x1x1 - (x1-x0)*(x1-x0))<<B */
4369
-  if (mp_lshd (&x1x1, B * 2) != MP_OKAY)
4370
-    goto X1X1;           /* x1x1 = x1x1 << 2*B */
4371
-
4372
-  if (mp_add (&x0x0, &t1, &t1) != MP_OKAY)
4373
-    goto X1X1;           /* t1 = x0x0 + t1 */
4374
-  if (mp_add (&t1, &x1x1, b) != MP_OKAY)
4375
-    goto X1X1;           /* t1 = x0x0 + t1 + x1x1 */
4376
-
4377
-  err = MP_OKAY;
4378
-
4379
-X1X1:mp_clear (&x1x1);
4380
-X0X0:mp_clear (&x0x0);
4381
-T2:mp_clear (&t2);
4382
-T1:mp_clear (&t1);
4383
-X1:mp_clear (&x1);
4384
-X0:mp_clear (&x0);
4385
-ERR:
4386
-  return err;
3395
+   /* now do 8 miller rabins */
3396
+   fp_init(&b);
3397
+   for (r = 0; r < 8; r++) {
3398
+       fp_set(&b, primes[r]);
3399
+       fp_prime_miller_rabin(a, &b, &res);
3400
+       if (res == FP_NO) {
3401
+          return FP_NO;
3402
+       }
3403
+   }
3404
+   return FP_YES;
4387 3405
 }
4388
-#endif
4389 3406
 
4390
-/* $Source: /cvs/libtom/libtommath/bn_mp_karatsuba_sqr.c,v $ */
4391
-/* $Revision: 1.5 $ */
4392
-/* $Date: 2006/03/31 14:18:44 $ */
3407
+/* $Source: /cvs/libtom/tomsfastmath/src/numtheory/fp_isprime.c,v $ */
3408
+/* $Revision: 1.1 $ */
3409
+/* $Date: 2007/01/24 21:25:19 $ */
4393 3410
 
4394
-/* End: bn_mp_karatsuba_sqr.c */
3411
+/* End: fp_isprime.c */
4395 3412
 
4396
-/* Start: bn_mp_lcm.c */
4397
-#include <bignum.h>
4398
-#ifdef BN_MP_LCM_C
4399
-/* LibTomMath, multiple-precision integer library -- Tom St Denis
4400
- *
4401
- * LibTomMath is a library that provides multiple-precision
4402
- * integer arithmetic as well as number theoretic functionality.
4403
- *
4404
- * The library was designed directly after the MPI library by
4405
- * Michael Fromberger but has been written from scratch with
4406
- * additional optimizations in place.
4407
- *
4408
- * The library is free for all purposes without any express
4409
- * guarantee it works.
3413
+/* Start: fp_lcm.c */
3414
+/* TomsFastMath, a fast ISO C bignum library.
3415
+ * 
3416
+ * This project is meant to fill in where LibTomMath
3417
+ * falls short.  That is speed ;-)
4410 3418
  *
4411
- * Tom St Denis, tomstdenis@gmail.com, http://math.libtomcrypt.com
3419
+ * This project is public domain and free for all purposes.
3420
+ * 
3421
+ * Tom St Denis, tomstdenis@gmail.com
4412 3422
  */
3423
+#include "bignum_fast.h"
4413 3424
 
4414
-/* computes least common multiple as |a*b|/(a, b) */
4415
-int mp_lcm (mp_int * a, mp_int * b, mp_int * c)
3425
+/* c = [a, b] */
3426
+void fp_lcm(fp_int *a, fp_int *b, fp_int *c)
4416 3427
 {
4417
-  int     res;
4418
-  mp_int  t1, t2;
4419
-
4420
-
4421
-  if ((res = mp_init_multi (&t1, &t2, NULL)) != MP_OKAY) {
4422
-    return res;
4423
-  }
4424
-
4425
-  /* t1 = get the GCD of the two inputs */
4426
-  if ((res = mp_gcd (a, b, &t1)) != MP_OKAY) {
4427
-    goto LBL_T;
4428
-  }
4429
-
4430
-  /* divide the smallest by the GCD */
4431
-  if (mp_cmp_mag(a, b) == MP_LT) {
4432
-     /* store quotient in t2 such that t2 * b is the LCM */
4433
-     if ((res = mp_div(a, &t1, &t2, NULL)) != MP_OKAY) {
4434
-        goto LBL_T;
4435
-     }
4436
-     res = mp_mul(b, &t2, c);
4437
-  } else {
4438
-     /* store quotient in t2 such that t2 * a is the LCM */
4439
-     if ((res = mp_div(b, &t1, &t2, NULL)) != MP_OKAY) {
4440
-        goto LBL_T;
4441
-     }
4442
-     res = mp_mul(a, &t2, c);
4443
-  }
4444
-
4445
-  /* fix the sign to positive */
4446
-  c->sign = MP_ZPOS;
4447
-
4448
-LBL_T:
4449
-  mp_clear_multi (&t1, &t2, NULL);
4450
-  return res;
3428
+   fp_int  t1, t2;
3429
+
3430
+   fp_init(&t1);
3431
+   fp_init(&t2);
3432
+   fp_gcd(a, b, &t1);
3433
+   if (fp_cmp_mag(a, b) == FP_GT) {
3434
+      fp_div(a, &t1, &t2, NULL);
3435
+      fp_mul(b, &t2, c);
3436
+   } else {
3437
+      fp_div(b, &t1, &t2, NULL);
3438
+      fp_mul(a, &t2, c);
3439
+   }   
4451 3440
 }
4452
-#endif
4453 3441
 
4454
-/* $Source: /cvs/libtom/libtommath/bn_mp_lcm.c,v $ */
4455
-/* $Revision: 1.3 $ */
4456
-/* $Date: 2006/03/31 14:18:44 $ */
3442
+/* $Source: /cvs/libtom/tomsfastmath/src/numtheory/fp_lcm.c,v $ */
3443
+/* $Revision: 1.1 $ */
3444
+/* $Date: 2007/01/24 21:25:19 $ */
4457 3445
 
4458
-/* End: bn_mp_lcm.c */
3446
+/* End: fp_lcm.c */
4459 3447
 
4460
-/* Start: bn_mp_lshd.c */
4461
-#include <bignum.h>
4462
-#ifdef BN_MP_LSHD_C
4463
-/* LibTomMath, multiple-precision integer library -- Tom St Denis
4464
- *
4465
- * LibTomMath is a library that provides multiple-precision
4466
- * integer arithmetic as well as number theoretic functionality.
4467
- *
4468
- * The library was designed directly after the MPI library by
4469
- * Michael Fromberger but has been written from scratch with
4470
- * additional optimizations in place.
4471
- *
4472
- * The library is free for all purposes without any express
4473
- * guarantee it works.
3448
+/* Start: fp_lshd.c */
3449
+/* TomsFastMath, a fast ISO C bignum library.
3450
+ * 
3451
+ * This project is meant to fill in where LibTomMath
3452
+ * falls short.  That is speed ;-)
4474 3453
  *
4475
- * Tom St Denis, tomstdenis@gmail.com, http://math.libtomcrypt.com
3454
+ * This project is public domain and free for all purposes.
3455
+ * 
3456
+ * Tom St Denis, tomstdenis@gmail.com
4476 3457
  */
3458
+#include "bignum_fast.h"
4477 3459
 
4478
-/* shift left a certain amount of digits */
4479
-int mp_lshd (mp_int * a, int b)
3460
+void fp_lshd(fp_int *a, int x)
4480 3461
 {
4481
-  int     x, res;
3462
+   int y;
4482 3463
 
4483
-  /* if its less than zero return */
4484
-  if (b <= 0) {
4485
-    return MP_OKAY;
4486
-  }
4487
-
4488
-  /* grow to fit the new digits */
4489
-  if (a->alloc < a->used + b) {
4490
-     if ((res = mp_grow (a, a->used + b)) != MP_OKAY) {
4491
-       return res;
4492
-     }
4493
-  }
3464
+   /* move up and truncate as required */
3465
+   y = MIN(a->used + x - 1, (int)(FP_SIZE-1));
4494 3466
 
4495
-  {
4496
-    register mp_digit *top, *bottom;
3467
+   /* store new size */
3468
+   a->used = y + 1;
4497 3469
 
4498
-    /* increment the used by the shift amount then copy upwards */
4499
-    a->used += b;
4500
-
4501
-    /* top */
4502
-    top = a->dp + a->used - 1;
4503
-
4504
-    /* base */
4505
-    bottom = a->dp + a->used - 1 - b;
4506
-
4507
-    /* much like mp_rshd this is implemented using a sliding window
4508
-     * except the window goes the otherway around.  Copying from
4509
-     * the bottom to the top.  see bn_mp_rshd.c for more info.
4510
-     */
4511
-    for (x = a->used - 1; x >= b; x--) {
4512
-      *top-- = *bottom--;
4513
-    }
3470
+   /* move digits */
3471
+   for (; y >= x; y--) {
3472
+       a->dp[y] = a->dp[y-x];
3473
+   }
3474
+ 
3475
+   /* zero lower digits */
3476
+   for (; y >= 0; y--) {
3477
+       a->dp[y] = 0;
3478
+   }
4514 3479
 
4515
-    /* zero the lower digits */
4516
-    top = a->dp;
4517
-    for (x = 0; x < b; x++) {
4518
-      *top++ = 0;
4519
-    }
4520
-  }
4521
-  return MP_OKAY;
3480
+   /* clamp digits */
3481
+   fp_clamp(a);
4522 3482
 }
4523
-#endif
4524 3483
 
4525
-/* $Source: /cvs/libtom/libtommath/bn_mp_lshd.c,v $ */
4526
-/* $Revision: 1.3 $ */
4527
-/* $Date: 2006/03/31 14:18:44 $ */
3484
+/* $Source: /cvs/libtom/tomsfastmath/src/bit/fp_lshd.c,v $ */
3485
+/* $Revision: 1.1 $ */
3486
+/* $Date: 2006/12/31 21:25:53 $ */
4528 3487
 
4529
-/* End: bn_mp_lshd.c */
3488
+/* End: fp_lshd.c */
4530 3489
 
4531
-/* Start: bn_mp_mod.c */
4532
-#include <bignum.h>
4533
-#ifdef BN_MP_MOD_C
4534
-/* LibTomMath, multiple-precision integer library -- Tom St Denis
4535
- *
4536
- * LibTomMath is a library that provides multiple-precision
4537
- * integer arithmetic as well as number theoretic functionality.
4538
- *
4539
- * The library was designed directly after the MPI library by
4540
- * Michael Fromberger but has been written from scratch with
4541
- * additional optimizations in place.
4542
- *
4543
- * The library is free for all purposes without any express
4544
- * guarantee it works.
3490
+/* Start: fp_mod.c */
3491
+/* TomsFastMath, a fast ISO C bignum library.
3492
+ * 
3493
+ * This project is meant to fill in where LibTomMath
3494
+ * falls short.  That is speed ;-)
4545 3495
  *
4546
- * Tom St Denis, tomstdenis@gmail.com, http://math.libtomcrypt.com
3496
+ * This project is public domain and free for all purposes.
3497
+ * 
3498
+ * Tom St Denis, tomstdenis@gmail.com
4547 3499
  */
3500
+#include "bignum_fast.h"
4548 3501
 
4549
-/* c = a mod b, 0 <= c < b */
4550
-int
4551
-mp_mod (mp_int * a, mp_int * b, mp_int * c)
3502
+/* c = a mod b, 0 <= c < b  */
3503
+int fp_mod(fp_int *a, fp_int *b, fp_int *c)
4552 3504
 {
4553
-  mp_int  t;
4554
-  int     res;
3505
+   fp_int t;
3506
+   int    err;
4555 3507
 
4556
-  if ((res = mp_init (&t)) != MP_OKAY) {
4557
-    return res;
4558
-  }
4559
-
4560
-  if ((res = mp_div (a, b, NULL, &t)) != MP_OKAY) {
4561
-    mp_clear (&t);
4562
-    return res;
3508
+   fp_zero(&t);
3509
+   if ((err = fp_div(a, b, NULL, &t)) != FP_OKAY) {
3510
+      return err;
3511
+   }
3512
+   if (t.sign != b->sign) {
3513
+      fp_add(&t, b, c);
3514
+   } else {
3515
+      fp_copy(&t, c);
4563 3516
   }
3517
+  return FP_OKAY;
3518
+}
4564 3519
 
4565
-  if (t.sign != b->sign) {
4566
-    res = mp_add (b, &t, c);
4567
-  } else {
4568
-    res = MP_OKAY;
4569
-    mp_exch (&t, c);
4570
-  }
4571 3520
 
4572
-  mp_clear (&t);
4573
-  return res;
4574
-}
4575
-#endif
4576 3521
 
4577
-/* $Source: /cvs/libtom/libtommath/bn_mp_mod.c,v $ */
4578
-/* $Revision: 1.3 $ */
4579
-/* $Date: 2006/03/31 14:18:44 $ */
3522
+/* $Source: /cvs/libtom/tomsfastmath/src/divide/fp_mod.c,v $ */
3523
+/* $Revision: 1.1 $ */
3524
+/* $Date: 2006/12/31 21:25:53 $ */
4580 3525
 
4581
-/* End: bn_mp_mod.c */
3526
+/* End: fp_mod.c */
4582 3527
 
4583
-/* Start: bn_mp_mod_2d.c */
4584
-#include <bignum.h>
4585
-#ifdef BN_MP_MOD_2D_C
4586
-/* LibTomMath, multiple-precision integer library -- Tom St Denis
4587
- *
4588
- * LibTomMath is a library that provides multiple-precision
4589
- * integer arithmetic as well as number theoretic functionality.
4590
- *
4591
- * The library was designed directly after the MPI library by
4592
- * Michael Fromberger but has been written from scratch with
4593
- * additional optimizations in place.
4594
- *
4595
- * The library is free for all purposes without any express
4596
- * guarantee it works.
3528
+/* Start: fp_mod_2d.c */
3529
+/* TomsFastMath, a fast ISO C bignum library.
3530
+ * 
3531
+ * This project is meant to fill in where LibTomMath
3532
+ * falls short.  That is speed ;-)
4597 3533
  *
4598
- * Tom St Denis, tomstdenis@gmail.com, http://math.libtomcrypt.com
3534
+ * This project is public domain and free for all purposes.
3535
+ * 
3536
+ * Tom St Denis, tomstdenis@gmail.com
4599 3537
  */
3538
+#include "bignum_fast.h"
4600 3539
 
4601
-/* calc a value mod 2**b */
4602
-int
4603
-mp_mod_2d (mp_int * a, int b, mp_int * c)
3540
+/* c = a mod 2**d */
3541
+void fp_mod_2d(fp_int *a, int b, fp_int *c)
4604 3542
 {
4605
-  int     x, res;
4606
-
4607
-  /* if b is <= 0 then zero the int */
4608
-  if (b <= 0) {
4609
-    mp_zero (c);
4610
-    return MP_OKAY;
4611
-  }
3543
+   int x;
4612 3544
 
4613
-  /* if the modulus is larger than the value than return */
4614
-  if (b >= (int) (a->used * DIGIT_BIT)) {
4615
-    res = mp_copy (a, c);
4616
-    return res;
4617
-  }
3545
+   /* zero if count less than or equal to zero */
3546
+   if (b <= 0) {
3547
+      fp_zero(c);
3548
+      return;
3549
+   }
4618 3550
 
4619
-  /* copy */
4620
-  if ((res = mp_copy (a, c)) != MP_OKAY) {
4621
-    return res;
4622
-  }
3551
+   /* get copy of input */
3552
+   fp_copy(a, c);
3553
+ 
3554
+   /* if 2**d is larger than we just return */
3555
+   if (b >= (DIGIT_BIT * a->used)) {
3556
+      return;
3557
+   }
4623 3558
 
4624 3559
   /* zero digits above the last digit of the modulus */
4625 3560
   for (x = (b / DIGIT_BIT) + ((b % DIGIT_BIT) == 0 ? 0 : 1); x < c->used; x++) {
4626 3561
     c->dp[x] = 0;
4627 3562
   }
4628 3563
   /* clear the digit that is not completely outside/inside the modulus */
4629
-  c->dp[b / DIGIT_BIT] &=
4630
-    (mp_digit) ((((mp_digit) 1) << (((mp_digit) b) % DIGIT_BIT)) - ((mp_digit) 1));
4631
-  mp_clamp (c);
4632
-  return MP_OKAY;
3564
+  c->dp[b / DIGIT_BIT] &= ~((fp_digit)0) >> (DIGIT_BIT - b);
3565
+  fp_clamp (c);
4633 3566
 }
4634
-#endif
4635 3567
 
4636
-/* $Source: /cvs/libtom/libtommath/bn_mp_mod_2d.c,v $ */
4637
-/* $Revision: 1.3 $ */
4638
-/* $Date: 2006/03/31 14:18:44 $ */
3568
+/* $Source: /cvs/libtom/tomsfastmath/src/bit/fp_mod_2d.c,v $ */
3569
+/* $Revision: 1.1 $ */
3570
+/* $Date: 2006/12/31 21:25:53 $ */
4639 3571
 
4640
-/* End: bn_mp_mod_2d.c */
3572
+/* End: fp_mod_2d.c */
4641 3573
 
4642
-/* Start: bn_mp_mod_d.c */
4643
-#include <bignum.h>
4644
-#ifdef BN_MP_MOD_D_C
4645
-/* LibTomMath, multiple-precision integer library -- Tom St Denis
4646
- *
4647
- * LibTomMath is a library that provides multiple-precision
4648
- * integer arithmetic as well as number theoretic functionality.
4649
- *
4650
- * The library was designed directly after the MPI library by
4651
- * Michael Fromberger but has been written from scratch with
4652
- * additional optimizations in place.
4653
- *
4654
- * The library is free for all purposes without any express
4655
- * guarantee it works.
3574
+/* Start: fp_mod_d.c */
3575
+/* TomsFastMath, a fast ISO C bignum library.
3576
+ * 
3577
+ * This project is meant to fill in where LibTomMath
3578
+ * falls short.  That is speed ;-)
4656 3579
  *
4657
- * Tom St Denis, tomstdenis@gmail.com, http://math.libtomcrypt.com
3580
+ * This project is public domain and free for all purposes.
3581
+ * 
3582
+ * Tom St Denis, tomstdenis@gmail.com
4658 3583
  */
3584
+#include "bignum_fast.h"
4659 3585
 
4660
-int
4661
-mp_mod_d (mp_int * a, mp_digit b, mp_digit * c)
3586
+/* c = a mod b, 0 <= c < b  */
3587
+int fp_mod_d(fp_int *a, fp_digit b, fp_digit *c)
4662 3588
 {
4663
-  return mp_div_d(a, b, NULL, c);
3589
+   return fp_div_d(a, b, NULL, c);
4664 3590
 }
4665
-#endif
4666 3591
 
4667
-/* $Source: /cvs/libtom/libtommath/bn_mp_mod_d.c,v $ */
4668
-/* $Revision: 1.3 $ */
4669
-/* $Date: 2006/03/31 14:18:44 $ */
3592
+/* $Source: /cvs/libtom/tomsfastmath/src/divide/fp_mod_d.c,v $ */
3593
+/* $Revision: 1.1 $ */
3594
+/* $Date: 2006/12/31 21:25:53 $ */
4670 3595
 
4671
-/* End: bn_mp_mod_d.c */
3596
+/* End: fp_mod_d.c */
4672 3597
 
4673
-/* Start: bn_mp_montgomery_calc_normalization.c */
4674
-#include <bignum.h>
4675
-#ifdef BN_MP_MONTGOMERY_CALC_NORMALIZATION_C
4676
-/* LibTomMath, multiple-precision integer library -- Tom St Denis
4677
- *
4678
- * LibTomMath is a library that provides multiple-precision
4679
- * integer arithmetic as well as number theoretic functionality.
4680
- *
4681
- * The library was designed directly after the MPI library by
4682
- * Michael Fromberger but has been written from scratch with
4683
- * additional optimizations in place.
4684
- *
4685
- * The library is free for all purposes without any express
4686
- * guarantee it works.
3598
+/* Start: fp_montgomery_calc_normalization.c */
3599
+/* TomsFastMath, a fast ISO C bignum library.
3600
+ * 
3601
+ * This project is meant to fill in where LibTomMath
3602
+ * falls short.  That is speed ;-)
4687 3603
  *
4688
- * Tom St Denis, tomstdenis@gmail.com, http://math.libtomcrypt.com
3604
+ * This project is public domain and free for all purposes.
3605
+ * 
3606
+ * Tom St Denis, tomstdenis@gmail.com
4689 3607
  */
3608
+#include "bignum_fast.h"
4690 3609
 
4691
-/*
4692
- * shifts with subtractions when the result is greater than b.
4693
- *
4694
- * The method is slightly modified to shift B unconditionally upto just under
4695
- * the leading bit of b.  This saves alot of multiple precision shifting.
3610
+/* computes a = B**n mod b without division or multiplication useful for
3611
+ * normalizing numbers in a Montgomery system.
4696 3612
  */
4697
-int mp_montgomery_calc_normalization (mp_int * a, mp_int * b)
3613
+void fp_montgomery_calc_normalization(fp_int *a, fp_int *b)
4698 3614
 {
4699
-  int     x, bits, res;
3615
+  int     x, bits;
4700 3616
 
4701 3617
   /* how many bits of last digit does b use */
4702
-  bits = mp_count_bits (b) % DIGIT_BIT;
3618
+  bits = fp_count_bits (b) % DIGIT_BIT;
3619
+  if (!bits) bits = DIGIT_BIT;
4703 3620
 
3621
+  /* compute A = B^(n-1) * 2^(bits-1) */
4704 3622
   if (b->used > 1) {
4705
-     if ((res = mp_2expt (a, (b->used - 1) * DIGIT_BIT + bits - 1)) != MP_OKAY) {
4706
-        return res;
4707
-     }
3623
+     fp_2expt (a, (b->used - 1) * DIGIT_BIT + bits - 1);
4708 3624
   } else {
4709
-     mp_set(a, 1);
3625
+     fp_set(a, 1);
4710 3626
      bits = 1;
4711 3627
   }
4712 3628
 
4713
-
4714 3629
   /* now compute C = A * B mod b */
4715 3630
   for (x = bits - 1; x < (int)DIGIT_BIT; x++) {
4716
-    if ((res = mp_mul_2 (a, a)) != MP_OKAY) {
4717
-      return res;
4718
-    }
4719
-    if (mp_cmp_mag (a, b) != MP_LT) {
4720
-      if ((res = s_mp_sub (a, b, a)) != MP_OKAY) {
4721
-        return res;
4722
-      }
3631
+    fp_mul_2 (a, a);
3632
+    if (fp_cmp_mag (a, b) != FP_LT) {
3633
+      s_fp_sub (a, b, a);
4723 3634
     }
4724 3635
   }
4725
-
4726
-  return MP_OKAY;
4727 3636
 }
4728
-#endif
4729 3637
 
4730
-/* $Source: /cvs/libtom/libtommath/bn_mp_montgomery_calc_normalization.c,v $ */
4731
-/* $Revision: 1.3 $ */
4732
-/* $Date: 2006/03/31 14:18:44 $ */
4733 3638
 
4734
-/* End: bn_mp_montgomery_calc_normalization.c */
3639
+/* $Source: /cvs/libtom/tomsfastmath/src/mont/fp_montgomery_calc_normalization.c,v $ */
3640
+/* $Revision: 1.1 $ */
3641
+/* $Date: 2006/12/31 21:25:53 $ */
4735 3642
 
4736
-/* Start: bn_mp_montgomery_reduce.c */
4737
-#include <bignum.h>
4738
-#ifdef BN_MP_MONTGOMERY_REDUCE_C
4739
-/* LibTomMath, multiple-precision integer library -- Tom St Denis
4740
- *
4741
- * LibTomMath is a library that provides multiple-precision
4742
- * integer arithmetic as well as number theoretic functionality.
4743
- *
4744
- * The library was designed directly after the MPI library by
4745
- * Michael Fromberger but has been written from scratch with
4746
- * additional optimizations in place.
4747
- *
4748
- * The library is free for all purposes without any express
4749
- * guarantee it works.
3643
+/* End: fp_montgomery_calc_normalization.c */
3644
+
3645
+/* Start: fp_montgomery_reduce.c */
3646
+/* TomsFastMath, a fast ISO C bignum library.
3647
+ * 
3648
+ * This project is meant to fill in where LibTomMath
3649
+ * falls short.  That is speed ;-)
4750 3650
  *
4751
- * Tom St Denis, tomstdenis@gmail.com, http://math.libtomcrypt.com
4752
- */
3651
+ * This project is public domain and free for all purposes.
3652
+ * 
3653
+ * Tom St Denis, tomstdenis@gmail.com
3654
+ */
3655
+#include "bignum_fast.h"
3656
+
3657
+/******************************************************************/
3658
+#if defined(TFM_X86) && !defined(TFM_SSE2) 
3659
+/* x86-32 code */
3660
+
3661
+#define MONT_START 
3662
+#define MONT_FINI
3663
+#define LOOP_END
3664
+#define LOOP_START \
3665
+   mu = c[x] * mp
3666
+
3667
+#define INNERMUL                                          \
3668
+asm(                                                      \
3669
+   "movl %5,%%eax \n\t"                                   \
3670
+   "mull %4       \n\t"                                   \
3671
+   "addl %1,%%eax \n\t"                                   \
3672
+   "adcl $0,%%edx \n\t"                                   \
3673
+   "addl %%eax,%0 \n\t"                                   \
3674
+   "adcl $0,%%edx \n\t"                                   \
3675
+   "movl %%edx,%1 \n\t"                                   \
3676
+:"=g"(_c[LO]), "=r"(cy)                                   \
3677
+:"0"(_c[LO]), "1"(cy), "g"(mu), "g"(*tmpm++)              \
3678
+: "%eax", "%edx", "%cc")
3679
+
3680
+#define PROPCARRY                           \
3681
+asm(                                        \
3682
+   "addl   %1,%0    \n\t"                   \
3683
+   "setb   %%al     \n\t"                   \
3684
+   "movzbl %%al,%1 \n\t"                    \
3685
+:"=g"(_c[LO]), "=r"(cy)                     \
3686
+:"0"(_c[LO]), "1"(cy)                       \
3687
+: "%eax", "%cc")
3688
+
3689
+/******************************************************************/
3690
+#elif defined(TFM_X86_64)
3691
+/* x86-64 code */
3692
+#define MONT_START 
3693
+#define MONT_FINI
3694
+#define LOOP_END
3695
+#define LOOP_START \
3696
+   mu = c[x] * mp
3697
+
3698
+#define INNERMUL                                          \
3699
+asm(                                                      \
3700
+   "movq %5,%%rax \n\t"                                   \
3701
+   "mulq %4       \n\t"                                   \
3702
+   "addq %1,%%rax \n\t"                                   \
3703
+   "adcq $0,%%rdx \n\t"                                   \
3704
+   "addq %%rax,%0 \n\t"                                   \
3705
+   "adcq $0,%%rdx \n\t"                                   \
3706
+   "movq %%rdx,%1 \n\t"                                   \
3707
+:"=g"(_c[LO]), "=r"(cy)                                   \
3708
+:"0"(_c[LO]), "1"(cy), "r"(mu), "r"(*tmpm++)              \
3709
+: "%rax", "%rdx", "%cc")
3710
+
3711
+#define INNERMUL8 \
3712
+ asm(                  \
3713
+ "movq 0(%5),%%rax    \n\t"  \
3714
+ "movq 0(%2),%%r10    \n\t"  \
3715
+ "movq 0x8(%5),%%r11  \n\t"  \
3716
+ "mulq %4             \n\t"  \
3717
+ "addq %%r10,%%rax    \n\t"  \
3718
+ "adcq $0,%%rdx       \n\t"  \
3719
+ "movq 0x8(%2),%%r10  \n\t"  \
3720
+ "addq %3,%%rax       \n\t"  \
3721
+ "adcq $0,%%rdx       \n\t"  \
3722
+ "movq %%rax,0(%0)    \n\t"  \
3723
+ "movq %%rdx,%1       \n\t"  \
3724
+ \
3725
+ "movq %%r11,%%rax    \n\t"  \
3726
+ "movq 0x10(%5),%%r11 \n\t"  \
3727
+ "mulq %4             \n\t"  \
3728
+ "addq %%r10,%%rax    \n\t"  \
3729
+ "adcq $0,%%rdx       \n\t"  \
3730
+ "movq 0x10(%2),%%r10 \n\t"  \
3731
+ "addq %3,%%rax       \n\t"  \
3732
+ "adcq $0,%%rdx       \n\t"  \
3733
+ "movq %%rax,0x8(%0)  \n\t"  \
3734
+ "movq %%rdx,%1       \n\t"  \
3735
+ \
3736
+ "movq %%r11,%%rax    \n\t"  \
3737
+ "movq 0x18(%5),%%r11 \n\t"  \
3738
+ "mulq %4             \n\t"  \
3739
+ "addq %%r10,%%rax    \n\t"  \
3740
+ "adcq $0,%%rdx       \n\t"  \
3741
+ "movq 0x18(%2),%%r10 \n\t"  \
3742
+ "addq %3,%%rax       \n\t"  \
3743
+ "adcq $0,%%rdx       \n\t"  \
3744
+ "movq %%rax,0x10(%0) \n\t"  \
3745
+ "movq %%rdx,%1       \n\t"  \
3746
+ \
3747
+ "movq %%r11,%%rax    \n\t"  \
3748
+ "movq 0x20(%5),%%r11 \n\t"  \
3749
+ "mulq %4             \n\t"  \
3750
+ "addq %%r10,%%rax    \n\t"  \
3751
+ "adcq $0,%%rdx       \n\t"  \
3752
+ "movq 0x20(%2),%%r10 \n\t"  \
3753
+ "addq %3,%%rax       \n\t"  \
3754
+ "adcq $0,%%rdx       \n\t"  \
3755
+ "movq %%rax,0x18(%0) \n\t"  \
3756
+ "movq %%rdx,%1       \n\t"  \
3757
+ \
3758
+ "movq %%r11,%%rax    \n\t"  \
3759
+ "movq 0x28(%5),%%r11 \n\t"  \
3760
+ "mulq %4             \n\t"  \
3761
+ "addq %%r10,%%rax    \n\t"  \
3762
+ "adcq $0,%%rdx       \n\t"  \
3763
+ "movq 0x28(%2),%%r10 \n\t"  \
3764
+ "addq %3,%%rax       \n\t"  \
3765
+ "adcq $0,%%rdx       \n\t"  \
3766
+ "movq %%rax,0x20(%0) \n\t"  \
3767
+ "movq %%rdx,%1       \n\t"  \
3768
+ \
3769
+ "movq %%r11,%%rax    \n\t"  \
3770
+ "movq 0x30(%5),%%r11 \n\t"  \
3771
+ "mulq %4             \n\t"  \
3772
+ "addq %%r10,%%rax    \n\t"  \
3773
+ "adcq $0,%%rdx       \n\t"  \
3774
+ "movq 0x30(%2),%%r10 \n\t"  \
3775
+ "addq %3,%%rax       \n\t"  \
3776
+ "adcq $0,%%rdx       \n\t"  \
3777
+ "movq %%rax,0x28(%0) \n\t"  \
3778
+ "movq %%rdx,%1       \n\t"  \
3779
+ \
3780
+ "movq %%r11,%%rax    \n\t"  \
3781
+ "movq 0x38(%5),%%r11 \n\t"  \
3782
+ "mulq %4             \n\t"  \
3783
+ "addq %%r10,%%rax    \n\t"  \
3784
+ "adcq $0,%%rdx       \n\t"  \
3785
+ "movq 0x38(%2),%%r10 \n\t"  \
3786
+ "addq %3,%%rax       \n\t"  \
3787
+ "adcq $0,%%rdx       \n\t"  \
3788
+ "movq %%rax,0x30(%0) \n\t"  \
3789
+ "movq %%rdx,%1       \n\t"  \
3790
+ \
3791
+ "movq %%r11,%%rax    \n\t"  \
3792
+ "mulq %4             \n\t"  \
3793
+ "addq %%r10,%%rax    \n\t"  \
3794
+ "adcq $0,%%rdx       \n\t"  \
3795
+ "addq %3,%%rax       \n\t"  \
3796
+ "adcq $0,%%rdx       \n\t"  \
3797
+ "movq %%rax,0x38(%0) \n\t"  \
3798
+ "movq %%rdx,%1       \n\t"  \
3799
+ \
3800
+:"=r"(_c), "=r"(cy)                    \
3801
+: "0"(_c),  "1"(cy), "g"(mu), "r"(tmpm)\
3802
+: "%rax", "%rdx", "%r10", "%r11", "%cc")
3803
+
3804
+
3805
+#define PROPCARRY                           \
3806
+asm(                                        \
3807
+   "addq   %1,%0    \n\t"                   \
3808
+   "setb   %%al     \n\t"                   \
3809
+   "movzbq %%al,%1 \n\t"                    \
3810
+:"=g"(_c[LO]), "=r"(cy)                     \
3811
+:"0"(_c[LO]), "1"(cy)                       \
3812
+: "%rax", "%cc")
3813
+
3814
+/******************************************************************/
3815
+#elif defined(TFM_SSE2)  
3816
+/* SSE2 code (assumes 32-bit fp_digits) */
3817
+/* XMM register assignments:
3818
+ * xmm0  *tmpm++, then Mu * (*tmpm++)
3819
+ * xmm1  c[x], then Mu
3820
+ * xmm2  mp
3821
+ * xmm3  cy
3822
+ * xmm4  _c[LO]
3823
+ */
3824
+
3825
+#define MONT_START \
3826
+   asm("movd %0,%%mm2"::"g"(mp))
3827
+
3828
+#define MONT_FINI \
3829
+   asm("emms")
3830
+
3831
+#define LOOP_START          \
3832
+asm(                        \
3833
+"movd %0,%%mm1        \n\t" \
3834
+"pxor %%mm3,%%mm3     \n\t" \
3835
+"pmuludq %%mm2,%%mm1  \n\t" \
3836
+:: "g"(c[x]))
3837
+
3838
+/* pmuludq on mmx registers does a 32x32->64 multiply. */
3839
+#define INNERMUL               \
3840
+asm(                           \
3841
+   "movd %1,%%mm4        \n\t" \
3842
+   "movd %2,%%mm0        \n\t" \
3843
+   "paddq %%mm4,%%mm3    \n\t" \
3844
+   "pmuludq %%mm1,%%mm0  \n\t" \
3845
+   "paddq %%mm0,%%mm3    \n\t" \
3846
+   "movd %%mm3,%0        \n\t" \
3847
+   "psrlq $32, %%mm3     \n\t" \
3848
+:"=g"(_c[LO]) : "0"(_c[LO]), "g"(*tmpm++) );
3849
+
3850
+#define INNERMUL8 \
3851
+asm(                           \
3852
+   "movd 0(%1),%%mm4     \n\t" \
3853
+   "movd 0(%2),%%mm0     \n\t" \
3854
+   "paddq %%mm4,%%mm3    \n\t" \
3855
+   "pmuludq %%mm1,%%mm0  \n\t" \
3856
+   "movd 4(%2),%%mm5     \n\t" \
3857
+   "paddq %%mm0,%%mm3    \n\t" \
3858
+   "movd 4(%1),%%mm6     \n\t" \
3859
+   "movd %%mm3,0(%0)     \n\t" \
3860
+   "psrlq $32, %%mm3     \n\t" \
3861
+\
3862
+   "paddq %%mm6,%%mm3    \n\t" \
3863
+   "pmuludq %%mm1,%%mm5  \n\t" \
3864
+   "movd 8(%2),%%mm6     \n\t" \
3865
+   "paddq %%mm5,%%mm3    \n\t" \
3866
+   "movd 8(%1),%%mm7     \n\t" \
3867
+   "movd %%mm3,4(%0)     \n\t" \
3868
+   "psrlq $32, %%mm3     \n\t" \
3869
+\
3870
+   "paddq %%mm7,%%mm3    \n\t" \
3871
+   "pmuludq %%mm1,%%mm6  \n\t" \
3872
+   "movd 12(%2),%%mm7    \n\t" \
3873
+   "paddq %%mm6,%%mm3    \n\t" \
3874
+   "movd 12(%1),%%mm5     \n\t" \
3875
+   "movd %%mm3,8(%0)     \n\t" \
3876
+   "psrlq $32, %%mm3     \n\t" \
3877
+\
3878
+   "paddq %%mm5,%%mm3    \n\t" \
3879
+   "pmuludq %%mm1,%%mm7  \n\t" \
3880
+   "movd 16(%2),%%mm5    \n\t" \
3881
+   "paddq %%mm7,%%mm3    \n\t" \
3882
+   "movd 16(%1),%%mm6    \n\t" \
3883
+   "movd %%mm3,12(%0)    \n\t" \
3884
+   "psrlq $32, %%mm3     \n\t" \
3885
+\
3886
+   "paddq %%mm6,%%mm3    \n\t" \
3887
+   "pmuludq %%mm1,%%mm5  \n\t" \
3888
+   "movd 20(%2),%%mm6    \n\t" \
3889
+   "paddq %%mm5,%%mm3    \n\t" \
3890
+   "movd 20(%1),%%mm7    \n\t" \
3891
+   "movd %%mm3,16(%0)    \n\t" \
3892
+   "psrlq $32, %%mm3     \n\t" \
3893
+\
3894
+   "paddq %%mm7,%%mm3    \n\t" \
3895
+   "pmuludq %%mm1,%%mm6  \n\t" \
3896
+   "movd 24(%2),%%mm7    \n\t" \
3897
+   "paddq %%mm6,%%mm3    \n\t" \
3898
+   "movd 24(%1),%%mm5     \n\t" \
3899
+   "movd %%mm3,20(%0)    \n\t" \
3900
+   "psrlq $32, %%mm3     \n\t" \
3901
+\
3902
+   "paddq %%mm5,%%mm3    \n\t" \
3903
+   "pmuludq %%mm1,%%mm7  \n\t" \
3904
+   "movd 28(%2),%%mm5    \n\t" \
3905
+   "paddq %%mm7,%%mm3    \n\t" \
3906
+   "movd 28(%1),%%mm6    \n\t" \
3907
+   "movd %%mm3,24(%0)    \n\t" \
3908
+   "psrlq $32, %%mm3     \n\t" \
3909
+\
3910
+   "paddq %%mm6,%%mm3    \n\t" \
3911
+   "pmuludq %%mm1,%%mm5  \n\t" \
3912
+   "paddq %%mm5,%%mm3    \n\t" \
3913
+   "movd %%mm3,28(%0)    \n\t" \
3914
+   "psrlq $32, %%mm3     \n\t" \
3915
+:"=r"(_c) : "0"(_c), "g"(tmpm) );
3916
+
3917
+#define LOOP_END \
3918
+asm( "movd %%mm3,%0  \n" :"=r"(cy))
3919
+
3920
+#define PROPCARRY                           \
3921
+asm(                                        \
3922
+   "addl   %1,%0    \n\t"                   \
3923
+   "setb   %%al     \n\t"                   \
3924
+   "movzbl %%al,%1 \n\t"                    \
3925
+:"=g"(_c[LO]), "=r"(cy)                     \
3926
+:"0"(_c[LO]), "1"(cy)                       \
3927
+: "%eax", "%cc")
3928
+
3929
+/******************************************************************/
3930
+#elif defined(TFM_ARM)
3931
+   /* ARMv4 code */
3932
+
3933
+#define MONT_START 
3934
+#define MONT_FINI
3935
+#define LOOP_END
3936
+#define LOOP_START \
3937
+   mu = c[x] * mp
3938
+
3939
+#define INNERMUL                    \
3940
+asm(                                \
3941
+    " LDR    r0,%1            \n\t" \
3942
+    " ADDS   r0,r0,%0         \n\t" \
3943
+    " MOVCS  %0,#1            \n\t" \
3944
+    " MOVCC  %0,#0            \n\t" \
3945
+    " UMLAL  r0,%0,%3,%4      \n\t" \
3946
+    " STR    r0,%1            \n\t" \
3947
+:"=r"(cy),"=m"(_c[0]):"0"(cy),"r"(mu),"r"(*tmpm++),"1"(_c[0]):"r0","%cc");
3948
+
3949
+#define PROPCARRY                  \
3950
+asm(                               \
3951
+    " LDR   r0,%1            \n\t" \
3952
+    " ADDS  r0,r0,%0         \n\t" \
3953
+    " STR   r0,%1            \n\t" \
3954
+    " MOVCS %0,#1            \n\t" \
3955
+    " MOVCC %0,#0            \n\t" \
3956
+:"=r"(cy),"=m"(_c[0]):"0"(cy),"1"(_c[0]):"r0","%cc");
3957
+
3958
+/******************************************************************/
3959
+#elif defined(TFM_PPC32)
3960
+
3961
+/* PPC32 */
3962
+#define MONT_START 
3963
+#define MONT_FINI
3964
+#define LOOP_END
3965
+#define LOOP_START \
3966
+   mu = c[x] * mp
3967
+
3968
+#define INNERMUL                     \
3969
+asm(                                 \
3970
+   " mullw    16,%3,%4       \n\t"   \
3971
+   " mulhwu   17,%3,%4       \n\t"   \
3972
+   " addc     16,16,%0       \n\t"   \
3973
+   " addze    17,17          \n\t"   \
3974
+   " lwz      18,%1          \n\t"   \
3975
+   " addc     16,16,18       \n\t"   \
3976
+   " addze    %0,17          \n\t"   \
3977
+   " stw      16,%1          \n\t"   \
3978
+:"=r"(cy),"=m"(_c[0]):"0"(cy),"r"(mu),"r"(tmpm[0]),"1"(_c[0]):"16", "17", "18","%cc"); ++tmpm;
3979
+
3980
+#define PROPCARRY                    \
3981
+asm(                                 \
3982
+   " lwz      16,%1         \n\t"    \
3983
+   " addc     16,16,%0      \n\t"    \
3984
+   " stw      16,%1         \n\t"    \
3985
+   " xor      %0,%0,%0      \n\t"    \
3986
+   " addze    %0,%0         \n\t"    \
3987
+:"=r"(cy),"=m"(_c[0]):"0"(cy),"1"(_c[0]):"16","%cc");
3988
+
3989
+/******************************************************************/
3990
+#elif defined(TFM_PPC64)
3991
+
3992
+/* PPC64 */
3993
+#define MONT_START 
3994
+#define MONT_FINI
3995
+#define LOOP_END
3996
+#define LOOP_START \
3997
+   mu = c[x] * mp
3998
+
3999
+#define INNERMUL                     \
4000
+asm(                                 \
4001
+   " mulld    r16,%3,%4       \n\t"   \
4002
+   " mulhdu   r17,%3,%4       \n\t"   \
4003
+   " addc     r16,16,%0       \n\t"   \
4004
+   " addze    r17,r17          \n\t"   \
4005
+   " ldx      r18,0,%1        \n\t"   \
4006
+   " addc     r16,r16,r18       \n\t"   \
4007
+   " addze    %0,r17          \n\t"   \
4008
+   " sdx      r16,0,%1        \n\t"   \
4009
+:"=r"(cy),"=m"(_c[0]):"0"(cy),"r"(mu),"r"(tmpm[0]),"1"(_c[0]):"r16", "r17", "r18","%cc"); ++tmpm;
4010
+
4011
+#define PROPCARRY                    \
4012
+asm(                                 \
4013
+   " ldx      r16,0,%1       \n\t"    \
4014
+   " addc     r16,r16,%0      \n\t"    \
4015
+   " sdx      r16,0,%1       \n\t"    \
4016
+   " xor      %0,%0,%0      \n\t"    \
4017
+   " addze    %0,%0         \n\t"    \
4018
+:"=r"(cy),"=m"(_c[0]):"0"(cy),"1"(_c[0]):"r16","%cc");
4019
+
4020
+/******************************************************************/
4021
+#elif defined(TFM_AVR32)
4022
+
4023
+/* AVR32 */
4024
+#define MONT_START 
4025
+#define MONT_FINI
4026
+#define LOOP_END
4027
+#define LOOP_START \
4028
+   mu = c[x] * mp
4029
+
4030
+#define INNERMUL                    \
4031
+asm(                                \
4032
+    " ld.w   r2,%1            \n\t" \
4033
+    " add    r2,%0            \n\t" \
4034
+    " eor    r3,r3            \n\t" \
4035
+    " acr    r3               \n\t" \
4036
+    " macu.d r2,%3,%4         \n\t" \
4037
+    " st.w   %1,r2            \n\t" \
4038
+    " mov    %0,r3            \n\t" \
4039
+:"=r"(cy),"=r"(_c):"0"(cy),"r"(mu),"r"(*tmpm++),"1"(_c):"r2","r3");
4040
+
4041
+#define PROPCARRY                    \
4042
+asm(                                 \
4043
+   " ld.w     r2,%1         \n\t"    \
4044
+   " add      r2,%0         \n\t"    \
4045
+   " st.w     %1,r2         \n\t"    \
4046
+   " eor      %0,%0         \n\t"    \
4047
+   " acr      %0            \n\t"    \
4048
+:"=r"(cy),"=r"(&_c[0]):"0"(cy),"1"(&_c[0]):"r2","%cc");
4049
+
4050
+/******************************************************************/
4051
+#elif defined(TFM_MIPS)
4052
+
4053
+/* MIPS */
4054
+#define MONT_START 
4055
+#define MONT_FINI
4056
+#define LOOP_END
4057
+#define LOOP_START \
4058
+   mu = c[x] * mp
4059
+
4060
+#define INNERMUL                     \
4061
+asm(                                 \
4062
+   " multu    %3,%4          \n\t"   \
4063
+   " mflo     $12            \n\t"   \
4064
+   " mfhi     $13            \n\t"   \
4065
+   " addu     $12,$12,%0     \n\t"   \
4066
+   " sltu     $10,$12,%0     \n\t"   \
4067
+   " addu     $13,$13,$10    \n\t"   \
4068
+   " lw       $10,%1         \n\t"   \
4069
+   " addu     $12,$12,$10    \n\t"   \
4070
+   " sltu     $10,$12,$10    \n\t"   \
4071
+   " addu     %0,$13,$10     \n\t"   \
4072
+   " sw       $12,%1         \n\t"   \
4073
+:"=r"(cy),"=m"(_c[0]):"0"(cy),"r"(mu),"r"(tmpm[0]),"1"(_c[0]):"$10","$12","$13"); ++tmpm;
4074
+
4075
+#define PROPCARRY                    \
4076
+asm(                                 \
4077
+   " lw       $10,%1        \n\t"    \
4078
+   " addu     $10,$10,%0    \n\t"    \
4079
+   " sw       $10,%1        \n\t"    \
4080
+   " sltu     %0,$10,%0     \n\t"    \
4081
+:"=r"(cy),"=m"(_c[0]):"0"(cy),"1"(_c[0]):"$10");
4082
+
4083
+/******************************************************************/
4084
+#else
4753 4085
 
4754
-/* computes xR**-1 == x (mod N) via Montgomery Reduction */
4755
-int
4756
-mp_montgomery_reduce (mp_int * x, mp_int * n, mp_digit rho)
4757
-{
4758
-  int     ix, res, digs;
4759
-  mp_digit mu;
4086
+/* ISO C code */
4087
+#define MONT_START 
4088
+#define MONT_FINI
4089
+#define LOOP_END
4090
+#define LOOP_START \
4091
+   mu = c[x] * mp
4760 4092
 
4761
-  /* can the fast reduction [comba] method be used?
4762
-   *
4763
-   * Note that unlike in mul you're safely allowed *less*
4764
-   * than the available columns [255 per default] since carries
4765
-   * are fixed up in the inner loop.
4766
-   */
4767
-  digs = n->used * 2 + 1;
4768
-  if ((digs < MP_WARRAY) &&
4769
-      n->used <
4770
-      (1 << ((CHAR_BIT * sizeof (mp_word)) - (2 * DIGIT_BIT)))) {
4771
-    return fast_mp_montgomery_reduce (x, n, rho);
4772
-  }
4093
+#define INNERMUL                                      \
4094
+   do { fp_word t;                                    \
4095
+   _c[0] = t  = ((fp_word)_c[0] + (fp_word)cy) +      \
4096
+                (((fp_word)mu) * ((fp_word)*tmpm++)); \
4097
+   cy = (t >> DIGIT_BIT);                             \
4098
+   } while (0)
4773 4099
 
4774
-  /* grow the input as required */
4775
-  if (x->alloc < digs) {
4776
-    if ((res = mp_grow (x, digs)) != MP_OKAY) {
4777
-      return res;
4778
-    }
4779
-  }
4780
-  x->used = digs;
4781
-
4782
-  for (ix = 0; ix < n->used; ix++) {
4783
-    /* mu = ai * rho mod b
4784
-     *
4785
-     * The value of rho must be precalculated via
4786
-     * montgomery_setup() such that
4787
-     * it equals -1/n0 mod b this allows the
4788
-     * following inner loop to reduce the
4789
-     * input one digit at a time
4790
-     */
4791
-    mu = (mp_digit) (((mp_word)x->dp[ix]) * ((mp_word)rho) & MP_MASK);
4100
+#define PROPCARRY \
4101
+   do { fp_digit t = _c[0] += cy; cy = (t < cy); } while (0)
4792 4102
 
4793
-    /* a = a + mu * m * b**i */
4794
-    {
4795
-      register int iy;
4796
-      register mp_digit *tmpn, *tmpx, u;
4797
-      register mp_word r;
4103
+#endif
4104
+/******************************************************************/
4798 4105
 
4799
-      /* alias for digits of the modulus */
4800
-      tmpn = n->dp;
4801 4106
 
4802
-      /* alias for the digits of x [the input] */
4803
-      tmpx = x->dp + ix;
4107
+#define LO  0
4804 4108
 
4805
-      /* set the carry to zero */
4806
-      u = 0;
4109
+#ifdef TFM_SMALL_MONT_SET
4110
+#include "fp_mont_small.i"
4111
+#endif
4807 4112
 
4808
-      /* Multiply and add in place */
4809
-      for (iy = 0; iy < n->used; iy++) {
4810
-        /* compute product and sum */
4811
-        r       = ((mp_word)mu) * ((mp_word)*tmpn++) +
4812
-                  ((mp_word) u) + ((mp_word) * tmpx);
4113
+/* computes x/R == x (mod N) via Montgomery Reduction */
4114
+void fp_montgomery_reduce(fp_int *a, fp_int *m, fp_digit mp)
4115
+{
4116
+   fp_digit c[FP_SIZE], *_c, *tmpm, mu;
4117
+   int      oldused, x, y, pa;
4813 4118
 
4814
-        /* get carry */
4815
-        u       = (mp_digit)(r >> ((mp_word) DIGIT_BIT));
4119
+   /* bail if too large */
4120
+   if (m->used > (FP_SIZE/2)) {
4121
+      return;
4122
+   }
4816 4123
 
4817
-        /* fix digit */
4818
-        *tmpx++ = (mp_digit)(r & ((mp_word) MP_MASK));
4819
-      }
4820
-      /* At this point the ix'th digit of x should be zero */
4124
+#ifdef TFM_SMALL_MONT_SET
4125
+   if (m->used <= 16) {
4126
+      fp_montgomery_reduce_small(a, m, mp);
4127
+      return;
4128
+   }
4129
+#endif
4821 4130
 
4131
+#if defined(USE_MEMSET)
4132
+   /* now zero the buff */
4133
+   memset(c, 0, sizeof c);
4134
+#endif
4135
+   pa = m->used;
4822 4136
 
4823
-      /* propagate carries upwards as required*/
4824
-      while (u) {
4825
-        *tmpx   += u;
4826
-        u        = *tmpx >> DIGIT_BIT;
4827
-        *tmpx++ &= MP_MASK;
4828
-      }
4829
-    }
4137
+   /* copy the input */
4138
+   oldused = a->used;
4139
+   for (x = 0; x < oldused; x++) {
4140
+       c[x] = a->dp[x];
4141
+   }
4142
+#if !defined(USE_MEMSET)
4143
+   for (; x < 2*pa+1; x++) {
4144
+       c[x] = 0;
4145
+   }
4146
+#endif
4147
+   MONT_START;
4148
+
4149
+   for (x = 0; x < pa; x++) {
4150
+       fp_digit cy = 0;
4151
+       /* get Mu for this round */
4152
+       LOOP_START;
4153
+       _c   = c + x;
4154
+       tmpm = m->dp;
4155
+       y = 0;
4156
+       #if (defined(TFM_SSE2) || defined(TFM_X86_64))
4157
+        for (; y < (pa & ~7); y += 8) {
4158
+              INNERMUL8;
4159
+              _c   += 8;
4160
+              tmpm += 8;
4161
+           }
4162
+       #endif
4163
+
4164
+       for (; y < pa; y++) {
4165
+          INNERMUL;
4166
+          ++_c;
4167
+       }
4168
+       LOOP_END;
4169
+       while (cy) {
4170
+           PROPCARRY;
4171
+           ++_c;
4172
+       }
4173
+  }         
4174
+
4175
+  /* now copy out */
4176
+  _c   = c + pa;
4177
+  tmpm = a->dp;
4178
+  for (x = 0; x < pa+1; x++) {
4179
+     *tmpm++ = *_c++;
4830 4180
   }
4831 4181
 
4832
-  /* at this point the n.used'th least
4833
-   * significant digits of x are all zero
4834
-   * which means we can shift x to the
4835
-   * right by n.used digits and the
4836
-   * residue is unchanged.
4837
-   */
4182
+  for (; x < oldused; x++)   {
4183
+     *tmpm++ = 0;
4184
+  }
4838 4185
 
4839
-  /* x = x/b**n.used */
4840
-  mp_clamp(x);
4841
-  mp_rshd (x, n->used);
4186
+  MONT_FINI;
4842 4187
 
4843
-  /* if x >= n then x = x - n */
4844
-  if (mp_cmp_mag (x, n) != MP_LT) {
4845
-    return s_mp_sub (x, n, x);
4188
+  a->used = pa+1;
4189
+  fp_clamp(a);
4190
+  
4191
+  /* if A >= m then A = A - m */
4192
+  if (fp_cmp_mag (a, m) != FP_LT) {
4193
+    s_fp_sub (a, m, a);
4846 4194
   }
4847
-
4848
-  return MP_OKAY;
4849 4195
 }
4850
-#endif
4851 4196
 
4852
-/* $Source: /cvs/libtom/libtommath/bn_mp_montgomery_reduce.c,v $ */
4853
-/* $Revision: 1.3 $ */
4854
-/* $Date: 2006/03/31 14:18:44 $ */
4855 4197
 
4856
-/* End: bn_mp_montgomery_reduce.c */
4198
+/* $Source: /cvs/libtom/tomsfastmath/src/mont/fp_montgomery_reduce.c,v $ */
4199
+/* $Revision: 1.2 $ */
4200
+/* $Date: 2007/03/14 23:47:42 $ */
4857 4201
 
4858
-/* Start: bn_mp_montgomery_setup.c */
4859
-#include <bignum.h>
4860
-#ifdef BN_MP_MONTGOMERY_SETUP_C
4861
-/* LibTomMath, multiple-precision integer library -- Tom St Denis
4862
- *
4863
- * LibTomMath is a library that provides multiple-precision
4864
- * integer arithmetic as well as number theoretic functionality.
4865
- *
4866
- * The library was designed directly after the MPI library by
4867
- * Michael Fromberger but has been written from scratch with
4868
- * additional optimizations in place.
4869
- *
4870
- * The library is free for all purposes without any express
4871
- * guarantee it works.
4202
+/* End: fp_montgomery_reduce.c */
4203
+
4204
+/* Start: fp_montgomery_setup.c */
4205
+/* TomsFastMath, a fast ISO C bignum library.
4206
+ * 
4207
+ * This project is meant to fill in where LibTomMath
4208
+ * falls short.  That is speed ;-)
4872 4209
  *
4873
- * Tom St Denis, tomstdenis@gmail.com, http://math.libtomcrypt.com
4210
+ * This project is public domain and free for all purposes.
4211
+ * 
4212
+ * Tom St Denis, tomstdenis@gmail.com
4874 4213
  */
4214
+#include "bignum_fast.h"
4875 4215
 
4876
-/* setups the montgomery reduction stuff */
4877
-int
4878
-mp_montgomery_setup (mp_int * n, mp_digit * rho)
4216
+/* setups the montgomery reduction */
4217
+int fp_montgomery_setup(fp_int *a, fp_digit *rho)
4879 4218
 {
4880
-  mp_digit x, b;
4219
+  fp_digit x, b;
4881 4220
 
4882 4221
 /* fast inversion mod 2**k
4883 4222
  *
... ...
@@ -4887,142 +2298,186 @@ mp_montgomery_setup (mp_int * n, mp_digit * rho)
4887 4887
  *                    =>  2*X*A - X*X*A*A = 1
4888 4888
  *                    =>  2*(1) - (1)     = 1
4889 4889
  */
4890
-  b = n->dp[0];
4890
+  b = a->dp[0];
4891 4891
 
4892 4892
   if ((b & 1) == 0) {
4893
-    return MP_VAL;
4893
+    return FP_VAL;
4894 4894
   }
4895 4895
 
4896 4896
   x = (((b + 2) & 4) << 1) + b; /* here x*a==1 mod 2**4 */
4897 4897
   x *= 2 - b * x;               /* here x*a==1 mod 2**8 */
4898
-#if !defined(MP_8BIT)
4899 4898
   x *= 2 - b * x;               /* here x*a==1 mod 2**16 */
4900
-#endif
4901
-#if defined(MP_64BIT) || !(defined(MP_8BIT) || defined(MP_16BIT))
4902 4899
   x *= 2 - b * x;               /* here x*a==1 mod 2**32 */
4903
-#endif
4904
-#ifdef MP_64BIT
4900
+#ifdef FP_64BIT
4905 4901
   x *= 2 - b * x;               /* here x*a==1 mod 2**64 */
4906 4902
 #endif
4907 4903
 
4908 4904
   /* rho = -1/m mod b */
4909
-  *rho = (((mp_word)1 << ((mp_word) DIGIT_BIT)) - x) & MP_MASK;
4905
+  *rho = (((fp_word) 1 << ((fp_word) DIGIT_BIT)) - ((fp_word)x));
4910 4906
 
4911
-  return MP_OKAY;
4907
+  return FP_OKAY;
4912 4908
 }
4913
-#endif
4914 4909
 
4915
-/* $Source: /cvs/libtom/libtommath/bn_mp_montgomery_setup.c,v $ */
4916
-/* $Revision: 1.3 $ */
4917
-/* $Date: 2006/03/31 14:18:44 $ */
4918 4910
 
4919
-/* End: bn_mp_montgomery_setup.c */
4911
+/* $Source: /cvs/libtom/tomsfastmath/src/mont/fp_montgomery_setup.c,v $ */
4912
+/* $Revision: 1.1 $ */
4913
+/* $Date: 2006/12/31 21:25:53 $ */
4920 4914
 
4921
-/* Start: bn_mp_mul.c */
4922
-#include <bignum.h>
4923
-#ifdef BN_MP_MUL_C
4924
-/* LibTomMath, multiple-precision integer library -- Tom St Denis
4925
- *
4926
- * LibTomMath is a library that provides multiple-precision
4927
- * integer arithmetic as well as number theoretic functionality.
4928
- *
4929
- * The library was designed directly after the MPI library by
4930
- * Michael Fromberger but has been written from scratch with
4931
- * additional optimizations in place.
4932
- *
4933
- * The library is free for all purposes without any express
4934
- * guarantee it works.
4915
+/* End: fp_montgomery_setup.c */
4916
+
4917
+/* Start: fp_mul.c */
4918
+/* TomsFastMath, a fast ISO C bignum library.
4919
+ * 
4920
+ * This project is meant to fill in where LibTomMath
4921
+ * falls short.  That is speed ;-)
4935 4922
  *
4936
- * Tom St Denis, tomstdenis@gmail.com, http://math.libtomcrypt.com
4923
+ * This project is public domain and free for all purposes.
4924
+ * 
4925
+ * Tom St Denis, tomstdenis@gmail.com
4937 4926
  */
4927
+#include "bignum_fast.h"
4938 4928
 
4939
-/* high level multiplication (handles sign) */
4940
-int mp_mul (mp_int * a, mp_int * b, mp_int * c)
4929
+/* c = a * b */
4930
+void fp_mul(fp_int *A, fp_int *B, fp_int *C)
4941 4931
 {
4942
-  int     res, neg;
4943
-  neg = (a->sign == b->sign) ? MP_ZPOS : MP_NEG;
4944
-
4945
-  /* use Toom-Cook? */
4946
-#ifdef BN_MP_TOOM_MUL_C
4947
-  if (MIN (a->used, b->used) >= TOOM_MUL_CUTOFF) {
4948
-    res = mp_toom_mul(a, b, c);
4949
-  } else 
4932
+    int   y, yy;
4933
+
4934
+    /* call generic if we're out of range */
4935
+    if (A->used + B->used > FP_SIZE) {
4936
+       fp_mul_comba(A, B, C);
4937
+       return ;
4938
+    }
4939
+
4940
+     y  = MAX(A->used, B->used);
4941
+     yy = MIN(A->used, B->used);
4942
+    /* pick a comba (unrolled 4/8/16/32 x or rolled) based on the size
4943
+       of the largest input.  We also want to avoid doing excess mults if the 
4944
+       inputs are not close to the next power of two.  That is, for example,
4945
+       if say y=17 then we would do (32-17)^2 = 225 unneeded multiplications 
4946
+    */
4947
+
4948
+#ifdef TFM_MUL3
4949
+        if (y <= 3) {
4950
+           fp_mul_comba3(A,B,C);
4951
+           return;
4952
+        }
4950 4953
 #endif
4951
-#ifdef BN_MP_KARATSUBA_MUL_C
4952
-  /* use Karatsuba? */
4953
-  if (MIN (a->used, b->used) >= KARATSUBA_MUL_CUTOFF) {
4954
-    res = mp_karatsuba_mul (a, b, c);
4955
-  } else 
4954
+#ifdef TFM_MUL4
4955
+        if (y == 4) {
4956
+           fp_mul_comba4(A,B,C);
4957
+           return;
4958
+        }
4956 4959
 #endif
4957
-  {
4958
-    /* can we use the fast multiplier?
4959
-     *
4960
-     * The fast multiplier can be used if the output will 
4961
-     * have less than MP_WARRAY digits and the number of 
4962
-     * digits won't affect carry propagation
4963
-     */
4964
-    int     digs = a->used + b->used + 1;
4965
-
4966
-#ifdef BN_FAST_S_MP_MUL_DIGS_C
4967
-    if ((digs < MP_WARRAY) &&
4968
-        MIN(a->used, b->used) <= 
4969
-        (1 << ((CHAR_BIT * sizeof (mp_word)) - (2 * DIGIT_BIT)))) {
4970
-      res = fast_s_mp_mul_digs (a, b, c, digs);
4971
-    } else 
4960
+#ifdef TFM_MUL6
4961
+        if (y <= 6) {
4962
+           fp_mul_comba6(A,B,C);
4963
+           return;
4964
+        }
4972 4965
 #endif
4973
-#ifdef BN_S_MP_MUL_DIGS_C
4974
-      res = s_mp_mul (a, b, c); /* uses s_mp_mul_digs */
4975
-#else
4976
-      res = MP_VAL;
4966
+#ifdef TFM_MUL7
4967
+        if (y == 7) {
4968
+           fp_mul_comba7(A,B,C);
4969
+           return;
4970
+        }
4971
+#endif
4972
+#ifdef TFM_MUL8
4973
+        if (y == 8) {
4974
+           fp_mul_comba8(A,B,C);
4975
+           return;
4976
+        }
4977
+#endif
4978
+#ifdef TFM_MUL9
4979
+        if (y == 9) {
4980
+           fp_mul_comba9(A,B,C);
4981
+           return;
4982
+        }
4983
+#endif
4984
+#ifdef TFM_MUL12
4985
+        if (y <= 12) {
4986
+           fp_mul_comba12(A,B,C);
4987
+           return;
4988
+        }
4989
+#endif
4990
+#ifdef TFM_MUL17
4991
+        if (y <= 17) {
4992
+           fp_mul_comba17(A,B,C);
4993
+           return;
4994
+        }
4977 4995
 #endif
4978 4996
 
4979
-  }
4980
-  c->sign = (c->used > 0) ? neg : MP_ZPOS;
4981
-  return res;
4982
-}
4997
+#ifdef TFM_SMALL_SET
4998
+        if (y <= 16) {
4999
+           fp_mul_comba_small(A,B,C);
5000
+           return;
5001
+        }
5002
+#endif        
5003
+#if defined(TFM_MUL20)
5004
+        if (y <= 20) {
5005
+           fp_mul_comba20(A,B,C);
5006
+           return;
5007
+        }
5008
+#endif
5009
+#if defined(TFM_MUL24)
5010
+        if (yy >= 16 && y <= 24) {
5011
+           fp_mul_comba24(A,B,C);
5012
+           return;
5013
+        }
5014
+#endif
5015
+#if defined(TFM_MUL28)
5016
+        if (yy >= 20 && y <= 28) {
5017
+           fp_mul_comba28(A,B,C);
5018
+           return;
5019
+        }
5020
+#endif
5021
+#if defined(TFM_MUL32)
5022
+        if (yy >= 24 && y <= 32) {
5023
+           fp_mul_comba32(A,B,C);
5024
+           return;
5025
+        }
4983 5026
 #endif
5027
+#if defined(TFM_MUL48)
5028
+        if (yy >= 40 && y <= 48) {
5029
+           fp_mul_comba48(A,B,C);
5030
+           return;
5031
+        }
5032
+#endif        
5033
+#if defined(TFM_MUL64)
5034
+        if (yy >= 56 && y <= 64) {
5035
+           fp_mul_comba64(A,B,C);
5036
+           return;
5037
+        }
5038
+#endif
5039
+        fp_mul_comba(A,B,C);
5040
+}
4984 5041
 
4985
-/* $Source: /cvs/libtom/libtommath/bn_mp_mul.c,v $ */
4986
-/* $Revision: 1.3 $ */
4987
-/* $Date: 2006/03/31 14:18:44 $ */
4988 5042
 
4989
-/* End: bn_mp_mul.c */
5043
+/* $Source: /cvs/libtom/tomsfastmath/src/mul/fp_mul.c,v $ */
5044
+/* $Revision: 1.1 $ */
5045
+/* $Date: 2006/12/31 21:25:53 $ */
4990 5046
 
4991
-/* Start: bn_mp_mul_2.c */
4992
-#include <bignum.h>
4993
-#ifdef BN_MP_MUL_2_C
4994
-/* LibTomMath, multiple-precision integer library -- Tom St Denis
4995
- *
4996
- * LibTomMath is a library that provides multiple-precision
4997
- * integer arithmetic as well as number theoretic functionality.
4998
- *
4999
- * The library was designed directly after the MPI library by
5000
- * Michael Fromberger but has been written from scratch with
5001
- * additional optimizations in place.
5002
- *
5003
- * The library is free for all purposes without any express
5004
- * guarantee it works.
5047
+/* End: fp_mul.c */
5048
+
5049
+/* Start: fp_mul_2.c */
5050
+/* TomsFastMath, a fast ISO C bignum library.
5051
+ * 
5052
+ * This project is meant to fill in where LibTomMath
5053
+ * falls short.  That is speed ;-)
5005 5054
  *
5006
- * Tom St Denis, tomstdenis@gmail.com, http://math.libtomcrypt.com
5055
+ * This project is public domain and free for all purposes.
5056
+ * 
5057
+ * Tom St Denis, tomstdenis@gmail.com
5007 5058
  */
5059
+#include "bignum_fast.h"
5008 5060
 
5009
-/* b = a*2 */
5010
-int mp_mul_2(mp_int * a, mp_int * b)
5061
+void fp_mul_2(fp_int * a, fp_int * b)
5011 5062
 {
5012
-  int     x, res, oldused;
5013
-
5014
-  /* grow to accomodate result */
5015
-  if (b->alloc < a->used + 1) {
5016
-    if ((res = mp_grow (b, a->used + 1)) != MP_OKAY) {
5017
-      return res;
5018
-    }
5019
-  }
5020
-
5063
+  int     x, oldused;
5064
+   
5021 5065
   oldused = b->used;
5022 5066
   b->used = a->used;
5023 5067
 
5024 5068
   {
5025
-    register mp_digit r, rr, *tmpa, *tmpb;
5069
+    register fp_digit r, rr, *tmpa, *tmpb;
5026 5070
 
5027 5071
     /* alias for source */
5028 5072
     tmpa = a->dp;
... ...
@@ -5037,10 +2492,10 @@ int mp_mul_2(mp_int * a, mp_int * b)
5037 5037
       /* get what will be the *next* carry bit from the 
5038 5038
        * MSB of the current digit 
5039 5039
        */
5040
-      rr = *tmpa >> ((mp_digit)(DIGIT_BIT - 1));
5040
+      rr = *tmpa >> ((fp_digit)(DIGIT_BIT - 1));
5041 5041
       
5042 5042
       /* now shift up this digit, add in the carry [from the previous] */
5043
-      *tmpb++ = ((*tmpa++ << ((mp_digit)1)) | r) & MP_MASK;
5043
+      *tmpb++ = ((*tmpa++ << ((fp_digit)1)) | r);
5044 5044
       
5045 5045
       /* copy the carry that would be from the source 
5046 5046
        * digit into the next iteration 
... ...
@@ -5049,7 +2504,7 @@ int mp_mul_2(mp_int * a, mp_int * b)
5049 5049
     }
5050 5050
 
5051 5051
     /* new leading digit? */
5052
-    if (r != 0) {
5052
+    if (r != 0 && b->used != (FP_SIZE-1)) {
5053 5053
       /* add a MSB which is always 1 at this point */
5054 5054
       *tmpb = 1;
5055 5055
       ++(b->used);
... ...
@@ -5064,690 +2519,4287 @@ int mp_mul_2(mp_int * a, mp_int * b)
5064 5064
     }
5065 5065
   }
5066 5066
   b->sign = a->sign;
5067
-  return MP_OKAY;
5068 5067
 }
5069
-#endif
5070 5068
 
5071
-/* $Source: /cvs/libtom/libtommath/bn_mp_mul_2.c,v $ */
5072
-/* $Revision: 1.3 $ */
5073
-/* $Date: 2006/03/31 14:18:44 $ */
5074 5069
 
5075
-/* End: bn_mp_mul_2.c */
5070
+/* $Source: /cvs/libtom/tomsfastmath/src/mul/fp_mul_2.c,v $ */
5071
+/* $Revision: 1.1 $ */
5072
+/* $Date: 2006/12/31 21:25:53 $ */
5076 5073
 
5077
-/* Start: bn_mp_mul_2d.c */
5078
-#include <bignum.h>
5079
-#ifdef BN_MP_MUL_2D_C
5080
-/* LibTomMath, multiple-precision integer library -- Tom St Denis
5081
- *
5082
- * LibTomMath is a library that provides multiple-precision
5083
- * integer arithmetic as well as number theoretic functionality.
5084
- *
5085
- * The library was designed directly after the MPI library by
5086
- * Michael Fromberger but has been written from scratch with
5087
- * additional optimizations in place.
5088
- *
5089
- * The library is free for all purposes without any express
5090
- * guarantee it works.
5074
+/* End: fp_mul_2.c */
5075
+
5076
+/* Start: fp_mul_2d.c */
5077
+/* TomsFastMath, a fast ISO C bignum library.
5078
+ * 
5079
+ * This project is meant to fill in where LibTomMath
5080
+ * falls short.  That is speed ;-)
5091 5081
  *
5092
- * Tom St Denis, tomstdenis@gmail.com, http://math.libtomcrypt.com
5082
+ * This project is public domain and free for all purposes.
5083
+ * 
5084
+ * Tom St Denis, tomstdenis@gmail.com
5093 5085
  */
5086
+#include "bignum_fast.h"
5094 5087
 
5095
-/* shift left by a certain bit count */
5096
-int mp_mul_2d (mp_int * a, int b, mp_int * c)
5088
+/* c = a * 2**d */
5089
+void fp_mul_2d(fp_int *a, int b, fp_int *c)
5097 5090
 {
5098
-  mp_digit d;
5099
-  int      res;
5091
+   fp_digit carry, carrytmp, shift;
5092
+   int x;
5100 5093
 
5101
-  /* copy */
5102
-  if (a != c) {
5103
-     if ((res = mp_copy (a, c)) != MP_OKAY) {
5104
-       return res;
5105
-     }
5106
-  }
5094
+   /* copy it */
5095
+   fp_copy(a, c);
5107 5096
 
5108
-  if (c->alloc < (int)(c->used + b/DIGIT_BIT + 1)) {
5109
-     if ((res = mp_grow (c, c->used + b / DIGIT_BIT + 1)) != MP_OKAY) {
5110
-       return res;
5111
-     }
5112
-  }
5097
+   /* handle whole digits */
5098
+   if (b >= DIGIT_BIT) {
5099
+      fp_lshd(c, b/DIGIT_BIT);
5100
+   }
5101
+   b %= DIGIT_BIT;
5102
+
5103
+   /* shift the digits */
5104
+   if (b != 0) {
5105
+      carry = 0;   
5106
+      shift = DIGIT_BIT - b;
5107
+      for (x = 0; x < c->used; x++) {
5108
+          carrytmp = c->dp[x] >> shift;
5109
+          c->dp[x] = (c->dp[x] << b) + carry;
5110
+          carry = carrytmp;
5111
+      }
5112
+      /* store last carry if room */
5113
+      if (carry && x < FP_SIZE) {
5114
+         c->dp[c->used++] = carry;
5115
+      }
5116
+   }
5117
+   fp_clamp(c);
5118
+}
5113 5119
 
5114
-  /* shift by as many digits in the bit count */
5115
-  if (b >= (int)DIGIT_BIT) {
5116
-    if ((res = mp_lshd (c, b / DIGIT_BIT)) != MP_OKAY) {
5117
-      return res;
5118
-    }
5119
-  }
5120 5120
 
5121
-  /* shift any bit count < DIGIT_BIT */
5122
-  d = (mp_digit) (b % DIGIT_BIT);
5123
-  if (d != 0) {
5124
-    register mp_digit *tmpc, shift, mask, r, rr;
5125
-    register int x;
5121
+/* $Source: /cvs/libtom/tomsfastmath/src/mul/fp_mul_2d.c,v $ */
5122
+/* $Revision: 1.1 $ */
5123
+/* $Date: 2006/12/31 21:25:53 $ */
5126 5124
 
5127
-    /* bitmask for carries */
5128
-    mask = (((mp_digit)1) << d) - 1;
5125
+/* End: fp_mul_2d.c */
5129 5126
 
5130
-    /* shift for msbs */
5131
-    shift = DIGIT_BIT - d;
5127
+/* Start: fp_mul_comba.c */
5128
+/* TomsFastMath, a fast ISO C bignum library.
5129
+ * 
5130
+ * This project is meant to fill in where LibTomMath
5131
+ * falls short.  That is speed ;-)
5132
+ *
5133
+ * This project is public domain and free for all purposes.
5134
+ * 
5135
+ * Tom St Denis, tomstdenis@gmail.com
5136
+ */
5132 5137
 
5133
-    /* alias */
5134
-    tmpc = c->dp;
5138
+/* About this file...
5135 5139
 
5136
-    /* carry */
5137
-    r    = 0;
5138
-    for (x = 0; x < c->used; x++) {
5139
-      /* get the higher bits of the current word */
5140
-      rr = (*tmpc >> shift) & mask;
5140
+*/
5141 5141
 
5142
-      /* shift the current word and OR in the carry */
5143
-      *tmpc = ((*tmpc << d) | r) & MP_MASK;
5144
-      ++tmpc;
5142
+#include "bignum_fast.h"
5145 5143
 
5146
-      /* set the carry to the carry bits of the current word */
5147
-      r = rr;
5148
-    }
5149
-    
5150
-    /* set final carry */
5151
-    if (r != 0) {
5152
-       c->dp[(c->used)++] = r;
5153
-    }
5154
-  }
5155
-  mp_clamp (c);
5156
-  return MP_OKAY;
5157
-}
5144
+#if defined(TFM_PRESCOTT) && defined(TFM_SSE2)
5145
+   #undef TFM_SSE2
5146
+   #define TFM_X86
5158 5147
 #endif
5159 5148
 
5160
-/* $Source: /cvs/libtom/libtommath/bn_mp_mul_2d.c,v $ */
5161
-/* $Revision: 1.3 $ */
5162
-/* $Date: 2006/03/31 14:18:44 $ */
5149
+/* these are the combas.  Worship them. */
5150
+#if defined(TFM_X86)
5151
+/* Generic x86 optimized code */
5152
+
5153
+/* anything you need at the start */
5154
+#define COMBA_START
5155
+
5156
+/* clear the chaining variables */
5157
+#define COMBA_CLEAR \
5158
+   c0 = c1 = c2 = 0;
5159
+
5160
+/* forward the carry to the next digit */
5161
+#define COMBA_FORWARD \
5162
+   do { c0 = c1; c1 = c2; c2 = 0; } while (0);
5163
+
5164
+/* store the first sum */
5165
+#define COMBA_STORE(x) \
5166
+   x = c0;
5163 5167
 
5164
-/* End: bn_mp_mul_2d.c */
5168
+/* store the second sum [carry] */
5169
+#define COMBA_STORE2(x) \
5170
+   x = c1;
5165 5171
 
5166
-/* Start: bn_mp_mul_d.c */
5167
-#include <bignum.h>
5168
-#ifdef BN_MP_MUL_D_C
5169
-/* LibTomMath, multiple-precision integer library -- Tom St Denis
5170
- *
5171
- * LibTomMath is a library that provides multiple-precision
5172
- * integer arithmetic as well as number theoretic functionality.
5173
- *
5174
- * The library was designed directly after the MPI library by
5175
- * Michael Fromberger but has been written from scratch with
5176
- * additional optimizations in place.
5177
- *
5178
- * The library is free for all purposes without any express
5179
- * guarantee it works.
5180
- *
5181
- * Tom St Denis, tomstdenis@gmail.com, http://math.libtomcrypt.com
5182
- */
5172
+/* anything you need at the end */
5173
+#define COMBA_FINI
5183 5174
 
5184
-/* multiply by a digit */
5185
-int
5186
-mp_mul_d (mp_int * a, mp_digit b, mp_int * c)
5187
-{
5188
-  mp_digit u, *tmpa, *tmpc;
5189
-  mp_word  r;
5190
-  int      ix, res, olduse;
5175
+/* this should multiply i and j  */
5176
+#define MULADD(i, j)                                      \
5177
+asm(                                                      \
5178
+     "movl  %6,%%eax     \n\t"                            \
5179
+     "mull  %7           \n\t"                            \
5180
+     "addl  %%eax,%0     \n\t"                            \
5181
+     "adcl  %%edx,%1     \n\t"                            \
5182
+     "adcl  $0,%2        \n\t"                            \
5183
+     :"=r"(c0), "=r"(c1), "=r"(c2): "0"(c0), "1"(c1), "2"(c2), "m"(i), "m"(j)  :"%eax","%edx","%cc");
5184
+
5185
+#elif defined(TFM_X86_64)
5186
+/* x86-64 optimized */
5191 5187
 
5192
-  /* make sure c is big enough to hold a*b */
5193
-  if (c->alloc < a->used + 1) {
5194
-    if ((res = mp_grow (c, a->used + 1)) != MP_OKAY) {
5195
-      return res;
5196
-    }
5197
-  }
5188
+/* anything you need at the start */
5189
+#define COMBA_START
5198 5190
 
5199
-  /* get the original destinations used count */
5200
-  olduse = c->used;
5191
+/* clear the chaining variables */
5192
+#define COMBA_CLEAR \
5193
+   c0 = c1 = c2 = 0;
5201 5194
 
5202
-  /* set the sign */
5203
-  c->sign = a->sign;
5195
+/* forward the carry to the next digit */
5196
+#define COMBA_FORWARD \
5197
+   do { c0 = c1; c1 = c2; c2 = 0; } while (0);
5204 5198
 
5205
-  /* alias for a->dp [source] */
5206
-  tmpa = a->dp;
5199
+/* store the first sum */
5200
+#define COMBA_STORE(x) \
5201
+   x = c0;
5207 5202
 
5208
-  /* alias for c->dp [dest] */
5209
-  tmpc = c->dp;
5203
+/* store the second sum [carry] */
5204
+#define COMBA_STORE2(x) \
5205
+   x = c1;
5210 5206
 
5211
-  /* zero carry */
5212
-  u = 0;
5207
+/* anything you need at the end */
5208
+#define COMBA_FINI
5213 5209
 
5214
-  /* compute columns */
5215
-  for (ix = 0; ix < a->used; ix++) {
5216
-    /* compute product and carry sum for this term */
5217
-    r       = ((mp_word) u) + ((mp_word)*tmpa++) * ((mp_word)b);
5210
+/* this should multiply i and j  */
5211
+#define MULADD(i, j)                                      \
5212
+asm  (                                                    \
5213
+     "movq  %6,%%rax     \n\t"                            \
5214
+     "mulq  %7           \n\t"                            \
5215
+     "addq  %%rax,%0     \n\t"                            \
5216
+     "adcq  %%rdx,%1     \n\t"                            \
5217
+     "adcq  $0,%2        \n\t"                            \
5218
+     :"=r"(c0), "=r"(c1), "=r"(c2): "0"(c0), "1"(c1), "2"(c2), "g"(i), "g"(j)  :"%rax","%rdx","%cc");
5218 5219
 
5219
-    /* mask off higher bits to get a single digit */
5220
-    *tmpc++ = (mp_digit) (r & ((mp_word) MP_MASK));
5220
+#elif defined(TFM_SSE2)
5221
+/* use SSE2 optimizations */
5221 5222
 
5222
-    /* send carry into next iteration */
5223
-    u       = (mp_digit) (r >> ((mp_word) DIGIT_BIT));
5224
-  }
5223
+/* anything you need at the start */
5224
+#define COMBA_START
5225 5225
 
5226
-  /* store final carry [if any] and increment ix offset  */
5227
-  *tmpc++ = u;
5228
-  ++ix;
5226
+/* clear the chaining variables */
5227
+#define COMBA_CLEAR \
5228
+   c0 = c1 = c2 = 0;
5229 5229
 
5230
-  /* now zero digits above the top */
5231
-  while (ix++ < olduse) {
5232
-     *tmpc++ = 0;
5233
-  }
5230
+/* forward the carry to the next digit */
5231
+#define COMBA_FORWARD \
5232
+   do { c0 = c1; c1 = c2; c2 = 0; } while (0);
5234 5233
 
5235
-  /* set used count */
5236
-  c->used = a->used + 1;
5237
-  mp_clamp(c);
5234
+/* store the first sum */
5235
+#define COMBA_STORE(x) \
5236
+   x = c0;
5238 5237
 
5239
-  return MP_OKAY;
5240
-}
5241
-#endif
5238
+/* store the second sum [carry] */
5239
+#define COMBA_STORE2(x) \
5240
+   x = c1;
5242 5241
 
5243
-/* $Source: /cvs/libtom/libtommath/bn_mp_mul_d.c,v $ */
5244
-/* $Revision: 1.3 $ */
5245
-/* $Date: 2006/03/31 14:18:44 $ */
5242
+/* anything you need at the end */
5243
+#define COMBA_FINI \
5244
+   asm("emms");
5246 5245
 
5247
-/* End: bn_mp_mul_d.c */
5246
+/* this should multiply i and j  */
5247
+#define MULADD(i, j)                                     \
5248
+asm(                                                     \
5249
+    "movd  %6,%%mm0     \n\t"                            \
5250
+    "movd  %7,%%mm1     \n\t"                            \
5251
+    "pmuludq %%mm1,%%mm0\n\t"                            \
5252
+    "movd  %%mm0,%%eax  \n\t"                            \
5253
+    "psrlq $32,%%mm0    \n\t"                            \
5254
+    "addl  %%eax,%0     \n\t"                            \
5255
+    "movd  %%mm0,%%eax  \n\t"                            \
5256
+    "adcl  %%eax,%1     \n\t"                            \
5257
+    "adcl  $0,%2        \n\t"                            \
5258
+    :"=r"(c0), "=r"(c1), "=r"(c2): "0"(c0), "1"(c1), "2"(c2), "m"(i), "m"(j)  :"%eax","%cc");
5248 5259
 
5249
-/* Start: bn_mp_mulmod.c */
5250
-#include <bignum.h>
5251
-#ifdef BN_MP_MULMOD_C
5252
-/* LibTomMath, multiple-precision integer library -- Tom St Denis
5253
- *
5254
- * LibTomMath is a library that provides multiple-precision
5255
- * integer arithmetic as well as number theoretic functionality.
5256
- *
5257
- * The library was designed directly after the MPI library by
5258
- * Michael Fromberger but has been written from scratch with
5259
- * additional optimizations in place.
5260
- *
5261
- * The library is free for all purposes without any express
5262
- * guarantee it works.
5263
- *
5264
- * Tom St Denis, tomstdenis@gmail.com, http://math.libtomcrypt.com
5265
- */
5260
+#elif defined(TFM_ARM)
5261
+/* ARM code */
5266 5262
 
5267
-/* d = a * b (mod c) */
5268
-int mp_mulmod (mp_int * a, mp_int * b, mp_int * c, mp_int * d)
5269
-{
5270
-  int     res;
5271
-  mp_int  t;
5263
+#define COMBA_START 
5264
+
5265
+#define COMBA_CLEAR \
5266
+   c0 = c1 = c2 = 0;
5272 5267
 
5273
-  if ((res = mp_init (&t)) != MP_OKAY) {
5274
-    return res;
5275
-  }
5268
+#define COMBA_FORWARD \
5269
+   do { c0 = c1; c1 = c2; c2 = 0; } while (0);
5276 5270
 
5277
-  if ((res = mp_mul (a, b, &t)) != MP_OKAY) {
5278
-    mp_clear (&t);
5279
-    return res;
5280
-  }
5281
-  res = mp_mod (&t, c, d);
5282
-  mp_clear (&t);
5283
-  return res;
5284
-}
5285
-#endif
5271
+#define COMBA_STORE(x) \
5272
+   x = c0;
5273
+
5274
+#define COMBA_STORE2(x) \
5275
+   x = c1;
5276
+
5277
+#define COMBA_FINI
5286 5278
 
5287
-/* $Source: /cvs/libtom/libtommath/bn_mp_mulmod.c,v $ */
5288
-/* $Revision: 1.4 $ */
5289
-/* $Date: 2006/03/31 14:18:44 $ */
5279
+#define MULADD(i, j)                                          \
5280
+asm(                                                          \
5281
+"  UMULL  r0,r1,%6,%7           \n\t"                         \
5282
+"  ADDS   %0,%0,r0              \n\t"                         \
5283
+"  ADCS   %1,%1,r1              \n\t"                         \
5284
+"  ADC    %2,%2,#0              \n\t"                         \
5285
+:"=r"(c0), "=r"(c1), "=r"(c2) : "0"(c0), "1"(c1), "2"(c2), "r"(i), "r"(j) : "r0", "r1", "%cc");
5290 5286
 
5291
-/* End: bn_mp_mulmod.c */
5287
+#elif defined(TFM_PPC32)
5288
+/* For 32-bit PPC */
5292 5289
 
5293
-/* Start: bn_mp_n_root.c */
5294
-#include <bignum.h>
5295
-#ifdef BN_MP_N_ROOT_C
5296
-/* LibTomMath, multiple-precision integer library -- Tom St Denis
5297
- *
5298
- * LibTomMath is a library that provides multiple-precision
5299
- * integer arithmetic as well as number theoretic functionality.
5300
- *
5301
- * The library was designed directly after the MPI library by
5302
- * Michael Fromberger but has been written from scratch with
5303
- * additional optimizations in place.
5304
- *
5305
- * The library is free for all purposes without any express
5306
- * guarantee it works.
5307
- *
5308
- * Tom St Denis, tomstdenis@gmail.com, http://math.libtomcrypt.com
5309
- */
5290
+#define COMBA_START
5310 5291
 
5311
-/* find the n'th root of an integer 
5312
- *
5313
- * Result found such that (c)**b <= a and (c+1)**b > a 
5314
- *
5315
- * This algorithm uses Newton's approximation 
5316
- * x[i+1] = x[i] - f(x[i])/f'(x[i]) 
5317
- * which will find the root in log(N) time where 
5318
- * each step involves a fair bit.  This is not meant to 
5319
- * find huge roots [square and cube, etc].
5320
- */
5321
-int mp_n_root (mp_int * a, mp_digit b, mp_int * c)
5322
-{
5323
-  mp_int  t1, t2, t3;
5324
-  int     res, neg;
5292
+#define COMBA_CLEAR \
5293
+   c0 = c1 = c2 = 0;
5325 5294
 
5326
-  /* input must be positive if b is even */
5327
-  if ((b & 1) == 0 && a->sign == MP_NEG) {
5328
-    return MP_VAL;
5329
-  }
5295
+#define COMBA_FORWARD \
5296
+   do { c0 = c1; c1 = c2; c2 = 0; } while (0);
5330 5297
 
5331
-  if ((res = mp_init (&t1)) != MP_OKAY) {
5332
-    return res;
5333
-  }
5298
+#define COMBA_STORE(x) \
5299
+   x = c0;
5334 5300
 
5335
-  if ((res = mp_init (&t2)) != MP_OKAY) {
5336
-    goto LBL_T1;
5337
-  }
5301
+#define COMBA_STORE2(x) \
5302
+   x = c1;
5338 5303
 
5339
-  if ((res = mp_init (&t3)) != MP_OKAY) {
5340
-    goto LBL_T2;
5341
-  }
5304
+#define COMBA_FINI 
5305
+   
5306
+/* untested: will mulhwu change the flags?  Docs say no */
5307
+#define MULADD(i, j)              \
5308
+asm(                              \
5309
+   " mullw  16,%6,%7       \n\t" \
5310
+   " addc   %0,%0,16       \n\t" \
5311
+   " mulhwu 16,%6,%7       \n\t" \
5312
+   " adde   %1,%1,16       \n\t" \
5313
+   " addze  %2,%2          \n\t" \
5314
+:"=r"(c0), "=r"(c1), "=r"(c2):"0"(c0), "1"(c1), "2"(c2), "r"(i), "r"(j):"16");
5342 5315
 
5343
-  /* if a is negative fudge the sign but keep track */
5344
-  neg     = a->sign;
5345
-  a->sign = MP_ZPOS;
5316
+#elif defined(TFM_PPC64)
5317
+/* For 64-bit PPC */
5346 5318
 
5347
-  /* t2 = 2 */
5348
-  mp_set (&t2, 2);
5319
+#define COMBA_START
5349 5320
 
5350
-  do {
5351
-    /* t1 = t2 */
5352
-    if ((res = mp_copy (&t2, &t1)) != MP_OKAY) {
5353
-      goto LBL_T3;
5354
-    }
5321
+#define COMBA_CLEAR \
5322
+   c0 = c1 = c2 = 0;
5355 5323
 
5356
-    /* t2 = t1 - ((t1**b - a) / (b * t1**(b-1))) */
5357
-    
5358
-    /* t3 = t1**(b-1) */
5359
-    if ((res = mp_expt_d (&t1, b - 1, &t3)) != MP_OKAY) {   
5360
-      goto LBL_T3;
5361
-    }
5324
+#define COMBA_FORWARD \
5325
+   do { c0 = c1; c1 = c2; c2 = 0; } while (0);
5362 5326
 
5363
-    /* numerator */
5364
-    /* t2 = t1**b */
5365
-    if ((res = mp_mul (&t3, &t1, &t2)) != MP_OKAY) {    
5366
-      goto LBL_T3;
5367
-    }
5327
+#define COMBA_STORE(x) \
5328
+   x = c0;
5368 5329
 
5369
-    /* t2 = t1**b - a */
5370
-    if ((res = mp_sub (&t2, a, &t2)) != MP_OKAY) {  
5371
-      goto LBL_T3;
5372
-    }
5330
+#define COMBA_STORE2(x) \
5331
+   x = c1;
5373 5332
 
5374
-    /* denominator */
5375
-    /* t3 = t1**(b-1) * b  */
5376
-    if ((res = mp_mul_d (&t3, b, &t3)) != MP_OKAY) {    
5377
-      goto LBL_T3;
5378
-    }
5333
+#define COMBA_FINI 
5334
+   
5335
+/* untested: will mulhdu change the flags?  Docs say no */
5336
+#define MULADD(i, j)              \
5337
+asm(                              \
5338
+   " mulld  r16,%6,%7       \n\t" \
5339
+   " addc   %0,%0,16       \n\t" \
5340
+   " mulhdu r16,%6,%7       \n\t" \
5341
+   " adde   %1,%1,16       \n\t" \
5342
+   " addze  %2,%2          \n\t" \
5343
+:"=r"(c0), "=r"(c1), "=r"(c2):"0"(c0), "1"(c1), "2"(c2), "r"(i), "r"(j):"r16");
5379 5344
 
5380
-    /* t3 = (t1**b - a)/(b * t1**(b-1)) */
5381
-    if ((res = mp_div (&t2, &t3, &t3, NULL)) != MP_OKAY) {  
5382
-      goto LBL_T3;
5383
-    }
5345
+#elif defined(TFM_AVR32)
5384 5346
 
5385
-    if ((res = mp_sub (&t1, &t3, &t2)) != MP_OKAY) {
5386
-      goto LBL_T3;
5387
-    }
5388
-  }  while (mp_cmp (&t1, &t2) != MP_EQ);
5347
+/* ISO C code */
5389 5348
 
5390
-  /* result can be off by a few so check */
5391
-  for (;;) {
5392
-    if ((res = mp_expt_d (&t1, b, &t2)) != MP_OKAY) {
5393
-      goto LBL_T3;
5394
-    }
5349
+#define COMBA_START
5395 5350
 
5396
-    if (mp_cmp (&t2, a) == MP_GT) {
5397
-      if ((res = mp_sub_d (&t1, 1, &t1)) != MP_OKAY) {
5398
-         goto LBL_T3;
5399
-      }
5400
-    } else {
5401
-      break;
5402
-    }
5403
-  }
5351
+#define COMBA_CLEAR \
5352
+   c0 = c1 = c2 = 0;
5404 5353
 
5405
-  /* reset the sign of a first */
5406
-  a->sign = neg;
5354
+#define COMBA_FORWARD \
5355
+   do { c0 = c1; c1 = c2; c2 = 0; } while (0);
5407 5356
 
5408
-  /* set the result */
5409
-  mp_exch (&t1, c);
5357
+#define COMBA_STORE(x) \
5358
+   x = c0;
5410 5359
 
5411
-  /* set the sign of the result */
5412
-  c->sign = neg;
5360
+#define COMBA_STORE2(x) \
5361
+   x = c1;
5413 5362
 
5414
-  res = MP_OKAY;
5363
+#define COMBA_FINI 
5364
+   
5365
+#define MULADD(i, j)             \
5366
+asm(                             \
5367
+   " mulu.d r2,%6,%7        \n\t"\
5368
+   " add    %0,r2           \n\t"\
5369
+   " adc    %1,%1,r3        \n\t"\
5370
+   " acr    %2              \n\t"\
5371
+:"=r"(c0), "=r"(c1), "=r"(c2):"0"(c0), "1"(c1), "2"(c2), "r"(i), "r"(j):"r2","r3");
5372
+
5373
+#elif defined(TFM_MIPS)
5374
+
5375
+#define COMBA_START
5376
+
5377
+#define COMBA_CLEAR \
5378
+   c0 = c1 = c2 = 0;
5379
+
5380
+#define COMBA_FORWARD \
5381
+   do { c0 = c1; c1 = c2; c2 = 0; } while (0);
5382
+
5383
+#define COMBA_STORE(x) \
5384
+   x = c0;
5385
+
5386
+#define COMBA_STORE2(x) \
5387
+   x = c1;
5388
+
5389
+#define COMBA_FINI 
5390
+   
5391
+#define MULADD(i, j)              \
5392
+asm(                              \
5393
+   " multu  %6,%7          \n\t"  \
5394
+   " mflo   $12            \n\t"  \
5395
+   " mfhi   $13            \n\t"  \
5396
+   " addu    %0,%0,$12     \n\t"  \
5397
+   " sltu   $12,%0,$12     \n\t"  \
5398
+   " addu    %1,%1,$13     \n\t"  \
5399
+   " sltu   $13,%1,$13     \n\t"  \
5400
+   " addu    %1,%1,$12     \n\t"  \
5401
+   " sltu   $12,%1,$12     \n\t"  \
5402
+   " addu    %2,%2,$13     \n\t"  \
5403
+   " addu    %2,%2,$12     \n\t"  \
5404
+:"=r"(c0), "=r"(c1), "=r"(c2):"0"(c0), "1"(c1), "2"(c2), "r"(i), "r"(j):"$12","$13");
5405
+
5406
+#else
5407
+/* ISO C code */
5408
+
5409
+#define COMBA_START
5410
+
5411
+#define COMBA_CLEAR \
5412
+   c0 = c1 = c2 = 0;
5413
+
5414
+#define COMBA_FORWARD \
5415
+   do { c0 = c1; c1 = c2; c2 = 0; } while (0);
5416
+
5417
+#define COMBA_STORE(x) \
5418
+   x = c0;
5419
+
5420
+#define COMBA_STORE2(x) \
5421
+   x = c1;
5422
+
5423
+#define COMBA_FINI 
5424
+   
5425
+#define MULADD(i, j)                                                              \
5426
+   do { fp_word t;                                                                \
5427
+   t = (fp_word)c0 + ((fp_word)i) * ((fp_word)j); c0 = t;                         \
5428
+   t = (fp_word)c1 + (t >> DIGIT_BIT);            c1 = t; c2 += t >> DIGIT_BIT;   \
5429
+   } while (0);
5430
+
5431
+#endif
5432
+
5433
+#ifndef TFM_DEFINES
5434
+
5435
+/* generic PxQ multiplier */
5436
+void fp_mul_comba(fp_int *A, fp_int *B, fp_int *C)
5437
+{
5438
+   int       ix, iy, iz, tx, ty, pa;
5439
+   fp_digit  c0, c1, c2, *tmpx, *tmpy;
5440
+   fp_int    tmp, *dst;
5441
+
5442
+   COMBA_START;
5443
+   COMBA_CLEAR;
5444
+   
5445
+   /* get size of output and trim */
5446
+   pa = A->used + B->used;
5447
+   if (pa >= FP_SIZE) {
5448
+      pa = FP_SIZE-1;
5449
+   }
5415 5450
 
5416
-LBL_T3:mp_clear (&t3);
5417
-LBL_T2:mp_clear (&t2);
5418
-LBL_T1:mp_clear (&t1);
5419
-  return res;
5451
+   if (A == C || B == C) {
5452
+      fp_zero(&tmp);
5453
+      dst = &tmp;
5454
+   } else {
5455
+      fp_zero(C);
5456
+      dst = C;
5457
+   }
5458
+
5459
+   for (ix = 0; ix < pa; ix++) {
5460
+      /* get offsets into the two bignums */
5461
+      ty = MIN(ix, B->used-1);
5462
+      tx = ix - ty;
5463
+
5464
+      /* setup temp aliases */
5465
+      tmpx = A->dp + tx;
5466
+      tmpy = B->dp + ty;
5467
+
5468
+      /* this is the number of times the loop will iterrate, essentially its 
5469
+         while (tx++ < a->used && ty-- >= 0) { ... }
5470
+       */
5471
+      iy = MIN(A->used-tx, ty+1);
5472
+
5473
+      /* execute loop */
5474
+      COMBA_FORWARD;
5475
+      for (iz = 0; iz < iy; ++iz) {
5476
+          MULADD(*tmpx++, *tmpy--);
5477
+      }
5478
+
5479
+      /* store term */
5480
+      COMBA_STORE(dst->dp[ix]);
5481
+  }
5482
+  COMBA_FINI;
5483
+
5484
+  dst->used = pa;
5485
+  dst->sign = A->sign ^ B->sign;
5486
+  fp_clamp(dst);
5487
+  fp_copy(dst, C);
5420 5488
 }
5489
+
5421 5490
 #endif
5422 5491
 
5423
-/* $Source: /cvs/libtom/libtommath/bn_mp_n_root.c,v $ */
5424
-/* $Revision: 1.3 $ */
5425
-/* $Date: 2006/03/31 14:18:44 $ */
5492
+/* $Source: /cvs/libtom/tomsfastmath/src/mul/fp_mul_comba.c,v $ */
5493
+/* $Revision: 1.4 $ */
5494
+/* $Date: 2007/03/14 23:47:42 $ */
5426 5495
 
5427
-/* End: bn_mp_n_root.c */
5428 5496
 
5429
-/* Start: bn_mp_neg.c */
5430
-#include <bignum.h>
5431
-#ifdef BN_MP_NEG_C
5432
-/* LibTomMath, multiple-precision integer library -- Tom St Denis
5433
- *
5434
- * LibTomMath is a library that provides multiple-precision
5435
- * integer arithmetic as well as number theoretic functionality.
5436
- *
5437
- * The library was designed directly after the MPI library by
5438
- * Michael Fromberger but has been written from scratch with
5439
- * additional optimizations in place.
5440
- *
5441
- * The library is free for all purposes without any express
5442
- * guarantee it works.
5443
- *
5444
- * Tom St Denis, tomstdenis@gmail.com, http://math.libtomcrypt.com
5445
- */
5497
+/* End: fp_mul_comba.c */
5446 5498
 
5447
-/* b = -a */
5448
-int mp_neg (mp_int * a, mp_int * b)
5499
+/* Start: fp_mul_comba_12.c */
5500
+#define TFM_DEFINES
5501
+#include "fp_mul_comba.c"
5502
+
5503
+#ifdef TFM_MUL12
5504
+void fp_mul_comba12(fp_int *A, fp_int *B, fp_int *C)
5449 5505
 {
5450
-  int     res;
5451
-  if (a != b) {
5452
-     if ((res = mp_copy (a, b)) != MP_OKAY) {
5453
-        return res;
5454
-     }
5455
-  }
5506
+   fp_digit c0, c1, c2, at[24];
5507
+
5508
+   memcpy(at, A->dp, 12 * sizeof(fp_digit));
5509
+   memcpy(at+12, B->dp, 12 * sizeof(fp_digit));
5510
+   COMBA_START;
5511
+
5512
+   COMBA_CLEAR;
5513
+   /* 0 */
5514
+   MULADD(at[0], at[12]); 
5515
+   COMBA_STORE(C->dp[0]);
5516
+   /* 1 */
5517
+   COMBA_FORWARD;
5518
+   MULADD(at[0], at[13]);    MULADD(at[1], at[12]); 
5519
+   COMBA_STORE(C->dp[1]);
5520
+   /* 2 */
5521
+   COMBA_FORWARD;
5522
+   MULADD(at[0], at[14]);    MULADD(at[1], at[13]);    MULADD(at[2], at[12]); 
5523
+   COMBA_STORE(C->dp[2]);
5524
+   /* 3 */
5525
+   COMBA_FORWARD;
5526
+   MULADD(at[0], at[15]);    MULADD(at[1], at[14]);    MULADD(at[2], at[13]);    MULADD(at[3], at[12]); 
5527
+   COMBA_STORE(C->dp[3]);
5528
+   /* 4 */
5529
+   COMBA_FORWARD;
5530
+   MULADD(at[0], at[16]);    MULADD(at[1], at[15]);    MULADD(at[2], at[14]);    MULADD(at[3], at[13]);    MULADD(at[4], at[12]); 
5531
+   COMBA_STORE(C->dp[4]);
5532
+   /* 5 */
5533
+   COMBA_FORWARD;
5534
+   MULADD(at[0], at[17]);    MULADD(at[1], at[16]);    MULADD(at[2], at[15]);    MULADD(at[3], at[14]);    MULADD(at[4], at[13]);    MULADD(at[5], at[12]); 
5535
+   COMBA_STORE(C->dp[5]);
5536
+   /* 6 */
5537
+   COMBA_FORWARD;
5538
+   MULADD(at[0], at[18]);    MULADD(at[1], at[17]);    MULADD(at[2], at[16]);    MULADD(at[3], at[15]);    MULADD(at[4], at[14]);    MULADD(at[5], at[13]);    MULADD(at[6], at[12]); 
5539
+   COMBA_STORE(C->dp[6]);
5540
+   /* 7 */
5541
+   COMBA_FORWARD;
5542
+   MULADD(at[0], at[19]);    MULADD(at[1], at[18]);    MULADD(at[2], at[17]);    MULADD(at[3], at[16]);    MULADD(at[4], at[15]);    MULADD(at[5], at[14]);    MULADD(at[6], at[13]);    MULADD(at[7], at[12]); 
5543
+   COMBA_STORE(C->dp[7]);
5544
+   /* 8 */
5545
+   COMBA_FORWARD;
5546
+   MULADD(at[0], at[20]);    MULADD(at[1], at[19]);    MULADD(at[2], at[18]);    MULADD(at[3], at[17]);    MULADD(at[4], at[16]);    MULADD(at[5], at[15]);    MULADD(at[6], at[14]);    MULADD(at[7], at[13]);    MULADD(at[8], at[12]); 
5547
+   COMBA_STORE(C->dp[8]);
5548
+   /* 9 */
5549
+   COMBA_FORWARD;
5550
+   MULADD(at[0], at[21]);    MULADD(at[1], at[20]);    MULADD(at[2], at[19]);    MULADD(at[3], at[18]);    MULADD(at[4], at[17]);    MULADD(at[5], at[16]);    MULADD(at[6], at[15]);    MULADD(at[7], at[14]);    MULADD(at[8], at[13]);    MULADD(at[9], at[12]); 
5551
+   COMBA_STORE(C->dp[9]);
5552
+   /* 10 */
5553
+   COMBA_FORWARD;
5554
+   MULADD(at[0], at[22]);    MULADD(at[1], at[21]);    MULADD(at[2], at[20]);    MULADD(at[3], at[19]);    MULADD(at[4], at[18]);    MULADD(at[5], at[17]);    MULADD(at[6], at[16]);    MULADD(at[7], at[15]);    MULADD(at[8], at[14]);    MULADD(at[9], at[13]);    MULADD(at[10], at[12]); 
5555
+   COMBA_STORE(C->dp[10]);
5556
+   /* 11 */
5557
+   COMBA_FORWARD;
5558
+   MULADD(at[0], at[23]);    MULADD(at[1], at[22]);    MULADD(at[2], at[21]);    MULADD(at[3], at[20]);    MULADD(at[4], at[19]);    MULADD(at[5], at[18]);    MULADD(at[6], at[17]);    MULADD(at[7], at[16]);    MULADD(at[8], at[15]);    MULADD(at[9], at[14]);    MULADD(at[10], at[13]);    MULADD(at[11], at[12]); 
5559
+   COMBA_STORE(C->dp[11]);
5560
+   /* 12 */
5561
+   COMBA_FORWARD;
5562
+   MULADD(at[1], at[23]);    MULADD(at[2], at[22]);    MULADD(at[3], at[21]);    MULADD(at[4], at[20]);    MULADD(at[5], at[19]);    MULADD(at[6], at[18]);    MULADD(at[7], at[17]);    MULADD(at[8], at[16]);    MULADD(at[9], at[15]);    MULADD(at[10], at[14]);    MULADD(at[11], at[13]); 
5563
+   COMBA_STORE(C->dp[12]);
5564
+   /* 13 */
5565
+   COMBA_FORWARD;
5566
+   MULADD(at[2], at[23]);    MULADD(at[3], at[22]);    MULADD(at[4], at[21]);    MULADD(at[5], at[20]);    MULADD(at[6], at[19]);    MULADD(at[7], at[18]);    MULADD(at[8], at[17]);    MULADD(at[9], at[16]);    MULADD(at[10], at[15]);    MULADD(at[11], at[14]); 
5567
+   COMBA_STORE(C->dp[13]);
5568
+   /* 14 */
5569
+   COMBA_FORWARD;
5570
+   MULADD(at[3], at[23]);    MULADD(at[4], at[22]);    MULADD(at[5], at[21]);    MULADD(at[6], at[20]);    MULADD(at[7], at[19]);    MULADD(at[8], at[18]);    MULADD(at[9], at[17]);    MULADD(at[10], at[16]);    MULADD(at[11], at[15]); 
5571
+   COMBA_STORE(C->dp[14]);
5572
+   /* 15 */
5573
+   COMBA_FORWARD;
5574
+   MULADD(at[4], at[23]);    MULADD(at[5], at[22]);    MULADD(at[6], at[21]);    MULADD(at[7], at[20]);    MULADD(at[8], at[19]);    MULADD(at[9], at[18]);    MULADD(at[10], at[17]);    MULADD(at[11], at[16]); 
5575
+   COMBA_STORE(C->dp[15]);
5576
+   /* 16 */
5577
+   COMBA_FORWARD;
5578
+   MULADD(at[5], at[23]);    MULADD(at[6], at[22]);    MULADD(at[7], at[21]);    MULADD(at[8], at[20]);    MULADD(at[9], at[19]);    MULADD(at[10], at[18]);    MULADD(at[11], at[17]); 
5579
+   COMBA_STORE(C->dp[16]);
5580
+   /* 17 */
5581
+   COMBA_FORWARD;
5582
+   MULADD(at[6], at[23]);    MULADD(at[7], at[22]);    MULADD(at[8], at[21]);    MULADD(at[9], at[20]);    MULADD(at[10], at[19]);    MULADD(at[11], at[18]); 
5583
+   COMBA_STORE(C->dp[17]);
5584
+   /* 18 */
5585
+   COMBA_FORWARD;
5586
+   MULADD(at[7], at[23]);    MULADD(at[8], at[22]);    MULADD(at[9], at[21]);    MULADD(at[10], at[20]);    MULADD(at[11], at[19]); 
5587
+   COMBA_STORE(C->dp[18]);
5588
+   /* 19 */
5589
+   COMBA_FORWARD;
5590
+   MULADD(at[8], at[23]);    MULADD(at[9], at[22]);    MULADD(at[10], at[21]);    MULADD(at[11], at[20]); 
5591
+   COMBA_STORE(C->dp[19]);
5592
+   /* 20 */
5593
+   COMBA_FORWARD;
5594
+   MULADD(at[9], at[23]);    MULADD(at[10], at[22]);    MULADD(at[11], at[21]); 
5595
+   COMBA_STORE(C->dp[20]);
5596
+   /* 21 */
5597
+   COMBA_FORWARD;
5598
+   MULADD(at[10], at[23]);    MULADD(at[11], at[22]); 
5599
+   COMBA_STORE(C->dp[21]);
5600
+   /* 22 */
5601
+   COMBA_FORWARD;
5602
+   MULADD(at[11], at[23]); 
5603
+   COMBA_STORE(C->dp[22]);
5604
+   COMBA_STORE2(C->dp[23]);
5605
+   C->used = 24;
5606
+   C->sign = A->sign ^ B->sign;
5607
+   fp_clamp(C);
5608
+   COMBA_FINI;
5609
+}
5610
+#endif
5456 5611
 
5457
-  if (mp_iszero(b) != MP_YES) {
5458
-     b->sign = (a->sign == MP_ZPOS) ? MP_NEG : MP_ZPOS;
5459
-  } else {
5460
-     b->sign = MP_ZPOS;
5461
-  }
5612
+/* End: fp_mul_comba_12.c */
5462 5613
 
5463
-  return MP_OKAY;
5614
+/* Start: fp_mul_comba_17.c */
5615
+#define TFM_DEFINES
5616
+#include "fp_mul_comba.c"
5617
+
5618
+#ifdef TFM_MUL17
5619
+void fp_mul_comba17(fp_int *A, fp_int *B, fp_int *C)
5620
+{
5621
+   fp_digit c0, c1, c2, at[34];
5622
+
5623
+   memcpy(at, A->dp, 17 * sizeof(fp_digit));
5624
+   memcpy(at+17, B->dp, 17 * sizeof(fp_digit));
5625
+   COMBA_START;
5626
+
5627
+   COMBA_CLEAR;
5628
+   /* 0 */
5629
+   MULADD(at[0], at[17]); 
5630
+   COMBA_STORE(C->dp[0]);
5631
+   /* 1 */
5632
+   COMBA_FORWARD;
5633
+   MULADD(at[0], at[18]);    MULADD(at[1], at[17]); 
5634
+   COMBA_STORE(C->dp[1]);
5635
+   /* 2 */
5636
+   COMBA_FORWARD;
5637
+   MULADD(at[0], at[19]);    MULADD(at[1], at[18]);    MULADD(at[2], at[17]); 
5638
+   COMBA_STORE(C->dp[2]);
5639
+   /* 3 */
5640
+   COMBA_FORWARD;
5641
+   MULADD(at[0], at[20]);    MULADD(at[1], at[19]);    MULADD(at[2], at[18]);    MULADD(at[3], at[17]); 
5642
+   COMBA_STORE(C->dp[3]);
5643
+   /* 4 */
5644
+   COMBA_FORWARD;
5645
+   MULADD(at[0], at[21]);    MULADD(at[1], at[20]);    MULADD(at[2], at[19]);    MULADD(at[3], at[18]);    MULADD(at[4], at[17]); 
5646
+   COMBA_STORE(C->dp[4]);
5647
+   /* 5 */
5648
+   COMBA_FORWARD;
5649
+   MULADD(at[0], at[22]);    MULADD(at[1], at[21]);    MULADD(at[2], at[20]);    MULADD(at[3], at[19]);    MULADD(at[4], at[18]);    MULADD(at[5], at[17]); 
5650
+   COMBA_STORE(C->dp[5]);
5651
+   /* 6 */
5652
+   COMBA_FORWARD;
5653
+   MULADD(at[0], at[23]);    MULADD(at[1], at[22]);    MULADD(at[2], at[21]);    MULADD(at[3], at[20]);    MULADD(at[4], at[19]);    MULADD(at[5], at[18]);    MULADD(at[6], at[17]); 
5654
+   COMBA_STORE(C->dp[6]);
5655
+   /* 7 */
5656
+   COMBA_FORWARD;
5657
+   MULADD(at[0], at[24]);    MULADD(at[1], at[23]);    MULADD(at[2], at[22]);    MULADD(at[3], at[21]);    MULADD(at[4], at[20]);    MULADD(at[5], at[19]);    MULADD(at[6], at[18]);    MULADD(at[7], at[17]); 
5658
+   COMBA_STORE(C->dp[7]);
5659
+   /* 8 */
5660
+   COMBA_FORWARD;
5661
+   MULADD(at[0], at[25]);    MULADD(at[1], at[24]);    MULADD(at[2], at[23]);    MULADD(at[3], at[22]);    MULADD(at[4], at[21]);    MULADD(at[5], at[20]);    MULADD(at[6], at[19]);    MULADD(at[7], at[18]);    MULADD(at[8], at[17]); 
5662
+   COMBA_STORE(C->dp[8]);
5663
+   /* 9 */
5664
+   COMBA_FORWARD;
5665
+   MULADD(at[0], at[26]);    MULADD(at[1], at[25]);    MULADD(at[2], at[24]);    MULADD(at[3], at[23]);    MULADD(at[4], at[22]);    MULADD(at[5], at[21]);    MULADD(at[6], at[20]);    MULADD(at[7], at[19]);    MULADD(at[8], at[18]);    MULADD(at[9], at[17]); 
5666
+   COMBA_STORE(C->dp[9]);
5667
+   /* 10 */
5668
+   COMBA_FORWARD;
5669
+   MULADD(at[0], at[27]);    MULADD(at[1], at[26]);    MULADD(at[2], at[25]);    MULADD(at[3], at[24]);    MULADD(at[4], at[23]);    MULADD(at[5], at[22]);    MULADD(at[6], at[21]);    MULADD(at[7], at[20]);    MULADD(at[8], at[19]);    MULADD(at[9], at[18]);    MULADD(at[10], at[17]); 
5670
+   COMBA_STORE(C->dp[10]);
5671
+   /* 11 */
5672
+   COMBA_FORWARD;
5673
+   MULADD(at[0], at[28]);    MULADD(at[1], at[27]);    MULADD(at[2], at[26]);    MULADD(at[3], at[25]);    MULADD(at[4], at[24]);    MULADD(at[5], at[23]);    MULADD(at[6], at[22]);    MULADD(at[7], at[21]);    MULADD(at[8], at[20]);    MULADD(at[9], at[19]);    MULADD(at[10], at[18]);    MULADD(at[11], at[17]); 
5674
+   COMBA_STORE(C->dp[11]);
5675
+   /* 12 */
5676
+   COMBA_FORWARD;
5677
+   MULADD(at[0], at[29]);    MULADD(at[1], at[28]);    MULADD(at[2], at[27]);    MULADD(at[3], at[26]);    MULADD(at[4], at[25]);    MULADD(at[5], at[24]);    MULADD(at[6], at[23]);    MULADD(at[7], at[22]);    MULADD(at[8], at[21]);    MULADD(at[9], at[20]);    MULADD(at[10], at[19]);    MULADD(at[11], at[18]);    MULADD(at[12], at[17]); 
5678
+   COMBA_STORE(C->dp[12]);
5679
+   /* 13 */
5680
+   COMBA_FORWARD;
5681
+   MULADD(at[0], at[30]);    MULADD(at[1], at[29]);    MULADD(at[2], at[28]);    MULADD(at[3], at[27]);    MULADD(at[4], at[26]);    MULADD(at[5], at[25]);    MULADD(at[6], at[24]);    MULADD(at[7], at[23]);    MULADD(at[8], at[22]);    MULADD(at[9], at[21]);    MULADD(at[10], at[20]);    MULADD(at[11], at[19]);    MULADD(at[12], at[18]);    MULADD(at[13], at[17]); 
5682
+   COMBA_STORE(C->dp[13]);
5683
+   /* 14 */
5684
+   COMBA_FORWARD;
5685
+   MULADD(at[0], at[31]);    MULADD(at[1], at[30]);    MULADD(at[2], at[29]);    MULADD(at[3], at[28]);    MULADD(at[4], at[27]);    MULADD(at[5], at[26]);    MULADD(at[6], at[25]);    MULADD(at[7], at[24]);    MULADD(at[8], at[23]);    MULADD(at[9], at[22]);    MULADD(at[10], at[21]);    MULADD(at[11], at[20]);    MULADD(at[12], at[19]);    MULADD(at[13], at[18]);    MULADD(at[14], at[17]); 
5686
+   COMBA_STORE(C->dp[14]);
5687
+   /* 15 */
5688
+   COMBA_FORWARD;
5689
+   MULADD(at[0], at[32]);    MULADD(at[1], at[31]);    MULADD(at[2], at[30]);    MULADD(at[3], at[29]);    MULADD(at[4], at[28]);    MULADD(at[5], at[27]);    MULADD(at[6], at[26]);    MULADD(at[7], at[25]);    MULADD(at[8], at[24]);    MULADD(at[9], at[23]);    MULADD(at[10], at[22]);    MULADD(at[11], at[21]);    MULADD(at[12], at[20]);    MULADD(at[13], at[19]);    MULADD(at[14], at[18]);    MULADD(at[15], at[17]); 
5690
+   COMBA_STORE(C->dp[15]);
5691
+   /* 16 */
5692
+   COMBA_FORWARD;
5693
+   MULADD(at[0], at[33]);    MULADD(at[1], at[32]);    MULADD(at[2], at[31]);    MULADD(at[3], at[30]);    MULADD(at[4], at[29]);    MULADD(at[5], at[28]);    MULADD(at[6], at[27]);    MULADD(at[7], at[26]);    MULADD(at[8], at[25]);    MULADD(at[9], at[24]);    MULADD(at[10], at[23]);    MULADD(at[11], at[22]);    MULADD(at[12], at[21]);    MULADD(at[13], at[20]);    MULADD(at[14], at[19]);    MULADD(at[15], at[18]);    MULADD(at[16], at[17]); 
5694
+   COMBA_STORE(C->dp[16]);
5695
+   /* 17 */
5696
+   COMBA_FORWARD;
5697
+   MULADD(at[1], at[33]);    MULADD(at[2], at[32]);    MULADD(at[3], at[31]);    MULADD(at[4], at[30]);    MULADD(at[5], at[29]);    MULADD(at[6], at[28]);    MULADD(at[7], at[27]);    MULADD(at[8], at[26]);    MULADD(at[9], at[25]);    MULADD(at[10], at[24]);    MULADD(at[11], at[23]);    MULADD(at[12], at[22]);    MULADD(at[13], at[21]);    MULADD(at[14], at[20]);    MULADD(at[15], at[19]);    MULADD(at[16], at[18]); 
5698
+   COMBA_STORE(C->dp[17]);
5699
+   /* 18 */
5700
+   COMBA_FORWARD;
5701
+   MULADD(at[2], at[33]);    MULADD(at[3], at[32]);    MULADD(at[4], at[31]);    MULADD(at[5], at[30]);    MULADD(at[6], at[29]);    MULADD(at[7], at[28]);    MULADD(at[8], at[27]);    MULADD(at[9], at[26]);    MULADD(at[10], at[25]);    MULADD(at[11], at[24]);    MULADD(at[12], at[23]);    MULADD(at[13], at[22]);    MULADD(at[14], at[21]);    MULADD(at[15], at[20]);    MULADD(at[16], at[19]); 
5702
+   COMBA_STORE(C->dp[18]);
5703
+   /* 19 */
5704
+   COMBA_FORWARD;
5705
+   MULADD(at[3], at[33]);    MULADD(at[4], at[32]);    MULADD(at[5], at[31]);    MULADD(at[6], at[30]);    MULADD(at[7], at[29]);    MULADD(at[8], at[28]);    MULADD(at[9], at[27]);    MULADD(at[10], at[26]);    MULADD(at[11], at[25]);    MULADD(at[12], at[24]);    MULADD(at[13], at[23]);    MULADD(at[14], at[22]);    MULADD(at[15], at[21]);    MULADD(at[16], at[20]); 
5706
+   COMBA_STORE(C->dp[19]);
5707
+   /* 20 */
5708
+   COMBA_FORWARD;
5709
+   MULADD(at[4], at[33]);    MULADD(at[5], at[32]);    MULADD(at[6], at[31]);    MULADD(at[7], at[30]);    MULADD(at[8], at[29]);    MULADD(at[9], at[28]);    MULADD(at[10], at[27]);    MULADD(at[11], at[26]);    MULADD(at[12], at[25]);    MULADD(at[13], at[24]);    MULADD(at[14], at[23]);    MULADD(at[15], at[22]);    MULADD(at[16], at[21]); 
5710
+   COMBA_STORE(C->dp[20]);
5711
+   /* 21 */
5712
+   COMBA_FORWARD;
5713
+   MULADD(at[5], at[33]);    MULADD(at[6], at[32]);    MULADD(at[7], at[31]);    MULADD(at[8], at[30]);    MULADD(at[9], at[29]);    MULADD(at[10], at[28]);    MULADD(at[11], at[27]);    MULADD(at[12], at[26]);    MULADD(at[13], at[25]);    MULADD(at[14], at[24]);    MULADD(at[15], at[23]);    MULADD(at[16], at[22]); 
5714
+   COMBA_STORE(C->dp[21]);
5715
+   /* 22 */
5716
+   COMBA_FORWARD;
5717
+   MULADD(at[6], at[33]);    MULADD(at[7], at[32]);    MULADD(at[8], at[31]);    MULADD(at[9], at[30]);    MULADD(at[10], at[29]);    MULADD(at[11], at[28]);    MULADD(at[12], at[27]);    MULADD(at[13], at[26]);    MULADD(at[14], at[25]);    MULADD(at[15], at[24]);    MULADD(at[16], at[23]); 
5718
+   COMBA_STORE(C->dp[22]);
5719
+   /* 23 */
5720
+   COMBA_FORWARD;
5721
+   MULADD(at[7], at[33]);    MULADD(at[8], at[32]);    MULADD(at[9], at[31]);    MULADD(at[10], at[30]);    MULADD(at[11], at[29]);    MULADD(at[12], at[28]);    MULADD(at[13], at[27]);    MULADD(at[14], at[26]);    MULADD(at[15], at[25]);    MULADD(at[16], at[24]); 
5722
+   COMBA_STORE(C->dp[23]);
5723
+   /* 24 */
5724
+   COMBA_FORWARD;
5725
+   MULADD(at[8], at[33]);    MULADD(at[9], at[32]);    MULADD(at[10], at[31]);    MULADD(at[11], at[30]);    MULADD(at[12], at[29]);    MULADD(at[13], at[28]);    MULADD(at[14], at[27]);    MULADD(at[15], at[26]);    MULADD(at[16], at[25]); 
5726
+   COMBA_STORE(C->dp[24]);
5727
+   /* 25 */
5728
+   COMBA_FORWARD;
5729
+   MULADD(at[9], at[33]);    MULADD(at[10], at[32]);    MULADD(at[11], at[31]);    MULADD(at[12], at[30]);    MULADD(at[13], at[29]);    MULADD(at[14], at[28]);    MULADD(at[15], at[27]);    MULADD(at[16], at[26]); 
5730
+   COMBA_STORE(C->dp[25]);
5731
+   /* 26 */
5732
+   COMBA_FORWARD;
5733
+   MULADD(at[10], at[33]);    MULADD(at[11], at[32]);    MULADD(at[12], at[31]);    MULADD(at[13], at[30]);    MULADD(at[14], at[29]);    MULADD(at[15], at[28]);    MULADD(at[16], at[27]); 
5734
+   COMBA_STORE(C->dp[26]);
5735
+   /* 27 */
5736
+   COMBA_FORWARD;
5737
+   MULADD(at[11], at[33]);    MULADD(at[12], at[32]);    MULADD(at[13], at[31]);    MULADD(at[14], at[30]);    MULADD(at[15], at[29]);    MULADD(at[16], at[28]); 
5738
+   COMBA_STORE(C->dp[27]);
5739
+   /* 28 */
5740
+   COMBA_FORWARD;
5741
+   MULADD(at[12], at[33]);    MULADD(at[13], at[32]);    MULADD(at[14], at[31]);    MULADD(at[15], at[30]);    MULADD(at[16], at[29]); 
5742
+   COMBA_STORE(C->dp[28]);
5743
+   /* 29 */
5744
+   COMBA_FORWARD;
5745
+   MULADD(at[13], at[33]);    MULADD(at[14], at[32]);    MULADD(at[15], at[31]);    MULADD(at[16], at[30]); 
5746
+   COMBA_STORE(C->dp[29]);
5747
+   /* 30 */
5748
+   COMBA_FORWARD;
5749
+   MULADD(at[14], at[33]);    MULADD(at[15], at[32]);    MULADD(at[16], at[31]); 
5750
+   COMBA_STORE(C->dp[30]);
5751
+   /* 31 */
5752
+   COMBA_FORWARD;
5753
+   MULADD(at[15], at[33]);    MULADD(at[16], at[32]); 
5754
+   COMBA_STORE(C->dp[31]);
5755
+   /* 32 */
5756
+   COMBA_FORWARD;
5757
+   MULADD(at[16], at[33]); 
5758
+   COMBA_STORE(C->dp[32]);
5759
+   COMBA_STORE2(C->dp[33]);
5760
+   C->used = 34;
5761
+   C->sign = A->sign ^ B->sign;
5762
+   fp_clamp(C);
5763
+   COMBA_FINI;
5464 5764
 }
5465 5765
 #endif
5466 5766
 
5467
-/* $Source: /cvs/libtom/libtommath/bn_mp_neg.c,v $ */
5468
-/* $Revision: 1.3 $ */
5469
-/* $Date: 2006/03/31 14:18:44 $ */
5767
+/* End: fp_mul_comba_17.c */
5470 5768
 
5471
-/* End: bn_mp_neg.c */
5769
+/* Start: fp_mul_comba_20.c */
5770
+#define TFM_DEFINES
5771
+#include "fp_mul_comba.c"
5472 5772
 
5473
-/* Start: bn_mp_or.c */
5474
-#include <bignum.h>
5475
-#ifdef BN_MP_OR_C
5476
-/* LibTomMath, multiple-precision integer library -- Tom St Denis
5477
- *
5478
- * LibTomMath is a library that provides multiple-precision
5479
- * integer arithmetic as well as number theoretic functionality.
5480
- *
5481
- * The library was designed directly after the MPI library by
5482
- * Michael Fromberger but has been written from scratch with
5483
- * additional optimizations in place.
5484
- *
5485
- * The library is free for all purposes without any express
5486
- * guarantee it works.
5487
- *
5488
- * Tom St Denis, tomstdenis@gmail.com, http://math.libtomcrypt.com
5489
- */
5773
+#ifdef TFM_MUL20
5774
+void fp_mul_comba20(fp_int *A, fp_int *B, fp_int *C)
5775
+{
5776
+   fp_digit c0, c1, c2, at[40];
5777
+   
5778
+   memcpy(at, A->dp, 20 * sizeof(fp_digit));
5779
+   memcpy(at+20, B->dp, 20 * sizeof(fp_digit));
5780
+   COMBA_START;
5781
+
5782
+   COMBA_CLEAR;
5783
+   /* 0 */
5784
+   MULADD(at[0], at[20]); 
5785
+   COMBA_STORE(C->dp[0]);
5786
+   /* 1 */
5787
+   COMBA_FORWARD;
5788
+   MULADD(at[0], at[21]);    MULADD(at[1], at[20]); 
5789
+   COMBA_STORE(C->dp[1]);
5790
+   /* 2 */
5791
+   COMBA_FORWARD;
5792
+   MULADD(at[0], at[22]);    MULADD(at[1], at[21]);    MULADD(at[2], at[20]); 
5793
+   COMBA_STORE(C->dp[2]);
5794
+   /* 3 */
5795
+   COMBA_FORWARD;
5796
+   MULADD(at[0], at[23]);    MULADD(at[1], at[22]);    MULADD(at[2], at[21]);    MULADD(at[3], at[20]); 
5797
+   COMBA_STORE(C->dp[3]);
5798
+   /* 4 */
5799
+   COMBA_FORWARD;
5800
+   MULADD(at[0], at[24]);    MULADD(at[1], at[23]);    MULADD(at[2], at[22]);    MULADD(at[3], at[21]);    MULADD(at[4], at[20]); 
5801
+   COMBA_STORE(C->dp[4]);
5802
+   /* 5 */
5803
+   COMBA_FORWARD;
5804
+   MULADD(at[0], at[25]);    MULADD(at[1], at[24]);    MULADD(at[2], at[23]);    MULADD(at[3], at[22]);    MULADD(at[4], at[21]);    MULADD(at[5], at[20]); 
5805
+   COMBA_STORE(C->dp[5]);
5806
+   /* 6 */
5807
+   COMBA_FORWARD;
5808
+   MULADD(at[0], at[26]);    MULADD(at[1], at[25]);    MULADD(at[2], at[24]);    MULADD(at[3], at[23]);    MULADD(at[4], at[22]);    MULADD(at[5], at[21]);    MULADD(at[6], at[20]); 
5809
+   COMBA_STORE(C->dp[6]);
5810
+   /* 7 */
5811
+   COMBA_FORWARD;
5812
+   MULADD(at[0], at[27]);    MULADD(at[1], at[26]);    MULADD(at[2], at[25]);    MULADD(at[3], at[24]);    MULADD(at[4], at[23]);    MULADD(at[5], at[22]);    MULADD(at[6], at[21]);    MULADD(at[7], at[20]); 
5813
+   COMBA_STORE(C->dp[7]);
5814
+   /* 8 */
5815
+   COMBA_FORWARD;
5816
+   MULADD(at[0], at[28]);    MULADD(at[1], at[27]);    MULADD(at[2], at[26]);    MULADD(at[3], at[25]);    MULADD(at[4], at[24]);    MULADD(at[5], at[23]);    MULADD(at[6], at[22]);    MULADD(at[7], at[21]);    MULADD(at[8], at[20]); 
5817
+   COMBA_STORE(C->dp[8]);
5818
+   /* 9 */
5819
+   COMBA_FORWARD;
5820
+   MULADD(at[0], at[29]);    MULADD(at[1], at[28]);    MULADD(at[2], at[27]);    MULADD(at[3], at[26]);    MULADD(at[4], at[25]);    MULADD(at[5], at[24]);    MULADD(at[6], at[23]);    MULADD(at[7], at[22]);    MULADD(at[8], at[21]);    MULADD(at[9], at[20]); 
5821
+   COMBA_STORE(C->dp[9]);
5822
+   /* 10 */
5823
+   COMBA_FORWARD;
5824
+   MULADD(at[0], at[30]);    MULADD(at[1], at[29]);    MULADD(at[2], at[28]);    MULADD(at[3], at[27]);    MULADD(at[4], at[26]);    MULADD(at[5], at[25]);    MULADD(at[6], at[24]);    MULADD(at[7], at[23]);    MULADD(at[8], at[22]);    MULADD(at[9], at[21]);    MULADD(at[10], at[20]); 
5825
+   COMBA_STORE(C->dp[10]);
5826
+   /* 11 */
5827
+   COMBA_FORWARD;
5828
+   MULADD(at[0], at[31]);    MULADD(at[1], at[30]);    MULADD(at[2], at[29]);    MULADD(at[3], at[28]);    MULADD(at[4], at[27]);    MULADD(at[5], at[26]);    MULADD(at[6], at[25]);    MULADD(at[7], at[24]);    MULADD(at[8], at[23]);    MULADD(at[9], at[22]);    MULADD(at[10], at[21]);    MULADD(at[11], at[20]); 
5829
+   COMBA_STORE(C->dp[11]);
5830
+   /* 12 */
5831
+   COMBA_FORWARD;
5832
+   MULADD(at[0], at[32]);    MULADD(at[1], at[31]);    MULADD(at[2], at[30]);    MULADD(at[3], at[29]);    MULADD(at[4], at[28]);    MULADD(at[5], at[27]);    MULADD(at[6], at[26]);    MULADD(at[7], at[25]);    MULADD(at[8], at[24]);    MULADD(at[9], at[23]);    MULADD(at[10], at[22]);    MULADD(at[11], at[21]);    MULADD(at[12], at[20]); 
5833
+   COMBA_STORE(C->dp[12]);
5834
+   /* 13 */
5835
+   COMBA_FORWARD;
5836
+   MULADD(at[0], at[33]);    MULADD(at[1], at[32]);    MULADD(at[2], at[31]);    MULADD(at[3], at[30]);    MULADD(at[4], at[29]);    MULADD(at[5], at[28]);    MULADD(at[6], at[27]);    MULADD(at[7], at[26]);    MULADD(at[8], at[25]);    MULADD(at[9], at[24]);    MULADD(at[10], at[23]);    MULADD(at[11], at[22]);    MULADD(at[12], at[21]);    MULADD(at[13], at[20]); 
5837
+   COMBA_STORE(C->dp[13]);
5838
+   /* 14 */
5839
+   COMBA_FORWARD;
5840
+   MULADD(at[0], at[34]);    MULADD(at[1], at[33]);    MULADD(at[2], at[32]);    MULADD(at[3], at[31]);    MULADD(at[4], at[30]);    MULADD(at[5], at[29]);    MULADD(at[6], at[28]);    MULADD(at[7], at[27]);    MULADD(at[8], at[26]);    MULADD(at[9], at[25]);    MULADD(at[10], at[24]);    MULADD(at[11], at[23]);    MULADD(at[12], at[22]);    MULADD(at[13], at[21]);    MULADD(at[14], at[20]); 
5841
+   COMBA_STORE(C->dp[14]);
5842
+   /* 15 */
5843
+   COMBA_FORWARD;
5844
+   MULADD(at[0], at[35]);    MULADD(at[1], at[34]);    MULADD(at[2], at[33]);    MULADD(at[3], at[32]);    MULADD(at[4], at[31]);    MULADD(at[5], at[30]);    MULADD(at[6], at[29]);    MULADD(at[7], at[28]);    MULADD(at[8], at[27]);    MULADD(at[9], at[26]);    MULADD(at[10], at[25]);    MULADD(at[11], at[24]);    MULADD(at[12], at[23]);    MULADD(at[13], at[22]);    MULADD(at[14], at[21]);    MULADD(at[15], at[20]); 
5845
+   COMBA_STORE(C->dp[15]);
5846
+   /* 16 */
5847
+   COMBA_FORWARD;
5848
+   MULADD(at[0], at[36]);    MULADD(at[1], at[35]);    MULADD(at[2], at[34]);    MULADD(at[3], at[33]);    MULADD(at[4], at[32]);    MULADD(at[5], at[31]);    MULADD(at[6], at[30]);    MULADD(at[7], at[29]);    MULADD(at[8], at[28]);    MULADD(at[9], at[27]);    MULADD(at[10], at[26]);    MULADD(at[11], at[25]);    MULADD(at[12], at[24]);    MULADD(at[13], at[23]);    MULADD(at[14], at[22]);    MULADD(at[15], at[21]);    MULADD(at[16], at[20]); 
5849
+   COMBA_STORE(C->dp[16]);
5850
+   /* 17 */
5851
+   COMBA_FORWARD;
5852
+   MULADD(at[0], at[37]);    MULADD(at[1], at[36]);    MULADD(at[2], at[35]);    MULADD(at[3], at[34]);    MULADD(at[4], at[33]);    MULADD(at[5], at[32]);    MULADD(at[6], at[31]);    MULADD(at[7], at[30]);    MULADD(at[8], at[29]);    MULADD(at[9], at[28]);    MULADD(at[10], at[27]);    MULADD(at[11], at[26]);    MULADD(at[12], at[25]);    MULADD(at[13], at[24]);    MULADD(at[14], at[23]);    MULADD(at[15], at[22]);    MULADD(at[16], at[21]);    MULADD(at[17], at[20]); 
5853
+   COMBA_STORE(C->dp[17]);
5854
+   /* 18 */
5855
+   COMBA_FORWARD;
5856
+   MULADD(at[0], at[38]);    MULADD(at[1], at[37]);    MULADD(at[2], at[36]);    MULADD(at[3], at[35]);    MULADD(at[4], at[34]);    MULADD(at[5], at[33]);    MULADD(at[6], at[32]);    MULADD(at[7], at[31]);    MULADD(at[8], at[30]);    MULADD(at[9], at[29]);    MULADD(at[10], at[28]);    MULADD(at[11], at[27]);    MULADD(at[12], at[26]);    MULADD(at[13], at[25]);    MULADD(at[14], at[24]);    MULADD(at[15], at[23]);    MULADD(at[16], at[22]);    MULADD(at[17], at[21]);    MULADD(at[18], at[20]); 
5857
+   COMBA_STORE(C->dp[18]);
5858
+   /* 19 */
5859
+   COMBA_FORWARD;
5860
+   MULADD(at[0], at[39]);    MULADD(at[1], at[38]);    MULADD(at[2], at[37]);    MULADD(at[3], at[36]);    MULADD(at[4], at[35]);    MULADD(at[5], at[34]);    MULADD(at[6], at[33]);    MULADD(at[7], at[32]);    MULADD(at[8], at[31]);    MULADD(at[9], at[30]);    MULADD(at[10], at[29]);    MULADD(at[11], at[28]);    MULADD(at[12], at[27]);    MULADD(at[13], at[26]);    MULADD(at[14], at[25]);    MULADD(at[15], at[24]);    MULADD(at[16], at[23]);    MULADD(at[17], at[22]);    MULADD(at[18], at[21]);    MULADD(at[19], at[20]); 
5861
+   COMBA_STORE(C->dp[19]);
5862
+   /* 20 */
5863
+   COMBA_FORWARD;
5864
+   MULADD(at[1], at[39]);    MULADD(at[2], at[38]);    MULADD(at[3], at[37]);    MULADD(at[4], at[36]);    MULADD(at[5], at[35]);    MULADD(at[6], at[34]);    MULADD(at[7], at[33]);    MULADD(at[8], at[32]);    MULADD(at[9], at[31]);    MULADD(at[10], at[30]);    MULADD(at[11], at[29]);    MULADD(at[12], at[28]);    MULADD(at[13], at[27]);    MULADD(at[14], at[26]);    MULADD(at[15], at[25]);    MULADD(at[16], at[24]);    MULADD(at[17], at[23]);    MULADD(at[18], at[22]);    MULADD(at[19], at[21]); 
5865
+   COMBA_STORE(C->dp[20]);
5866
+   /* 21 */
5867
+   COMBA_FORWARD;
5868
+   MULADD(at[2], at[39]);    MULADD(at[3], at[38]);    MULADD(at[4], at[37]);    MULADD(at[5], at[36]);    MULADD(at[6], at[35]);    MULADD(at[7], at[34]);    MULADD(at[8], at[33]);    MULADD(at[9], at[32]);    MULADD(at[10], at[31]);    MULADD(at[11], at[30]);    MULADD(at[12], at[29]);    MULADD(at[13], at[28]);    MULADD(at[14], at[27]);    MULADD(at[15], at[26]);    MULADD(at[16], at[25]);    MULADD(at[17], at[24]);    MULADD(at[18], at[23]);    MULADD(at[19], at[22]); 
5869
+   COMBA_STORE(C->dp[21]);
5870
+   /* 22 */
5871
+   COMBA_FORWARD;
5872
+   MULADD(at[3], at[39]);    MULADD(at[4], at[38]);    MULADD(at[5], at[37]);    MULADD(at[6], at[36]);    MULADD(at[7], at[35]);    MULADD(at[8], at[34]);    MULADD(at[9], at[33]);    MULADD(at[10], at[32]);    MULADD(at[11], at[31]);    MULADD(at[12], at[30]);    MULADD(at[13], at[29]);    MULADD(at[14], at[28]);    MULADD(at[15], at[27]);    MULADD(at[16], at[26]);    MULADD(at[17], at[25]);    MULADD(at[18], at[24]);    MULADD(at[19], at[23]); 
5873
+   COMBA_STORE(C->dp[22]);
5874
+   /* 23 */
5875
+   COMBA_FORWARD;
5876
+   MULADD(at[4], at[39]);    MULADD(at[5], at[38]);    MULADD(at[6], at[37]);    MULADD(at[7], at[36]);    MULADD(at[8], at[35]);    MULADD(at[9], at[34]);    MULADD(at[10], at[33]);    MULADD(at[11], at[32]);    MULADD(at[12], at[31]);    MULADD(at[13], at[30]);    MULADD(at[14], at[29]);    MULADD(at[15], at[28]);    MULADD(at[16], at[27]);    MULADD(at[17], at[26]);    MULADD(at[18], at[25]);    MULADD(at[19], at[24]); 
5877
+   COMBA_STORE(C->dp[23]);
5878
+   /* 24 */
5879
+   COMBA_FORWARD;
5880
+   MULADD(at[5], at[39]);    MULADD(at[6], at[38]);    MULADD(at[7], at[37]);    MULADD(at[8], at[36]);    MULADD(at[9], at[35]);    MULADD(at[10], at[34]);    MULADD(at[11], at[33]);    MULADD(at[12], at[32]);    MULADD(at[13], at[31]);    MULADD(at[14], at[30]);    MULADD(at[15], at[29]);    MULADD(at[16], at[28]);    MULADD(at[17], at[27]);    MULADD(at[18], at[26]);    MULADD(at[19], at[25]); 
5881
+   COMBA_STORE(C->dp[24]);
5882
+   /* 25 */
5883
+   COMBA_FORWARD;
5884
+   MULADD(at[6], at[39]);    MULADD(at[7], at[38]);    MULADD(at[8], at[37]);    MULADD(at[9], at[36]);    MULADD(at[10], at[35]);    MULADD(at[11], at[34]);    MULADD(at[12], at[33]);    MULADD(at[13], at[32]);    MULADD(at[14], at[31]);    MULADD(at[15], at[30]);    MULADD(at[16], at[29]);    MULADD(at[17], at[28]);    MULADD(at[18], at[27]);    MULADD(at[19], at[26]); 
5885
+   COMBA_STORE(C->dp[25]);
5886
+   /* 26 */
5887
+   COMBA_FORWARD;
5888
+   MULADD(at[7], at[39]);    MULADD(at[8], at[38]);    MULADD(at[9], at[37]);    MULADD(at[10], at[36]);    MULADD(at[11], at[35]);    MULADD(at[12], at[34]);    MULADD(at[13], at[33]);    MULADD(at[14], at[32]);    MULADD(at[15], at[31]);    MULADD(at[16], at[30]);    MULADD(at[17], at[29]);    MULADD(at[18], at[28]);    MULADD(at[19], at[27]); 
5889
+   COMBA_STORE(C->dp[26]);
5890
+   /* 27 */
5891
+   COMBA_FORWARD;
5892
+   MULADD(at[8], at[39]);    MULADD(at[9], at[38]);    MULADD(at[10], at[37]);    MULADD(at[11], at[36]);    MULADD(at[12], at[35]);    MULADD(at[13], at[34]);    MULADD(at[14], at[33]);    MULADD(at[15], at[32]);    MULADD(at[16], at[31]);    MULADD(at[17], at[30]);    MULADD(at[18], at[29]);    MULADD(at[19], at[28]); 
5893
+   COMBA_STORE(C->dp[27]);
5894
+   /* 28 */
5895
+   COMBA_FORWARD;
5896
+   MULADD(at[9], at[39]);    MULADD(at[10], at[38]);    MULADD(at[11], at[37]);    MULADD(at[12], at[36]);    MULADD(at[13], at[35]);    MULADD(at[14], at[34]);    MULADD(at[15], at[33]);    MULADD(at[16], at[32]);    MULADD(at[17], at[31]);    MULADD(at[18], at[30]);    MULADD(at[19], at[29]); 
5897
+   COMBA_STORE(C->dp[28]);
5898
+   /* 29 */
5899
+   COMBA_FORWARD;
5900
+   MULADD(at[10], at[39]);    MULADD(at[11], at[38]);    MULADD(at[12], at[37]);    MULADD(at[13], at[36]);    MULADD(at[14], at[35]);    MULADD(at[15], at[34]);    MULADD(at[16], at[33]);    MULADD(at[17], at[32]);    MULADD(at[18], at[31]);    MULADD(at[19], at[30]); 
5901
+   COMBA_STORE(C->dp[29]);
5902
+   /* 30 */
5903
+   COMBA_FORWARD;
5904
+   MULADD(at[11], at[39]);    MULADD(at[12], at[38]);    MULADD(at[13], at[37]);    MULADD(at[14], at[36]);    MULADD(at[15], at[35]);    MULADD(at[16], at[34]);    MULADD(at[17], at[33]);    MULADD(at[18], at[32]);    MULADD(at[19], at[31]); 
5905
+   COMBA_STORE(C->dp[30]);
5906
+   /* 31 */
5907
+   COMBA_FORWARD;
5908
+   MULADD(at[12], at[39]);    MULADD(at[13], at[38]);    MULADD(at[14], at[37]);    MULADD(at[15], at[36]);    MULADD(at[16], at[35]);    MULADD(at[17], at[34]);    MULADD(at[18], at[33]);    MULADD(at[19], at[32]); 
5909
+   COMBA_STORE(C->dp[31]);
5910
+   /* 32 */
5911
+   COMBA_FORWARD;
5912
+   MULADD(at[13], at[39]);    MULADD(at[14], at[38]);    MULADD(at[15], at[37]);    MULADD(at[16], at[36]);    MULADD(at[17], at[35]);    MULADD(at[18], at[34]);    MULADD(at[19], at[33]); 
5913
+   COMBA_STORE(C->dp[32]);
5914
+   /* 33 */
5915
+   COMBA_FORWARD;
5916
+   MULADD(at[14], at[39]);    MULADD(at[15], at[38]);    MULADD(at[16], at[37]);    MULADD(at[17], at[36]);    MULADD(at[18], at[35]);    MULADD(at[19], at[34]); 
5917
+   COMBA_STORE(C->dp[33]);
5918
+   /* 34 */
5919
+   COMBA_FORWARD;
5920
+   MULADD(at[15], at[39]);    MULADD(at[16], at[38]);    MULADD(at[17], at[37]);    MULADD(at[18], at[36]);    MULADD(at[19], at[35]); 
5921
+   COMBA_STORE(C->dp[34]);
5922
+   /* 35 */
5923
+   COMBA_FORWARD;
5924
+   MULADD(at[16], at[39]);    MULADD(at[17], at[38]);    MULADD(at[18], at[37]);    MULADD(at[19], at[36]); 
5925
+   COMBA_STORE(C->dp[35]);
5926
+   /* 36 */
5927
+   COMBA_FORWARD;
5928
+   MULADD(at[17], at[39]);    MULADD(at[18], at[38]);    MULADD(at[19], at[37]); 
5929
+   COMBA_STORE(C->dp[36]);
5930
+   /* 37 */
5931
+   COMBA_FORWARD;
5932
+   MULADD(at[18], at[39]);    MULADD(at[19], at[38]); 
5933
+   COMBA_STORE(C->dp[37]);
5934
+   /* 38 */
5935
+   COMBA_FORWARD;
5936
+   MULADD(at[19], at[39]); 
5937
+   COMBA_STORE(C->dp[38]);
5938
+   COMBA_STORE2(C->dp[39]);
5939
+   C->used = 40;
5940
+   C->sign = A->sign ^ B->sign;
5941
+   fp_clamp(C);
5942
+   COMBA_FINI;
5943
+}
5944
+#endif
5945
+
5946
+/* End: fp_mul_comba_20.c */
5490 5947
 
5491
-/* OR two ints together */
5492
-int mp_or (mp_int * a, mp_int * b, mp_int * c)
5948
+/* Start: fp_mul_comba_24.c */
5949
+#define TFM_DEFINES
5950
+#include "fp_mul_comba.c"
5951
+
5952
+#ifdef TFM_MUL24
5953
+void fp_mul_comba24(fp_int *A, fp_int *B, fp_int *C)
5493 5954
 {
5494
-  int     res, ix, px;
5495
-  mp_int  t, *x;
5955
+   fp_digit c0, c1, c2, at[48];
5956
+
5957
+   memcpy(at, A->dp, 24 * sizeof(fp_digit));
5958
+   memcpy(at+24, B->dp, 24 * sizeof(fp_digit));
5959
+   COMBA_START;
5960
+
5961
+   COMBA_CLEAR;
5962
+   /* 0 */
5963
+   MULADD(at[0], at[24]); 
5964
+   COMBA_STORE(C->dp[0]);
5965
+   /* 1 */
5966
+   COMBA_FORWARD;
5967
+   MULADD(at[0], at[25]);    MULADD(at[1], at[24]); 
5968
+   COMBA_STORE(C->dp[1]);
5969
+   /* 2 */
5970
+   COMBA_FORWARD;
5971
+   MULADD(at[0], at[26]);    MULADD(at[1], at[25]);    MULADD(at[2], at[24]); 
5972
+   COMBA_STORE(C->dp[2]);
5973
+   /* 3 */
5974
+   COMBA_FORWARD;
5975
+   MULADD(at[0], at[27]);    MULADD(at[1], at[26]);    MULADD(at[2], at[25]);    MULADD(at[3], at[24]); 
5976
+   COMBA_STORE(C->dp[3]);
5977
+   /* 4 */
5978
+   COMBA_FORWARD;
5979
+   MULADD(at[0], at[28]);    MULADD(at[1], at[27]);    MULADD(at[2], at[26]);    MULADD(at[3], at[25]);    MULADD(at[4], at[24]); 
5980
+   COMBA_STORE(C->dp[4]);
5981
+   /* 5 */
5982
+   COMBA_FORWARD;
5983
+   MULADD(at[0], at[29]);    MULADD(at[1], at[28]);    MULADD(at[2], at[27]);    MULADD(at[3], at[26]);    MULADD(at[4], at[25]);    MULADD(at[5], at[24]); 
5984
+   COMBA_STORE(C->dp[5]);
5985
+   /* 6 */
5986
+   COMBA_FORWARD;
5987
+   MULADD(at[0], at[30]);    MULADD(at[1], at[29]);    MULADD(at[2], at[28]);    MULADD(at[3], at[27]);    MULADD(at[4], at[26]);    MULADD(at[5], at[25]);    MULADD(at[6], at[24]); 
5988
+   COMBA_STORE(C->dp[6]);
5989
+   /* 7 */
5990
+   COMBA_FORWARD;
5991
+   MULADD(at[0], at[31]);    MULADD(at[1], at[30]);    MULADD(at[2], at[29]);    MULADD(at[3], at[28]);    MULADD(at[4], at[27]);    MULADD(at[5], at[26]);    MULADD(at[6], at[25]);    MULADD(at[7], at[24]); 
5992
+   COMBA_STORE(C->dp[7]);
5993
+   /* 8 */
5994
+   COMBA_FORWARD;
5995
+   MULADD(at[0], at[32]);    MULADD(at[1], at[31]);    MULADD(at[2], at[30]);    MULADD(at[3], at[29]);    MULADD(at[4], at[28]);    MULADD(at[5], at[27]);    MULADD(at[6], at[26]);    MULADD(at[7], at[25]);    MULADD(at[8], at[24]); 
5996
+   COMBA_STORE(C->dp[8]);
5997
+   /* 9 */
5998
+   COMBA_FORWARD;
5999
+   MULADD(at[0], at[33]);    MULADD(at[1], at[32]);    MULADD(at[2], at[31]);    MULADD(at[3], at[30]);    MULADD(at[4], at[29]);    MULADD(at[5], at[28]);    MULADD(at[6], at[27]);    MULADD(at[7], at[26]);    MULADD(at[8], at[25]);    MULADD(at[9], at[24]); 
6000
+   COMBA_STORE(C->dp[9]);
6001
+   /* 10 */
6002
+   COMBA_FORWARD;
6003
+   MULADD(at[0], at[34]);    MULADD(at[1], at[33]);    MULADD(at[2], at[32]);    MULADD(at[3], at[31]);    MULADD(at[4], at[30]);    MULADD(at[5], at[29]);    MULADD(at[6], at[28]);    MULADD(at[7], at[27]);    MULADD(at[8], at[26]);    MULADD(at[9], at[25]);    MULADD(at[10], at[24]); 
6004
+   COMBA_STORE(C->dp[10]);
6005
+   /* 11 */
6006
+   COMBA_FORWARD;
6007
+   MULADD(at[0], at[35]);    MULADD(at[1], at[34]);    MULADD(at[2], at[33]);    MULADD(at[3], at[32]);    MULADD(at[4], at[31]);    MULADD(at[5], at[30]);    MULADD(at[6], at[29]);    MULADD(at[7], at[28]);    MULADD(at[8], at[27]);    MULADD(at[9], at[26]);    MULADD(at[10], at[25]);    MULADD(at[11], at[24]); 
6008
+   COMBA_STORE(C->dp[11]);
6009
+   /* 12 */
6010
+   COMBA_FORWARD;
6011
+   MULADD(at[0], at[36]);    MULADD(at[1], at[35]);    MULADD(at[2], at[34]);    MULADD(at[3], at[33]);    MULADD(at[4], at[32]);    MULADD(at[5], at[31]);    MULADD(at[6], at[30]);    MULADD(at[7], at[29]);    MULADD(at[8], at[28]);    MULADD(at[9], at[27]);    MULADD(at[10], at[26]);    MULADD(at[11], at[25]);    MULADD(at[12], at[24]); 
6012
+   COMBA_STORE(C->dp[12]);
6013
+   /* 13 */
6014
+   COMBA_FORWARD;
6015
+   MULADD(at[0], at[37]);    MULADD(at[1], at[36]);    MULADD(at[2], at[35]);    MULADD(at[3], at[34]);    MULADD(at[4], at[33]);    MULADD(at[5], at[32]);    MULADD(at[6], at[31]);    MULADD(at[7], at[30]);    MULADD(at[8], at[29]);    MULADD(at[9], at[28]);    MULADD(at[10], at[27]);    MULADD(at[11], at[26]);    MULADD(at[12], at[25]);    MULADD(at[13], at[24]); 
6016
+   COMBA_STORE(C->dp[13]);
6017
+   /* 14 */
6018
+   COMBA_FORWARD;
6019
+   MULADD(at[0], at[38]);    MULADD(at[1], at[37]);    MULADD(at[2], at[36]);    MULADD(at[3], at[35]);    MULADD(at[4], at[34]);    MULADD(at[5], at[33]);    MULADD(at[6], at[32]);    MULADD(at[7], at[31]);    MULADD(at[8], at[30]);    MULADD(at[9], at[29]);    MULADD(at[10], at[28]);    MULADD(at[11], at[27]);    MULADD(at[12], at[26]);    MULADD(at[13], at[25]);    MULADD(at[14], at[24]); 
6020
+   COMBA_STORE(C->dp[14]);
6021
+   /* 15 */
6022
+   COMBA_FORWARD;
6023
+   MULADD(at[0], at[39]);    MULADD(at[1], at[38]);    MULADD(at[2], at[37]);    MULADD(at[3], at[36]);    MULADD(at[4], at[35]);    MULADD(at[5], at[34]);    MULADD(at[6], at[33]);    MULADD(at[7], at[32]);    MULADD(at[8], at[31]);    MULADD(at[9], at[30]);    MULADD(at[10], at[29]);    MULADD(at[11], at[28]);    MULADD(at[12], at[27]);    MULADD(at[13], at[26]);    MULADD(at[14], at[25]);    MULADD(at[15], at[24]); 
6024
+   COMBA_STORE(C->dp[15]);
6025
+   /* 16 */
6026
+   COMBA_FORWARD;
6027
+   MULADD(at[0], at[40]);    MULADD(at[1], at[39]);    MULADD(at[2], at[38]);    MULADD(at[3], at[37]);    MULADD(at[4], at[36]);    MULADD(at[5], at[35]);    MULADD(at[6], at[34]);    MULADD(at[7], at[33]);    MULADD(at[8], at[32]);    MULADD(at[9], at[31]);    MULADD(at[10], at[30]);    MULADD(at[11], at[29]);    MULADD(at[12], at[28]);    MULADD(at[13], at[27]);    MULADD(at[14], at[26]);    MULADD(at[15], at[25]);    MULADD(at[16], at[24]); 
6028
+   COMBA_STORE(C->dp[16]);
6029
+   /* 17 */
6030
+   COMBA_FORWARD;
6031
+   MULADD(at[0], at[41]);    MULADD(at[1], at[40]);    MULADD(at[2], at[39]);    MULADD(at[3], at[38]);    MULADD(at[4], at[37]);    MULADD(at[5], at[36]);    MULADD(at[6], at[35]);    MULADD(at[7], at[34]);    MULADD(at[8], at[33]);    MULADD(at[9], at[32]);    MULADD(at[10], at[31]);    MULADD(at[11], at[30]);    MULADD(at[12], at[29]);    MULADD(at[13], at[28]);    MULADD(at[14], at[27]);    MULADD(at[15], at[26]);    MULADD(at[16], at[25]);    MULADD(at[17], at[24]); 
6032
+   COMBA_STORE(C->dp[17]);
6033
+   /* 18 */
6034
+   COMBA_FORWARD;
6035
+   MULADD(at[0], at[42]);    MULADD(at[1], at[41]);    MULADD(at[2], at[40]);    MULADD(at[3], at[39]);    MULADD(at[4], at[38]);    MULADD(at[5], at[37]);    MULADD(at[6], at[36]);    MULADD(at[7], at[35]);    MULADD(at[8], at[34]);    MULADD(at[9], at[33]);    MULADD(at[10], at[32]);    MULADD(at[11], at[31]);    MULADD(at[12], at[30]);    MULADD(at[13], at[29]);    MULADD(at[14], at[28]);    MULADD(at[15], at[27]);    MULADD(at[16], at[26]);    MULADD(at[17], at[25]);    MULADD(at[18], at[24]); 
6036
+   COMBA_STORE(C->dp[18]);
6037
+   /* 19 */
6038
+   COMBA_FORWARD;
6039
+   MULADD(at[0], at[43]);    MULADD(at[1], at[42]);    MULADD(at[2], at[41]);    MULADD(at[3], at[40]);    MULADD(at[4], at[39]);    MULADD(at[5], at[38]);    MULADD(at[6], at[37]);    MULADD(at[7], at[36]);    MULADD(at[8], at[35]);    MULADD(at[9], at[34]);    MULADD(at[10], at[33]);    MULADD(at[11], at[32]);    MULADD(at[12], at[31]);    MULADD(at[13], at[30]);    MULADD(at[14], at[29]);    MULADD(at[15], at[28]);    MULADD(at[16], at[27]);    MULADD(at[17], at[26]);    MULADD(at[18], at[25]);    MULADD(at[19], at[24]); 
6040
+   COMBA_STORE(C->dp[19]);
6041
+   /* 20 */
6042
+   COMBA_FORWARD;
6043
+   MULADD(at[0], at[44]);    MULADD(at[1], at[43]);    MULADD(at[2], at[42]);    MULADD(at[3], at[41]);    MULADD(at[4], at[40]);    MULADD(at[5], at[39]);    MULADD(at[6], at[38]);    MULADD(at[7], at[37]);    MULADD(at[8], at[36]);    MULADD(at[9], at[35]);    MULADD(at[10], at[34]);    MULADD(at[11], at[33]);    MULADD(at[12], at[32]);    MULADD(at[13], at[31]);    MULADD(at[14], at[30]);    MULADD(at[15], at[29]);    MULADD(at[16], at[28]);    MULADD(at[17], at[27]);    MULADD(at[18], at[26]);    MULADD(at[19], at[25]);    MULADD(at[20], at[24]); 
6044
+   COMBA_STORE(C->dp[20]);
6045
+   /* 21 */
6046
+   COMBA_FORWARD;
6047
+   MULADD(at[0], at[45]);    MULADD(at[1], at[44]);    MULADD(at[2], at[43]);    MULADD(at[3], at[42]);    MULADD(at[4], at[41]);    MULADD(at[5], at[40]);    MULADD(at[6], at[39]);    MULADD(at[7], at[38]);    MULADD(at[8], at[37]);    MULADD(at[9], at[36]);    MULADD(at[10], at[35]);    MULADD(at[11], at[34]);    MULADD(at[12], at[33]);    MULADD(at[13], at[32]);    MULADD(at[14], at[31]);    MULADD(at[15], at[30]);    MULADD(at[16], at[29]);    MULADD(at[17], at[28]);    MULADD(at[18], at[27]);    MULADD(at[19], at[26]);    MULADD(at[20], at[25]);    MULADD(at[21], at[24]); 
6048
+   COMBA_STORE(C->dp[21]);
6049
+   /* 22 */
6050
+   COMBA_FORWARD;
6051
+   MULADD(at[0], at[46]);    MULADD(at[1], at[45]);    MULADD(at[2], at[44]);    MULADD(at[3], at[43]);    MULADD(at[4], at[42]);    MULADD(at[5], at[41]);    MULADD(at[6], at[40]);    MULADD(at[7], at[39]);    MULADD(at[8], at[38]);    MULADD(at[9], at[37]);    MULADD(at[10], at[36]);    MULADD(at[11], at[35]);    MULADD(at[12], at[34]);    MULADD(at[13], at[33]);    MULADD(at[14], at[32]);    MULADD(at[15], at[31]);    MULADD(at[16], at[30]);    MULADD(at[17], at[29]);    MULADD(at[18], at[28]);    MULADD(at[19], at[27]);    MULADD(at[20], at[26]);    MULADD(at[21], at[25]);    MULADD(at[22], at[24]); 
6052
+   COMBA_STORE(C->dp[22]);
6053
+   /* 23 */
6054
+   COMBA_FORWARD;
6055
+   MULADD(at[0], at[47]);    MULADD(at[1], at[46]);    MULADD(at[2], at[45]);    MULADD(at[3], at[44]);    MULADD(at[4], at[43]);    MULADD(at[5], at[42]);    MULADD(at[6], at[41]);    MULADD(at[7], at[40]);    MULADD(at[8], at[39]);    MULADD(at[9], at[38]);    MULADD(at[10], at[37]);    MULADD(at[11], at[36]);    MULADD(at[12], at[35]);    MULADD(at[13], at[34]);    MULADD(at[14], at[33]);    MULADD(at[15], at[32]);    MULADD(at[16], at[31]);    MULADD(at[17], at[30]);    MULADD(at[18], at[29]);    MULADD(at[19], at[28]);    MULADD(at[20], at[27]);    MULADD(at[21], at[26]);    MULADD(at[22], at[25]);    MULADD(at[23], at[24]); 
6056
+   COMBA_STORE(C->dp[23]);
6057
+   /* 24 */
6058
+   COMBA_FORWARD;
6059
+   MULADD(at[1], at[47]);    MULADD(at[2], at[46]);    MULADD(at[3], at[45]);    MULADD(at[4], at[44]);    MULADD(at[5], at[43]);    MULADD(at[6], at[42]);    MULADD(at[7], at[41]);    MULADD(at[8], at[40]);    MULADD(at[9], at[39]);    MULADD(at[10], at[38]);    MULADD(at[11], at[37]);    MULADD(at[12], at[36]);    MULADD(at[13], at[35]);    MULADD(at[14], at[34]);    MULADD(at[15], at[33]);    MULADD(at[16], at[32]);    MULADD(at[17], at[31]);    MULADD(at[18], at[30]);    MULADD(at[19], at[29]);    MULADD(at[20], at[28]);    MULADD(at[21], at[27]);    MULADD(at[22], at[26]);    MULADD(at[23], at[25]); 
6060
+   COMBA_STORE(C->dp[24]);
6061
+   /* 25 */
6062
+   COMBA_FORWARD;
6063
+   MULADD(at[2], at[47]);    MULADD(at[3], at[46]);    MULADD(at[4], at[45]);    MULADD(at[5], at[44]);    MULADD(at[6], at[43]);    MULADD(at[7], at[42]);    MULADD(at[8], at[41]);    MULADD(at[9], at[40]);    MULADD(at[10], at[39]);    MULADD(at[11], at[38]);    MULADD(at[12], at[37]);    MULADD(at[13], at[36]);    MULADD(at[14], at[35]);    MULADD(at[15], at[34]);    MULADD(at[16], at[33]);    MULADD(at[17], at[32]);    MULADD(at[18], at[31]);    MULADD(at[19], at[30]);    MULADD(at[20], at[29]);    MULADD(at[21], at[28]);    MULADD(at[22], at[27]);    MULADD(at[23], at[26]); 
6064
+   COMBA_STORE(C->dp[25]);
6065
+   /* 26 */
6066
+   COMBA_FORWARD;
6067
+   MULADD(at[3], at[47]);    MULADD(at[4], at[46]);    MULADD(at[5], at[45]);    MULADD(at[6], at[44]);    MULADD(at[7], at[43]);    MULADD(at[8], at[42]);    MULADD(at[9], at[41]);    MULADD(at[10], at[40]);    MULADD(at[11], at[39]);    MULADD(at[12], at[38]);    MULADD(at[13], at[37]);    MULADD(at[14], at[36]);    MULADD(at[15], at[35]);    MULADD(at[16], at[34]);    MULADD(at[17], at[33]);    MULADD(at[18], at[32]);    MULADD(at[19], at[31]);    MULADD(at[20], at[30]);    MULADD(at[21], at[29]);    MULADD(at[22], at[28]);    MULADD(at[23], at[27]); 
6068
+   COMBA_STORE(C->dp[26]);
6069
+   /* 27 */
6070
+   COMBA_FORWARD;
6071
+   MULADD(at[4], at[47]);    MULADD(at[5], at[46]);    MULADD(at[6], at[45]);    MULADD(at[7], at[44]);    MULADD(at[8], at[43]);    MULADD(at[9], at[42]);    MULADD(at[10], at[41]);    MULADD(at[11], at[40]);    MULADD(at[12], at[39]);    MULADD(at[13], at[38]);    MULADD(at[14], at[37]);    MULADD(at[15], at[36]);    MULADD(at[16], at[35]);    MULADD(at[17], at[34]);    MULADD(at[18], at[33]);    MULADD(at[19], at[32]);    MULADD(at[20], at[31]);    MULADD(at[21], at[30]);    MULADD(at[22], at[29]);    MULADD(at[23], at[28]); 
6072
+   COMBA_STORE(C->dp[27]);
6073
+   /* 28 */
6074
+   COMBA_FORWARD;
6075
+   MULADD(at[5], at[47]);    MULADD(at[6], at[46]);    MULADD(at[7], at[45]);    MULADD(at[8], at[44]);    MULADD(at[9], at[43]);    MULADD(at[10], at[42]);    MULADD(at[11], at[41]);    MULADD(at[12], at[40]);    MULADD(at[13], at[39]);    MULADD(at[14], at[38]);    MULADD(at[15], at[37]);    MULADD(at[16], at[36]);    MULADD(at[17], at[35]);    MULADD(at[18], at[34]);    MULADD(at[19], at[33]);    MULADD(at[20], at[32]);    MULADD(at[21], at[31]);    MULADD(at[22], at[30]);    MULADD(at[23], at[29]); 
6076
+   COMBA_STORE(C->dp[28]);
6077
+   /* 29 */
6078
+   COMBA_FORWARD;
6079
+   MULADD(at[6], at[47]);    MULADD(at[7], at[46]);    MULADD(at[8], at[45]);    MULADD(at[9], at[44]);    MULADD(at[10], at[43]);    MULADD(at[11], at[42]);    MULADD(at[12], at[41]);    MULADD(at[13], at[40]);    MULADD(at[14], at[39]);    MULADD(at[15], at[38]);    MULADD(at[16], at[37]);    MULADD(at[17], at[36]);    MULADD(at[18], at[35]);    MULADD(at[19], at[34]);    MULADD(at[20], at[33]);    MULADD(at[21], at[32]);    MULADD(at[22], at[31]);    MULADD(at[23], at[30]); 
6080
+   COMBA_STORE(C->dp[29]);
6081
+   /* 30 */
6082
+   COMBA_FORWARD;
6083
+   MULADD(at[7], at[47]);    MULADD(at[8], at[46]);    MULADD(at[9], at[45]);    MULADD(at[10], at[44]);    MULADD(at[11], at[43]);    MULADD(at[12], at[42]);    MULADD(at[13], at[41]);    MULADD(at[14], at[40]);    MULADD(at[15], at[39]);    MULADD(at[16], at[38]);    MULADD(at[17], at[37]);    MULADD(at[18], at[36]);    MULADD(at[19], at[35]);    MULADD(at[20], at[34]);    MULADD(at[21], at[33]);    MULADD(at[22], at[32]);    MULADD(at[23], at[31]); 
6084
+   COMBA_STORE(C->dp[30]);
6085
+   /* 31 */
6086
+   COMBA_FORWARD;
6087
+   MULADD(at[8], at[47]);    MULADD(at[9], at[46]);    MULADD(at[10], at[45]);    MULADD(at[11], at[44]);    MULADD(at[12], at[43]);    MULADD(at[13], at[42]);    MULADD(at[14], at[41]);    MULADD(at[15], at[40]);    MULADD(at[16], at[39]);    MULADD(at[17], at[38]);    MULADD(at[18], at[37]);    MULADD(at[19], at[36]);    MULADD(at[20], at[35]);    MULADD(at[21], at[34]);    MULADD(at[22], at[33]);    MULADD(at[23], at[32]); 
6088
+   COMBA_STORE(C->dp[31]);
6089
+   /* 32 */
6090
+   COMBA_FORWARD;
6091
+   MULADD(at[9], at[47]);    MULADD(at[10], at[46]);    MULADD(at[11], at[45]);    MULADD(at[12], at[44]);    MULADD(at[13], at[43]);    MULADD(at[14], at[42]);    MULADD(at[15], at[41]);    MULADD(at[16], at[40]);    MULADD(at[17], at[39]);    MULADD(at[18], at[38]);    MULADD(at[19], at[37]);    MULADD(at[20], at[36]);    MULADD(at[21], at[35]);    MULADD(at[22], at[34]);    MULADD(at[23], at[33]); 
6092
+   COMBA_STORE(C->dp[32]);
6093
+   /* 33 */
6094
+   COMBA_FORWARD;
6095
+   MULADD(at[10], at[47]);    MULADD(at[11], at[46]);    MULADD(at[12], at[45]);    MULADD(at[13], at[44]);    MULADD(at[14], at[43]);    MULADD(at[15], at[42]);    MULADD(at[16], at[41]);    MULADD(at[17], at[40]);    MULADD(at[18], at[39]);    MULADD(at[19], at[38]);    MULADD(at[20], at[37]);    MULADD(at[21], at[36]);    MULADD(at[22], at[35]);    MULADD(at[23], at[34]); 
6096
+   COMBA_STORE(C->dp[33]);
6097
+   /* 34 */
6098
+   COMBA_FORWARD;
6099
+   MULADD(at[11], at[47]);    MULADD(at[12], at[46]);    MULADD(at[13], at[45]);    MULADD(at[14], at[44]);    MULADD(at[15], at[43]);    MULADD(at[16], at[42]);    MULADD(at[17], at[41]);    MULADD(at[18], at[40]);    MULADD(at[19], at[39]);    MULADD(at[20], at[38]);    MULADD(at[21], at[37]);    MULADD(at[22], at[36]);    MULADD(at[23], at[35]); 
6100
+   COMBA_STORE(C->dp[34]);
6101
+   /* 35 */
6102
+   COMBA_FORWARD;
6103
+   MULADD(at[12], at[47]);    MULADD(at[13], at[46]);    MULADD(at[14], at[45]);    MULADD(at[15], at[44]);    MULADD(at[16], at[43]);    MULADD(at[17], at[42]);    MULADD(at[18], at[41]);    MULADD(at[19], at[40]);    MULADD(at[20], at[39]);    MULADD(at[21], at[38]);    MULADD(at[22], at[37]);    MULADD(at[23], at[36]); 
6104
+   COMBA_STORE(C->dp[35]);
6105
+   /* 36 */
6106
+   COMBA_FORWARD;
6107
+   MULADD(at[13], at[47]);    MULADD(at[14], at[46]);    MULADD(at[15], at[45]);    MULADD(at[16], at[44]);    MULADD(at[17], at[43]);    MULADD(at[18], at[42]);    MULADD(at[19], at[41]);    MULADD(at[20], at[40]);    MULADD(at[21], at[39]);    MULADD(at[22], at[38]);    MULADD(at[23], at[37]); 
6108
+   COMBA_STORE(C->dp[36]);
6109
+   /* 37 */
6110
+   COMBA_FORWARD;
6111
+   MULADD(at[14], at[47]);    MULADD(at[15], at[46]);    MULADD(at[16], at[45]);    MULADD(at[17], at[44]);    MULADD(at[18], at[43]);    MULADD(at[19], at[42]);    MULADD(at[20], at[41]);    MULADD(at[21], at[40]);    MULADD(at[22], at[39]);    MULADD(at[23], at[38]); 
6112
+   COMBA_STORE(C->dp[37]);
6113
+   /* 38 */
6114
+   COMBA_FORWARD;
6115
+   MULADD(at[15], at[47]);    MULADD(at[16], at[46]);    MULADD(at[17], at[45]);    MULADD(at[18], at[44]);    MULADD(at[19], at[43]);    MULADD(at[20], at[42]);    MULADD(at[21], at[41]);    MULADD(at[22], at[40]);    MULADD(at[23], at[39]); 
6116
+   COMBA_STORE(C->dp[38]);
6117
+   /* 39 */
6118
+   COMBA_FORWARD;
6119
+   MULADD(at[16], at[47]);    MULADD(at[17], at[46]);    MULADD(at[18], at[45]);    MULADD(at[19], at[44]);    MULADD(at[20], at[43]);    MULADD(at[21], at[42]);    MULADD(at[22], at[41]);    MULADD(at[23], at[40]); 
6120
+   COMBA_STORE(C->dp[39]);
6121
+   /* 40 */
6122
+   COMBA_FORWARD;
6123
+   MULADD(at[17], at[47]);    MULADD(at[18], at[46]);    MULADD(at[19], at[45]);    MULADD(at[20], at[44]);    MULADD(at[21], at[43]);    MULADD(at[22], at[42]);    MULADD(at[23], at[41]); 
6124
+   COMBA_STORE(C->dp[40]);
6125
+   /* 41 */
6126
+   COMBA_FORWARD;
6127
+   MULADD(at[18], at[47]);    MULADD(at[19], at[46]);    MULADD(at[20], at[45]);    MULADD(at[21], at[44]);    MULADD(at[22], at[43]);    MULADD(at[23], at[42]); 
6128
+   COMBA_STORE(C->dp[41]);
6129
+   /* 42 */
6130
+   COMBA_FORWARD;
6131
+   MULADD(at[19], at[47]);    MULADD(at[20], at[46]);    MULADD(at[21], at[45]);    MULADD(at[22], at[44]);    MULADD(at[23], at[43]); 
6132
+   COMBA_STORE(C->dp[42]);
6133
+   /* 43 */
6134
+   COMBA_FORWARD;
6135
+   MULADD(at[20], at[47]);    MULADD(at[21], at[46]);    MULADD(at[22], at[45]);    MULADD(at[23], at[44]); 
6136
+   COMBA_STORE(C->dp[43]);
6137
+   /* 44 */
6138
+   COMBA_FORWARD;
6139
+   MULADD(at[21], at[47]);    MULADD(at[22], at[46]);    MULADD(at[23], at[45]); 
6140
+   COMBA_STORE(C->dp[44]);
6141
+   /* 45 */
6142
+   COMBA_FORWARD;
6143
+   MULADD(at[22], at[47]);    MULADD(at[23], at[46]); 
6144
+   COMBA_STORE(C->dp[45]);
6145
+   /* 46 */
6146
+   COMBA_FORWARD;
6147
+   MULADD(at[23], at[47]); 
6148
+   COMBA_STORE(C->dp[46]);
6149
+   COMBA_STORE2(C->dp[47]);
6150
+   C->used = 48;
6151
+   C->sign = A->sign ^ B->sign;
6152
+   fp_clamp(C);
6153
+   COMBA_FINI;
6154
+}
6155
+#endif
5496 6156
 
5497
-  if (a->used > b->used) {
5498
-    if ((res = mp_init_copy (&t, a)) != MP_OKAY) {
5499
-      return res;
5500
-    }
5501
-    px = b->used;
5502
-    x = b;
5503
-  } else {
5504
-    if ((res = mp_init_copy (&t, b)) != MP_OKAY) {
5505
-      return res;
5506
-    }
5507
-    px = a->used;
5508
-    x = a;
5509
-  }
6157
+/* End: fp_mul_comba_24.c */
5510 6158
 
5511
-  for (ix = 0; ix < px; ix++) {
5512
-    t.dp[ix] |= x->dp[ix];
5513
-  }
5514
-  mp_clamp (&t);
5515
-  mp_exch (c, &t);
5516
-  mp_clear (&t);
5517
-  return MP_OKAY;
6159
+/* Start: fp_mul_comba_28.c */
6160
+#define TFM_DEFINES
6161
+#include "fp_mul_comba.c"
6162
+
6163
+#ifdef TFM_MUL28
6164
+void fp_mul_comba28(fp_int *A, fp_int *B, fp_int *C)
6165
+{
6166
+   fp_digit c0, c1, c2, at[56];
6167
+
6168
+   memcpy(at, A->dp, 28 * sizeof(fp_digit));
6169
+   memcpy(at+28, B->dp, 28 * sizeof(fp_digit));
6170
+   COMBA_START;
6171
+
6172
+   COMBA_CLEAR;
6173
+   /* 0 */
6174
+   MULADD(at[0], at[28]); 
6175
+   COMBA_STORE(C->dp[0]);
6176
+   /* 1 */
6177
+   COMBA_FORWARD;
6178
+   MULADD(at[0], at[29]);    MULADD(at[1], at[28]); 
6179
+   COMBA_STORE(C->dp[1]);
6180
+   /* 2 */
6181
+   COMBA_FORWARD;
6182
+   MULADD(at[0], at[30]);    MULADD(at[1], at[29]);    MULADD(at[2], at[28]); 
6183
+   COMBA_STORE(C->dp[2]);
6184
+   /* 3 */
6185
+   COMBA_FORWARD;
6186
+   MULADD(at[0], at[31]);    MULADD(at[1], at[30]);    MULADD(at[2], at[29]);    MULADD(at[3], at[28]); 
6187
+   COMBA_STORE(C->dp[3]);
6188
+   /* 4 */
6189
+   COMBA_FORWARD;
6190
+   MULADD(at[0], at[32]);    MULADD(at[1], at[31]);    MULADD(at[2], at[30]);    MULADD(at[3], at[29]);    MULADD(at[4], at[28]); 
6191
+   COMBA_STORE(C->dp[4]);
6192
+   /* 5 */
6193
+   COMBA_FORWARD;
6194
+   MULADD(at[0], at[33]);    MULADD(at[1], at[32]);    MULADD(at[2], at[31]);    MULADD(at[3], at[30]);    MULADD(at[4], at[29]);    MULADD(at[5], at[28]); 
6195
+   COMBA_STORE(C->dp[5]);
6196
+   /* 6 */
6197
+   COMBA_FORWARD;
6198
+   MULADD(at[0], at[34]);    MULADD(at[1], at[33]);    MULADD(at[2], at[32]);    MULADD(at[3], at[31]);    MULADD(at[4], at[30]);    MULADD(at[5], at[29]);    MULADD(at[6], at[28]); 
6199
+   COMBA_STORE(C->dp[6]);
6200
+   /* 7 */
6201
+   COMBA_FORWARD;
6202
+   MULADD(at[0], at[35]);    MULADD(at[1], at[34]);    MULADD(at[2], at[33]);    MULADD(at[3], at[32]);    MULADD(at[4], at[31]);    MULADD(at[5], at[30]);    MULADD(at[6], at[29]);    MULADD(at[7], at[28]); 
6203
+   COMBA_STORE(C->dp[7]);
6204
+   /* 8 */
6205
+   COMBA_FORWARD;
6206
+   MULADD(at[0], at[36]);    MULADD(at[1], at[35]);    MULADD(at[2], at[34]);    MULADD(at[3], at[33]);    MULADD(at[4], at[32]);    MULADD(at[5], at[31]);    MULADD(at[6], at[30]);    MULADD(at[7], at[29]);    MULADD(at[8], at[28]); 
6207
+   COMBA_STORE(C->dp[8]);
6208
+   /* 9 */
6209
+   COMBA_FORWARD;
6210
+   MULADD(at[0], at[37]);    MULADD(at[1], at[36]);    MULADD(at[2], at[35]);    MULADD(at[3], at[34]);    MULADD(at[4], at[33]);    MULADD(at[5], at[32]);    MULADD(at[6], at[31]);    MULADD(at[7], at[30]);    MULADD(at[8], at[29]);    MULADD(at[9], at[28]); 
6211
+   COMBA_STORE(C->dp[9]);
6212
+   /* 10 */
6213
+   COMBA_FORWARD;
6214
+   MULADD(at[0], at[38]);    MULADD(at[1], at[37]);    MULADD(at[2], at[36]);    MULADD(at[3], at[35]);    MULADD(at[4], at[34]);    MULADD(at[5], at[33]);    MULADD(at[6], at[32]);    MULADD(at[7], at[31]);    MULADD(at[8], at[30]);    MULADD(at[9], at[29]);    MULADD(at[10], at[28]); 
6215
+   COMBA_STORE(C->dp[10]);
6216
+   /* 11 */
6217
+   COMBA_FORWARD;
6218
+   MULADD(at[0], at[39]);    MULADD(at[1], at[38]);    MULADD(at[2], at[37]);    MULADD(at[3], at[36]);    MULADD(at[4], at[35]);    MULADD(at[5], at[34]);    MULADD(at[6], at[33]);    MULADD(at[7], at[32]);    MULADD(at[8], at[31]);    MULADD(at[9], at[30]);    MULADD(at[10], at[29]);    MULADD(at[11], at[28]); 
6219
+   COMBA_STORE(C->dp[11]);
6220
+   /* 12 */
6221
+   COMBA_FORWARD;
6222
+   MULADD(at[0], at[40]);    MULADD(at[1], at[39]);    MULADD(at[2], at[38]);    MULADD(at[3], at[37]);    MULADD(at[4], at[36]);    MULADD(at[5], at[35]);    MULADD(at[6], at[34]);    MULADD(at[7], at[33]);    MULADD(at[8], at[32]);    MULADD(at[9], at[31]);    MULADD(at[10], at[30]);    MULADD(at[11], at[29]);    MULADD(at[12], at[28]); 
6223
+   COMBA_STORE(C->dp[12]);
6224
+   /* 13 */
6225
+   COMBA_FORWARD;
6226
+   MULADD(at[0], at[41]);    MULADD(at[1], at[40]);    MULADD(at[2], at[39]);    MULADD(at[3], at[38]);    MULADD(at[4], at[37]);    MULADD(at[5], at[36]);    MULADD(at[6], at[35]);    MULADD(at[7], at[34]);    MULADD(at[8], at[33]);    MULADD(at[9], at[32]);    MULADD(at[10], at[31]);    MULADD(at[11], at[30]);    MULADD(at[12], at[29]);    MULADD(at[13], at[28]); 
6227
+   COMBA_STORE(C->dp[13]);
6228
+   /* 14 */
6229
+   COMBA_FORWARD;
6230
+   MULADD(at[0], at[42]);    MULADD(at[1], at[41]);    MULADD(at[2], at[40]);    MULADD(at[3], at[39]);    MULADD(at[4], at[38]);    MULADD(at[5], at[37]);    MULADD(at[6], at[36]);    MULADD(at[7], at[35]);    MULADD(at[8], at[34]);    MULADD(at[9], at[33]);    MULADD(at[10], at[32]);    MULADD(at[11], at[31]);    MULADD(at[12], at[30]);    MULADD(at[13], at[29]);    MULADD(at[14], at[28]); 
6231
+   COMBA_STORE(C->dp[14]);
6232
+   /* 15 */
6233
+   COMBA_FORWARD;
6234
+   MULADD(at[0], at[43]);    MULADD(at[1], at[42]);    MULADD(at[2], at[41]);    MULADD(at[3], at[40]);    MULADD(at[4], at[39]);    MULADD(at[5], at[38]);    MULADD(at[6], at[37]);    MULADD(at[7], at[36]);    MULADD(at[8], at[35]);    MULADD(at[9], at[34]);    MULADD(at[10], at[33]);    MULADD(at[11], at[32]);    MULADD(at[12], at[31]);    MULADD(at[13], at[30]);    MULADD(at[14], at[29]);    MULADD(at[15], at[28]); 
6235
+   COMBA_STORE(C->dp[15]);
6236
+   /* 16 */
6237
+   COMBA_FORWARD;
6238
+   MULADD(at[0], at[44]);    MULADD(at[1], at[43]);    MULADD(at[2], at[42]);    MULADD(at[3], at[41]);    MULADD(at[4], at[40]);    MULADD(at[5], at[39]);    MULADD(at[6], at[38]);    MULADD(at[7], at[37]);    MULADD(at[8], at[36]);    MULADD(at[9], at[35]);    MULADD(at[10], at[34]);    MULADD(at[11], at[33]);    MULADD(at[12], at[32]);    MULADD(at[13], at[31]);    MULADD(at[14], at[30]);    MULADD(at[15], at[29]);    MULADD(at[16], at[28]); 
6239
+   COMBA_STORE(C->dp[16]);
6240
+   /* 17 */
6241
+   COMBA_FORWARD;
6242
+   MULADD(at[0], at[45]);    MULADD(at[1], at[44]);    MULADD(at[2], at[43]);    MULADD(at[3], at[42]);    MULADD(at[4], at[41]);    MULADD(at[5], at[40]);    MULADD(at[6], at[39]);    MULADD(at[7], at[38]);    MULADD(at[8], at[37]);    MULADD(at[9], at[36]);    MULADD(at[10], at[35]);    MULADD(at[11], at[34]);    MULADD(at[12], at[33]);    MULADD(at[13], at[32]);    MULADD(at[14], at[31]);    MULADD(at[15], at[30]);    MULADD(at[16], at[29]);    MULADD(at[17], at[28]); 
6243
+   COMBA_STORE(C->dp[17]);
6244
+   /* 18 */
6245
+   COMBA_FORWARD;
6246
+   MULADD(at[0], at[46]);    MULADD(at[1], at[45]);    MULADD(at[2], at[44]);    MULADD(at[3], at[43]);    MULADD(at[4], at[42]);    MULADD(at[5], at[41]);    MULADD(at[6], at[40]);    MULADD(at[7], at[39]);    MULADD(at[8], at[38]);    MULADD(at[9], at[37]);    MULADD(at[10], at[36]);    MULADD(at[11], at[35]);    MULADD(at[12], at[34]);    MULADD(at[13], at[33]);    MULADD(at[14], at[32]);    MULADD(at[15], at[31]);    MULADD(at[16], at[30]);    MULADD(at[17], at[29]);    MULADD(at[18], at[28]); 
6247
+   COMBA_STORE(C->dp[18]);
6248
+   /* 19 */
6249
+   COMBA_FORWARD;
6250
+   MULADD(at[0], at[47]);    MULADD(at[1], at[46]);    MULADD(at[2], at[45]);    MULADD(at[3], at[44]);    MULADD(at[4], at[43]);    MULADD(at[5], at[42]);    MULADD(at[6], at[41]);    MULADD(at[7], at[40]);    MULADD(at[8], at[39]);    MULADD(at[9], at[38]);    MULADD(at[10], at[37]);    MULADD(at[11], at[36]);    MULADD(at[12], at[35]);    MULADD(at[13], at[34]);    MULADD(at[14], at[33]);    MULADD(at[15], at[32]);    MULADD(at[16], at[31]);    MULADD(at[17], at[30]);    MULADD(at[18], at[29]);    MULADD(at[19], at[28]); 
6251
+   COMBA_STORE(C->dp[19]);
6252
+   /* 20 */
6253
+   COMBA_FORWARD;
6254
+   MULADD(at[0], at[48]);    MULADD(at[1], at[47]);    MULADD(at[2], at[46]);    MULADD(at[3], at[45]);    MULADD(at[4], at[44]);    MULADD(at[5], at[43]);    MULADD(at[6], at[42]);    MULADD(at[7], at[41]);    MULADD(at[8], at[40]);    MULADD(at[9], at[39]);    MULADD(at[10], at[38]);    MULADD(at[11], at[37]);    MULADD(at[12], at[36]);    MULADD(at[13], at[35]);    MULADD(at[14], at[34]);    MULADD(at[15], at[33]);    MULADD(at[16], at[32]);    MULADD(at[17], at[31]);    MULADD(at[18], at[30]);    MULADD(at[19], at[29]);    MULADD(at[20], at[28]); 
6255
+   COMBA_STORE(C->dp[20]);
6256
+   /* 21 */
6257
+   COMBA_FORWARD;
6258
+   MULADD(at[0], at[49]);    MULADD(at[1], at[48]);    MULADD(at[2], at[47]);    MULADD(at[3], at[46]);    MULADD(at[4], at[45]);    MULADD(at[5], at[44]);    MULADD(at[6], at[43]);    MULADD(at[7], at[42]);    MULADD(at[8], at[41]);    MULADD(at[9], at[40]);    MULADD(at[10], at[39]);    MULADD(at[11], at[38]);    MULADD(at[12], at[37]);    MULADD(at[13], at[36]);    MULADD(at[14], at[35]);    MULADD(at[15], at[34]);    MULADD(at[16], at[33]);    MULADD(at[17], at[32]);    MULADD(at[18], at[31]);    MULADD(at[19], at[30]);    MULADD(at[20], at[29]);    MULADD(at[21], at[28]); 
6259
+   COMBA_STORE(C->dp[21]);
6260
+   /* 22 */
6261
+   COMBA_FORWARD;
6262
+   MULADD(at[0], at[50]);    MULADD(at[1], at[49]);    MULADD(at[2], at[48]);    MULADD(at[3], at[47]);    MULADD(at[4], at[46]);    MULADD(at[5], at[45]);    MULADD(at[6], at[44]);    MULADD(at[7], at[43]);    MULADD(at[8], at[42]);    MULADD(at[9], at[41]);    MULADD(at[10], at[40]);    MULADD(at[11], at[39]);    MULADD(at[12], at[38]);    MULADD(at[13], at[37]);    MULADD(at[14], at[36]);    MULADD(at[15], at[35]);    MULADD(at[16], at[34]);    MULADD(at[17], at[33]);    MULADD(at[18], at[32]);    MULADD(at[19], at[31]);    MULADD(at[20], at[30]);    MULADD(at[21], at[29]);    MULADD(at[22], at[28]); 
6263
+   COMBA_STORE(C->dp[22]);
6264
+   /* 23 */
6265
+   COMBA_FORWARD;
6266
+   MULADD(at[0], at[51]);    MULADD(at[1], at[50]);    MULADD(at[2], at[49]);    MULADD(at[3], at[48]);    MULADD(at[4], at[47]);    MULADD(at[5], at[46]);    MULADD(at[6], at[45]);    MULADD(at[7], at[44]);    MULADD(at[8], at[43]);    MULADD(at[9], at[42]);    MULADD(at[10], at[41]);    MULADD(at[11], at[40]);    MULADD(at[12], at[39]);    MULADD(at[13], at[38]);    MULADD(at[14], at[37]);    MULADD(at[15], at[36]);    MULADD(at[16], at[35]);    MULADD(at[17], at[34]);    MULADD(at[18], at[33]);    MULADD(at[19], at[32]);    MULADD(at[20], at[31]);    MULADD(at[21], at[30]);    MULADD(at[22], at[29]);    MULADD(at[23], at[28]); 
6267
+   COMBA_STORE(C->dp[23]);
6268
+   /* 24 */
6269
+   COMBA_FORWARD;
6270
+   MULADD(at[0], at[52]);    MULADD(at[1], at[51]);    MULADD(at[2], at[50]);    MULADD(at[3], at[49]);    MULADD(at[4], at[48]);    MULADD(at[5], at[47]);    MULADD(at[6], at[46]);    MULADD(at[7], at[45]);    MULADD(at[8], at[44]);    MULADD(at[9], at[43]);    MULADD(at[10], at[42]);    MULADD(at[11], at[41]);    MULADD(at[12], at[40]);    MULADD(at[13], at[39]);    MULADD(at[14], at[38]);    MULADD(at[15], at[37]);    MULADD(at[16], at[36]);    MULADD(at[17], at[35]);    MULADD(at[18], at[34]);    MULADD(at[19], at[33]);    MULADD(at[20], at[32]);    MULADD(at[21], at[31]);    MULADD(at[22], at[30]);    MULADD(at[23], at[29]);    MULADD(at[24], at[28]); 
6271
+   COMBA_STORE(C->dp[24]);
6272
+   /* 25 */
6273
+   COMBA_FORWARD;
6274
+   MULADD(at[0], at[53]);    MULADD(at[1], at[52]);    MULADD(at[2], at[51]);    MULADD(at[3], at[50]);    MULADD(at[4], at[49]);    MULADD(at[5], at[48]);    MULADD(at[6], at[47]);    MULADD(at[7], at[46]);    MULADD(at[8], at[45]);    MULADD(at[9], at[44]);    MULADD(at[10], at[43]);    MULADD(at[11], at[42]);    MULADD(at[12], at[41]);    MULADD(at[13], at[40]);    MULADD(at[14], at[39]);    MULADD(at[15], at[38]);    MULADD(at[16], at[37]);    MULADD(at[17], at[36]);    MULADD(at[18], at[35]);    MULADD(at[19], at[34]);    MULADD(at[20], at[33]);    MULADD(at[21], at[32]);    MULADD(at[22], at[31]);    MULADD(at[23], at[30]);    MULADD(at[24], at[29]);    MULADD(at[25], at[28]); 
6275
+   COMBA_STORE(C->dp[25]);
6276
+   /* 26 */
6277
+   COMBA_FORWARD;
6278
+   MULADD(at[0], at[54]);    MULADD(at[1], at[53]);    MULADD(at[2], at[52]);    MULADD(at[3], at[51]);    MULADD(at[4], at[50]);    MULADD(at[5], at[49]);    MULADD(at[6], at[48]);    MULADD(at[7], at[47]);    MULADD(at[8], at[46]);    MULADD(at[9], at[45]);    MULADD(at[10], at[44]);    MULADD(at[11], at[43]);    MULADD(at[12], at[42]);    MULADD(at[13], at[41]);    MULADD(at[14], at[40]);    MULADD(at[15], at[39]);    MULADD(at[16], at[38]);    MULADD(at[17], at[37]);    MULADD(at[18], at[36]);    MULADD(at[19], at[35]);    MULADD(at[20], at[34]);    MULADD(at[21], at[33]);    MULADD(at[22], at[32]);    MULADD(at[23], at[31]);    MULADD(at[24], at[30]);    MULADD(at[25], at[29]);    MULADD(at[26], at[28]); 
6279
+   COMBA_STORE(C->dp[26]);
6280
+   /* 27 */
6281
+   COMBA_FORWARD;
6282
+   MULADD(at[0], at[55]);    MULADD(at[1], at[54]);    MULADD(at[2], at[53]);    MULADD(at[3], at[52]);    MULADD(at[4], at[51]);    MULADD(at[5], at[50]);    MULADD(at[6], at[49]);    MULADD(at[7], at[48]);    MULADD(at[8], at[47]);    MULADD(at[9], at[46]);    MULADD(at[10], at[45]);    MULADD(at[11], at[44]);    MULADD(at[12], at[43]);    MULADD(at[13], at[42]);    MULADD(at[14], at[41]);    MULADD(at[15], at[40]);    MULADD(at[16], at[39]);    MULADD(at[17], at[38]);    MULADD(at[18], at[37]);    MULADD(at[19], at[36]);    MULADD(at[20], at[35]);    MULADD(at[21], at[34]);    MULADD(at[22], at[33]);    MULADD(at[23], at[32]);    MULADD(at[24], at[31]);    MULADD(at[25], at[30]);    MULADD(at[26], at[29]);    MULADD(at[27], at[28]); 
6283
+   COMBA_STORE(C->dp[27]);
6284
+   /* 28 */
6285
+   COMBA_FORWARD;
6286
+   MULADD(at[1], at[55]);    MULADD(at[2], at[54]);    MULADD(at[3], at[53]);    MULADD(at[4], at[52]);    MULADD(at[5], at[51]);    MULADD(at[6], at[50]);    MULADD(at[7], at[49]);    MULADD(at[8], at[48]);    MULADD(at[9], at[47]);    MULADD(at[10], at[46]);    MULADD(at[11], at[45]);    MULADD(at[12], at[44]);    MULADD(at[13], at[43]);    MULADD(at[14], at[42]);    MULADD(at[15], at[41]);    MULADD(at[16], at[40]);    MULADD(at[17], at[39]);    MULADD(at[18], at[38]);    MULADD(at[19], at[37]);    MULADD(at[20], at[36]);    MULADD(at[21], at[35]);    MULADD(at[22], at[34]);    MULADD(at[23], at[33]);    MULADD(at[24], at[32]);    MULADD(at[25], at[31]);    MULADD(at[26], at[30]);    MULADD(at[27], at[29]); 
6287
+   COMBA_STORE(C->dp[28]);
6288
+   /* 29 */
6289
+   COMBA_FORWARD;
6290
+   MULADD(at[2], at[55]);    MULADD(at[3], at[54]);    MULADD(at[4], at[53]);    MULADD(at[5], at[52]);    MULADD(at[6], at[51]);    MULADD(at[7], at[50]);    MULADD(at[8], at[49]);    MULADD(at[9], at[48]);    MULADD(at[10], at[47]);    MULADD(at[11], at[46]);    MULADD(at[12], at[45]);    MULADD(at[13], at[44]);    MULADD(at[14], at[43]);    MULADD(at[15], at[42]);    MULADD(at[16], at[41]);    MULADD(at[17], at[40]);    MULADD(at[18], at[39]);    MULADD(at[19], at[38]);    MULADD(at[20], at[37]);    MULADD(at[21], at[36]);    MULADD(at[22], at[35]);    MULADD(at[23], at[34]);    MULADD(at[24], at[33]);    MULADD(at[25], at[32]);    MULADD(at[26], at[31]);    MULADD(at[27], at[30]); 
6291
+   COMBA_STORE(C->dp[29]);
6292
+   /* 30 */
6293
+   COMBA_FORWARD;
6294
+   MULADD(at[3], at[55]);    MULADD(at[4], at[54]);    MULADD(at[5], at[53]);    MULADD(at[6], at[52]);    MULADD(at[7], at[51]);    MULADD(at[8], at[50]);    MULADD(at[9], at[49]);    MULADD(at[10], at[48]);    MULADD(at[11], at[47]);    MULADD(at[12], at[46]);    MULADD(at[13], at[45]);    MULADD(at[14], at[44]);    MULADD(at[15], at[43]);    MULADD(at[16], at[42]);    MULADD(at[17], at[41]);    MULADD(at[18], at[40]);    MULADD(at[19], at[39]);    MULADD(at[20], at[38]);    MULADD(at[21], at[37]);    MULADD(at[22], at[36]);    MULADD(at[23], at[35]);    MULADD(at[24], at[34]);    MULADD(at[25], at[33]);    MULADD(at[26], at[32]);    MULADD(at[27], at[31]); 
6295
+   COMBA_STORE(C->dp[30]);
6296
+   /* 31 */
6297
+   COMBA_FORWARD;
6298
+   MULADD(at[4], at[55]);    MULADD(at[5], at[54]);    MULADD(at[6], at[53]);    MULADD(at[7], at[52]);    MULADD(at[8], at[51]);    MULADD(at[9], at[50]);    MULADD(at[10], at[49]);    MULADD(at[11], at[48]);    MULADD(at[12], at[47]);    MULADD(at[13], at[46]);    MULADD(at[14], at[45]);    MULADD(at[15], at[44]);    MULADD(at[16], at[43]);    MULADD(at[17], at[42]);    MULADD(at[18], at[41]);    MULADD(at[19], at[40]);    MULADD(at[20], at[39]);    MULADD(at[21], at[38]);    MULADD(at[22], at[37]);    MULADD(at[23], at[36]);    MULADD(at[24], at[35]);    MULADD(at[25], at[34]);    MULADD(at[26], at[33]);    MULADD(at[27], at[32]); 
6299
+   COMBA_STORE(C->dp[31]);
6300
+   /* 32 */
6301
+   COMBA_FORWARD;
6302
+   MULADD(at[5], at[55]);    MULADD(at[6], at[54]);    MULADD(at[7], at[53]);    MULADD(at[8], at[52]);    MULADD(at[9], at[51]);    MULADD(at[10], at[50]);    MULADD(at[11], at[49]);    MULADD(at[12], at[48]);    MULADD(at[13], at[47]);    MULADD(at[14], at[46]);    MULADD(at[15], at[45]);    MULADD(at[16], at[44]);    MULADD(at[17], at[43]);    MULADD(at[18], at[42]);    MULADD(at[19], at[41]);    MULADD(at[20], at[40]);    MULADD(at[21], at[39]);    MULADD(at[22], at[38]);    MULADD(at[23], at[37]);    MULADD(at[24], at[36]);    MULADD(at[25], at[35]);    MULADD(at[26], at[34]);    MULADD(at[27], at[33]); 
6303
+   COMBA_STORE(C->dp[32]);
6304
+   /* 33 */
6305
+   COMBA_FORWARD;
6306
+   MULADD(at[6], at[55]);    MULADD(at[7], at[54]);    MULADD(at[8], at[53]);    MULADD(at[9], at[52]);    MULADD(at[10], at[51]);    MULADD(at[11], at[50]);    MULADD(at[12], at[49]);    MULADD(at[13], at[48]);    MULADD(at[14], at[47]);    MULADD(at[15], at[46]);    MULADD(at[16], at[45]);    MULADD(at[17], at[44]);    MULADD(at[18], at[43]);    MULADD(at[19], at[42]);    MULADD(at[20], at[41]);    MULADD(at[21], at[40]);    MULADD(at[22], at[39]);    MULADD(at[23], at[38]);    MULADD(at[24], at[37]);    MULADD(at[25], at[36]);    MULADD(at[26], at[35]);    MULADD(at[27], at[34]); 
6307
+   COMBA_STORE(C->dp[33]);
6308
+   /* 34 */
6309
+   COMBA_FORWARD;
6310
+   MULADD(at[7], at[55]);    MULADD(at[8], at[54]);    MULADD(at[9], at[53]);    MULADD(at[10], at[52]);    MULADD(at[11], at[51]);    MULADD(at[12], at[50]);    MULADD(at[13], at[49]);    MULADD(at[14], at[48]);    MULADD(at[15], at[47]);    MULADD(at[16], at[46]);    MULADD(at[17], at[45]);    MULADD(at[18], at[44]);    MULADD(at[19], at[43]);    MULADD(at[20], at[42]);    MULADD(at[21], at[41]);    MULADD(at[22], at[40]);    MULADD(at[23], at[39]);    MULADD(at[24], at[38]);    MULADD(at[25], at[37]);    MULADD(at[26], at[36]);    MULADD(at[27], at[35]); 
6311
+   COMBA_STORE(C->dp[34]);
6312
+   /* 35 */
6313
+   COMBA_FORWARD;
6314
+   MULADD(at[8], at[55]);    MULADD(at[9], at[54]);    MULADD(at[10], at[53]);    MULADD(at[11], at[52]);    MULADD(at[12], at[51]);    MULADD(at[13], at[50]);    MULADD(at[14], at[49]);    MULADD(at[15], at[48]);    MULADD(at[16], at[47]);    MULADD(at[17], at[46]);    MULADD(at[18], at[45]);    MULADD(at[19], at[44]);    MULADD(at[20], at[43]);    MULADD(at[21], at[42]);    MULADD(at[22], at[41]);    MULADD(at[23], at[40]);    MULADD(at[24], at[39]);    MULADD(at[25], at[38]);    MULADD(at[26], at[37]);    MULADD(at[27], at[36]); 
6315
+   COMBA_STORE(C->dp[35]);
6316
+   /* 36 */
6317
+   COMBA_FORWARD;
6318
+   MULADD(at[9], at[55]);    MULADD(at[10], at[54]);    MULADD(at[11], at[53]);    MULADD(at[12], at[52]);    MULADD(at[13], at[51]);    MULADD(at[14], at[50]);    MULADD(at[15], at[49]);    MULADD(at[16], at[48]);    MULADD(at[17], at[47]);    MULADD(at[18], at[46]);    MULADD(at[19], at[45]);    MULADD(at[20], at[44]);    MULADD(at[21], at[43]);    MULADD(at[22], at[42]);    MULADD(at[23], at[41]);    MULADD(at[24], at[40]);    MULADD(at[25], at[39]);    MULADD(at[26], at[38]);    MULADD(at[27], at[37]); 
6319
+   COMBA_STORE(C->dp[36]);
6320
+   /* 37 */
6321
+   COMBA_FORWARD;
6322
+   MULADD(at[10], at[55]);    MULADD(at[11], at[54]);    MULADD(at[12], at[53]);    MULADD(at[13], at[52]);    MULADD(at[14], at[51]);    MULADD(at[15], at[50]);    MULADD(at[16], at[49]);    MULADD(at[17], at[48]);    MULADD(at[18], at[47]);    MULADD(at[19], at[46]);    MULADD(at[20], at[45]);    MULADD(at[21], at[44]);    MULADD(at[22], at[43]);    MULADD(at[23], at[42]);    MULADD(at[24], at[41]);    MULADD(at[25], at[40]);    MULADD(at[26], at[39]);    MULADD(at[27], at[38]); 
6323
+   COMBA_STORE(C->dp[37]);
6324
+   /* 38 */
6325
+   COMBA_FORWARD;
6326
+   MULADD(at[11], at[55]);    MULADD(at[12], at[54]);    MULADD(at[13], at[53]);    MULADD(at[14], at[52]);    MULADD(at[15], at[51]);    MULADD(at[16], at[50]);    MULADD(at[17], at[49]);    MULADD(at[18], at[48]);    MULADD(at[19], at[47]);    MULADD(at[20], at[46]);    MULADD(at[21], at[45]);    MULADD(at[22], at[44]);    MULADD(at[23], at[43]);    MULADD(at[24], at[42]);    MULADD(at[25], at[41]);    MULADD(at[26], at[40]);    MULADD(at[27], at[39]); 
6327
+   COMBA_STORE(C->dp[38]);
6328
+   /* 39 */
6329
+   COMBA_FORWARD;
6330
+   MULADD(at[12], at[55]);    MULADD(at[13], at[54]);    MULADD(at[14], at[53]);    MULADD(at[15], at[52]);    MULADD(at[16], at[51]);    MULADD(at[17], at[50]);    MULADD(at[18], at[49]);    MULADD(at[19], at[48]);    MULADD(at[20], at[47]);    MULADD(at[21], at[46]);    MULADD(at[22], at[45]);    MULADD(at[23], at[44]);    MULADD(at[24], at[43]);    MULADD(at[25], at[42]);    MULADD(at[26], at[41]);    MULADD(at[27], at[40]); 
6331
+   COMBA_STORE(C->dp[39]);
6332
+   /* 40 */
6333
+   COMBA_FORWARD;
6334
+   MULADD(at[13], at[55]);    MULADD(at[14], at[54]);    MULADD(at[15], at[53]);    MULADD(at[16], at[52]);    MULADD(at[17], at[51]);    MULADD(at[18], at[50]);    MULADD(at[19], at[49]);    MULADD(at[20], at[48]);    MULADD(at[21], at[47]);    MULADD(at[22], at[46]);    MULADD(at[23], at[45]);    MULADD(at[24], at[44]);    MULADD(at[25], at[43]);    MULADD(at[26], at[42]);    MULADD(at[27], at[41]); 
6335
+   COMBA_STORE(C->dp[40]);
6336
+   /* 41 */
6337
+   COMBA_FORWARD;
6338
+   MULADD(at[14], at[55]);    MULADD(at[15], at[54]);    MULADD(at[16], at[53]);    MULADD(at[17], at[52]);    MULADD(at[18], at[51]);    MULADD(at[19], at[50]);    MULADD(at[20], at[49]);    MULADD(at[21], at[48]);    MULADD(at[22], at[47]);    MULADD(at[23], at[46]);    MULADD(at[24], at[45]);    MULADD(at[25], at[44]);    MULADD(at[26], at[43]);    MULADD(at[27], at[42]); 
6339
+   COMBA_STORE(C->dp[41]);
6340
+   /* 42 */
6341
+   COMBA_FORWARD;
6342
+   MULADD(at[15], at[55]);    MULADD(at[16], at[54]);    MULADD(at[17], at[53]);    MULADD(at[18], at[52]);    MULADD(at[19], at[51]);    MULADD(at[20], at[50]);    MULADD(at[21], at[49]);    MULADD(at[22], at[48]);    MULADD(at[23], at[47]);    MULADD(at[24], at[46]);    MULADD(at[25], at[45]);    MULADD(at[26], at[44]);    MULADD(at[27], at[43]); 
6343
+   COMBA_STORE(C->dp[42]);
6344
+   /* 43 */
6345
+   COMBA_FORWARD;
6346
+   MULADD(at[16], at[55]);    MULADD(at[17], at[54]);    MULADD(at[18], at[53]);    MULADD(at[19], at[52]);    MULADD(at[20], at[51]);    MULADD(at[21], at[50]);    MULADD(at[22], at[49]);    MULADD(at[23], at[48]);    MULADD(at[24], at[47]);    MULADD(at[25], at[46]);    MULADD(at[26], at[45]);    MULADD(at[27], at[44]); 
6347
+   COMBA_STORE(C->dp[43]);
6348
+   /* 44 */
6349
+   COMBA_FORWARD;
6350
+   MULADD(at[17], at[55]);    MULADD(at[18], at[54]);    MULADD(at[19], at[53]);    MULADD(at[20], at[52]);    MULADD(at[21], at[51]);    MULADD(at[22], at[50]);    MULADD(at[23], at[49]);    MULADD(at[24], at[48]);    MULADD(at[25], at[47]);    MULADD(at[26], at[46]);    MULADD(at[27], at[45]); 
6351
+   COMBA_STORE(C->dp[44]);
6352
+   /* 45 */
6353
+   COMBA_FORWARD;
6354
+   MULADD(at[18], at[55]);    MULADD(at[19], at[54]);    MULADD(at[20], at[53]);    MULADD(at[21], at[52]);    MULADD(at[22], at[51]);    MULADD(at[23], at[50]);    MULADD(at[24], at[49]);    MULADD(at[25], at[48]);    MULADD(at[26], at[47]);    MULADD(at[27], at[46]); 
6355
+   COMBA_STORE(C->dp[45]);
6356
+   /* 46 */
6357
+   COMBA_FORWARD;
6358
+   MULADD(at[19], at[55]);    MULADD(at[20], at[54]);    MULADD(at[21], at[53]);    MULADD(at[22], at[52]);    MULADD(at[23], at[51]);    MULADD(at[24], at[50]);    MULADD(at[25], at[49]);    MULADD(at[26], at[48]);    MULADD(at[27], at[47]); 
6359
+   COMBA_STORE(C->dp[46]);
6360
+   /* 47 */
6361
+   COMBA_FORWARD;
6362
+   MULADD(at[20], at[55]);    MULADD(at[21], at[54]);    MULADD(at[22], at[53]);    MULADD(at[23], at[52]);    MULADD(at[24], at[51]);    MULADD(at[25], at[50]);    MULADD(at[26], at[49]);    MULADD(at[27], at[48]); 
6363
+   COMBA_STORE(C->dp[47]);
6364
+   /* 48 */
6365
+   COMBA_FORWARD;
6366
+   MULADD(at[21], at[55]);    MULADD(at[22], at[54]);    MULADD(at[23], at[53]);    MULADD(at[24], at[52]);    MULADD(at[25], at[51]);    MULADD(at[26], at[50]);    MULADD(at[27], at[49]); 
6367
+   COMBA_STORE(C->dp[48]);
6368
+   /* 49 */
6369
+   COMBA_FORWARD;
6370
+   MULADD(at[22], at[55]);    MULADD(at[23], at[54]);    MULADD(at[24], at[53]);    MULADD(at[25], at[52]);    MULADD(at[26], at[51]);    MULADD(at[27], at[50]); 
6371
+   COMBA_STORE(C->dp[49]);
6372
+   /* 50 */
6373
+   COMBA_FORWARD;
6374
+   MULADD(at[23], at[55]);    MULADD(at[24], at[54]);    MULADD(at[25], at[53]);    MULADD(at[26], at[52]);    MULADD(at[27], at[51]); 
6375
+   COMBA_STORE(C->dp[50]);
6376
+   /* 51 */
6377
+   COMBA_FORWARD;
6378
+   MULADD(at[24], at[55]);    MULADD(at[25], at[54]);    MULADD(at[26], at[53]);    MULADD(at[27], at[52]); 
6379
+   COMBA_STORE(C->dp[51]);
6380
+   /* 52 */
6381
+   COMBA_FORWARD;
6382
+   MULADD(at[25], at[55]);    MULADD(at[26], at[54]);    MULADD(at[27], at[53]); 
6383
+   COMBA_STORE(C->dp[52]);
6384
+   /* 53 */
6385
+   COMBA_FORWARD;
6386
+   MULADD(at[26], at[55]);    MULADD(at[27], at[54]); 
6387
+   COMBA_STORE(C->dp[53]);
6388
+   /* 54 */
6389
+   COMBA_FORWARD;
6390
+   MULADD(at[27], at[55]); 
6391
+   COMBA_STORE(C->dp[54]);
6392
+   COMBA_STORE2(C->dp[55]);
6393
+   C->used = 56;
6394
+   C->sign = A->sign ^ B->sign;
6395
+   fp_clamp(C);
6396
+   COMBA_FINI;
5518 6397
 }
5519 6398
 #endif
5520 6399
 
5521
-/* $Source: /cvs/libtom/libtommath/bn_mp_or.c,v $ */
5522
-/* $Revision: 1.3 $ */
5523
-/* $Date: 2006/03/31 14:18:44 $ */
6400
+/* End: fp_mul_comba_28.c */
5524 6401
 
5525
-/* End: bn_mp_or.c */
6402
+/* Start: fp_mul_comba_3.c */
6403
+#define TFM_DEFINES
6404
+#include "fp_mul_comba.c"
5526 6405
 
5527
-/* Start: bn_mp_prime_fermat.c */
5528
-#include <bignum.h>
5529
-#ifdef BN_MP_PRIME_FERMAT_C
5530
-/* LibTomMath, multiple-precision integer library -- Tom St Denis
5531
- *
5532
- * LibTomMath is a library that provides multiple-precision
5533
- * integer arithmetic as well as number theoretic functionality.
5534
- *
5535
- * The library was designed directly after the MPI library by
5536
- * Michael Fromberger but has been written from scratch with
5537
- * additional optimizations in place.
5538
- *
5539
- * The library is free for all purposes without any express
5540
- * guarantee it works.
5541
- *
5542
- * Tom St Denis, tomstdenis@gmail.com, http://math.libtomcrypt.com
5543
- */
6406
+#ifdef TFM_MUL3
6407
+void fp_mul_comba3(fp_int *A, fp_int *B, fp_int *C)
6408
+{
6409
+   fp_digit c0, c1, c2, at[6];
6410
+
6411
+   memcpy(at, A->dp, 3 * sizeof(fp_digit));
6412
+   memcpy(at+3, B->dp, 3 * sizeof(fp_digit));
6413
+   COMBA_START;
6414
+
6415
+   COMBA_CLEAR;
6416
+   /* 0 */
6417
+   MULADD(at[0], at[3]); 
6418
+   COMBA_STORE(C->dp[0]);
6419
+   /* 1 */
6420
+   COMBA_FORWARD;
6421
+   MULADD(at[0], at[4]);    MULADD(at[1], at[3]); 
6422
+   COMBA_STORE(C->dp[1]);
6423
+   /* 2 */
6424
+   COMBA_FORWARD;
6425
+   MULADD(at[0], at[5]);    MULADD(at[1], at[4]);    MULADD(at[2], at[3]); 
6426
+   COMBA_STORE(C->dp[2]);
6427
+   /* 3 */
6428
+   COMBA_FORWARD;
6429
+   MULADD(at[1], at[5]);    MULADD(at[2], at[4]); 
6430
+   COMBA_STORE(C->dp[3]);
6431
+   /* 4 */
6432
+   COMBA_FORWARD;
6433
+   MULADD(at[2], at[5]); 
6434
+   COMBA_STORE(C->dp[4]);
6435
+   COMBA_STORE2(C->dp[5]);
6436
+   C->used = 6;
6437
+   C->sign = A->sign ^ B->sign;
6438
+   fp_clamp(C);
6439
+   COMBA_FINI;
6440
+}
6441
+#endif
5544 6442
 
5545
-/* performs one Fermat test.
5546
- * 
5547
- * If "a" were prime then b**a == b (mod a) since the order of
5548
- * the multiplicative sub-group would be phi(a) = a-1.  That means
5549
- * it would be the same as b**(a mod (a-1)) == b**1 == b (mod a).
5550
- *
5551
- * Sets result to 1 if the congruence holds, or zero otherwise.
5552
- */
5553
-int mp_prime_fermat (mp_int * a, mp_int * b, int *result)
6443
+/* End: fp_mul_comba_3.c */
6444
+
6445
+/* Start: fp_mul_comba_32.c */
6446
+#define TFM_DEFINES
6447
+#include "fp_mul_comba.c"
6448
+
6449
+#ifdef TFM_MUL32
6450
+void fp_mul_comba32(fp_int *A, fp_int *B, fp_int *C)
5554 6451
 {
5555
-  mp_int  t;
5556
-  int     err;
6452
+   fp_digit c0, c1, c2, at[64];
6453
+   int out_size;
6454
+
6455
+   out_size = A->used + B->used;
6456
+   memcpy(at, A->dp, 32 * sizeof(fp_digit));
6457
+   memcpy(at+32, B->dp, 32 * sizeof(fp_digit));
6458
+   COMBA_START;
6459
+
6460
+   COMBA_CLEAR;
6461
+   /* 0 */
6462
+   MULADD(at[0], at[32]); 
6463
+   COMBA_STORE(C->dp[0]);
6464
+   /* 1 */
6465
+   COMBA_FORWARD;
6466
+   MULADD(at[0], at[33]);    MULADD(at[1], at[32]); 
6467
+   COMBA_STORE(C->dp[1]);
6468
+   /* 2 */
6469
+   COMBA_FORWARD;
6470
+   MULADD(at[0], at[34]);    MULADD(at[1], at[33]);    MULADD(at[2], at[32]); 
6471
+   COMBA_STORE(C->dp[2]);
6472
+   /* 3 */
6473
+   COMBA_FORWARD;
6474
+   MULADD(at[0], at[35]);    MULADD(at[1], at[34]);    MULADD(at[2], at[33]);    MULADD(at[3], at[32]); 
6475
+   COMBA_STORE(C->dp[3]);
6476
+   /* 4 */
6477
+   COMBA_FORWARD;
6478
+   MULADD(at[0], at[36]);    MULADD(at[1], at[35]);    MULADD(at[2], at[34]);    MULADD(at[3], at[33]);    MULADD(at[4], at[32]); 
6479
+   COMBA_STORE(C->dp[4]);
6480
+   /* 5 */
6481
+   COMBA_FORWARD;
6482
+   MULADD(at[0], at[37]);    MULADD(at[1], at[36]);    MULADD(at[2], at[35]);    MULADD(at[3], at[34]);    MULADD(at[4], at[33]);    MULADD(at[5], at[32]); 
6483
+   COMBA_STORE(C->dp[5]);
6484
+   /* 6 */
6485
+   COMBA_FORWARD;
6486
+   MULADD(at[0], at[38]);    MULADD(at[1], at[37]);    MULADD(at[2], at[36]);    MULADD(at[3], at[35]);    MULADD(at[4], at[34]);    MULADD(at[5], at[33]);    MULADD(at[6], at[32]); 
6487
+   COMBA_STORE(C->dp[6]);
6488
+   /* 7 */
6489
+   COMBA_FORWARD;
6490
+   MULADD(at[0], at[39]);    MULADD(at[1], at[38]);    MULADD(at[2], at[37]);    MULADD(at[3], at[36]);    MULADD(at[4], at[35]);    MULADD(at[5], at[34]);    MULADD(at[6], at[33]);    MULADD(at[7], at[32]); 
6491
+   COMBA_STORE(C->dp[7]);
6492
+   /* 8 */
6493
+   COMBA_FORWARD;
6494
+   MULADD(at[0], at[40]);    MULADD(at[1], at[39]);    MULADD(at[2], at[38]);    MULADD(at[3], at[37]);    MULADD(at[4], at[36]);    MULADD(at[5], at[35]);    MULADD(at[6], at[34]);    MULADD(at[7], at[33]);    MULADD(at[8], at[32]); 
6495
+   COMBA_STORE(C->dp[8]);
6496
+   /* 9 */
6497
+   COMBA_FORWARD;
6498
+   MULADD(at[0], at[41]);    MULADD(at[1], at[40]);    MULADD(at[2], at[39]);    MULADD(at[3], at[38]);    MULADD(at[4], at[37]);    MULADD(at[5], at[36]);    MULADD(at[6], at[35]);    MULADD(at[7], at[34]);    MULADD(at[8], at[33]);    MULADD(at[9], at[32]); 
6499
+   COMBA_STORE(C->dp[9]);
6500
+   /* 10 */
6501
+   COMBA_FORWARD;
6502
+   MULADD(at[0], at[42]);    MULADD(at[1], at[41]);    MULADD(at[2], at[40]);    MULADD(at[3], at[39]);    MULADD(at[4], at[38]);    MULADD(at[5], at[37]);    MULADD(at[6], at[36]);    MULADD(at[7], at[35]);    MULADD(at[8], at[34]);    MULADD(at[9], at[33]);    MULADD(at[10], at[32]); 
6503
+   COMBA_STORE(C->dp[10]);
6504
+   /* 11 */
6505
+   COMBA_FORWARD;
6506
+   MULADD(at[0], at[43]);    MULADD(at[1], at[42]);    MULADD(at[2], at[41]);    MULADD(at[3], at[40]);    MULADD(at[4], at[39]);    MULADD(at[5], at[38]);    MULADD(at[6], at[37]);    MULADD(at[7], at[36]);    MULADD(at[8], at[35]);    MULADD(at[9], at[34]);    MULADD(at[10], at[33]);    MULADD(at[11], at[32]); 
6507
+   COMBA_STORE(C->dp[11]);
6508
+   /* 12 */
6509
+   COMBA_FORWARD;
6510
+   MULADD(at[0], at[44]);    MULADD(at[1], at[43]);    MULADD(at[2], at[42]);    MULADD(at[3], at[41]);    MULADD(at[4], at[40]);    MULADD(at[5], at[39]);    MULADD(at[6], at[38]);    MULADD(at[7], at[37]);    MULADD(at[8], at[36]);    MULADD(at[9], at[35]);    MULADD(at[10], at[34]);    MULADD(at[11], at[33]);    MULADD(at[12], at[32]); 
6511
+   COMBA_STORE(C->dp[12]);
6512
+   /* 13 */
6513
+   COMBA_FORWARD;
6514
+   MULADD(at[0], at[45]);    MULADD(at[1], at[44]);    MULADD(at[2], at[43]);    MULADD(at[3], at[42]);    MULADD(at[4], at[41]);    MULADD(at[5], at[40]);    MULADD(at[6], at[39]);    MULADD(at[7], at[38]);    MULADD(at[8], at[37]);    MULADD(at[9], at[36]);    MULADD(at[10], at[35]);    MULADD(at[11], at[34]);    MULADD(at[12], at[33]);    MULADD(at[13], at[32]); 
6515
+   COMBA_STORE(C->dp[13]);
6516
+   /* 14 */
6517
+   COMBA_FORWARD;
6518
+   MULADD(at[0], at[46]);    MULADD(at[1], at[45]);    MULADD(at[2], at[44]);    MULADD(at[3], at[43]);    MULADD(at[4], at[42]);    MULADD(at[5], at[41]);    MULADD(at[6], at[40]);    MULADD(at[7], at[39]);    MULADD(at[8], at[38]);    MULADD(at[9], at[37]);    MULADD(at[10], at[36]);    MULADD(at[11], at[35]);    MULADD(at[12], at[34]);    MULADD(at[13], at[33]);    MULADD(at[14], at[32]); 
6519
+   COMBA_STORE(C->dp[14]);
6520
+   /* 15 */
6521
+   COMBA_FORWARD;
6522
+   MULADD(at[0], at[47]);    MULADD(at[1], at[46]);    MULADD(at[2], at[45]);    MULADD(at[3], at[44]);    MULADD(at[4], at[43]);    MULADD(at[5], at[42]);    MULADD(at[6], at[41]);    MULADD(at[7], at[40]);    MULADD(at[8], at[39]);    MULADD(at[9], at[38]);    MULADD(at[10], at[37]);    MULADD(at[11], at[36]);    MULADD(at[12], at[35]);    MULADD(at[13], at[34]);    MULADD(at[14], at[33]);    MULADD(at[15], at[32]); 
6523
+   COMBA_STORE(C->dp[15]);
6524
+   /* 16 */
6525
+   COMBA_FORWARD;
6526
+   MULADD(at[0], at[48]);    MULADD(at[1], at[47]);    MULADD(at[2], at[46]);    MULADD(at[3], at[45]);    MULADD(at[4], at[44]);    MULADD(at[5], at[43]);    MULADD(at[6], at[42]);    MULADD(at[7], at[41]);    MULADD(at[8], at[40]);    MULADD(at[9], at[39]);    MULADD(at[10], at[38]);    MULADD(at[11], at[37]);    MULADD(at[12], at[36]);    MULADD(at[13], at[35]);    MULADD(at[14], at[34]);    MULADD(at[15], at[33]);    MULADD(at[16], at[32]); 
6527
+   COMBA_STORE(C->dp[16]);
6528
+   /* 17 */
6529
+   COMBA_FORWARD;
6530
+   MULADD(at[0], at[49]);    MULADD(at[1], at[48]);    MULADD(at[2], at[47]);    MULADD(at[3], at[46]);    MULADD(at[4], at[45]);    MULADD(at[5], at[44]);    MULADD(at[6], at[43]);    MULADD(at[7], at[42]);    MULADD(at[8], at[41]);    MULADD(at[9], at[40]);    MULADD(at[10], at[39]);    MULADD(at[11], at[38]);    MULADD(at[12], at[37]);    MULADD(at[13], at[36]);    MULADD(at[14], at[35]);    MULADD(at[15], at[34]);    MULADD(at[16], at[33]);    MULADD(at[17], at[32]); 
6531
+   COMBA_STORE(C->dp[17]);
6532
+   /* 18 */
6533
+   COMBA_FORWARD;
6534
+   MULADD(at[0], at[50]);    MULADD(at[1], at[49]);    MULADD(at[2], at[48]);    MULADD(at[3], at[47]);    MULADD(at[4], at[46]);    MULADD(at[5], at[45]);    MULADD(at[6], at[44]);    MULADD(at[7], at[43]);    MULADD(at[8], at[42]);    MULADD(at[9], at[41]);    MULADD(at[10], at[40]);    MULADD(at[11], at[39]);    MULADD(at[12], at[38]);    MULADD(at[13], at[37]);    MULADD(at[14], at[36]);    MULADD(at[15], at[35]);    MULADD(at[16], at[34]);    MULADD(at[17], at[33]);    MULADD(at[18], at[32]); 
6535
+   COMBA_STORE(C->dp[18]);
6536
+   /* 19 */
6537
+   COMBA_FORWARD;
6538
+   MULADD(at[0], at[51]);    MULADD(at[1], at[50]);    MULADD(at[2], at[49]);    MULADD(at[3], at[48]);    MULADD(at[4], at[47]);    MULADD(at[5], at[46]);    MULADD(at[6], at[45]);    MULADD(at[7], at[44]);    MULADD(at[8], at[43]);    MULADD(at[9], at[42]);    MULADD(at[10], at[41]);    MULADD(at[11], at[40]);    MULADD(at[12], at[39]);    MULADD(at[13], at[38]);    MULADD(at[14], at[37]);    MULADD(at[15], at[36]);    MULADD(at[16], at[35]);    MULADD(at[17], at[34]);    MULADD(at[18], at[33]);    MULADD(at[19], at[32]); 
6539
+   COMBA_STORE(C->dp[19]);
6540
+   /* 20 */
6541
+   COMBA_FORWARD;
6542
+   MULADD(at[0], at[52]);    MULADD(at[1], at[51]);    MULADD(at[2], at[50]);    MULADD(at[3], at[49]);    MULADD(at[4], at[48]);    MULADD(at[5], at[47]);    MULADD(at[6], at[46]);    MULADD(at[7], at[45]);    MULADD(at[8], at[44]);    MULADD(at[9], at[43]);    MULADD(at[10], at[42]);    MULADD(at[11], at[41]);    MULADD(at[12], at[40]);    MULADD(at[13], at[39]);    MULADD(at[14], at[38]);    MULADD(at[15], at[37]);    MULADD(at[16], at[36]);    MULADD(at[17], at[35]);    MULADD(at[18], at[34]);    MULADD(at[19], at[33]);    MULADD(at[20], at[32]); 
6543
+   COMBA_STORE(C->dp[20]);
6544
+   /* 21 */
6545
+   COMBA_FORWARD;
6546
+   MULADD(at[0], at[53]);    MULADD(at[1], at[52]);    MULADD(at[2], at[51]);    MULADD(at[3], at[50]);    MULADD(at[4], at[49]);    MULADD(at[5], at[48]);    MULADD(at[6], at[47]);    MULADD(at[7], at[46]);    MULADD(at[8], at[45]);    MULADD(at[9], at[44]);    MULADD(at[10], at[43]);    MULADD(at[11], at[42]);    MULADD(at[12], at[41]);    MULADD(at[13], at[40]);    MULADD(at[14], at[39]);    MULADD(at[15], at[38]);    MULADD(at[16], at[37]);    MULADD(at[17], at[36]);    MULADD(at[18], at[35]);    MULADD(at[19], at[34]);    MULADD(at[20], at[33]);    MULADD(at[21], at[32]); 
6547
+   COMBA_STORE(C->dp[21]);
6548
+   /* 22 */
6549
+   COMBA_FORWARD;
6550
+   MULADD(at[0], at[54]);    MULADD(at[1], at[53]);    MULADD(at[2], at[52]);    MULADD(at[3], at[51]);    MULADD(at[4], at[50]);    MULADD(at[5], at[49]);    MULADD(at[6], at[48]);    MULADD(at[7], at[47]);    MULADD(at[8], at[46]);    MULADD(at[9], at[45]);    MULADD(at[10], at[44]);    MULADD(at[11], at[43]);    MULADD(at[12], at[42]);    MULADD(at[13], at[41]);    MULADD(at[14], at[40]);    MULADD(at[15], at[39]);    MULADD(at[16], at[38]);    MULADD(at[17], at[37]);    MULADD(at[18], at[36]);    MULADD(at[19], at[35]);    MULADD(at[20], at[34]);    MULADD(at[21], at[33]);    MULADD(at[22], at[32]); 
6551
+   COMBA_STORE(C->dp[22]);
6552
+   /* 23 */
6553
+   COMBA_FORWARD;
6554
+   MULADD(at[0], at[55]);    MULADD(at[1], at[54]);    MULADD(at[2], at[53]);    MULADD(at[3], at[52]);    MULADD(at[4], at[51]);    MULADD(at[5], at[50]);    MULADD(at[6], at[49]);    MULADD(at[7], at[48]);    MULADD(at[8], at[47]);    MULADD(at[9], at[46]);    MULADD(at[10], at[45]);    MULADD(at[11], at[44]);    MULADD(at[12], at[43]);    MULADD(at[13], at[42]);    MULADD(at[14], at[41]);    MULADD(at[15], at[40]);    MULADD(at[16], at[39]);    MULADD(at[17], at[38]);    MULADD(at[18], at[37]);    MULADD(at[19], at[36]);    MULADD(at[20], at[35]);    MULADD(at[21], at[34]);    MULADD(at[22], at[33]);    MULADD(at[23], at[32]); 
6555
+   COMBA_STORE(C->dp[23]);
6556
+   /* 24 */
6557
+   COMBA_FORWARD;
6558
+   MULADD(at[0], at[56]);    MULADD(at[1], at[55]);    MULADD(at[2], at[54]);    MULADD(at[3], at[53]);    MULADD(at[4], at[52]);    MULADD(at[5], at[51]);    MULADD(at[6], at[50]);    MULADD(at[7], at[49]);    MULADD(at[8], at[48]);    MULADD(at[9], at[47]);    MULADD(at[10], at[46]);    MULADD(at[11], at[45]);    MULADD(at[12], at[44]);    MULADD(at[13], at[43]);    MULADD(at[14], at[42]);    MULADD(at[15], at[41]);    MULADD(at[16], at[40]);    MULADD(at[17], at[39]);    MULADD(at[18], at[38]);    MULADD(at[19], at[37]);    MULADD(at[20], at[36]);    MULADD(at[21], at[35]);    MULADD(at[22], at[34]);    MULADD(at[23], at[33]);    MULADD(at[24], at[32]); 
6559
+   COMBA_STORE(C->dp[24]);
6560
+   /* 25 */
6561
+   COMBA_FORWARD;
6562
+   MULADD(at[0], at[57]);    MULADD(at[1], at[56]);    MULADD(at[2], at[55]);    MULADD(at[3], at[54]);    MULADD(at[4], at[53]);    MULADD(at[5], at[52]);    MULADD(at[6], at[51]);    MULADD(at[7], at[50]);    MULADD(at[8], at[49]);    MULADD(at[9], at[48]);    MULADD(at[10], at[47]);    MULADD(at[11], at[46]);    MULADD(at[12], at[45]);    MULADD(at[13], at[44]);    MULADD(at[14], at[43]);    MULADD(at[15], at[42]);    MULADD(at[16], at[41]);    MULADD(at[17], at[40]);    MULADD(at[18], at[39]);    MULADD(at[19], at[38]);    MULADD(at[20], at[37]);    MULADD(at[21], at[36]);    MULADD(at[22], at[35]);    MULADD(at[23], at[34]);    MULADD(at[24], at[33]);    MULADD(at[25], at[32]); 
6563
+   COMBA_STORE(C->dp[25]);
6564
+   /* 26 */
6565
+   COMBA_FORWARD;
6566
+   MULADD(at[0], at[58]);    MULADD(at[1], at[57]);    MULADD(at[2], at[56]);    MULADD(at[3], at[55]);    MULADD(at[4], at[54]);    MULADD(at[5], at[53]);    MULADD(at[6], at[52]);    MULADD(at[7], at[51]);    MULADD(at[8], at[50]);    MULADD(at[9], at[49]);    MULADD(at[10], at[48]);    MULADD(at[11], at[47]);    MULADD(at[12], at[46]);    MULADD(at[13], at[45]);    MULADD(at[14], at[44]);    MULADD(at[15], at[43]);    MULADD(at[16], at[42]);    MULADD(at[17], at[41]);    MULADD(at[18], at[40]);    MULADD(at[19], at[39]);    MULADD(at[20], at[38]);    MULADD(at[21], at[37]);    MULADD(at[22], at[36]);    MULADD(at[23], at[35]);    MULADD(at[24], at[34]);    MULADD(at[25], at[33]);    MULADD(at[26], at[32]); 
6567
+   COMBA_STORE(C->dp[26]);
6568
+   /* 27 */
6569
+   COMBA_FORWARD;
6570
+   MULADD(at[0], at[59]);    MULADD(at[1], at[58]);    MULADD(at[2], at[57]);    MULADD(at[3], at[56]);    MULADD(at[4], at[55]);    MULADD(at[5], at[54]);    MULADD(at[6], at[53]);    MULADD(at[7], at[52]);    MULADD(at[8], at[51]);    MULADD(at[9], at[50]);    MULADD(at[10], at[49]);    MULADD(at[11], at[48]);    MULADD(at[12], at[47]);    MULADD(at[13], at[46]);    MULADD(at[14], at[45]);    MULADD(at[15], at[44]);    MULADD(at[16], at[43]);    MULADD(at[17], at[42]);    MULADD(at[18], at[41]);    MULADD(at[19], at[40]);    MULADD(at[20], at[39]);    MULADD(at[21], at[38]);    MULADD(at[22], at[37]);    MULADD(at[23], at[36]);    MULADD(at[24], at[35]);    MULADD(at[25], at[34]);    MULADD(at[26], at[33]);    MULADD(at[27], at[32]); 
6571
+   COMBA_STORE(C->dp[27]);
6572
+   /* 28 */
6573
+   COMBA_FORWARD;
6574
+   MULADD(at[0], at[60]);    MULADD(at[1], at[59]);    MULADD(at[2], at[58]);    MULADD(at[3], at[57]);    MULADD(at[4], at[56]);    MULADD(at[5], at[55]);    MULADD(at[6], at[54]);    MULADD(at[7], at[53]);    MULADD(at[8], at[52]);    MULADD(at[9], at[51]);    MULADD(at[10], at[50]);    MULADD(at[11], at[49]);    MULADD(at[12], at[48]);    MULADD(at[13], at[47]);    MULADD(at[14], at[46]);    MULADD(at[15], at[45]);    MULADD(at[16], at[44]);    MULADD(at[17], at[43]);    MULADD(at[18], at[42]);    MULADD(at[19], at[41]);    MULADD(at[20], at[40]);    MULADD(at[21], at[39]);    MULADD(at[22], at[38]);    MULADD(at[23], at[37]);    MULADD(at[24], at[36]);    MULADD(at[25], at[35]);    MULADD(at[26], at[34]);    MULADD(at[27], at[33]);    MULADD(at[28], at[32]); 
6575
+   COMBA_STORE(C->dp[28]);
6576
+   /* 29 */
6577
+   COMBA_FORWARD;
6578
+   MULADD(at[0], at[61]);    MULADD(at[1], at[60]);    MULADD(at[2], at[59]);    MULADD(at[3], at[58]);    MULADD(at[4], at[57]);    MULADD(at[5], at[56]);    MULADD(at[6], at[55]);    MULADD(at[7], at[54]);    MULADD(at[8], at[53]);    MULADD(at[9], at[52]);    MULADD(at[10], at[51]);    MULADD(at[11], at[50]);    MULADD(at[12], at[49]);    MULADD(at[13], at[48]);    MULADD(at[14], at[47]);    MULADD(at[15], at[46]);    MULADD(at[16], at[45]);    MULADD(at[17], at[44]);    MULADD(at[18], at[43]);    MULADD(at[19], at[42]);    MULADD(at[20], at[41]);    MULADD(at[21], at[40]);    MULADD(at[22], at[39]);    MULADD(at[23], at[38]);    MULADD(at[24], at[37]);    MULADD(at[25], at[36]);    MULADD(at[26], at[35]);    MULADD(at[27], at[34]);    MULADD(at[28], at[33]);    MULADD(at[29], at[32]); 
6579
+   COMBA_STORE(C->dp[29]);
6580
+   /* 30 */
6581
+   COMBA_FORWARD;
6582
+   MULADD(at[0], at[62]);    MULADD(at[1], at[61]);    MULADD(at[2], at[60]);    MULADD(at[3], at[59]);    MULADD(at[4], at[58]);    MULADD(at[5], at[57]);    MULADD(at[6], at[56]);    MULADD(at[7], at[55]);    MULADD(at[8], at[54]);    MULADD(at[9], at[53]);    MULADD(at[10], at[52]);    MULADD(at[11], at[51]);    MULADD(at[12], at[50]);    MULADD(at[13], at[49]);    MULADD(at[14], at[48]);    MULADD(at[15], at[47]);    MULADD(at[16], at[46]);    MULADD(at[17], at[45]);    MULADD(at[18], at[44]);    MULADD(at[19], at[43]);    MULADD(at[20], at[42]);    MULADD(at[21], at[41]);    MULADD(at[22], at[40]);    MULADD(at[23], at[39]);    MULADD(at[24], at[38]);    MULADD(at[25], at[37]);    MULADD(at[26], at[36]);    MULADD(at[27], at[35]);    MULADD(at[28], at[34]);    MULADD(at[29], at[33]);    MULADD(at[30], at[32]); 
6583
+   COMBA_STORE(C->dp[30]);
6584
+   /* 31 */
6585
+   COMBA_FORWARD;
6586
+   MULADD(at[0], at[63]);    MULADD(at[1], at[62]);    MULADD(at[2], at[61]);    MULADD(at[3], at[60]);    MULADD(at[4], at[59]);    MULADD(at[5], at[58]);    MULADD(at[6], at[57]);    MULADD(at[7], at[56]);    MULADD(at[8], at[55]);    MULADD(at[9], at[54]);    MULADD(at[10], at[53]);    MULADD(at[11], at[52]);    MULADD(at[12], at[51]);    MULADD(at[13], at[50]);    MULADD(at[14], at[49]);    MULADD(at[15], at[48]);    MULADD(at[16], at[47]);    MULADD(at[17], at[46]);    MULADD(at[18], at[45]);    MULADD(at[19], at[44]);    MULADD(at[20], at[43]);    MULADD(at[21], at[42]);    MULADD(at[22], at[41]);    MULADD(at[23], at[40]);    MULADD(at[24], at[39]);    MULADD(at[25], at[38]);    MULADD(at[26], at[37]);    MULADD(at[27], at[36]);    MULADD(at[28], at[35]);    MULADD(at[29], at[34]);    MULADD(at[30], at[33]);    MULADD(at[31], at[32]); 
6587
+   COMBA_STORE(C->dp[31]);
6588
+   /* 32 */
6589
+   COMBA_FORWARD;
6590
+   MULADD(at[1], at[63]);    MULADD(at[2], at[62]);    MULADD(at[3], at[61]);    MULADD(at[4], at[60]);    MULADD(at[5], at[59]);    MULADD(at[6], at[58]);    MULADD(at[7], at[57]);    MULADD(at[8], at[56]);    MULADD(at[9], at[55]);    MULADD(at[10], at[54]);    MULADD(at[11], at[53]);    MULADD(at[12], at[52]);    MULADD(at[13], at[51]);    MULADD(at[14], at[50]);    MULADD(at[15], at[49]);    MULADD(at[16], at[48]);    MULADD(at[17], at[47]);    MULADD(at[18], at[46]);    MULADD(at[19], at[45]);    MULADD(at[20], at[44]);    MULADD(at[21], at[43]);    MULADD(at[22], at[42]);    MULADD(at[23], at[41]);    MULADD(at[24], at[40]);    MULADD(at[25], at[39]);    MULADD(at[26], at[38]);    MULADD(at[27], at[37]);    MULADD(at[28], at[36]);    MULADD(at[29], at[35]);    MULADD(at[30], at[34]);    MULADD(at[31], at[33]); 
6591
+   COMBA_STORE(C->dp[32]);
6592
+   /* 33 */
6593
+   COMBA_FORWARD;
6594
+   MULADD(at[2], at[63]);    MULADD(at[3], at[62]);    MULADD(at[4], at[61]);    MULADD(at[5], at[60]);    MULADD(at[6], at[59]);    MULADD(at[7], at[58]);    MULADD(at[8], at[57]);    MULADD(at[9], at[56]);    MULADD(at[10], at[55]);    MULADD(at[11], at[54]);    MULADD(at[12], at[53]);    MULADD(at[13], at[52]);    MULADD(at[14], at[51]);    MULADD(at[15], at[50]);    MULADD(at[16], at[49]);    MULADD(at[17], at[48]);    MULADD(at[18], at[47]);    MULADD(at[19], at[46]);    MULADD(at[20], at[45]);    MULADD(at[21], at[44]);    MULADD(at[22], at[43]);    MULADD(at[23], at[42]);    MULADD(at[24], at[41]);    MULADD(at[25], at[40]);    MULADD(at[26], at[39]);    MULADD(at[27], at[38]);    MULADD(at[28], at[37]);    MULADD(at[29], at[36]);    MULADD(at[30], at[35]);    MULADD(at[31], at[34]); 
6595
+   COMBA_STORE(C->dp[33]);
6596
+   /* 34 */
6597
+   COMBA_FORWARD;
6598
+   MULADD(at[3], at[63]);    MULADD(at[4], at[62]);    MULADD(at[5], at[61]);    MULADD(at[6], at[60]);    MULADD(at[7], at[59]);    MULADD(at[8], at[58]);    MULADD(at[9], at[57]);    MULADD(at[10], at[56]);    MULADD(at[11], at[55]);    MULADD(at[12], at[54]);    MULADD(at[13], at[53]);    MULADD(at[14], at[52]);    MULADD(at[15], at[51]);    MULADD(at[16], at[50]);    MULADD(at[17], at[49]);    MULADD(at[18], at[48]);    MULADD(at[19], at[47]);    MULADD(at[20], at[46]);    MULADD(at[21], at[45]);    MULADD(at[22], at[44]);    MULADD(at[23], at[43]);    MULADD(at[24], at[42]);    MULADD(at[25], at[41]);    MULADD(at[26], at[40]);    MULADD(at[27], at[39]);    MULADD(at[28], at[38]);    MULADD(at[29], at[37]);    MULADD(at[30], at[36]);    MULADD(at[31], at[35]); 
6599
+   COMBA_STORE(C->dp[34]);
6600
+   /* 35 */
6601
+   COMBA_FORWARD;
6602
+   MULADD(at[4], at[63]);    MULADD(at[5], at[62]);    MULADD(at[6], at[61]);    MULADD(at[7], at[60]);    MULADD(at[8], at[59]);    MULADD(at[9], at[58]);    MULADD(at[10], at[57]);    MULADD(at[11], at[56]);    MULADD(at[12], at[55]);    MULADD(at[13], at[54]);    MULADD(at[14], at[53]);    MULADD(at[15], at[52]);    MULADD(at[16], at[51]);    MULADD(at[17], at[50]);    MULADD(at[18], at[49]);    MULADD(at[19], at[48]);    MULADD(at[20], at[47]);    MULADD(at[21], at[46]);    MULADD(at[22], at[45]);    MULADD(at[23], at[44]);    MULADD(at[24], at[43]);    MULADD(at[25], at[42]);    MULADD(at[26], at[41]);    MULADD(at[27], at[40]);    MULADD(at[28], at[39]);    MULADD(at[29], at[38]);    MULADD(at[30], at[37]);    MULADD(at[31], at[36]); 
6603
+   COMBA_STORE(C->dp[35]);
6604
+   /* 36 */
6605
+   COMBA_FORWARD;
6606
+   MULADD(at[5], at[63]);    MULADD(at[6], at[62]);    MULADD(at[7], at[61]);    MULADD(at[8], at[60]);    MULADD(at[9], at[59]);    MULADD(at[10], at[58]);    MULADD(at[11], at[57]);    MULADD(at[12], at[56]);    MULADD(at[13], at[55]);    MULADD(at[14], at[54]);    MULADD(at[15], at[53]);    MULADD(at[16], at[52]);    MULADD(at[17], at[51]);    MULADD(at[18], at[50]);    MULADD(at[19], at[49]);    MULADD(at[20], at[48]);    MULADD(at[21], at[47]);    MULADD(at[22], at[46]);    MULADD(at[23], at[45]);    MULADD(at[24], at[44]);    MULADD(at[25], at[43]);    MULADD(at[26], at[42]);    MULADD(at[27], at[41]);    MULADD(at[28], at[40]);    MULADD(at[29], at[39]);    MULADD(at[30], at[38]);    MULADD(at[31], at[37]); 
6607
+   COMBA_STORE(C->dp[36]);
6608
+   /* 37 */
6609
+   COMBA_FORWARD;
6610
+   MULADD(at[6], at[63]);    MULADD(at[7], at[62]);    MULADD(at[8], at[61]);    MULADD(at[9], at[60]);    MULADD(at[10], at[59]);    MULADD(at[11], at[58]);    MULADD(at[12], at[57]);    MULADD(at[13], at[56]);    MULADD(at[14], at[55]);    MULADD(at[15], at[54]);    MULADD(at[16], at[53]);    MULADD(at[17], at[52]);    MULADD(at[18], at[51]);    MULADD(at[19], at[50]);    MULADD(at[20], at[49]);    MULADD(at[21], at[48]);    MULADD(at[22], at[47]);    MULADD(at[23], at[46]);    MULADD(at[24], at[45]);    MULADD(at[25], at[44]);    MULADD(at[26], at[43]);    MULADD(at[27], at[42]);    MULADD(at[28], at[41]);    MULADD(at[29], at[40]);    MULADD(at[30], at[39]);    MULADD(at[31], at[38]); 
6611
+   COMBA_STORE(C->dp[37]);
6612
+   /* 38 */
6613
+   COMBA_FORWARD;
6614
+   MULADD(at[7], at[63]);    MULADD(at[8], at[62]);    MULADD(at[9], at[61]);    MULADD(at[10], at[60]);    MULADD(at[11], at[59]);    MULADD(at[12], at[58]);    MULADD(at[13], at[57]);    MULADD(at[14], at[56]);    MULADD(at[15], at[55]);    MULADD(at[16], at[54]);    MULADD(at[17], at[53]);    MULADD(at[18], at[52]);    MULADD(at[19], at[51]);    MULADD(at[20], at[50]);    MULADD(at[21], at[49]);    MULADD(at[22], at[48]);    MULADD(at[23], at[47]);    MULADD(at[24], at[46]);    MULADD(at[25], at[45]);    MULADD(at[26], at[44]);    MULADD(at[27], at[43]);    MULADD(at[28], at[42]);    MULADD(at[29], at[41]);    MULADD(at[30], at[40]);    MULADD(at[31], at[39]); 
6615
+   COMBA_STORE(C->dp[38]);
6616
+
6617
+   /* early out at 40 digits, 40*32==1280, or two 640 bit operands */
6618
+   if (out_size <= 40) { COMBA_STORE2(C->dp[39]); C->used = 40; C->sign = A->sign ^ B->sign; fp_clamp(C); COMBA_FINI; return; }
6619
+
6620
+   /* 39 */
6621
+   COMBA_FORWARD;
6622
+   MULADD(at[8], at[63]);    MULADD(at[9], at[62]);    MULADD(at[10], at[61]);    MULADD(at[11], at[60]);    MULADD(at[12], at[59]);    MULADD(at[13], at[58]);    MULADD(at[14], at[57]);    MULADD(at[15], at[56]);    MULADD(at[16], at[55]);    MULADD(at[17], at[54]);    MULADD(at[18], at[53]);    MULADD(at[19], at[52]);    MULADD(at[20], at[51]);    MULADD(at[21], at[50]);    MULADD(at[22], at[49]);    MULADD(at[23], at[48]);    MULADD(at[24], at[47]);    MULADD(at[25], at[46]);    MULADD(at[26], at[45]);    MULADD(at[27], at[44]);    MULADD(at[28], at[43]);    MULADD(at[29], at[42]);    MULADD(at[30], at[41]);    MULADD(at[31], at[40]); 
6623
+   COMBA_STORE(C->dp[39]);
6624
+   /* 40 */
6625
+   COMBA_FORWARD;
6626
+   MULADD(at[9], at[63]);    MULADD(at[10], at[62]);    MULADD(at[11], at[61]);    MULADD(at[12], at[60]);    MULADD(at[13], at[59]);    MULADD(at[14], at[58]);    MULADD(at[15], at[57]);    MULADD(at[16], at[56]);    MULADD(at[17], at[55]);    MULADD(at[18], at[54]);    MULADD(at[19], at[53]);    MULADD(at[20], at[52]);    MULADD(at[21], at[51]);    MULADD(at[22], at[50]);    MULADD(at[23], at[49]);    MULADD(at[24], at[48]);    MULADD(at[25], at[47]);    MULADD(at[26], at[46]);    MULADD(at[27], at[45]);    MULADD(at[28], at[44]);    MULADD(at[29], at[43]);    MULADD(at[30], at[42]);    MULADD(at[31], at[41]); 
6627
+   COMBA_STORE(C->dp[40]);
6628
+   /* 41 */
6629
+   COMBA_FORWARD;
6630
+   MULADD(at[10], at[63]);    MULADD(at[11], at[62]);    MULADD(at[12], at[61]);    MULADD(at[13], at[60]);    MULADD(at[14], at[59]);    MULADD(at[15], at[58]);    MULADD(at[16], at[57]);    MULADD(at[17], at[56]);    MULADD(at[18], at[55]);    MULADD(at[19], at[54]);    MULADD(at[20], at[53]);    MULADD(at[21], at[52]);    MULADD(at[22], at[51]);    MULADD(at[23], at[50]);    MULADD(at[24], at[49]);    MULADD(at[25], at[48]);    MULADD(at[26], at[47]);    MULADD(at[27], at[46]);    MULADD(at[28], at[45]);    MULADD(at[29], at[44]);    MULADD(at[30], at[43]);    MULADD(at[31], at[42]); 
6631
+   COMBA_STORE(C->dp[41]);
6632
+   /* 42 */
6633
+   COMBA_FORWARD;
6634
+   MULADD(at[11], at[63]);    MULADD(at[12], at[62]);    MULADD(at[13], at[61]);    MULADD(at[14], at[60]);    MULADD(at[15], at[59]);    MULADD(at[16], at[58]);    MULADD(at[17], at[57]);    MULADD(at[18], at[56]);    MULADD(at[19], at[55]);    MULADD(at[20], at[54]);    MULADD(at[21], at[53]);    MULADD(at[22], at[52]);    MULADD(at[23], at[51]);    MULADD(at[24], at[50]);    MULADD(at[25], at[49]);    MULADD(at[26], at[48]);    MULADD(at[27], at[47]);    MULADD(at[28], at[46]);    MULADD(at[29], at[45]);    MULADD(at[30], at[44]);    MULADD(at[31], at[43]); 
6635
+   COMBA_STORE(C->dp[42]);
6636
+   /* 43 */
6637
+   COMBA_FORWARD;
6638
+   MULADD(at[12], at[63]);    MULADD(at[13], at[62]);    MULADD(at[14], at[61]);    MULADD(at[15], at[60]);    MULADD(at[16], at[59]);    MULADD(at[17], at[58]);    MULADD(at[18], at[57]);    MULADD(at[19], at[56]);    MULADD(at[20], at[55]);    MULADD(at[21], at[54]);    MULADD(at[22], at[53]);    MULADD(at[23], at[52]);    MULADD(at[24], at[51]);    MULADD(at[25], at[50]);    MULADD(at[26], at[49]);    MULADD(at[27], at[48]);    MULADD(at[28], at[47]);    MULADD(at[29], at[46]);    MULADD(at[30], at[45]);    MULADD(at[31], at[44]); 
6639
+   COMBA_STORE(C->dp[43]);
6640
+   /* 44 */
6641
+   COMBA_FORWARD;
6642
+   MULADD(at[13], at[63]);    MULADD(at[14], at[62]);    MULADD(at[15], at[61]);    MULADD(at[16], at[60]);    MULADD(at[17], at[59]);    MULADD(at[18], at[58]);    MULADD(at[19], at[57]);    MULADD(at[20], at[56]);    MULADD(at[21], at[55]);    MULADD(at[22], at[54]);    MULADD(at[23], at[53]);    MULADD(at[24], at[52]);    MULADD(at[25], at[51]);    MULADD(at[26], at[50]);    MULADD(at[27], at[49]);    MULADD(at[28], at[48]);    MULADD(at[29], at[47]);    MULADD(at[30], at[46]);    MULADD(at[31], at[45]); 
6643
+   COMBA_STORE(C->dp[44]);
6644
+   /* 45 */
6645
+   COMBA_FORWARD;
6646
+   MULADD(at[14], at[63]);    MULADD(at[15], at[62]);    MULADD(at[16], at[61]);    MULADD(at[17], at[60]);    MULADD(at[18], at[59]);    MULADD(at[19], at[58]);    MULADD(at[20], at[57]);    MULADD(at[21], at[56]);    MULADD(at[22], at[55]);    MULADD(at[23], at[54]);    MULADD(at[24], at[53]);    MULADD(at[25], at[52]);    MULADD(at[26], at[51]);    MULADD(at[27], at[50]);    MULADD(at[28], at[49]);    MULADD(at[29], at[48]);    MULADD(at[30], at[47]);    MULADD(at[31], at[46]); 
6647
+   COMBA_STORE(C->dp[45]);
6648
+   /* 46 */
6649
+   COMBA_FORWARD;
6650
+   MULADD(at[15], at[63]);    MULADD(at[16], at[62]);    MULADD(at[17], at[61]);    MULADD(at[18], at[60]);    MULADD(at[19], at[59]);    MULADD(at[20], at[58]);    MULADD(at[21], at[57]);    MULADD(at[22], at[56]);    MULADD(at[23], at[55]);    MULADD(at[24], at[54]);    MULADD(at[25], at[53]);    MULADD(at[26], at[52]);    MULADD(at[27], at[51]);    MULADD(at[28], at[50]);    MULADD(at[29], at[49]);    MULADD(at[30], at[48]);    MULADD(at[31], at[47]); 
6651
+   COMBA_STORE(C->dp[46]);
6652
+
6653
+   /* early out at 48 digits, 48*32==1536, or two 768 bit operands */
6654
+   if (out_size <= 48) { COMBA_STORE2(C->dp[47]); C->used = 48; C->sign = A->sign ^ B->sign; fp_clamp(C); COMBA_FINI; return; }
6655
+
6656
+   /* 47 */
6657
+   COMBA_FORWARD;
6658
+   MULADD(at[16], at[63]);    MULADD(at[17], at[62]);    MULADD(at[18], at[61]);    MULADD(at[19], at[60]);    MULADD(at[20], at[59]);    MULADD(at[21], at[58]);    MULADD(at[22], at[57]);    MULADD(at[23], at[56]);    MULADD(at[24], at[55]);    MULADD(at[25], at[54]);    MULADD(at[26], at[53]);    MULADD(at[27], at[52]);    MULADD(at[28], at[51]);    MULADD(at[29], at[50]);    MULADD(at[30], at[49]);    MULADD(at[31], at[48]); 
6659
+   COMBA_STORE(C->dp[47]);
6660
+   /* 48 */
6661
+   COMBA_FORWARD;
6662
+   MULADD(at[17], at[63]);    MULADD(at[18], at[62]);    MULADD(at[19], at[61]);    MULADD(at[20], at[60]);    MULADD(at[21], at[59]);    MULADD(at[22], at[58]);    MULADD(at[23], at[57]);    MULADD(at[24], at[56]);    MULADD(at[25], at[55]);    MULADD(at[26], at[54]);    MULADD(at[27], at[53]);    MULADD(at[28], at[52]);    MULADD(at[29], at[51]);    MULADD(at[30], at[50]);    MULADD(at[31], at[49]); 
6663
+   COMBA_STORE(C->dp[48]);
6664
+   /* 49 */
6665
+   COMBA_FORWARD;
6666
+   MULADD(at[18], at[63]);    MULADD(at[19], at[62]);    MULADD(at[20], at[61]);    MULADD(at[21], at[60]);    MULADD(at[22], at[59]);    MULADD(at[23], at[58]);    MULADD(at[24], at[57]);    MULADD(at[25], at[56]);    MULADD(at[26], at[55]);    MULADD(at[27], at[54]);    MULADD(at[28], at[53]);    MULADD(at[29], at[52]);    MULADD(at[30], at[51]);    MULADD(at[31], at[50]); 
6667
+   COMBA_STORE(C->dp[49]);
6668
+   /* 50 */
6669
+   COMBA_FORWARD;
6670
+   MULADD(at[19], at[63]);    MULADD(at[20], at[62]);    MULADD(at[21], at[61]);    MULADD(at[22], at[60]);    MULADD(at[23], at[59]);    MULADD(at[24], at[58]);    MULADD(at[25], at[57]);    MULADD(at[26], at[56]);    MULADD(at[27], at[55]);    MULADD(at[28], at[54]);    MULADD(at[29], at[53]);    MULADD(at[30], at[52]);    MULADD(at[31], at[51]); 
6671
+   COMBA_STORE(C->dp[50]);
6672
+   /* 51 */
6673
+   COMBA_FORWARD;
6674
+   MULADD(at[20], at[63]);    MULADD(at[21], at[62]);    MULADD(at[22], at[61]);    MULADD(at[23], at[60]);    MULADD(at[24], at[59]);    MULADD(at[25], at[58]);    MULADD(at[26], at[57]);    MULADD(at[27], at[56]);    MULADD(at[28], at[55]);    MULADD(at[29], at[54]);    MULADD(at[30], at[53]);    MULADD(at[31], at[52]); 
6675
+   COMBA_STORE(C->dp[51]);
6676
+   /* 52 */
6677
+   COMBA_FORWARD;
6678
+   MULADD(at[21], at[63]);    MULADD(at[22], at[62]);    MULADD(at[23], at[61]);    MULADD(at[24], at[60]);    MULADD(at[25], at[59]);    MULADD(at[26], at[58]);    MULADD(at[27], at[57]);    MULADD(at[28], at[56]);    MULADD(at[29], at[55]);    MULADD(at[30], at[54]);    MULADD(at[31], at[53]); 
6679
+   COMBA_STORE(C->dp[52]);
6680
+   /* 53 */
6681
+   COMBA_FORWARD;
6682
+   MULADD(at[22], at[63]);    MULADD(at[23], at[62]);    MULADD(at[24], at[61]);    MULADD(at[25], at[60]);    MULADD(at[26], at[59]);    MULADD(at[27], at[58]);    MULADD(at[28], at[57]);    MULADD(at[29], at[56]);    MULADD(at[30], at[55]);    MULADD(at[31], at[54]); 
6683
+   COMBA_STORE(C->dp[53]);
6684
+   /* 54 */
6685
+   COMBA_FORWARD;
6686
+   MULADD(at[23], at[63]);    MULADD(at[24], at[62]);    MULADD(at[25], at[61]);    MULADD(at[26], at[60]);    MULADD(at[27], at[59]);    MULADD(at[28], at[58]);    MULADD(at[29], at[57]);    MULADD(at[30], at[56]);    MULADD(at[31], at[55]); 
6687
+   COMBA_STORE(C->dp[54]);
6688
+
6689
+   /* early out at 56 digits, 56*32==1792, or two 896 bit operands */
6690
+   if (out_size <= 56) { COMBA_STORE2(C->dp[55]); C->used = 56; C->sign = A->sign ^ B->sign; fp_clamp(C); COMBA_FINI; return; }
6691
+
6692
+   /* 55 */
6693
+   COMBA_FORWARD;
6694
+   MULADD(at[24], at[63]);    MULADD(at[25], at[62]);    MULADD(at[26], at[61]);    MULADD(at[27], at[60]);    MULADD(at[28], at[59]);    MULADD(at[29], at[58]);    MULADD(at[30], at[57]);    MULADD(at[31], at[56]); 
6695
+   COMBA_STORE(C->dp[55]);
6696
+   /* 56 */
6697
+   COMBA_FORWARD;
6698
+   MULADD(at[25], at[63]);    MULADD(at[26], at[62]);    MULADD(at[27], at[61]);    MULADD(at[28], at[60]);    MULADD(at[29], at[59]);    MULADD(at[30], at[58]);    MULADD(at[31], at[57]); 
6699
+   COMBA_STORE(C->dp[56]);
6700
+   /* 57 */
6701
+   COMBA_FORWARD;
6702
+   MULADD(at[26], at[63]);    MULADD(at[27], at[62]);    MULADD(at[28], at[61]);    MULADD(at[29], at[60]);    MULADD(at[30], at[59]);    MULADD(at[31], at[58]); 
6703
+   COMBA_STORE(C->dp[57]);
6704
+   /* 58 */
6705
+   COMBA_FORWARD;
6706
+   MULADD(at[27], at[63]);    MULADD(at[28], at[62]);    MULADD(at[29], at[61]);    MULADD(at[30], at[60]);    MULADD(at[31], at[59]); 
6707
+   COMBA_STORE(C->dp[58]);
6708
+   /* 59 */
6709
+   COMBA_FORWARD;
6710
+   MULADD(at[28], at[63]);    MULADD(at[29], at[62]);    MULADD(at[30], at[61]);    MULADD(at[31], at[60]); 
6711
+   COMBA_STORE(C->dp[59]);
6712
+   /* 60 */
6713
+   COMBA_FORWARD;
6714
+   MULADD(at[29], at[63]);    MULADD(at[30], at[62]);    MULADD(at[31], at[61]); 
6715
+   COMBA_STORE(C->dp[60]);
6716
+   /* 61 */
6717
+   COMBA_FORWARD;
6718
+   MULADD(at[30], at[63]);    MULADD(at[31], at[62]); 
6719
+   COMBA_STORE(C->dp[61]);
6720
+   /* 62 */
6721
+   COMBA_FORWARD;
6722
+   MULADD(at[31], at[63]); 
6723
+   COMBA_STORE(C->dp[62]);
6724
+   COMBA_STORE2(C->dp[63]);
6725
+   C->used = 64;
6726
+   C->sign = A->sign ^ B->sign;
6727
+   fp_clamp(C);
6728
+   COMBA_FINI;
6729
+}
6730
+#endif
5557 6731
 
5558
-  /* default to composite  */
5559
-  *result = MP_NO;
6732
+/* End: fp_mul_comba_32.c */
5560 6733
 
5561
-  /* ensure b > 1 */
5562
-  if (mp_cmp_d(b, 1) != MP_GT) {
5563
-     return MP_VAL;
5564
-  }
6734
+/* Start: fp_mul_comba_4.c */
6735
+#define TFM_DEFINES
6736
+#include "fp_mul_comba.c"
5565 6737
 
5566
-  /* init t */
5567
-  if ((err = mp_init (&t)) != MP_OKAY) {
5568
-    return err;
5569
-  }
6738
+#ifdef TFM_MUL4
6739
+void fp_mul_comba4(fp_int *A, fp_int *B, fp_int *C)
6740
+{
6741
+   fp_digit c0, c1, c2, at[8];
6742
+
6743
+   memcpy(at, A->dp, 4 * sizeof(fp_digit));
6744
+   memcpy(at+4, B->dp, 4 * sizeof(fp_digit));
6745
+   COMBA_START;
6746
+
6747
+   COMBA_CLEAR;
6748
+   /* 0 */
6749
+   MULADD(at[0], at[4]); 
6750
+   COMBA_STORE(C->dp[0]);
6751
+   /* 1 */
6752
+   COMBA_FORWARD;
6753
+   MULADD(at[0], at[5]);    MULADD(at[1], at[4]); 
6754
+   COMBA_STORE(C->dp[1]);
6755
+   /* 2 */
6756
+   COMBA_FORWARD;
6757
+   MULADD(at[0], at[6]);    MULADD(at[1], at[5]);    MULADD(at[2], at[4]); 
6758
+   COMBA_STORE(C->dp[2]);
6759
+   /* 3 */
6760
+   COMBA_FORWARD;
6761
+   MULADD(at[0], at[7]);    MULADD(at[1], at[6]);    MULADD(at[2], at[5]);    MULADD(at[3], at[4]); 
6762
+   COMBA_STORE(C->dp[3]);
6763
+   /* 4 */
6764
+   COMBA_FORWARD;
6765
+   MULADD(at[1], at[7]);    MULADD(at[2], at[6]);    MULADD(at[3], at[5]); 
6766
+   COMBA_STORE(C->dp[4]);
6767
+   /* 5 */
6768
+   COMBA_FORWARD;
6769
+   MULADD(at[2], at[7]);    MULADD(at[3], at[6]); 
6770
+   COMBA_STORE(C->dp[5]);
6771
+   /* 6 */
6772
+   COMBA_FORWARD;
6773
+   MULADD(at[3], at[7]); 
6774
+   COMBA_STORE(C->dp[6]);
6775
+   COMBA_STORE2(C->dp[7]);
6776
+   C->used = 8;
6777
+   C->sign = A->sign ^ B->sign;
6778
+   fp_clamp(C);
6779
+   COMBA_FINI;
6780
+}
6781
+#endif
5570 6782
 
5571
-  /* compute t = b**a mod a */
5572
-  if ((err = mp_exptmod (b, a, a, &t)) != MP_OKAY) {
5573
-    goto LBL_T;
5574
-  }
6783
+/* End: fp_mul_comba_4.c */
5575 6784
 
5576
-  /* is it equal to b? */
5577
-  if (mp_cmp (&t, b) == MP_EQ) {
5578
-    *result = MP_YES;
5579
-  }
6785
+/* Start: fp_mul_comba_48.c */
6786
+#define TFM_DEFINES
6787
+#include "fp_mul_comba.c"
5580 6788
 
5581
-  err = MP_OKAY;
5582
-LBL_T:mp_clear (&t);
5583
-  return err;
6789
+#ifdef TFM_MUL48
6790
+void fp_mul_comba48(fp_int *A, fp_int *B, fp_int *C)
6791
+{
6792
+   fp_digit c0, c1, c2, at[96];
6793
+
6794
+   memcpy(at, A->dp, 48 * sizeof(fp_digit));
6795
+   memcpy(at+48, B->dp, 48 * sizeof(fp_digit));
6796
+   COMBA_START;
6797
+
6798
+   COMBA_CLEAR;
6799
+   /* 0 */
6800
+   MULADD(at[0], at[48]); 
6801
+   COMBA_STORE(C->dp[0]);
6802
+   /* 1 */
6803
+   COMBA_FORWARD;
6804
+   MULADD(at[0], at[49]);    MULADD(at[1], at[48]); 
6805
+   COMBA_STORE(C->dp[1]);
6806
+   /* 2 */
6807
+   COMBA_FORWARD;
6808
+   MULADD(at[0], at[50]);    MULADD(at[1], at[49]);    MULADD(at[2], at[48]); 
6809
+   COMBA_STORE(C->dp[2]);
6810
+   /* 3 */
6811
+   COMBA_FORWARD;
6812
+   MULADD(at[0], at[51]);    MULADD(at[1], at[50]);    MULADD(at[2], at[49]);    MULADD(at[3], at[48]); 
6813
+   COMBA_STORE(C->dp[3]);
6814
+   /* 4 */
6815
+   COMBA_FORWARD;
6816
+   MULADD(at[0], at[52]);    MULADD(at[1], at[51]);    MULADD(at[2], at[50]);    MULADD(at[3], at[49]);    MULADD(at[4], at[48]); 
6817
+   COMBA_STORE(C->dp[4]);
6818
+   /* 5 */
6819
+   COMBA_FORWARD;
6820
+   MULADD(at[0], at[53]);    MULADD(at[1], at[52]);    MULADD(at[2], at[51]);    MULADD(at[3], at[50]);    MULADD(at[4], at[49]);    MULADD(at[5], at[48]); 
6821
+   COMBA_STORE(C->dp[5]);
6822
+   /* 6 */
6823
+   COMBA_FORWARD;
6824
+   MULADD(at[0], at[54]);    MULADD(at[1], at[53]);    MULADD(at[2], at[52]);    MULADD(at[3], at[51]);    MULADD(at[4], at[50]);    MULADD(at[5], at[49]);    MULADD(at[6], at[48]); 
6825
+   COMBA_STORE(C->dp[6]);
6826
+   /* 7 */
6827
+   COMBA_FORWARD;
6828
+   MULADD(at[0], at[55]);    MULADD(at[1], at[54]);    MULADD(at[2], at[53]);    MULADD(at[3], at[52]);    MULADD(at[4], at[51]);    MULADD(at[5], at[50]);    MULADD(at[6], at[49]);    MULADD(at[7], at[48]); 
6829
+   COMBA_STORE(C->dp[7]);
6830
+   /* 8 */
6831
+   COMBA_FORWARD;
6832
+   MULADD(at[0], at[56]);    MULADD(at[1], at[55]);    MULADD(at[2], at[54]);    MULADD(at[3], at[53]);    MULADD(at[4], at[52]);    MULADD(at[5], at[51]);    MULADD(at[6], at[50]);    MULADD(at[7], at[49]);    MULADD(at[8], at[48]); 
6833
+   COMBA_STORE(C->dp[8]);
6834
+   /* 9 */
6835
+   COMBA_FORWARD;
6836
+   MULADD(at[0], at[57]);    MULADD(at[1], at[56]);    MULADD(at[2], at[55]);    MULADD(at[3], at[54]);    MULADD(at[4], at[53]);    MULADD(at[5], at[52]);    MULADD(at[6], at[51]);    MULADD(at[7], at[50]);    MULADD(at[8], at[49]);    MULADD(at[9], at[48]); 
6837
+   COMBA_STORE(C->dp[9]);
6838
+   /* 10 */
6839
+   COMBA_FORWARD;
6840
+   MULADD(at[0], at[58]);    MULADD(at[1], at[57]);    MULADD(at[2], at[56]);    MULADD(at[3], at[55]);    MULADD(at[4], at[54]);    MULADD(at[5], at[53]);    MULADD(at[6], at[52]);    MULADD(at[7], at[51]);    MULADD(at[8], at[50]);    MULADD(at[9], at[49]);    MULADD(at[10], at[48]); 
6841
+   COMBA_STORE(C->dp[10]);
6842
+   /* 11 */
6843
+   COMBA_FORWARD;
6844
+   MULADD(at[0], at[59]);    MULADD(at[1], at[58]);    MULADD(at[2], at[57]);    MULADD(at[3], at[56]);    MULADD(at[4], at[55]);    MULADD(at[5], at[54]);    MULADD(at[6], at[53]);    MULADD(at[7], at[52]);    MULADD(at[8], at[51]);    MULADD(at[9], at[50]);    MULADD(at[10], at[49]);    MULADD(at[11], at[48]); 
6845
+   COMBA_STORE(C->dp[11]);
6846
+   /* 12 */
6847
+   COMBA_FORWARD;
6848
+   MULADD(at[0], at[60]);    MULADD(at[1], at[59]);    MULADD(at[2], at[58]);    MULADD(at[3], at[57]);    MULADD(at[4], at[56]);    MULADD(at[5], at[55]);    MULADD(at[6], at[54]);    MULADD(at[7], at[53]);    MULADD(at[8], at[52]);    MULADD(at[9], at[51]);    MULADD(at[10], at[50]);    MULADD(at[11], at[49]);    MULADD(at[12], at[48]); 
6849
+   COMBA_STORE(C->dp[12]);
6850
+   /* 13 */
6851
+   COMBA_FORWARD;
6852
+   MULADD(at[0], at[61]);    MULADD(at[1], at[60]);    MULADD(at[2], at[59]);    MULADD(at[3], at[58]);    MULADD(at[4], at[57]);    MULADD(at[5], at[56]);    MULADD(at[6], at[55]);    MULADD(at[7], at[54]);    MULADD(at[8], at[53]);    MULADD(at[9], at[52]);    MULADD(at[10], at[51]);    MULADD(at[11], at[50]);    MULADD(at[12], at[49]);    MULADD(at[13], at[48]); 
6853
+   COMBA_STORE(C->dp[13]);
6854
+   /* 14 */
6855
+   COMBA_FORWARD;
6856
+   MULADD(at[0], at[62]);    MULADD(at[1], at[61]);    MULADD(at[2], at[60]);    MULADD(at[3], at[59]);    MULADD(at[4], at[58]);    MULADD(at[5], at[57]);    MULADD(at[6], at[56]);    MULADD(at[7], at[55]);    MULADD(at[8], at[54]);    MULADD(at[9], at[53]);    MULADD(at[10], at[52]);    MULADD(at[11], at[51]);    MULADD(at[12], at[50]);    MULADD(at[13], at[49]);    MULADD(at[14], at[48]); 
6857
+   COMBA_STORE(C->dp[14]);
6858
+   /* 15 */
6859
+   COMBA_FORWARD;
6860
+   MULADD(at[0], at[63]);    MULADD(at[1], at[62]);    MULADD(at[2], at[61]);    MULADD(at[3], at[60]);    MULADD(at[4], at[59]);    MULADD(at[5], at[58]);    MULADD(at[6], at[57]);    MULADD(at[7], at[56]);    MULADD(at[8], at[55]);    MULADD(at[9], at[54]);    MULADD(at[10], at[53]);    MULADD(at[11], at[52]);    MULADD(at[12], at[51]);    MULADD(at[13], at[50]);    MULADD(at[14], at[49]);    MULADD(at[15], at[48]); 
6861
+   COMBA_STORE(C->dp[15]);
6862
+   /* 16 */
6863
+   COMBA_FORWARD;
6864
+   MULADD(at[0], at[64]);    MULADD(at[1], at[63]);    MULADD(at[2], at[62]);    MULADD(at[3], at[61]);    MULADD(at[4], at[60]);    MULADD(at[5], at[59]);    MULADD(at[6], at[58]);    MULADD(at[7], at[57]);    MULADD(at[8], at[56]);    MULADD(at[9], at[55]);    MULADD(at[10], at[54]);    MULADD(at[11], at[53]);    MULADD(at[12], at[52]);    MULADD(at[13], at[51]);    MULADD(at[14], at[50]);    MULADD(at[15], at[49]);    MULADD(at[16], at[48]); 
6865
+   COMBA_STORE(C->dp[16]);
6866
+   /* 17 */
6867
+   COMBA_FORWARD;
6868
+   MULADD(at[0], at[65]);    MULADD(at[1], at[64]);    MULADD(at[2], at[63]);    MULADD(at[3], at[62]);    MULADD(at[4], at[61]);    MULADD(at[5], at[60]);    MULADD(at[6], at[59]);    MULADD(at[7], at[58]);    MULADD(at[8], at[57]);    MULADD(at[9], at[56]);    MULADD(at[10], at[55]);    MULADD(at[11], at[54]);    MULADD(at[12], at[53]);    MULADD(at[13], at[52]);    MULADD(at[14], at[51]);    MULADD(at[15], at[50]);    MULADD(at[16], at[49]);    MULADD(at[17], at[48]); 
6869
+   COMBA_STORE(C->dp[17]);
6870
+   /* 18 */
6871
+   COMBA_FORWARD;
6872
+   MULADD(at[0], at[66]);    MULADD(at[1], at[65]);    MULADD(at[2], at[64]);    MULADD(at[3], at[63]);    MULADD(at[4], at[62]);    MULADD(at[5], at[61]);    MULADD(at[6], at[60]);    MULADD(at[7], at[59]);    MULADD(at[8], at[58]);    MULADD(at[9], at[57]);    MULADD(at[10], at[56]);    MULADD(at[11], at[55]);    MULADD(at[12], at[54]);    MULADD(at[13], at[53]);    MULADD(at[14], at[52]);    MULADD(at[15], at[51]);    MULADD(at[16], at[50]);    MULADD(at[17], at[49]);    MULADD(at[18], at[48]); 
6873
+   COMBA_STORE(C->dp[18]);
6874
+   /* 19 */
6875
+   COMBA_FORWARD;
6876
+   MULADD(at[0], at[67]);    MULADD(at[1], at[66]);    MULADD(at[2], at[65]);    MULADD(at[3], at[64]);    MULADD(at[4], at[63]);    MULADD(at[5], at[62]);    MULADD(at[6], at[61]);    MULADD(at[7], at[60]);    MULADD(at[8], at[59]);    MULADD(at[9], at[58]);    MULADD(at[10], at[57]);    MULADD(at[11], at[56]);    MULADD(at[12], at[55]);    MULADD(at[13], at[54]);    MULADD(at[14], at[53]);    MULADD(at[15], at[52]);    MULADD(at[16], at[51]);    MULADD(at[17], at[50]);    MULADD(at[18], at[49]);    MULADD(at[19], at[48]); 
6877
+   COMBA_STORE(C->dp[19]);
6878
+   /* 20 */
6879
+   COMBA_FORWARD;
6880
+   MULADD(at[0], at[68]);    MULADD(at[1], at[67]);    MULADD(at[2], at[66]);    MULADD(at[3], at[65]);    MULADD(at[4], at[64]);    MULADD(at[5], at[63]);    MULADD(at[6], at[62]);    MULADD(at[7], at[61]);    MULADD(at[8], at[60]);    MULADD(at[9], at[59]);    MULADD(at[10], at[58]);    MULADD(at[11], at[57]);    MULADD(at[12], at[56]);    MULADD(at[13], at[55]);    MULADD(at[14], at[54]);    MULADD(at[15], at[53]);    MULADD(at[16], at[52]);    MULADD(at[17], at[51]);    MULADD(at[18], at[50]);    MULADD(at[19], at[49]);    MULADD(at[20], at[48]); 
6881
+   COMBA_STORE(C->dp[20]);
6882
+   /* 21 */
6883
+   COMBA_FORWARD;
6884
+   MULADD(at[0], at[69]);    MULADD(at[1], at[68]);    MULADD(at[2], at[67]);    MULADD(at[3], at[66]);    MULADD(at[4], at[65]);    MULADD(at[5], at[64]);    MULADD(at[6], at[63]);    MULADD(at[7], at[62]);    MULADD(at[8], at[61]);    MULADD(at[9], at[60]);    MULADD(at[10], at[59]);    MULADD(at[11], at[58]);    MULADD(at[12], at[57]);    MULADD(at[13], at[56]);    MULADD(at[14], at[55]);    MULADD(at[15], at[54]);    MULADD(at[16], at[53]);    MULADD(at[17], at[52]);    MULADD(at[18], at[51]);    MULADD(at[19], at[50]);    MULADD(at[20], at[49]);    MULADD(at[21], at[48]); 
6885
+   COMBA_STORE(C->dp[21]);
6886
+   /* 22 */
6887
+   COMBA_FORWARD;
6888
+   MULADD(at[0], at[70]);    MULADD(at[1], at[69]);    MULADD(at[2], at[68]);    MULADD(at[3], at[67]);    MULADD(at[4], at[66]);    MULADD(at[5], at[65]);    MULADD(at[6], at[64]);    MULADD(at[7], at[63]);    MULADD(at[8], at[62]);    MULADD(at[9], at[61]);    MULADD(at[10], at[60]);    MULADD(at[11], at[59]);    MULADD(at[12], at[58]);    MULADD(at[13], at[57]);    MULADD(at[14], at[56]);    MULADD(at[15], at[55]);    MULADD(at[16], at[54]);    MULADD(at[17], at[53]);    MULADD(at[18], at[52]);    MULADD(at[19], at[51]);    MULADD(at[20], at[50]);    MULADD(at[21], at[49]);    MULADD(at[22], at[48]); 
6889
+   COMBA_STORE(C->dp[22]);
6890
+   /* 23 */
6891
+   COMBA_FORWARD;
6892
+   MULADD(at[0], at[71]);    MULADD(at[1], at[70]);    MULADD(at[2], at[69]);    MULADD(at[3], at[68]);    MULADD(at[4], at[67]);    MULADD(at[5], at[66]);    MULADD(at[6], at[65]);    MULADD(at[7], at[64]);    MULADD(at[8], at[63]);    MULADD(at[9], at[62]);    MULADD(at[10], at[61]);    MULADD(at[11], at[60]);    MULADD(at[12], at[59]);    MULADD(at[13], at[58]);    MULADD(at[14], at[57]);    MULADD(at[15], at[56]);    MULADD(at[16], at[55]);    MULADD(at[17], at[54]);    MULADD(at[18], at[53]);    MULADD(at[19], at[52]);    MULADD(at[20], at[51]);    MULADD(at[21], at[50]);    MULADD(at[22], at[49]);    MULADD(at[23], at[48]); 
6893
+   COMBA_STORE(C->dp[23]);
6894
+   /* 24 */
6895
+   COMBA_FORWARD;
6896
+   MULADD(at[0], at[72]);    MULADD(at[1], at[71]);    MULADD(at[2], at[70]);    MULADD(at[3], at[69]);    MULADD(at[4], at[68]);    MULADD(at[5], at[67]);    MULADD(at[6], at[66]);    MULADD(at[7], at[65]);    MULADD(at[8], at[64]);    MULADD(at[9], at[63]);    MULADD(at[10], at[62]);    MULADD(at[11], at[61]);    MULADD(at[12], at[60]);    MULADD(at[13], at[59]);    MULADD(at[14], at[58]);    MULADD(at[15], at[57]);    MULADD(at[16], at[56]);    MULADD(at[17], at[55]);    MULADD(at[18], at[54]);    MULADD(at[19], at[53]);    MULADD(at[20], at[52]);    MULADD(at[21], at[51]);    MULADD(at[22], at[50]);    MULADD(at[23], at[49]);    MULADD(at[24], at[48]); 
6897
+   COMBA_STORE(C->dp[24]);
6898
+   /* 25 */
6899
+   COMBA_FORWARD;
6900
+   MULADD(at[0], at[73]);    MULADD(at[1], at[72]);    MULADD(at[2], at[71]);    MULADD(at[3], at[70]);    MULADD(at[4], at[69]);    MULADD(at[5], at[68]);    MULADD(at[6], at[67]);    MULADD(at[7], at[66]);    MULADD(at[8], at[65]);    MULADD(at[9], at[64]);    MULADD(at[10], at[63]);    MULADD(at[11], at[62]);    MULADD(at[12], at[61]);    MULADD(at[13], at[60]);    MULADD(at[14], at[59]);    MULADD(at[15], at[58]);    MULADD(at[16], at[57]);    MULADD(at[17], at[56]);    MULADD(at[18], at[55]);    MULADD(at[19], at[54]);    MULADD(at[20], at[53]);    MULADD(at[21], at[52]);    MULADD(at[22], at[51]);    MULADD(at[23], at[50]);    MULADD(at[24], at[49]);    MULADD(at[25], at[48]); 
6901
+   COMBA_STORE(C->dp[25]);
6902
+   /* 26 */
6903
+   COMBA_FORWARD;
6904
+   MULADD(at[0], at[74]);    MULADD(at[1], at[73]);    MULADD(at[2], at[72]);    MULADD(at[3], at[71]);    MULADD(at[4], at[70]);    MULADD(at[5], at[69]);    MULADD(at[6], at[68]);    MULADD(at[7], at[67]);    MULADD(at[8], at[66]);    MULADD(at[9], at[65]);    MULADD(at[10], at[64]);    MULADD(at[11], at[63]);    MULADD(at[12], at[62]);    MULADD(at[13], at[61]);    MULADD(at[14], at[60]);    MULADD(at[15], at[59]);    MULADD(at[16], at[58]);    MULADD(at[17], at[57]);    MULADD(at[18], at[56]);    MULADD(at[19], at[55]);    MULADD(at[20], at[54]);    MULADD(at[21], at[53]);    MULADD(at[22], at[52]);    MULADD(at[23], at[51]);    MULADD(at[24], at[50]);    MULADD(at[25], at[49]);    MULADD(at[26], at[48]); 
6905
+   COMBA_STORE(C->dp[26]);
6906
+   /* 27 */
6907
+   COMBA_FORWARD;
6908
+   MULADD(at[0], at[75]);    MULADD(at[1], at[74]);    MULADD(at[2], at[73]);    MULADD(at[3], at[72]);    MULADD(at[4], at[71]);    MULADD(at[5], at[70]);    MULADD(at[6], at[69]);    MULADD(at[7], at[68]);    MULADD(at[8], at[67]);    MULADD(at[9], at[66]);    MULADD(at[10], at[65]);    MULADD(at[11], at[64]);    MULADD(at[12], at[63]);    MULADD(at[13], at[62]);    MULADD(at[14], at[61]);    MULADD(at[15], at[60]);    MULADD(at[16], at[59]);    MULADD(at[17], at[58]);    MULADD(at[18], at[57]);    MULADD(at[19], at[56]);    MULADD(at[20], at[55]);    MULADD(at[21], at[54]);    MULADD(at[22], at[53]);    MULADD(at[23], at[52]);    MULADD(at[24], at[51]);    MULADD(at[25], at[50]);    MULADD(at[26], at[49]);    MULADD(at[27], at[48]); 
6909
+   COMBA_STORE(C->dp[27]);
6910
+   /* 28 */
6911
+   COMBA_FORWARD;
6912
+   MULADD(at[0], at[76]);    MULADD(at[1], at[75]);    MULADD(at[2], at[74]);    MULADD(at[3], at[73]);    MULADD(at[4], at[72]);    MULADD(at[5], at[71]);    MULADD(at[6], at[70]);    MULADD(at[7], at[69]);    MULADD(at[8], at[68]);    MULADD(at[9], at[67]);    MULADD(at[10], at[66]);    MULADD(at[11], at[65]);    MULADD(at[12], at[64]);    MULADD(at[13], at[63]);    MULADD(at[14], at[62]);    MULADD(at[15], at[61]);    MULADD(at[16], at[60]);    MULADD(at[17], at[59]);    MULADD(at[18], at[58]);    MULADD(at[19], at[57]);    MULADD(at[20], at[56]);    MULADD(at[21], at[55]);    MULADD(at[22], at[54]);    MULADD(at[23], at[53]);    MULADD(at[24], at[52]);    MULADD(at[25], at[51]);    MULADD(at[26], at[50]);    MULADD(at[27], at[49]);    MULADD(at[28], at[48]); 
6913
+   COMBA_STORE(C->dp[28]);
6914
+   /* 29 */
6915
+   COMBA_FORWARD;
6916
+   MULADD(at[0], at[77]);    MULADD(at[1], at[76]);    MULADD(at[2], at[75]);    MULADD(at[3], at[74]);    MULADD(at[4], at[73]);    MULADD(at[5], at[72]);    MULADD(at[6], at[71]);    MULADD(at[7], at[70]);    MULADD(at[8], at[69]);    MULADD(at[9], at[68]);    MULADD(at[10], at[67]);    MULADD(at[11], at[66]);    MULADD(at[12], at[65]);    MULADD(at[13], at[64]);    MULADD(at[14], at[63]);    MULADD(at[15], at[62]);    MULADD(at[16], at[61]);    MULADD(at[17], at[60]);    MULADD(at[18], at[59]);    MULADD(at[19], at[58]);    MULADD(at[20], at[57]);    MULADD(at[21], at[56]);    MULADD(at[22], at[55]);    MULADD(at[23], at[54]);    MULADD(at[24], at[53]);    MULADD(at[25], at[52]);    MULADD(at[26], at[51]);    MULADD(at[27], at[50]);    MULADD(at[28], at[49]);    MULADD(at[29], at[48]); 
6917
+   COMBA_STORE(C->dp[29]);
6918
+   /* 30 */
6919
+   COMBA_FORWARD;
6920
+   MULADD(at[0], at[78]);    MULADD(at[1], at[77]);    MULADD(at[2], at[76]);    MULADD(at[3], at[75]);    MULADD(at[4], at[74]);    MULADD(at[5], at[73]);    MULADD(at[6], at[72]);    MULADD(at[7], at[71]);    MULADD(at[8], at[70]);    MULADD(at[9], at[69]);    MULADD(at[10], at[68]);    MULADD(at[11], at[67]);    MULADD(at[12], at[66]);    MULADD(at[13], at[65]);    MULADD(at[14], at[64]);    MULADD(at[15], at[63]);    MULADD(at[16], at[62]);    MULADD(at[17], at[61]);    MULADD(at[18], at[60]);    MULADD(at[19], at[59]);    MULADD(at[20], at[58]);    MULADD(at[21], at[57]);    MULADD(at[22], at[56]);    MULADD(at[23], at[55]);    MULADD(at[24], at[54]);    MULADD(at[25], at[53]);    MULADD(at[26], at[52]);    MULADD(at[27], at[51]);    MULADD(at[28], at[50]);    MULADD(at[29], at[49]);    MULADD(at[30], at[48]); 
6921
+   COMBA_STORE(C->dp[30]);
6922
+   /* 31 */
6923
+   COMBA_FORWARD;
6924
+   MULADD(at[0], at[79]);    MULADD(at[1], at[78]);    MULADD(at[2], at[77]);    MULADD(at[3], at[76]);    MULADD(at[4], at[75]);    MULADD(at[5], at[74]);    MULADD(at[6], at[73]);    MULADD(at[7], at[72]);    MULADD(at[8], at[71]);    MULADD(at[9], at[70]);    MULADD(at[10], at[69]);    MULADD(at[11], at[68]);    MULADD(at[12], at[67]);    MULADD(at[13], at[66]);    MULADD(at[14], at[65]);    MULADD(at[15], at[64]);    MULADD(at[16], at[63]);    MULADD(at[17], at[62]);    MULADD(at[18], at[61]);    MULADD(at[19], at[60]);    MULADD(at[20], at[59]);    MULADD(at[21], at[58]);    MULADD(at[22], at[57]);    MULADD(at[23], at[56]);    MULADD(at[24], at[55]);    MULADD(at[25], at[54]);    MULADD(at[26], at[53]);    MULADD(at[27], at[52]);    MULADD(at[28], at[51]);    MULADD(at[29], at[50]);    MULADD(at[30], at[49]);    MULADD(at[31], at[48]); 
6925
+   COMBA_STORE(C->dp[31]);
6926
+   /* 32 */
6927
+   COMBA_FORWARD;
6928
+   MULADD(at[0], at[80]);    MULADD(at[1], at[79]);    MULADD(at[2], at[78]);    MULADD(at[3], at[77]);    MULADD(at[4], at[76]);    MULADD(at[5], at[75]);    MULADD(at[6], at[74]);    MULADD(at[7], at[73]);    MULADD(at[8], at[72]);    MULADD(at[9], at[71]);    MULADD(at[10], at[70]);    MULADD(at[11], at[69]);    MULADD(at[12], at[68]);    MULADD(at[13], at[67]);    MULADD(at[14], at[66]);    MULADD(at[15], at[65]);    MULADD(at[16], at[64]);    MULADD(at[17], at[63]);    MULADD(at[18], at[62]);    MULADD(at[19], at[61]);    MULADD(at[20], at[60]);    MULADD(at[21], at[59]);    MULADD(at[22], at[58]);    MULADD(at[23], at[57]);    MULADD(at[24], at[56]);    MULADD(at[25], at[55]);    MULADD(at[26], at[54]);    MULADD(at[27], at[53]);    MULADD(at[28], at[52]);    MULADD(at[29], at[51]);    MULADD(at[30], at[50]);    MULADD(at[31], at[49]);    MULADD(at[32], at[48]); 
6929
+   COMBA_STORE(C->dp[32]);
6930
+   /* 33 */
6931
+   COMBA_FORWARD;
6932
+   MULADD(at[0], at[81]);    MULADD(at[1], at[80]);    MULADD(at[2], at[79]);    MULADD(at[3], at[78]);    MULADD(at[4], at[77]);    MULADD(at[5], at[76]);    MULADD(at[6], at[75]);    MULADD(at[7], at[74]);    MULADD(at[8], at[73]);    MULADD(at[9], at[72]);    MULADD(at[10], at[71]);    MULADD(at[11], at[70]);    MULADD(at[12], at[69]);    MULADD(at[13], at[68]);    MULADD(at[14], at[67]);    MULADD(at[15], at[66]);    MULADD(at[16], at[65]);    MULADD(at[17], at[64]);    MULADD(at[18], at[63]);    MULADD(at[19], at[62]);    MULADD(at[20], at[61]);    MULADD(at[21], at[60]);    MULADD(at[22], at[59]);    MULADD(at[23], at[58]);    MULADD(at[24], at[57]);    MULADD(at[25], at[56]);    MULADD(at[26], at[55]);    MULADD(at[27], at[54]);    MULADD(at[28], at[53]);    MULADD(at[29], at[52]);    MULADD(at[30], at[51]);    MULADD(at[31], at[50]);    MULADD(at[32], at[49]);    MULADD(at[33], at[48]); 
6933
+   COMBA_STORE(C->dp[33]);
6934
+   /* 34 */
6935
+   COMBA_FORWARD;
6936
+   MULADD(at[0], at[82]);    MULADD(at[1], at[81]);    MULADD(at[2], at[80]);    MULADD(at[3], at[79]);    MULADD(at[4], at[78]);    MULADD(at[5], at[77]);    MULADD(at[6], at[76]);    MULADD(at[7], at[75]);    MULADD(at[8], at[74]);    MULADD(at[9], at[73]);    MULADD(at[10], at[72]);    MULADD(at[11], at[71]);    MULADD(at[12], at[70]);    MULADD(at[13], at[69]);    MULADD(at[14], at[68]);    MULADD(at[15], at[67]);    MULADD(at[16], at[66]);    MULADD(at[17], at[65]);    MULADD(at[18], at[64]);    MULADD(at[19], at[63]);    MULADD(at[20], at[62]);    MULADD(at[21], at[61]);    MULADD(at[22], at[60]);    MULADD(at[23], at[59]);    MULADD(at[24], at[58]);    MULADD(at[25], at[57]);    MULADD(at[26], at[56]);    MULADD(at[27], at[55]);    MULADD(at[28], at[54]);    MULADD(at[29], at[53]);    MULADD(at[30], at[52]);    MULADD(at[31], at[51]);    MULADD(at[32], at[50]);    MULADD(at[33], at[49]);    MULADD(at[34], at[48]); 
6937
+   COMBA_STORE(C->dp[34]);
6938
+   /* 35 */
6939
+   COMBA_FORWARD;
6940
+   MULADD(at[0], at[83]);    MULADD(at[1], at[82]);    MULADD(at[2], at[81]);    MULADD(at[3], at[80]);    MULADD(at[4], at[79]);    MULADD(at[5], at[78]);    MULADD(at[6], at[77]);    MULADD(at[7], at[76]);    MULADD(at[8], at[75]);    MULADD(at[9], at[74]);    MULADD(at[10], at[73]);    MULADD(at[11], at[72]);    MULADD(at[12], at[71]);    MULADD(at[13], at[70]);    MULADD(at[14], at[69]);    MULADD(at[15], at[68]);    MULADD(at[16], at[67]);    MULADD(at[17], at[66]);    MULADD(at[18], at[65]);    MULADD(at[19], at[64]);    MULADD(at[20], at[63]);    MULADD(at[21], at[62]);    MULADD(at[22], at[61]);    MULADD(at[23], at[60]);    MULADD(at[24], at[59]);    MULADD(at[25], at[58]);    MULADD(at[26], at[57]);    MULADD(at[27], at[56]);    MULADD(at[28], at[55]);    MULADD(at[29], at[54]);    MULADD(at[30], at[53]);    MULADD(at[31], at[52]);    MULADD(at[32], at[51]);    MULADD(at[33], at[50]);    MULADD(at[34], at[49]);    MULADD(at[35], at[48]); 
6941
+   COMBA_STORE(C->dp[35]);
6942
+   /* 36 */
6943
+   COMBA_FORWARD;
6944
+   MULADD(at[0], at[84]);    MULADD(at[1], at[83]);    MULADD(at[2], at[82]);    MULADD(at[3], at[81]);    MULADD(at[4], at[80]);    MULADD(at[5], at[79]);    MULADD(at[6], at[78]);    MULADD(at[7], at[77]);    MULADD(at[8], at[76]);    MULADD(at[9], at[75]);    MULADD(at[10], at[74]);    MULADD(at[11], at[73]);    MULADD(at[12], at[72]);    MULADD(at[13], at[71]);    MULADD(at[14], at[70]);    MULADD(at[15], at[69]);    MULADD(at[16], at[68]);    MULADD(at[17], at[67]);    MULADD(at[18], at[66]);    MULADD(at[19], at[65]);    MULADD(at[20], at[64]);    MULADD(at[21], at[63]);    MULADD(at[22], at[62]);    MULADD(at[23], at[61]);    MULADD(at[24], at[60]);    MULADD(at[25], at[59]);    MULADD(at[26], at[58]);    MULADD(at[27], at[57]);    MULADD(at[28], at[56]);    MULADD(at[29], at[55]);    MULADD(at[30], at[54]);    MULADD(at[31], at[53]);    MULADD(at[32], at[52]);    MULADD(at[33], at[51]);    MULADD(at[34], at[50]);    MULADD(at[35], at[49]);    MULADD(at[36], at[48]); 
6945
+   COMBA_STORE(C->dp[36]);
6946
+   /* 37 */
6947
+   COMBA_FORWARD;
6948
+   MULADD(at[0], at[85]);    MULADD(at[1], at[84]);    MULADD(at[2], at[83]);    MULADD(at[3], at[82]);    MULADD(at[4], at[81]);    MULADD(at[5], at[80]);    MULADD(at[6], at[79]);    MULADD(at[7], at[78]);    MULADD(at[8], at[77]);    MULADD(at[9], at[76]);    MULADD(at[10], at[75]);    MULADD(at[11], at[74]);    MULADD(at[12], at[73]);    MULADD(at[13], at[72]);    MULADD(at[14], at[71]);    MULADD(at[15], at[70]);    MULADD(at[16], at[69]);    MULADD(at[17], at[68]);    MULADD(at[18], at[67]);    MULADD(at[19], at[66]);    MULADD(at[20], at[65]);    MULADD(at[21], at[64]);    MULADD(at[22], at[63]);    MULADD(at[23], at[62]);    MULADD(at[24], at[61]);    MULADD(at[25], at[60]);    MULADD(at[26], at[59]);    MULADD(at[27], at[58]);    MULADD(at[28], at[57]);    MULADD(at[29], at[56]);    MULADD(at[30], at[55]);    MULADD(at[31], at[54]);    MULADD(at[32], at[53]);    MULADD(at[33], at[52]);    MULADD(at[34], at[51]);    MULADD(at[35], at[50]);    MULADD(at[36], at[49]);    MULADD(at[37], at[48]); 
6949
+   COMBA_STORE(C->dp[37]);
6950
+   /* 38 */
6951
+   COMBA_FORWARD;
6952
+   MULADD(at[0], at[86]);    MULADD(at[1], at[85]);    MULADD(at[2], at[84]);    MULADD(at[3], at[83]);    MULADD(at[4], at[82]);    MULADD(at[5], at[81]);    MULADD(at[6], at[80]);    MULADD(at[7], at[79]);    MULADD(at[8], at[78]);    MULADD(at[9], at[77]);    MULADD(at[10], at[76]);    MULADD(at[11], at[75]);    MULADD(at[12], at[74]);    MULADD(at[13], at[73]);    MULADD(at[14], at[72]);    MULADD(at[15], at[71]);    MULADD(at[16], at[70]);    MULADD(at[17], at[69]);    MULADD(at[18], at[68]);    MULADD(at[19], at[67]);    MULADD(at[20], at[66]);    MULADD(at[21], at[65]);    MULADD(at[22], at[64]);    MULADD(at[23], at[63]);    MULADD(at[24], at[62]);    MULADD(at[25], at[61]);    MULADD(at[26], at[60]);    MULADD(at[27], at[59]);    MULADD(at[28], at[58]);    MULADD(at[29], at[57]);    MULADD(at[30], at[56]);    MULADD(at[31], at[55]);    MULADD(at[32], at[54]);    MULADD(at[33], at[53]);    MULADD(at[34], at[52]);    MULADD(at[35], at[51]);    MULADD(at[36], at[50]);    MULADD(at[37], at[49]);    MULADD(at[38], at[48]); 
6953
+   COMBA_STORE(C->dp[38]);
6954
+   /* 39 */
6955
+   COMBA_FORWARD;
6956
+   MULADD(at[0], at[87]);    MULADD(at[1], at[86]);    MULADD(at[2], at[85]);    MULADD(at[3], at[84]);    MULADD(at[4], at[83]);    MULADD(at[5], at[82]);    MULADD(at[6], at[81]);    MULADD(at[7], at[80]);    MULADD(at[8], at[79]);    MULADD(at[9], at[78]);    MULADD(at[10], at[77]);    MULADD(at[11], at[76]);    MULADD(at[12], at[75]);    MULADD(at[13], at[74]);    MULADD(at[14], at[73]);    MULADD(at[15], at[72]);    MULADD(at[16], at[71]);    MULADD(at[17], at[70]);    MULADD(at[18], at[69]);    MULADD(at[19], at[68]);    MULADD(at[20], at[67]);    MULADD(at[21], at[66]);    MULADD(at[22], at[65]);    MULADD(at[23], at[64]);    MULADD(at[24], at[63]);    MULADD(at[25], at[62]);    MULADD(at[26], at[61]);    MULADD(at[27], at[60]);    MULADD(at[28], at[59]);    MULADD(at[29], at[58]);    MULADD(at[30], at[57]);    MULADD(at[31], at[56]);    MULADD(at[32], at[55]);    MULADD(at[33], at[54]);    MULADD(at[34], at[53]);    MULADD(at[35], at[52]);    MULADD(at[36], at[51]);    MULADD(at[37], at[50]);    MULADD(at[38], at[49]);    MULADD(at[39], at[48]); 
6957
+   COMBA_STORE(C->dp[39]);
6958
+   /* 40 */
6959
+   COMBA_FORWARD;
6960
+   MULADD(at[0], at[88]);    MULADD(at[1], at[87]);    MULADD(at[2], at[86]);    MULADD(at[3], at[85]);    MULADD(at[4], at[84]);    MULADD(at[5], at[83]);    MULADD(at[6], at[82]);    MULADD(at[7], at[81]);    MULADD(at[8], at[80]);    MULADD(at[9], at[79]);    MULADD(at[10], at[78]);    MULADD(at[11], at[77]);    MULADD(at[12], at[76]);    MULADD(at[13], at[75]);    MULADD(at[14], at[74]);    MULADD(at[15], at[73]);    MULADD(at[16], at[72]);    MULADD(at[17], at[71]);    MULADD(at[18], at[70]);    MULADD(at[19], at[69]);    MULADD(at[20], at[68]);    MULADD(at[21], at[67]);    MULADD(at[22], at[66]);    MULADD(at[23], at[65]);    MULADD(at[24], at[64]);    MULADD(at[25], at[63]);    MULADD(at[26], at[62]);    MULADD(at[27], at[61]);    MULADD(at[28], at[60]);    MULADD(at[29], at[59]);    MULADD(at[30], at[58]);    MULADD(at[31], at[57]);    MULADD(at[32], at[56]);    MULADD(at[33], at[55]);    MULADD(at[34], at[54]);    MULADD(at[35], at[53]);    MULADD(at[36], at[52]);    MULADD(at[37], at[51]);    MULADD(at[38], at[50]);    MULADD(at[39], at[49]);    MULADD(at[40], at[48]); 
6961
+   COMBA_STORE(C->dp[40]);
6962
+   /* 41 */
6963
+   COMBA_FORWARD;
6964
+   MULADD(at[0], at[89]);    MULADD(at[1], at[88]);    MULADD(at[2], at[87]);    MULADD(at[3], at[86]);    MULADD(at[4], at[85]);    MULADD(at[5], at[84]);    MULADD(at[6], at[83]);    MULADD(at[7], at[82]);    MULADD(at[8], at[81]);    MULADD(at[9], at[80]);    MULADD(at[10], at[79]);    MULADD(at[11], at[78]);    MULADD(at[12], at[77]);    MULADD(at[13], at[76]);    MULADD(at[14], at[75]);    MULADD(at[15], at[74]);    MULADD(at[16], at[73]);    MULADD(at[17], at[72]);    MULADD(at[18], at[71]);    MULADD(at[19], at[70]);    MULADD(at[20], at[69]);    MULADD(at[21], at[68]);    MULADD(at[22], at[67]);    MULADD(at[23], at[66]);    MULADD(at[24], at[65]);    MULADD(at[25], at[64]);    MULADD(at[26], at[63]);    MULADD(at[27], at[62]);    MULADD(at[28], at[61]);    MULADD(at[29], at[60]);    MULADD(at[30], at[59]);    MULADD(at[31], at[58]);    MULADD(at[32], at[57]);    MULADD(at[33], at[56]);    MULADD(at[34], at[55]);    MULADD(at[35], at[54]);    MULADD(at[36], at[53]);    MULADD(at[37], at[52]);    MULADD(at[38], at[51]);    MULADD(at[39], at[50]);    MULADD(at[40], at[49]);    MULADD(at[41], at[48]); 
6965
+   COMBA_STORE(C->dp[41]);
6966
+   /* 42 */
6967
+   COMBA_FORWARD;
6968
+   MULADD(at[0], at[90]);    MULADD(at[1], at[89]);    MULADD(at[2], at[88]);    MULADD(at[3], at[87]);    MULADD(at[4], at[86]);    MULADD(at[5], at[85]);    MULADD(at[6], at[84]);    MULADD(at[7], at[83]);    MULADD(at[8], at[82]);    MULADD(at[9], at[81]);    MULADD(at[10], at[80]);    MULADD(at[11], at[79]);    MULADD(at[12], at[78]);    MULADD(at[13], at[77]);    MULADD(at[14], at[76]);    MULADD(at[15], at[75]);    MULADD(at[16], at[74]);    MULADD(at[17], at[73]);    MULADD(at[18], at[72]);    MULADD(at[19], at[71]);    MULADD(at[20], at[70]);    MULADD(at[21], at[69]);    MULADD(at[22], at[68]);    MULADD(at[23], at[67]);    MULADD(at[24], at[66]);    MULADD(at[25], at[65]);    MULADD(at[26], at[64]);    MULADD(at[27], at[63]);    MULADD(at[28], at[62]);    MULADD(at[29], at[61]);    MULADD(at[30], at[60]);    MULADD(at[31], at[59]);    MULADD(at[32], at[58]);    MULADD(at[33], at[57]);    MULADD(at[34], at[56]);    MULADD(at[35], at[55]);    MULADD(at[36], at[54]);    MULADD(at[37], at[53]);    MULADD(at[38], at[52]);    MULADD(at[39], at[51]);    MULADD(at[40], at[50]);    MULADD(at[41], at[49]);    MULADD(at[42], at[48]); 
6969
+   COMBA_STORE(C->dp[42]);
6970
+   /* 43 */
6971
+   COMBA_FORWARD;
6972
+   MULADD(at[0], at[91]);    MULADD(at[1], at[90]);    MULADD(at[2], at[89]);    MULADD(at[3], at[88]);    MULADD(at[4], at[87]);    MULADD(at[5], at[86]);    MULADD(at[6], at[85]);    MULADD(at[7], at[84]);    MULADD(at[8], at[83]);    MULADD(at[9], at[82]);    MULADD(at[10], at[81]);    MULADD(at[11], at[80]);    MULADD(at[12], at[79]);    MULADD(at[13], at[78]);    MULADD(at[14], at[77]);    MULADD(at[15], at[76]);    MULADD(at[16], at[75]);    MULADD(at[17], at[74]);    MULADD(at[18], at[73]);    MULADD(at[19], at[72]);    MULADD(at[20], at[71]);    MULADD(at[21], at[70]);    MULADD(at[22], at[69]);    MULADD(at[23], at[68]);    MULADD(at[24], at[67]);    MULADD(at[25], at[66]);    MULADD(at[26], at[65]);    MULADD(at[27], at[64]);    MULADD(at[28], at[63]);    MULADD(at[29], at[62]);    MULADD(at[30], at[61]);    MULADD(at[31], at[60]);    MULADD(at[32], at[59]);    MULADD(at[33], at[58]);    MULADD(at[34], at[57]);    MULADD(at[35], at[56]);    MULADD(at[36], at[55]);    MULADD(at[37], at[54]);    MULADD(at[38], at[53]);    MULADD(at[39], at[52]);    MULADD(at[40], at[51]);    MULADD(at[41], at[50]);    MULADD(at[42], at[49]);    MULADD(at[43], at[48]); 
6973
+   COMBA_STORE(C->dp[43]);
6974
+   /* 44 */
6975
+   COMBA_FORWARD;
6976
+   MULADD(at[0], at[92]);    MULADD(at[1], at[91]);    MULADD(at[2], at[90]);    MULADD(at[3], at[89]);    MULADD(at[4], at[88]);    MULADD(at[5], at[87]);    MULADD(at[6], at[86]);    MULADD(at[7], at[85]);    MULADD(at[8], at[84]);    MULADD(at[9], at[83]);    MULADD(at[10], at[82]);    MULADD(at[11], at[81]);    MULADD(at[12], at[80]);    MULADD(at[13], at[79]);    MULADD(at[14], at[78]);    MULADD(at[15], at[77]);    MULADD(at[16], at[76]);    MULADD(at[17], at[75]);    MULADD(at[18], at[74]);    MULADD(at[19], at[73]);    MULADD(at[20], at[72]);    MULADD(at[21], at[71]);    MULADD(at[22], at[70]);    MULADD(at[23], at[69]);    MULADD(at[24], at[68]);    MULADD(at[25], at[67]);    MULADD(at[26], at[66]);    MULADD(at[27], at[65]);    MULADD(at[28], at[64]);    MULADD(at[29], at[63]);    MULADD(at[30], at[62]);    MULADD(at[31], at[61]);    MULADD(at[32], at[60]);    MULADD(at[33], at[59]);    MULADD(at[34], at[58]);    MULADD(at[35], at[57]);    MULADD(at[36], at[56]);    MULADD(at[37], at[55]);    MULADD(at[38], at[54]);    MULADD(at[39], at[53]);    MULADD(at[40], at[52]);    MULADD(at[41], at[51]);    MULADD(at[42], at[50]);    MULADD(at[43], at[49]);    MULADD(at[44], at[48]); 
6977
+   COMBA_STORE(C->dp[44]);
6978
+   /* 45 */
6979
+   COMBA_FORWARD;
6980
+   MULADD(at[0], at[93]);    MULADD(at[1], at[92]);    MULADD(at[2], at[91]);    MULADD(at[3], at[90]);    MULADD(at[4], at[89]);    MULADD(at[5], at[88]);    MULADD(at[6], at[87]);    MULADD(at[7], at[86]);    MULADD(at[8], at[85]);    MULADD(at[9], at[84]);    MULADD(at[10], at[83]);    MULADD(at[11], at[82]);    MULADD(at[12], at[81]);    MULADD(at[13], at[80]);    MULADD(at[14], at[79]);    MULADD(at[15], at[78]);    MULADD(at[16], at[77]);    MULADD(at[17], at[76]);    MULADD(at[18], at[75]);    MULADD(at[19], at[74]);    MULADD(at[20], at[73]);    MULADD(at[21], at[72]);    MULADD(at[22], at[71]);    MULADD(at[23], at[70]);    MULADD(at[24], at[69]);    MULADD(at[25], at[68]);    MULADD(at[26], at[67]);    MULADD(at[27], at[66]);    MULADD(at[28], at[65]);    MULADD(at[29], at[64]);    MULADD(at[30], at[63]);    MULADD(at[31], at[62]);    MULADD(at[32], at[61]);    MULADD(at[33], at[60]);    MULADD(at[34], at[59]);    MULADD(at[35], at[58]);    MULADD(at[36], at[57]);    MULADD(at[37], at[56]);    MULADD(at[38], at[55]);    MULADD(at[39], at[54]);    MULADD(at[40], at[53]);    MULADD(at[41], at[52]);    MULADD(at[42], at[51]);    MULADD(at[43], at[50]);    MULADD(at[44], at[49]);    MULADD(at[45], at[48]); 
6981
+   COMBA_STORE(C->dp[45]);
6982
+   /* 46 */
6983
+   COMBA_FORWARD;
6984
+   MULADD(at[0], at[94]);    MULADD(at[1], at[93]);    MULADD(at[2], at[92]);    MULADD(at[3], at[91]);    MULADD(at[4], at[90]);    MULADD(at[5], at[89]);    MULADD(at[6], at[88]);    MULADD(at[7], at[87]);    MULADD(at[8], at[86]);    MULADD(at[9], at[85]);    MULADD(at[10], at[84]);    MULADD(at[11], at[83]);    MULADD(at[12], at[82]);    MULADD(at[13], at[81]);    MULADD(at[14], at[80]);    MULADD(at[15], at[79]);    MULADD(at[16], at[78]);    MULADD(at[17], at[77]);    MULADD(at[18], at[76]);    MULADD(at[19], at[75]);    MULADD(at[20], at[74]);    MULADD(at[21], at[73]);    MULADD(at[22], at[72]);    MULADD(at[23], at[71]);    MULADD(at[24], at[70]);    MULADD(at[25], at[69]);    MULADD(at[26], at[68]);    MULADD(at[27], at[67]);    MULADD(at[28], at[66]);    MULADD(at[29], at[65]);    MULADD(at[30], at[64]);    MULADD(at[31], at[63]);    MULADD(at[32], at[62]);    MULADD(at[33], at[61]);    MULADD(at[34], at[60]);    MULADD(at[35], at[59]);    MULADD(at[36], at[58]);    MULADD(at[37], at[57]);    MULADD(at[38], at[56]);    MULADD(at[39], at[55]);    MULADD(at[40], at[54]);    MULADD(at[41], at[53]);    MULADD(at[42], at[52]);    MULADD(at[43], at[51]);    MULADD(at[44], at[50]);    MULADD(at[45], at[49]);    MULADD(at[46], at[48]); 
6985
+   COMBA_STORE(C->dp[46]);
6986
+   /* 47 */
6987
+   COMBA_FORWARD;
6988
+   MULADD(at[0], at[95]);    MULADD(at[1], at[94]);    MULADD(at[2], at[93]);    MULADD(at[3], at[92]);    MULADD(at[4], at[91]);    MULADD(at[5], at[90]);    MULADD(at[6], at[89]);    MULADD(at[7], at[88]);    MULADD(at[8], at[87]);    MULADD(at[9], at[86]);    MULADD(at[10], at[85]);    MULADD(at[11], at[84]);    MULADD(at[12], at[83]);    MULADD(at[13], at[82]);    MULADD(at[14], at[81]);    MULADD(at[15], at[80]);    MULADD(at[16], at[79]);    MULADD(at[17], at[78]);    MULADD(at[18], at[77]);    MULADD(at[19], at[76]);    MULADD(at[20], at[75]);    MULADD(at[21], at[74]);    MULADD(at[22], at[73]);    MULADD(at[23], at[72]);    MULADD(at[24], at[71]);    MULADD(at[25], at[70]);    MULADD(at[26], at[69]);    MULADD(at[27], at[68]);    MULADD(at[28], at[67]);    MULADD(at[29], at[66]);    MULADD(at[30], at[65]);    MULADD(at[31], at[64]);    MULADD(at[32], at[63]);    MULADD(at[33], at[62]);    MULADD(at[34], at[61]);    MULADD(at[35], at[60]);    MULADD(at[36], at[59]);    MULADD(at[37], at[58]);    MULADD(at[38], at[57]);    MULADD(at[39], at[56]);    MULADD(at[40], at[55]);    MULADD(at[41], at[54]);    MULADD(at[42], at[53]);    MULADD(at[43], at[52]);    MULADD(at[44], at[51]);    MULADD(at[45], at[50]);    MULADD(at[46], at[49]);    MULADD(at[47], at[48]); 
6989
+   COMBA_STORE(C->dp[47]);
6990
+   /* 48 */
6991
+   COMBA_FORWARD;
6992
+   MULADD(at[1], at[95]);    MULADD(at[2], at[94]);    MULADD(at[3], at[93]);    MULADD(at[4], at[92]);    MULADD(at[5], at[91]);    MULADD(at[6], at[90]);    MULADD(at[7], at[89]);    MULADD(at[8], at[88]);    MULADD(at[9], at[87]);    MULADD(at[10], at[86]);    MULADD(at[11], at[85]);    MULADD(at[12], at[84]);    MULADD(at[13], at[83]);    MULADD(at[14], at[82]);    MULADD(at[15], at[81]);    MULADD(at[16], at[80]);    MULADD(at[17], at[79]);    MULADD(at[18], at[78]);    MULADD(at[19], at[77]);    MULADD(at[20], at[76]);    MULADD(at[21], at[75]);    MULADD(at[22], at[74]);    MULADD(at[23], at[73]);    MULADD(at[24], at[72]);    MULADD(at[25], at[71]);    MULADD(at[26], at[70]);    MULADD(at[27], at[69]);    MULADD(at[28], at[68]);    MULADD(at[29], at[67]);    MULADD(at[30], at[66]);    MULADD(at[31], at[65]);    MULADD(at[32], at[64]);    MULADD(at[33], at[63]);    MULADD(at[34], at[62]);    MULADD(at[35], at[61]);    MULADD(at[36], at[60]);    MULADD(at[37], at[59]);    MULADD(at[38], at[58]);    MULADD(at[39], at[57]);    MULADD(at[40], at[56]);    MULADD(at[41], at[55]);    MULADD(at[42], at[54]);    MULADD(at[43], at[53]);    MULADD(at[44], at[52]);    MULADD(at[45], at[51]);    MULADD(at[46], at[50]);    MULADD(at[47], at[49]); 
6993
+   COMBA_STORE(C->dp[48]);
6994
+   /* 49 */
6995
+   COMBA_FORWARD;
6996
+   MULADD(at[2], at[95]);    MULADD(at[3], at[94]);    MULADD(at[4], at[93]);    MULADD(at[5], at[92]);    MULADD(at[6], at[91]);    MULADD(at[7], at[90]);    MULADD(at[8], at[89]);    MULADD(at[9], at[88]);    MULADD(at[10], at[87]);    MULADD(at[11], at[86]);    MULADD(at[12], at[85]);    MULADD(at[13], at[84]);    MULADD(at[14], at[83]);    MULADD(at[15], at[82]);    MULADD(at[16], at[81]);    MULADD(at[17], at[80]);    MULADD(at[18], at[79]);    MULADD(at[19], at[78]);    MULADD(at[20], at[77]);    MULADD(at[21], at[76]);    MULADD(at[22], at[75]);    MULADD(at[23], at[74]);    MULADD(at[24], at[73]);    MULADD(at[25], at[72]);    MULADD(at[26], at[71]);    MULADD(at[27], at[70]);    MULADD(at[28], at[69]);    MULADD(at[29], at[68]);    MULADD(at[30], at[67]);    MULADD(at[31], at[66]);    MULADD(at[32], at[65]);    MULADD(at[33], at[64]);    MULADD(at[34], at[63]);    MULADD(at[35], at[62]);    MULADD(at[36], at[61]);    MULADD(at[37], at[60]);    MULADD(at[38], at[59]);    MULADD(at[39], at[58]);    MULADD(at[40], at[57]);    MULADD(at[41], at[56]);    MULADD(at[42], at[55]);    MULADD(at[43], at[54]);    MULADD(at[44], at[53]);    MULADD(at[45], at[52]);    MULADD(at[46], at[51]);    MULADD(at[47], at[50]); 
6997
+   COMBA_STORE(C->dp[49]);
6998
+   /* 50 */
6999
+   COMBA_FORWARD;
7000
+   MULADD(at[3], at[95]);    MULADD(at[4], at[94]);    MULADD(at[5], at[93]);    MULADD(at[6], at[92]);    MULADD(at[7], at[91]);    MULADD(at[8], at[90]);    MULADD(at[9], at[89]);    MULADD(at[10], at[88]);    MULADD(at[11], at[87]);    MULADD(at[12], at[86]);    MULADD(at[13], at[85]);    MULADD(at[14], at[84]);    MULADD(at[15], at[83]);    MULADD(at[16], at[82]);    MULADD(at[17], at[81]);    MULADD(at[18], at[80]);    MULADD(at[19], at[79]);    MULADD(at[20], at[78]);    MULADD(at[21], at[77]);    MULADD(at[22], at[76]);    MULADD(at[23], at[75]);    MULADD(at[24], at[74]);    MULADD(at[25], at[73]);    MULADD(at[26], at[72]);    MULADD(at[27], at[71]);    MULADD(at[28], at[70]);    MULADD(at[29], at[69]);    MULADD(at[30], at[68]);    MULADD(at[31], at[67]);    MULADD(at[32], at[66]);    MULADD(at[33], at[65]);    MULADD(at[34], at[64]);    MULADD(at[35], at[63]);    MULADD(at[36], at[62]);    MULADD(at[37], at[61]);    MULADD(at[38], at[60]);    MULADD(at[39], at[59]);    MULADD(at[40], at[58]);    MULADD(at[41], at[57]);    MULADD(at[42], at[56]);    MULADD(at[43], at[55]);    MULADD(at[44], at[54]);    MULADD(at[45], at[53]);    MULADD(at[46], at[52]);    MULADD(at[47], at[51]); 
7001
+   COMBA_STORE(C->dp[50]);
7002
+   /* 51 */
7003
+   COMBA_FORWARD;
7004
+   MULADD(at[4], at[95]);    MULADD(at[5], at[94]);    MULADD(at[6], at[93]);    MULADD(at[7], at[92]);    MULADD(at[8], at[91]);    MULADD(at[9], at[90]);    MULADD(at[10], at[89]);    MULADD(at[11], at[88]);    MULADD(at[12], at[87]);    MULADD(at[13], at[86]);    MULADD(at[14], at[85]);    MULADD(at[15], at[84]);    MULADD(at[16], at[83]);    MULADD(at[17], at[82]);    MULADD(at[18], at[81]);    MULADD(at[19], at[80]);    MULADD(at[20], at[79]);    MULADD(at[21], at[78]);    MULADD(at[22], at[77]);    MULADD(at[23], at[76]);    MULADD(at[24], at[75]);    MULADD(at[25], at[74]);    MULADD(at[26], at[73]);    MULADD(at[27], at[72]);    MULADD(at[28], at[71]);    MULADD(at[29], at[70]);    MULADD(at[30], at[69]);    MULADD(at[31], at[68]);    MULADD(at[32], at[67]);    MULADD(at[33], at[66]);    MULADD(at[34], at[65]);    MULADD(at[35], at[64]);    MULADD(at[36], at[63]);    MULADD(at[37], at[62]);    MULADD(at[38], at[61]);    MULADD(at[39], at[60]);    MULADD(at[40], at[59]);    MULADD(at[41], at[58]);    MULADD(at[42], at[57]);    MULADD(at[43], at[56]);    MULADD(at[44], at[55]);    MULADD(at[45], at[54]);    MULADD(at[46], at[53]);    MULADD(at[47], at[52]); 
7005
+   COMBA_STORE(C->dp[51]);
7006
+   /* 52 */
7007
+   COMBA_FORWARD;
7008
+   MULADD(at[5], at[95]);    MULADD(at[6], at[94]);    MULADD(at[7], at[93]);    MULADD(at[8], at[92]);    MULADD(at[9], at[91]);    MULADD(at[10], at[90]);    MULADD(at[11], at[89]);    MULADD(at[12], at[88]);    MULADD(at[13], at[87]);    MULADD(at[14], at[86]);    MULADD(at[15], at[85]);    MULADD(at[16], at[84]);    MULADD(at[17], at[83]);    MULADD(at[18], at[82]);    MULADD(at[19], at[81]);    MULADD(at[20], at[80]);    MULADD(at[21], at[79]);    MULADD(at[22], at[78]);    MULADD(at[23], at[77]);    MULADD(at[24], at[76]);    MULADD(at[25], at[75]);    MULADD(at[26], at[74]);    MULADD(at[27], at[73]);    MULADD(at[28], at[72]);    MULADD(at[29], at[71]);    MULADD(at[30], at[70]);    MULADD(at[31], at[69]);    MULADD(at[32], at[68]);    MULADD(at[33], at[67]);    MULADD(at[34], at[66]);    MULADD(at[35], at[65]);    MULADD(at[36], at[64]);    MULADD(at[37], at[63]);    MULADD(at[38], at[62]);    MULADD(at[39], at[61]);    MULADD(at[40], at[60]);    MULADD(at[41], at[59]);    MULADD(at[42], at[58]);    MULADD(at[43], at[57]);    MULADD(at[44], at[56]);    MULADD(at[45], at[55]);    MULADD(at[46], at[54]);    MULADD(at[47], at[53]); 
7009
+   COMBA_STORE(C->dp[52]);
7010
+   /* 53 */
7011
+   COMBA_FORWARD;
7012
+   MULADD(at[6], at[95]);    MULADD(at[7], at[94]);    MULADD(at[8], at[93]);    MULADD(at[9], at[92]);    MULADD(at[10], at[91]);    MULADD(at[11], at[90]);    MULADD(at[12], at[89]);    MULADD(at[13], at[88]);    MULADD(at[14], at[87]);    MULADD(at[15], at[86]);    MULADD(at[16], at[85]);    MULADD(at[17], at[84]);    MULADD(at[18], at[83]);    MULADD(at[19], at[82]);    MULADD(at[20], at[81]);    MULADD(at[21], at[80]);    MULADD(at[22], at[79]);    MULADD(at[23], at[78]);    MULADD(at[24], at[77]);    MULADD(at[25], at[76]);    MULADD(at[26], at[75]);    MULADD(at[27], at[74]);    MULADD(at[28], at[73]);    MULADD(at[29], at[72]);    MULADD(at[30], at[71]);    MULADD(at[31], at[70]);    MULADD(at[32], at[69]);    MULADD(at[33], at[68]);    MULADD(at[34], at[67]);    MULADD(at[35], at[66]);    MULADD(at[36], at[65]);    MULADD(at[37], at[64]);    MULADD(at[38], at[63]);    MULADD(at[39], at[62]);    MULADD(at[40], at[61]);    MULADD(at[41], at[60]);    MULADD(at[42], at[59]);    MULADD(at[43], at[58]);    MULADD(at[44], at[57]);    MULADD(at[45], at[56]);    MULADD(at[46], at[55]);    MULADD(at[47], at[54]); 
7013
+   COMBA_STORE(C->dp[53]);
7014
+   /* 54 */
7015
+   COMBA_FORWARD;
7016
+   MULADD(at[7], at[95]);    MULADD(at[8], at[94]);    MULADD(at[9], at[93]);    MULADD(at[10], at[92]);    MULADD(at[11], at[91]);    MULADD(at[12], at[90]);    MULADD(at[13], at[89]);    MULADD(at[14], at[88]);    MULADD(at[15], at[87]);    MULADD(at[16], at[86]);    MULADD(at[17], at[85]);    MULADD(at[18], at[84]);    MULADD(at[19], at[83]);    MULADD(at[20], at[82]);    MULADD(at[21], at[81]);    MULADD(at[22], at[80]);    MULADD(at[23], at[79]);    MULADD(at[24], at[78]);    MULADD(at[25], at[77]);    MULADD(at[26], at[76]);    MULADD(at[27], at[75]);    MULADD(at[28], at[74]);    MULADD(at[29], at[73]);    MULADD(at[30], at[72]);    MULADD(at[31], at[71]);    MULADD(at[32], at[70]);    MULADD(at[33], at[69]);    MULADD(at[34], at[68]);    MULADD(at[35], at[67]);    MULADD(at[36], at[66]);    MULADD(at[37], at[65]);    MULADD(at[38], at[64]);    MULADD(at[39], at[63]);    MULADD(at[40], at[62]);    MULADD(at[41], at[61]);    MULADD(at[42], at[60]);    MULADD(at[43], at[59]);    MULADD(at[44], at[58]);    MULADD(at[45], at[57]);    MULADD(at[46], at[56]);    MULADD(at[47], at[55]); 
7017
+   COMBA_STORE(C->dp[54]);
7018
+   /* 55 */
7019
+   COMBA_FORWARD;
7020
+   MULADD(at[8], at[95]);    MULADD(at[9], at[94]);    MULADD(at[10], at[93]);    MULADD(at[11], at[92]);    MULADD(at[12], at[91]);    MULADD(at[13], at[90]);    MULADD(at[14], at[89]);    MULADD(at[15], at[88]);    MULADD(at[16], at[87]);    MULADD(at[17], at[86]);    MULADD(at[18], at[85]);    MULADD(at[19], at[84]);    MULADD(at[20], at[83]);    MULADD(at[21], at[82]);    MULADD(at[22], at[81]);    MULADD(at[23], at[80]);    MULADD(at[24], at[79]);    MULADD(at[25], at[78]);    MULADD(at[26], at[77]);    MULADD(at[27], at[76]);    MULADD(at[28], at[75]);    MULADD(at[29], at[74]);    MULADD(at[30], at[73]);    MULADD(at[31], at[72]);    MULADD(at[32], at[71]);    MULADD(at[33], at[70]);    MULADD(at[34], at[69]);    MULADD(at[35], at[68]);    MULADD(at[36], at[67]);    MULADD(at[37], at[66]);    MULADD(at[38], at[65]);    MULADD(at[39], at[64]);    MULADD(at[40], at[63]);    MULADD(at[41], at[62]);    MULADD(at[42], at[61]);    MULADD(at[43], at[60]);    MULADD(at[44], at[59]);    MULADD(at[45], at[58]);    MULADD(at[46], at[57]);    MULADD(at[47], at[56]); 
7021
+   COMBA_STORE(C->dp[55]);
7022
+   /* 56 */
7023
+   COMBA_FORWARD;
7024
+   MULADD(at[9], at[95]);    MULADD(at[10], at[94]);    MULADD(at[11], at[93]);    MULADD(at[12], at[92]);    MULADD(at[13], at[91]);    MULADD(at[14], at[90]);    MULADD(at[15], at[89]);    MULADD(at[16], at[88]);    MULADD(at[17], at[87]);    MULADD(at[18], at[86]);    MULADD(at[19], at[85]);    MULADD(at[20], at[84]);    MULADD(at[21], at[83]);    MULADD(at[22], at[82]);    MULADD(at[23], at[81]);    MULADD(at[24], at[80]);    MULADD(at[25], at[79]);    MULADD(at[26], at[78]);    MULADD(at[27], at[77]);    MULADD(at[28], at[76]);    MULADD(at[29], at[75]);    MULADD(at[30], at[74]);    MULADD(at[31], at[73]);    MULADD(at[32], at[72]);    MULADD(at[33], at[71]);    MULADD(at[34], at[70]);    MULADD(at[35], at[69]);    MULADD(at[36], at[68]);    MULADD(at[37], at[67]);    MULADD(at[38], at[66]);    MULADD(at[39], at[65]);    MULADD(at[40], at[64]);    MULADD(at[41], at[63]);    MULADD(at[42], at[62]);    MULADD(at[43], at[61]);    MULADD(at[44], at[60]);    MULADD(at[45], at[59]);    MULADD(at[46], at[58]);    MULADD(at[47], at[57]); 
7025
+   COMBA_STORE(C->dp[56]);
7026
+   /* 57 */
7027
+   COMBA_FORWARD;
7028
+   MULADD(at[10], at[95]);    MULADD(at[11], at[94]);    MULADD(at[12], at[93]);    MULADD(at[13], at[92]);    MULADD(at[14], at[91]);    MULADD(at[15], at[90]);    MULADD(at[16], at[89]);    MULADD(at[17], at[88]);    MULADD(at[18], at[87]);    MULADD(at[19], at[86]);    MULADD(at[20], at[85]);    MULADD(at[21], at[84]);    MULADD(at[22], at[83]);    MULADD(at[23], at[82]);    MULADD(at[24], at[81]);    MULADD(at[25], at[80]);    MULADD(at[26], at[79]);    MULADD(at[27], at[78]);    MULADD(at[28], at[77]);    MULADD(at[29], at[76]);    MULADD(at[30], at[75]);    MULADD(at[31], at[74]);    MULADD(at[32], at[73]);    MULADD(at[33], at[72]);    MULADD(at[34], at[71]);    MULADD(at[35], at[70]);    MULADD(at[36], at[69]);    MULADD(at[37], at[68]);    MULADD(at[38], at[67]);    MULADD(at[39], at[66]);    MULADD(at[40], at[65]);    MULADD(at[41], at[64]);    MULADD(at[42], at[63]);    MULADD(at[43], at[62]);    MULADD(at[44], at[61]);    MULADD(at[45], at[60]);    MULADD(at[46], at[59]);    MULADD(at[47], at[58]); 
7029
+   COMBA_STORE(C->dp[57]);
7030
+   /* 58 */
7031
+   COMBA_FORWARD;
7032
+   MULADD(at[11], at[95]);    MULADD(at[12], at[94]);    MULADD(at[13], at[93]);    MULADD(at[14], at[92]);    MULADD(at[15], at[91]);    MULADD(at[16], at[90]);    MULADD(at[17], at[89]);    MULADD(at[18], at[88]);    MULADD(at[19], at[87]);    MULADD(at[20], at[86]);    MULADD(at[21], at[85]);    MULADD(at[22], at[84]);    MULADD(at[23], at[83]);    MULADD(at[24], at[82]);    MULADD(at[25], at[81]);    MULADD(at[26], at[80]);    MULADD(at[27], at[79]);    MULADD(at[28], at[78]);    MULADD(at[29], at[77]);    MULADD(at[30], at[76]);    MULADD(at[31], at[75]);    MULADD(at[32], at[74]);    MULADD(at[33], at[73]);    MULADD(at[34], at[72]);    MULADD(at[35], at[71]);    MULADD(at[36], at[70]);    MULADD(at[37], at[69]);    MULADD(at[38], at[68]);    MULADD(at[39], at[67]);    MULADD(at[40], at[66]);    MULADD(at[41], at[65]);    MULADD(at[42], at[64]);    MULADD(at[43], at[63]);    MULADD(at[44], at[62]);    MULADD(at[45], at[61]);    MULADD(at[46], at[60]);    MULADD(at[47], at[59]); 
7033
+   COMBA_STORE(C->dp[58]);
7034
+   /* 59 */
7035
+   COMBA_FORWARD;
7036
+   MULADD(at[12], at[95]);    MULADD(at[13], at[94]);    MULADD(at[14], at[93]);    MULADD(at[15], at[92]);    MULADD(at[16], at[91]);    MULADD(at[17], at[90]);    MULADD(at[18], at[89]);    MULADD(at[19], at[88]);    MULADD(at[20], at[87]);    MULADD(at[21], at[86]);    MULADD(at[22], at[85]);    MULADD(at[23], at[84]);    MULADD(at[24], at[83]);    MULADD(at[25], at[82]);    MULADD(at[26], at[81]);    MULADD(at[27], at[80]);    MULADD(at[28], at[79]);    MULADD(at[29], at[78]);    MULADD(at[30], at[77]);    MULADD(at[31], at[76]);    MULADD(at[32], at[75]);    MULADD(at[33], at[74]);    MULADD(at[34], at[73]);    MULADD(at[35], at[72]);    MULADD(at[36], at[71]);    MULADD(at[37], at[70]);    MULADD(at[38], at[69]);    MULADD(at[39], at[68]);    MULADD(at[40], at[67]);    MULADD(at[41], at[66]);    MULADD(at[42], at[65]);    MULADD(at[43], at[64]);    MULADD(at[44], at[63]);    MULADD(at[45], at[62]);    MULADD(at[46], at[61]);    MULADD(at[47], at[60]); 
7037
+   COMBA_STORE(C->dp[59]);
7038
+   /* 60 */
7039
+   COMBA_FORWARD;
7040
+   MULADD(at[13], at[95]);    MULADD(at[14], at[94]);    MULADD(at[15], at[93]);    MULADD(at[16], at[92]);    MULADD(at[17], at[91]);    MULADD(at[18], at[90]);    MULADD(at[19], at[89]);    MULADD(at[20], at[88]);    MULADD(at[21], at[87]);    MULADD(at[22], at[86]);    MULADD(at[23], at[85]);    MULADD(at[24], at[84]);    MULADD(at[25], at[83]);    MULADD(at[26], at[82]);    MULADD(at[27], at[81]);    MULADD(at[28], at[80]);    MULADD(at[29], at[79]);    MULADD(at[30], at[78]);    MULADD(at[31], at[77]);    MULADD(at[32], at[76]);    MULADD(at[33], at[75]);    MULADD(at[34], at[74]);    MULADD(at[35], at[73]);    MULADD(at[36], at[72]);    MULADD(at[37], at[71]);    MULADD(at[38], at[70]);    MULADD(at[39], at[69]);    MULADD(at[40], at[68]);    MULADD(at[41], at[67]);    MULADD(at[42], at[66]);    MULADD(at[43], at[65]);    MULADD(at[44], at[64]);    MULADD(at[45], at[63]);    MULADD(at[46], at[62]);    MULADD(at[47], at[61]); 
7041
+   COMBA_STORE(C->dp[60]);
7042
+   /* 61 */
7043
+   COMBA_FORWARD;
7044
+   MULADD(at[14], at[95]);    MULADD(at[15], at[94]);    MULADD(at[16], at[93]);    MULADD(at[17], at[92]);    MULADD(at[18], at[91]);    MULADD(at[19], at[90]);    MULADD(at[20], at[89]);    MULADD(at[21], at[88]);    MULADD(at[22], at[87]);    MULADD(at[23], at[86]);    MULADD(at[24], at[85]);    MULADD(at[25], at[84]);    MULADD(at[26], at[83]);    MULADD(at[27], at[82]);    MULADD(at[28], at[81]);    MULADD(at[29], at[80]);    MULADD(at[30], at[79]);    MULADD(at[31], at[78]);    MULADD(at[32], at[77]);    MULADD(at[33], at[76]);    MULADD(at[34], at[75]);    MULADD(at[35], at[74]);    MULADD(at[36], at[73]);    MULADD(at[37], at[72]);    MULADD(at[38], at[71]);    MULADD(at[39], at[70]);    MULADD(at[40], at[69]);    MULADD(at[41], at[68]);    MULADD(at[42], at[67]);    MULADD(at[43], at[66]);    MULADD(at[44], at[65]);    MULADD(at[45], at[64]);    MULADD(at[46], at[63]);    MULADD(at[47], at[62]); 
7045
+   COMBA_STORE(C->dp[61]);
7046
+   /* 62 */
7047
+   COMBA_FORWARD;
7048
+   MULADD(at[15], at[95]);    MULADD(at[16], at[94]);    MULADD(at[17], at[93]);    MULADD(at[18], at[92]);    MULADD(at[19], at[91]);    MULADD(at[20], at[90]);    MULADD(at[21], at[89]);    MULADD(at[22], at[88]);    MULADD(at[23], at[87]);    MULADD(at[24], at[86]);    MULADD(at[25], at[85]);    MULADD(at[26], at[84]);    MULADD(at[27], at[83]);    MULADD(at[28], at[82]);    MULADD(at[29], at[81]);    MULADD(at[30], at[80]);    MULADD(at[31], at[79]);    MULADD(at[32], at[78]);    MULADD(at[33], at[77]);    MULADD(at[34], at[76]);    MULADD(at[35], at[75]);    MULADD(at[36], at[74]);    MULADD(at[37], at[73]);    MULADD(at[38], at[72]);    MULADD(at[39], at[71]);    MULADD(at[40], at[70]);    MULADD(at[41], at[69]);    MULADD(at[42], at[68]);    MULADD(at[43], at[67]);    MULADD(at[44], at[66]);    MULADD(at[45], at[65]);    MULADD(at[46], at[64]);    MULADD(at[47], at[63]); 
7049
+   COMBA_STORE(C->dp[62]);
7050
+   /* 63 */
7051
+   COMBA_FORWARD;
7052
+   MULADD(at[16], at[95]);    MULADD(at[17], at[94]);    MULADD(at[18], at[93]);    MULADD(at[19], at[92]);    MULADD(at[20], at[91]);    MULADD(at[21], at[90]);    MULADD(at[22], at[89]);    MULADD(at[23], at[88]);    MULADD(at[24], at[87]);    MULADD(at[25], at[86]);    MULADD(at[26], at[85]);    MULADD(at[27], at[84]);    MULADD(at[28], at[83]);    MULADD(at[29], at[82]);    MULADD(at[30], at[81]);    MULADD(at[31], at[80]);    MULADD(at[32], at[79]);    MULADD(at[33], at[78]);    MULADD(at[34], at[77]);    MULADD(at[35], at[76]);    MULADD(at[36], at[75]);    MULADD(at[37], at[74]);    MULADD(at[38], at[73]);    MULADD(at[39], at[72]);    MULADD(at[40], at[71]);    MULADD(at[41], at[70]);    MULADD(at[42], at[69]);    MULADD(at[43], at[68]);    MULADD(at[44], at[67]);    MULADD(at[45], at[66]);    MULADD(at[46], at[65]);    MULADD(at[47], at[64]); 
7053
+   COMBA_STORE(C->dp[63]);
7054
+   /* 64 */
7055
+   COMBA_FORWARD;
7056
+   MULADD(at[17], at[95]);    MULADD(at[18], at[94]);    MULADD(at[19], at[93]);    MULADD(at[20], at[92]);    MULADD(at[21], at[91]);    MULADD(at[22], at[90]);    MULADD(at[23], at[89]);    MULADD(at[24], at[88]);    MULADD(at[25], at[87]);    MULADD(at[26], at[86]);    MULADD(at[27], at[85]);    MULADD(at[28], at[84]);    MULADD(at[29], at[83]);    MULADD(at[30], at[82]);    MULADD(at[31], at[81]);    MULADD(at[32], at[80]);    MULADD(at[33], at[79]);    MULADD(at[34], at[78]);    MULADD(at[35], at[77]);    MULADD(at[36], at[76]);    MULADD(at[37], at[75]);    MULADD(at[38], at[74]);    MULADD(at[39], at[73]);    MULADD(at[40], at[72]);    MULADD(at[41], at[71]);    MULADD(at[42], at[70]);    MULADD(at[43], at[69]);    MULADD(at[44], at[68]);    MULADD(at[45], at[67]);    MULADD(at[46], at[66]);    MULADD(at[47], at[65]); 
7057
+   COMBA_STORE(C->dp[64]);
7058
+   /* 65 */
7059
+   COMBA_FORWARD;
7060
+   MULADD(at[18], at[95]);    MULADD(at[19], at[94]);    MULADD(at[20], at[93]);    MULADD(at[21], at[92]);    MULADD(at[22], at[91]);    MULADD(at[23], at[90]);    MULADD(at[24], at[89]);    MULADD(at[25], at[88]);    MULADD(at[26], at[87]);    MULADD(at[27], at[86]);    MULADD(at[28], at[85]);    MULADD(at[29], at[84]);    MULADD(at[30], at[83]);    MULADD(at[31], at[82]);    MULADD(at[32], at[81]);    MULADD(at[33], at[80]);    MULADD(at[34], at[79]);    MULADD(at[35], at[78]);    MULADD(at[36], at[77]);    MULADD(at[37], at[76]);    MULADD(at[38], at[75]);    MULADD(at[39], at[74]);    MULADD(at[40], at[73]);    MULADD(at[41], at[72]);    MULADD(at[42], at[71]);    MULADD(at[43], at[70]);    MULADD(at[44], at[69]);    MULADD(at[45], at[68]);    MULADD(at[46], at[67]);    MULADD(at[47], at[66]); 
7061
+   COMBA_STORE(C->dp[65]);
7062
+   /* 66 */
7063
+   COMBA_FORWARD;
7064
+   MULADD(at[19], at[95]);    MULADD(at[20], at[94]);    MULADD(at[21], at[93]);    MULADD(at[22], at[92]);    MULADD(at[23], at[91]);    MULADD(at[24], at[90]);    MULADD(at[25], at[89]);    MULADD(at[26], at[88]);    MULADD(at[27], at[87]);    MULADD(at[28], at[86]);    MULADD(at[29], at[85]);    MULADD(at[30], at[84]);    MULADD(at[31], at[83]);    MULADD(at[32], at[82]);    MULADD(at[33], at[81]);    MULADD(at[34], at[80]);    MULADD(at[35], at[79]);    MULADD(at[36], at[78]);    MULADD(at[37], at[77]);    MULADD(at[38], at[76]);    MULADD(at[39], at[75]);    MULADD(at[40], at[74]);    MULADD(at[41], at[73]);    MULADD(at[42], at[72]);    MULADD(at[43], at[71]);    MULADD(at[44], at[70]);    MULADD(at[45], at[69]);    MULADD(at[46], at[68]);    MULADD(at[47], at[67]); 
7065
+   COMBA_STORE(C->dp[66]);
7066
+   /* 67 */
7067
+   COMBA_FORWARD;
7068
+   MULADD(at[20], at[95]);    MULADD(at[21], at[94]);    MULADD(at[22], at[93]);    MULADD(at[23], at[92]);    MULADD(at[24], at[91]);    MULADD(at[25], at[90]);    MULADD(at[26], at[89]);    MULADD(at[27], at[88]);    MULADD(at[28], at[87]);    MULADD(at[29], at[86]);    MULADD(at[30], at[85]);    MULADD(at[31], at[84]);    MULADD(at[32], at[83]);    MULADD(at[33], at[82]);    MULADD(at[34], at[81]);    MULADD(at[35], at[80]);    MULADD(at[36], at[79]);    MULADD(at[37], at[78]);    MULADD(at[38], at[77]);    MULADD(at[39], at[76]);    MULADD(at[40], at[75]);    MULADD(at[41], at[74]);    MULADD(at[42], at[73]);    MULADD(at[43], at[72]);    MULADD(at[44], at[71]);    MULADD(at[45], at[70]);    MULADD(at[46], at[69]);    MULADD(at[47], at[68]); 
7069
+   COMBA_STORE(C->dp[67]);
7070
+   /* 68 */
7071
+   COMBA_FORWARD;
7072
+   MULADD(at[21], at[95]);    MULADD(at[22], at[94]);    MULADD(at[23], at[93]);    MULADD(at[24], at[92]);    MULADD(at[25], at[91]);    MULADD(at[26], at[90]);    MULADD(at[27], at[89]);    MULADD(at[28], at[88]);    MULADD(at[29], at[87]);    MULADD(at[30], at[86]);    MULADD(at[31], at[85]);    MULADD(at[32], at[84]);    MULADD(at[33], at[83]);    MULADD(at[34], at[82]);    MULADD(at[35], at[81]);    MULADD(at[36], at[80]);    MULADD(at[37], at[79]);    MULADD(at[38], at[78]);    MULADD(at[39], at[77]);    MULADD(at[40], at[76]);    MULADD(at[41], at[75]);    MULADD(at[42], at[74]);    MULADD(at[43], at[73]);    MULADD(at[44], at[72]);    MULADD(at[45], at[71]);    MULADD(at[46], at[70]);    MULADD(at[47], at[69]); 
7073
+   COMBA_STORE(C->dp[68]);
7074
+   /* 69 */
7075
+   COMBA_FORWARD;
7076
+   MULADD(at[22], at[95]);    MULADD(at[23], at[94]);    MULADD(at[24], at[93]);    MULADD(at[25], at[92]);    MULADD(at[26], at[91]);    MULADD(at[27], at[90]);    MULADD(at[28], at[89]);    MULADD(at[29], at[88]);    MULADD(at[30], at[87]);    MULADD(at[31], at[86]);    MULADD(at[32], at[85]);    MULADD(at[33], at[84]);    MULADD(at[34], at[83]);    MULADD(at[35], at[82]);    MULADD(at[36], at[81]);    MULADD(at[37], at[80]);    MULADD(at[38], at[79]);    MULADD(at[39], at[78]);    MULADD(at[40], at[77]);    MULADD(at[41], at[76]);    MULADD(at[42], at[75]);    MULADD(at[43], at[74]);    MULADD(at[44], at[73]);    MULADD(at[45], at[72]);    MULADD(at[46], at[71]);    MULADD(at[47], at[70]); 
7077
+   COMBA_STORE(C->dp[69]);
7078
+   /* 70 */
7079
+   COMBA_FORWARD;
7080
+   MULADD(at[23], at[95]);    MULADD(at[24], at[94]);    MULADD(at[25], at[93]);    MULADD(at[26], at[92]);    MULADD(at[27], at[91]);    MULADD(at[28], at[90]);    MULADD(at[29], at[89]);    MULADD(at[30], at[88]);    MULADD(at[31], at[87]);    MULADD(at[32], at[86]);    MULADD(at[33], at[85]);    MULADD(at[34], at[84]);    MULADD(at[35], at[83]);    MULADD(at[36], at[82]);    MULADD(at[37], at[81]);    MULADD(at[38], at[80]);    MULADD(at[39], at[79]);    MULADD(at[40], at[78]);    MULADD(at[41], at[77]);    MULADD(at[42], at[76]);    MULADD(at[43], at[75]);    MULADD(at[44], at[74]);    MULADD(at[45], at[73]);    MULADD(at[46], at[72]);    MULADD(at[47], at[71]); 
7081
+   COMBA_STORE(C->dp[70]);
7082
+   /* 71 */
7083
+   COMBA_FORWARD;
7084
+   MULADD(at[24], at[95]);    MULADD(at[25], at[94]);    MULADD(at[26], at[93]);    MULADD(at[27], at[92]);    MULADD(at[28], at[91]);    MULADD(at[29], at[90]);    MULADD(at[30], at[89]);    MULADD(at[31], at[88]);    MULADD(at[32], at[87]);    MULADD(at[33], at[86]);    MULADD(at[34], at[85]);    MULADD(at[35], at[84]);    MULADD(at[36], at[83]);    MULADD(at[37], at[82]);    MULADD(at[38], at[81]);    MULADD(at[39], at[80]);    MULADD(at[40], at[79]);    MULADD(at[41], at[78]);    MULADD(at[42], at[77]);    MULADD(at[43], at[76]);    MULADD(at[44], at[75]);    MULADD(at[45], at[74]);    MULADD(at[46], at[73]);    MULADD(at[47], at[72]); 
7085
+   COMBA_STORE(C->dp[71]);
7086
+   /* 72 */
7087
+   COMBA_FORWARD;
7088
+   MULADD(at[25], at[95]);    MULADD(at[26], at[94]);    MULADD(at[27], at[93]);    MULADD(at[28], at[92]);    MULADD(at[29], at[91]);    MULADD(at[30], at[90]);    MULADD(at[31], at[89]);    MULADD(at[32], at[88]);    MULADD(at[33], at[87]);    MULADD(at[34], at[86]);    MULADD(at[35], at[85]);    MULADD(at[36], at[84]);    MULADD(at[37], at[83]);    MULADD(at[38], at[82]);    MULADD(at[39], at[81]);    MULADD(at[40], at[80]);    MULADD(at[41], at[79]);    MULADD(at[42], at[78]);    MULADD(at[43], at[77]);    MULADD(at[44], at[76]);    MULADD(at[45], at[75]);    MULADD(at[46], at[74]);    MULADD(at[47], at[73]); 
7089
+   COMBA_STORE(C->dp[72]);
7090
+   /* 73 */
7091
+   COMBA_FORWARD;
7092
+   MULADD(at[26], at[95]);    MULADD(at[27], at[94]);    MULADD(at[28], at[93]);    MULADD(at[29], at[92]);    MULADD(at[30], at[91]);    MULADD(at[31], at[90]);    MULADD(at[32], at[89]);    MULADD(at[33], at[88]);    MULADD(at[34], at[87]);    MULADD(at[35], at[86]);    MULADD(at[36], at[85]);    MULADD(at[37], at[84]);    MULADD(at[38], at[83]);    MULADD(at[39], at[82]);    MULADD(at[40], at[81]);    MULADD(at[41], at[80]);    MULADD(at[42], at[79]);    MULADD(at[43], at[78]);    MULADD(at[44], at[77]);    MULADD(at[45], at[76]);    MULADD(at[46], at[75]);    MULADD(at[47], at[74]); 
7093
+   COMBA_STORE(C->dp[73]);
7094
+   /* 74 */
7095
+   COMBA_FORWARD;
7096
+   MULADD(at[27], at[95]);    MULADD(at[28], at[94]);    MULADD(at[29], at[93]);    MULADD(at[30], at[92]);    MULADD(at[31], at[91]);    MULADD(at[32], at[90]);    MULADD(at[33], at[89]);    MULADD(at[34], at[88]);    MULADD(at[35], at[87]);    MULADD(at[36], at[86]);    MULADD(at[37], at[85]);    MULADD(at[38], at[84]);    MULADD(at[39], at[83]);    MULADD(at[40], at[82]);    MULADD(at[41], at[81]);    MULADD(at[42], at[80]);    MULADD(at[43], at[79]);    MULADD(at[44], at[78]);    MULADD(at[45], at[77]);    MULADD(at[46], at[76]);    MULADD(at[47], at[75]); 
7097
+   COMBA_STORE(C->dp[74]);
7098
+   /* 75 */
7099
+   COMBA_FORWARD;
7100
+   MULADD(at[28], at[95]);    MULADD(at[29], at[94]);    MULADD(at[30], at[93]);    MULADD(at[31], at[92]);    MULADD(at[32], at[91]);    MULADD(at[33], at[90]);    MULADD(at[34], at[89]);    MULADD(at[35], at[88]);    MULADD(at[36], at[87]);    MULADD(at[37], at[86]);    MULADD(at[38], at[85]);    MULADD(at[39], at[84]);    MULADD(at[40], at[83]);    MULADD(at[41], at[82]);    MULADD(at[42], at[81]);    MULADD(at[43], at[80]);    MULADD(at[44], at[79]);    MULADD(at[45], at[78]);    MULADD(at[46], at[77]);    MULADD(at[47], at[76]); 
7101
+   COMBA_STORE(C->dp[75]);
7102
+   /* 76 */
7103
+   COMBA_FORWARD;
7104
+   MULADD(at[29], at[95]);    MULADD(at[30], at[94]);    MULADD(at[31], at[93]);    MULADD(at[32], at[92]);    MULADD(at[33], at[91]);    MULADD(at[34], at[90]);    MULADD(at[35], at[89]);    MULADD(at[36], at[88]);    MULADD(at[37], at[87]);    MULADD(at[38], at[86]);    MULADD(at[39], at[85]);    MULADD(at[40], at[84]);    MULADD(at[41], at[83]);    MULADD(at[42], at[82]);    MULADD(at[43], at[81]);    MULADD(at[44], at[80]);    MULADD(at[45], at[79]);    MULADD(at[46], at[78]);    MULADD(at[47], at[77]); 
7105
+   COMBA_STORE(C->dp[76]);
7106
+   /* 77 */
7107
+   COMBA_FORWARD;
7108
+   MULADD(at[30], at[95]);    MULADD(at[31], at[94]);    MULADD(at[32], at[93]);    MULADD(at[33], at[92]);    MULADD(at[34], at[91]);    MULADD(at[35], at[90]);    MULADD(at[36], at[89]);    MULADD(at[37], at[88]);    MULADD(at[38], at[87]);    MULADD(at[39], at[86]);    MULADD(at[40], at[85]);    MULADD(at[41], at[84]);    MULADD(at[42], at[83]);    MULADD(at[43], at[82]);    MULADD(at[44], at[81]);    MULADD(at[45], at[80]);    MULADD(at[46], at[79]);    MULADD(at[47], at[78]); 
7109
+   COMBA_STORE(C->dp[77]);
7110
+   /* 78 */
7111
+   COMBA_FORWARD;
7112
+   MULADD(at[31], at[95]);    MULADD(at[32], at[94]);    MULADD(at[33], at[93]);    MULADD(at[34], at[92]);    MULADD(at[35], at[91]);    MULADD(at[36], at[90]);    MULADD(at[37], at[89]);    MULADD(at[38], at[88]);    MULADD(at[39], at[87]);    MULADD(at[40], at[86]);    MULADD(at[41], at[85]);    MULADD(at[42], at[84]);    MULADD(at[43], at[83]);    MULADD(at[44], at[82]);    MULADD(at[45], at[81]);    MULADD(at[46], at[80]);    MULADD(at[47], at[79]); 
7113
+   COMBA_STORE(C->dp[78]);
7114
+   /* 79 */
7115
+   COMBA_FORWARD;
7116
+   MULADD(at[32], at[95]);    MULADD(at[33], at[94]);    MULADD(at[34], at[93]);    MULADD(at[35], at[92]);    MULADD(at[36], at[91]);    MULADD(at[37], at[90]);    MULADD(at[38], at[89]);    MULADD(at[39], at[88]);    MULADD(at[40], at[87]);    MULADD(at[41], at[86]);    MULADD(at[42], at[85]);    MULADD(at[43], at[84]);    MULADD(at[44], at[83]);    MULADD(at[45], at[82]);    MULADD(at[46], at[81]);    MULADD(at[47], at[80]); 
7117
+   COMBA_STORE(C->dp[79]);
7118
+   /* 80 */
7119
+   COMBA_FORWARD;
7120
+   MULADD(at[33], at[95]);    MULADD(at[34], at[94]);    MULADD(at[35], at[93]);    MULADD(at[36], at[92]);    MULADD(at[37], at[91]);    MULADD(at[38], at[90]);    MULADD(at[39], at[89]);    MULADD(at[40], at[88]);    MULADD(at[41], at[87]);    MULADD(at[42], at[86]);    MULADD(at[43], at[85]);    MULADD(at[44], at[84]);    MULADD(at[45], at[83]);    MULADD(at[46], at[82]);    MULADD(at[47], at[81]); 
7121
+   COMBA_STORE(C->dp[80]);
7122
+   /* 81 */
7123
+   COMBA_FORWARD;
7124
+   MULADD(at[34], at[95]);    MULADD(at[35], at[94]);    MULADD(at[36], at[93]);    MULADD(at[37], at[92]);    MULADD(at[38], at[91]);    MULADD(at[39], at[90]);    MULADD(at[40], at[89]);    MULADD(at[41], at[88]);    MULADD(at[42], at[87]);    MULADD(at[43], at[86]);    MULADD(at[44], at[85]);    MULADD(at[45], at[84]);    MULADD(at[46], at[83]);    MULADD(at[47], at[82]); 
7125
+   COMBA_STORE(C->dp[81]);
7126
+   /* 82 */
7127
+   COMBA_FORWARD;
7128
+   MULADD(at[35], at[95]);    MULADD(at[36], at[94]);    MULADD(at[37], at[93]);    MULADD(at[38], at[92]);    MULADD(at[39], at[91]);    MULADD(at[40], at[90]);    MULADD(at[41], at[89]);    MULADD(at[42], at[88]);    MULADD(at[43], at[87]);    MULADD(at[44], at[86]);    MULADD(at[45], at[85]);    MULADD(at[46], at[84]);    MULADD(at[47], at[83]); 
7129
+   COMBA_STORE(C->dp[82]);
7130
+   /* 83 */
7131
+   COMBA_FORWARD;
7132
+   MULADD(at[36], at[95]);    MULADD(at[37], at[94]);    MULADD(at[38], at[93]);    MULADD(at[39], at[92]);    MULADD(at[40], at[91]);    MULADD(at[41], at[90]);    MULADD(at[42], at[89]);    MULADD(at[43], at[88]);    MULADD(at[44], at[87]);    MULADD(at[45], at[86]);    MULADD(at[46], at[85]);    MULADD(at[47], at[84]); 
7133
+   COMBA_STORE(C->dp[83]);
7134
+   /* 84 */
7135
+   COMBA_FORWARD;
7136
+   MULADD(at[37], at[95]);    MULADD(at[38], at[94]);    MULADD(at[39], at[93]);    MULADD(at[40], at[92]);    MULADD(at[41], at[91]);    MULADD(at[42], at[90]);    MULADD(at[43], at[89]);    MULADD(at[44], at[88]);    MULADD(at[45], at[87]);    MULADD(at[46], at[86]);    MULADD(at[47], at[85]); 
7137
+   COMBA_STORE(C->dp[84]);
7138
+   /* 85 */
7139
+   COMBA_FORWARD;
7140
+   MULADD(at[38], at[95]);    MULADD(at[39], at[94]);    MULADD(at[40], at[93]);    MULADD(at[41], at[92]);    MULADD(at[42], at[91]);    MULADD(at[43], at[90]);    MULADD(at[44], at[89]);    MULADD(at[45], at[88]);    MULADD(at[46], at[87]);    MULADD(at[47], at[86]); 
7141
+   COMBA_STORE(C->dp[85]);
7142
+   /* 86 */
7143
+   COMBA_FORWARD;
7144
+   MULADD(at[39], at[95]);    MULADD(at[40], at[94]);    MULADD(at[41], at[93]);    MULADD(at[42], at[92]);    MULADD(at[43], at[91]);    MULADD(at[44], at[90]);    MULADD(at[45], at[89]);    MULADD(at[46], at[88]);    MULADD(at[47], at[87]); 
7145
+   COMBA_STORE(C->dp[86]);
7146
+   /* 87 */
7147
+   COMBA_FORWARD;
7148
+   MULADD(at[40], at[95]);    MULADD(at[41], at[94]);    MULADD(at[42], at[93]);    MULADD(at[43], at[92]);    MULADD(at[44], at[91]);    MULADD(at[45], at[90]);    MULADD(at[46], at[89]);    MULADD(at[47], at[88]); 
7149
+   COMBA_STORE(C->dp[87]);
7150
+   /* 88 */
7151
+   COMBA_FORWARD;
7152
+   MULADD(at[41], at[95]);    MULADD(at[42], at[94]);    MULADD(at[43], at[93]);    MULADD(at[44], at[92]);    MULADD(at[45], at[91]);    MULADD(at[46], at[90]);    MULADD(at[47], at[89]); 
7153
+   COMBA_STORE(C->dp[88]);
7154
+   /* 89 */
7155
+   COMBA_FORWARD;
7156
+   MULADD(at[42], at[95]);    MULADD(at[43], at[94]);    MULADD(at[44], at[93]);    MULADD(at[45], at[92]);    MULADD(at[46], at[91]);    MULADD(at[47], at[90]); 
7157
+   COMBA_STORE(C->dp[89]);
7158
+   /* 90 */
7159
+   COMBA_FORWARD;
7160
+   MULADD(at[43], at[95]);    MULADD(at[44], at[94]);    MULADD(at[45], at[93]);    MULADD(at[46], at[92]);    MULADD(at[47], at[91]); 
7161
+   COMBA_STORE(C->dp[90]);
7162
+   /* 91 */
7163
+   COMBA_FORWARD;
7164
+   MULADD(at[44], at[95]);    MULADD(at[45], at[94]);    MULADD(at[46], at[93]);    MULADD(at[47], at[92]); 
7165
+   COMBA_STORE(C->dp[91]);
7166
+   /* 92 */
7167
+   COMBA_FORWARD;
7168
+   MULADD(at[45], at[95]);    MULADD(at[46], at[94]);    MULADD(at[47], at[93]); 
7169
+   COMBA_STORE(C->dp[92]);
7170
+   /* 93 */
7171
+   COMBA_FORWARD;
7172
+   MULADD(at[46], at[95]);    MULADD(at[47], at[94]); 
7173
+   COMBA_STORE(C->dp[93]);
7174
+   /* 94 */
7175
+   COMBA_FORWARD;
7176
+   MULADD(at[47], at[95]); 
7177
+   COMBA_STORE(C->dp[94]);
7178
+   COMBA_STORE2(C->dp[95]);
7179
+   C->used = 96;
7180
+   C->sign = A->sign ^ B->sign;
7181
+   fp_clamp(C);
7182
+   COMBA_FINI;
5584 7183
 }
5585 7184
 #endif
5586 7185
 
5587
-/* $Source: /cvs/libtom/libtommath/bn_mp_prime_fermat.c,v $ */
5588
-/* $Revision: 1.3 $ */
5589
-/* $Date: 2006/03/31 14:18:44 $ */
7186
+/* End: fp_mul_comba_48.c */
5590 7187
 
5591
-/* End: bn_mp_prime_fermat.c */
7188
+/* Start: fp_mul_comba_6.c */
7189
+#define TFM_DEFINES
7190
+#include "fp_mul_comba.c"
5592 7191
 
5593
-/* Start: bn_mp_prime_is_divisible.c */
5594
-#include <bignum.h>
5595
-#ifdef BN_MP_PRIME_IS_DIVISIBLE_C
5596
-/* LibTomMath, multiple-precision integer library -- Tom St Denis
5597
- *
5598
- * LibTomMath is a library that provides multiple-precision
5599
- * integer arithmetic as well as number theoretic functionality.
5600
- *
5601
- * The library was designed directly after the MPI library by
5602
- * Michael Fromberger but has been written from scratch with
5603
- * additional optimizations in place.
5604
- *
5605
- * The library is free for all purposes without any express
5606
- * guarantee it works.
5607
- *
5608
- * Tom St Denis, tomstdenis@gmail.com, http://math.libtomcrypt.com
5609
- */
7192
+#ifdef TFM_MUL6
7193
+void fp_mul_comba6(fp_int *A, fp_int *B, fp_int *C)
7194
+{
7195
+   fp_digit c0, c1, c2, at[12];
7196
+
7197
+   memcpy(at, A->dp, 6 * sizeof(fp_digit));
7198
+   memcpy(at+6, B->dp, 6 * sizeof(fp_digit));
7199
+   COMBA_START;
7200
+
7201
+   COMBA_CLEAR;
7202
+   /* 0 */
7203
+   MULADD(at[0], at[6]); 
7204
+   COMBA_STORE(C->dp[0]);
7205
+   /* 1 */
7206
+   COMBA_FORWARD;
7207
+   MULADD(at[0], at[7]);    MULADD(at[1], at[6]); 
7208
+   COMBA_STORE(C->dp[1]);
7209
+   /* 2 */
7210
+   COMBA_FORWARD;
7211
+   MULADD(at[0], at[8]);    MULADD(at[1], at[7]);    MULADD(at[2], at[6]); 
7212
+   COMBA_STORE(C->dp[2]);
7213
+   /* 3 */
7214
+   COMBA_FORWARD;
7215
+   MULADD(at[0], at[9]);    MULADD(at[1], at[8]);    MULADD(at[2], at[7]);    MULADD(at[3], at[6]); 
7216
+   COMBA_STORE(C->dp[3]);
7217
+   /* 4 */
7218
+   COMBA_FORWARD;
7219
+   MULADD(at[0], at[10]);    MULADD(at[1], at[9]);    MULADD(at[2], at[8]);    MULADD(at[3], at[7]);    MULADD(at[4], at[6]); 
7220
+   COMBA_STORE(C->dp[4]);
7221
+   /* 5 */
7222
+   COMBA_FORWARD;
7223
+   MULADD(at[0], at[11]);    MULADD(at[1], at[10]);    MULADD(at[2], at[9]);    MULADD(at[3], at[8]);    MULADD(at[4], at[7]);    MULADD(at[5], at[6]); 
7224
+   COMBA_STORE(C->dp[5]);
7225
+   /* 6 */
7226
+   COMBA_FORWARD;
7227
+   MULADD(at[1], at[11]);    MULADD(at[2], at[10]);    MULADD(at[3], at[9]);    MULADD(at[4], at[8]);    MULADD(at[5], at[7]); 
7228
+   COMBA_STORE(C->dp[6]);
7229
+   /* 7 */
7230
+   COMBA_FORWARD;
7231
+   MULADD(at[2], at[11]);    MULADD(at[3], at[10]);    MULADD(at[4], at[9]);    MULADD(at[5], at[8]); 
7232
+   COMBA_STORE(C->dp[7]);
7233
+   /* 8 */
7234
+   COMBA_FORWARD;
7235
+   MULADD(at[3], at[11]);    MULADD(at[4], at[10]);    MULADD(at[5], at[9]); 
7236
+   COMBA_STORE(C->dp[8]);
7237
+   /* 9 */
7238
+   COMBA_FORWARD;
7239
+   MULADD(at[4], at[11]);    MULADD(at[5], at[10]); 
7240
+   COMBA_STORE(C->dp[9]);
7241
+   /* 10 */
7242
+   COMBA_FORWARD;
7243
+   MULADD(at[5], at[11]); 
7244
+   COMBA_STORE(C->dp[10]);
7245
+   COMBA_STORE2(C->dp[11]);
7246
+   C->used = 12;
7247
+   C->sign = A->sign ^ B->sign;
7248
+   fp_clamp(C);
7249
+   COMBA_FINI;
7250
+}
7251
+#endif
5610 7252
 
5611
-/* determines if an integers is divisible by one 
5612
- * of the first PRIME_SIZE primes or not
5613
- *
5614
- * sets result to 0 if not, 1 if yes
5615
- */
5616
-int mp_prime_is_divisible (mp_int * a, int *result)
7253
+/* End: fp_mul_comba_6.c */
7254
+
7255
+/* Start: fp_mul_comba_64.c */
7256
+#define TFM_DEFINES
7257
+#include "fp_mul_comba.c"
7258
+
7259
+#ifdef TFM_MUL64
7260
+void fp_mul_comba64(fp_int *A, fp_int *B, fp_int *C)
5617 7261
 {
5618
-  int     err, ix;
5619
-  mp_digit res;
7262
+   fp_digit c0, c1, c2, at[128];
7263
+
7264
+   memcpy(at, A->dp, 64 * sizeof(fp_digit));
7265
+   memcpy(at+64, B->dp, 64 * sizeof(fp_digit));
7266
+   COMBA_START;
7267
+
7268
+   COMBA_CLEAR;
7269
+   /* 0 */
7270
+   MULADD(at[0], at[64]); 
7271
+   COMBA_STORE(C->dp[0]);
7272
+   /* 1 */
7273
+   COMBA_FORWARD;
7274
+   MULADD(at[0], at[65]);    MULADD(at[1], at[64]); 
7275
+   COMBA_STORE(C->dp[1]);
7276
+   /* 2 */
7277
+   COMBA_FORWARD;
7278
+   MULADD(at[0], at[66]);    MULADD(at[1], at[65]);    MULADD(at[2], at[64]); 
7279
+   COMBA_STORE(C->dp[2]);
7280
+   /* 3 */
7281
+   COMBA_FORWARD;
7282
+   MULADD(at[0], at[67]);    MULADD(at[1], at[66]);    MULADD(at[2], at[65]);    MULADD(at[3], at[64]); 
7283
+   COMBA_STORE(C->dp[3]);
7284
+   /* 4 */
7285
+   COMBA_FORWARD;
7286
+   MULADD(at[0], at[68]);    MULADD(at[1], at[67]);    MULADD(at[2], at[66]);    MULADD(at[3], at[65]);    MULADD(at[4], at[64]); 
7287
+   COMBA_STORE(C->dp[4]);
7288
+   /* 5 */
7289
+   COMBA_FORWARD;
7290
+   MULADD(at[0], at[69]);    MULADD(at[1], at[68]);    MULADD(at[2], at[67]);    MULADD(at[3], at[66]);    MULADD(at[4], at[65]);    MULADD(at[5], at[64]); 
7291
+   COMBA_STORE(C->dp[5]);
7292
+   /* 6 */
7293
+   COMBA_FORWARD;
7294
+   MULADD(at[0], at[70]);    MULADD(at[1], at[69]);    MULADD(at[2], at[68]);    MULADD(at[3], at[67]);    MULADD(at[4], at[66]);    MULADD(at[5], at[65]);    MULADD(at[6], at[64]); 
7295
+   COMBA_STORE(C->dp[6]);
7296
+   /* 7 */
7297
+   COMBA_FORWARD;
7298
+   MULADD(at[0], at[71]);    MULADD(at[1], at[70]);    MULADD(at[2], at[69]);    MULADD(at[3], at[68]);    MULADD(at[4], at[67]);    MULADD(at[5], at[66]);    MULADD(at[6], at[65]);    MULADD(at[7], at[64]); 
7299
+   COMBA_STORE(C->dp[7]);
7300
+   /* 8 */
7301
+   COMBA_FORWARD;
7302
+   MULADD(at[0], at[72]);    MULADD(at[1], at[71]);    MULADD(at[2], at[70]);    MULADD(at[3], at[69]);    MULADD(at[4], at[68]);    MULADD(at[5], at[67]);    MULADD(at[6], at[66]);    MULADD(at[7], at[65]);    MULADD(at[8], at[64]); 
7303
+   COMBA_STORE(C->dp[8]);
7304
+   /* 9 */
7305
+   COMBA_FORWARD;
7306
+   MULADD(at[0], at[73]);    MULADD(at[1], at[72]);    MULADD(at[2], at[71]);    MULADD(at[3], at[70]);    MULADD(at[4], at[69]);    MULADD(at[5], at[68]);    MULADD(at[6], at[67]);    MULADD(at[7], at[66]);    MULADD(at[8], at[65]);    MULADD(at[9], at[64]); 
7307
+   COMBA_STORE(C->dp[9]);
7308
+   /* 10 */
7309
+   COMBA_FORWARD;
7310
+   MULADD(at[0], at[74]);    MULADD(at[1], at[73]);    MULADD(at[2], at[72]);    MULADD(at[3], at[71]);    MULADD(at[4], at[70]);    MULADD(at[5], at[69]);    MULADD(at[6], at[68]);    MULADD(at[7], at[67]);    MULADD(at[8], at[66]);    MULADD(at[9], at[65]);    MULADD(at[10], at[64]); 
7311
+   COMBA_STORE(C->dp[10]);
7312
+   /* 11 */
7313
+   COMBA_FORWARD;
7314
+   MULADD(at[0], at[75]);    MULADD(at[1], at[74]);    MULADD(at[2], at[73]);    MULADD(at[3], at[72]);    MULADD(at[4], at[71]);    MULADD(at[5], at[70]);    MULADD(at[6], at[69]);    MULADD(at[7], at[68]);    MULADD(at[8], at[67]);    MULADD(at[9], at[66]);    MULADD(at[10], at[65]);    MULADD(at[11], at[64]); 
7315
+   COMBA_STORE(C->dp[11]);
7316
+   /* 12 */
7317
+   COMBA_FORWARD;
7318
+   MULADD(at[0], at[76]);    MULADD(at[1], at[75]);    MULADD(at[2], at[74]);    MULADD(at[3], at[73]);    MULADD(at[4], at[72]);    MULADD(at[5], at[71]);    MULADD(at[6], at[70]);    MULADD(at[7], at[69]);    MULADD(at[8], at[68]);    MULADD(at[9], at[67]);    MULADD(at[10], at[66]);    MULADD(at[11], at[65]);    MULADD(at[12], at[64]); 
7319
+   COMBA_STORE(C->dp[12]);
7320
+   /* 13 */
7321
+   COMBA_FORWARD;
7322
+   MULADD(at[0], at[77]);    MULADD(at[1], at[76]);    MULADD(at[2], at[75]);    MULADD(at[3], at[74]);    MULADD(at[4], at[73]);    MULADD(at[5], at[72]);    MULADD(at[6], at[71]);    MULADD(at[7], at[70]);    MULADD(at[8], at[69]);    MULADD(at[9], at[68]);    MULADD(at[10], at[67]);    MULADD(at[11], at[66]);    MULADD(at[12], at[65]);    MULADD(at[13], at[64]); 
7323
+   COMBA_STORE(C->dp[13]);
7324
+   /* 14 */
7325
+   COMBA_FORWARD;
7326
+   MULADD(at[0], at[78]);    MULADD(at[1], at[77]);    MULADD(at[2], at[76]);    MULADD(at[3], at[75]);    MULADD(at[4], at[74]);    MULADD(at[5], at[73]);    MULADD(at[6], at[72]);    MULADD(at[7], at[71]);    MULADD(at[8], at[70]);    MULADD(at[9], at[69]);    MULADD(at[10], at[68]);    MULADD(at[11], at[67]);    MULADD(at[12], at[66]);    MULADD(at[13], at[65]);    MULADD(at[14], at[64]); 
7327
+   COMBA_STORE(C->dp[14]);
7328
+   /* 15 */
7329
+   COMBA_FORWARD;
7330
+   MULADD(at[0], at[79]);    MULADD(at[1], at[78]);    MULADD(at[2], at[77]);    MULADD(at[3], at[76]);    MULADD(at[4], at[75]);    MULADD(at[5], at[74]);    MULADD(at[6], at[73]);    MULADD(at[7], at[72]);    MULADD(at[8], at[71]);    MULADD(at[9], at[70]);    MULADD(at[10], at[69]);    MULADD(at[11], at[68]);    MULADD(at[12], at[67]);    MULADD(at[13], at[66]);    MULADD(at[14], at[65]);    MULADD(at[15], at[64]); 
7331
+   COMBA_STORE(C->dp[15]);
7332
+   /* 16 */
7333
+   COMBA_FORWARD;
7334
+   MULADD(at[0], at[80]);    MULADD(at[1], at[79]);    MULADD(at[2], at[78]);    MULADD(at[3], at[77]);    MULADD(at[4], at[76]);    MULADD(at[5], at[75]);    MULADD(at[6], at[74]);    MULADD(at[7], at[73]);    MULADD(at[8], at[72]);    MULADD(at[9], at[71]);    MULADD(at[10], at[70]);    MULADD(at[11], at[69]);    MULADD(at[12], at[68]);    MULADD(at[13], at[67]);    MULADD(at[14], at[66]);    MULADD(at[15], at[65]);    MULADD(at[16], at[64]); 
7335
+   COMBA_STORE(C->dp[16]);
7336
+   /* 17 */
7337
+   COMBA_FORWARD;
7338
+   MULADD(at[0], at[81]);    MULADD(at[1], at[80]);    MULADD(at[2], at[79]);    MULADD(at[3], at[78]);    MULADD(at[4], at[77]);    MULADD(at[5], at[76]);    MULADD(at[6], at[75]);    MULADD(at[7], at[74]);    MULADD(at[8], at[73]);    MULADD(at[9], at[72]);    MULADD(at[10], at[71]);    MULADD(at[11], at[70]);    MULADD(at[12], at[69]);    MULADD(at[13], at[68]);    MULADD(at[14], at[67]);    MULADD(at[15], at[66]);    MULADD(at[16], at[65]);    MULADD(at[17], at[64]); 
7339
+   COMBA_STORE(C->dp[17]);
7340
+   /* 18 */
7341
+   COMBA_FORWARD;
7342
+   MULADD(at[0], at[82]);    MULADD(at[1], at[81]);    MULADD(at[2], at[80]);    MULADD(at[3], at[79]);    MULADD(at[4], at[78]);    MULADD(at[5], at[77]);    MULADD(at[6], at[76]);    MULADD(at[7], at[75]);    MULADD(at[8], at[74]);    MULADD(at[9], at[73]);    MULADD(at[10], at[72]);    MULADD(at[11], at[71]);    MULADD(at[12], at[70]);    MULADD(at[13], at[69]);    MULADD(at[14], at[68]);    MULADD(at[15], at[67]);    MULADD(at[16], at[66]);    MULADD(at[17], at[65]);    MULADD(at[18], at[64]); 
7343
+   COMBA_STORE(C->dp[18]);
7344
+   /* 19 */
7345
+   COMBA_FORWARD;
7346
+   MULADD(at[0], at[83]);    MULADD(at[1], at[82]);    MULADD(at[2], at[81]);    MULADD(at[3], at[80]);    MULADD(at[4], at[79]);    MULADD(at[5], at[78]);    MULADD(at[6], at[77]);    MULADD(at[7], at[76]);    MULADD(at[8], at[75]);    MULADD(at[9], at[74]);    MULADD(at[10], at[73]);    MULADD(at[11], at[72]);    MULADD(at[12], at[71]);    MULADD(at[13], at[70]);    MULADD(at[14], at[69]);    MULADD(at[15], at[68]);    MULADD(at[16], at[67]);    MULADD(at[17], at[66]);    MULADD(at[18], at[65]);    MULADD(at[19], at[64]); 
7347
+   COMBA_STORE(C->dp[19]);
7348
+   /* 20 */
7349
+   COMBA_FORWARD;
7350
+   MULADD(at[0], at[84]);    MULADD(at[1], at[83]);    MULADD(at[2], at[82]);    MULADD(at[3], at[81]);    MULADD(at[4], at[80]);    MULADD(at[5], at[79]);    MULADD(at[6], at[78]);    MULADD(at[7], at[77]);    MULADD(at[8], at[76]);    MULADD(at[9], at[75]);    MULADD(at[10], at[74]);    MULADD(at[11], at[73]);    MULADD(at[12], at[72]);    MULADD(at[13], at[71]);    MULADD(at[14], at[70]);    MULADD(at[15], at[69]);    MULADD(at[16], at[68]);    MULADD(at[17], at[67]);    MULADD(at[18], at[66]);    MULADD(at[19], at[65]);    MULADD(at[20], at[64]); 
7351
+   COMBA_STORE(C->dp[20]);
7352
+   /* 21 */
7353
+   COMBA_FORWARD;
7354
+   MULADD(at[0], at[85]);    MULADD(at[1], at[84]);    MULADD(at[2], at[83]);    MULADD(at[3], at[82]);    MULADD(at[4], at[81]);    MULADD(at[5], at[80]);    MULADD(at[6], at[79]);    MULADD(at[7], at[78]);    MULADD(at[8], at[77]);    MULADD(at[9], at[76]);    MULADD(at[10], at[75]);    MULADD(at[11], at[74]);    MULADD(at[12], at[73]);    MULADD(at[13], at[72]);    MULADD(at[14], at[71]);    MULADD(at[15], at[70]);    MULADD(at[16], at[69]);    MULADD(at[17], at[68]);    MULADD(at[18], at[67]);    MULADD(at[19], at[66]);    MULADD(at[20], at[65]);    MULADD(at[21], at[64]); 
7355
+   COMBA_STORE(C->dp[21]);
7356
+   /* 22 */
7357
+   COMBA_FORWARD;
7358
+   MULADD(at[0], at[86]);    MULADD(at[1], at[85]);    MULADD(at[2], at[84]);    MULADD(at[3], at[83]);    MULADD(at[4], at[82]);    MULADD(at[5], at[81]);    MULADD(at[6], at[80]);    MULADD(at[7], at[79]);    MULADD(at[8], at[78]);    MULADD(at[9], at[77]);    MULADD(at[10], at[76]);    MULADD(at[11], at[75]);    MULADD(at[12], at[74]);    MULADD(at[13], at[73]);    MULADD(at[14], at[72]);    MULADD(at[15], at[71]);    MULADD(at[16], at[70]);    MULADD(at[17], at[69]);    MULADD(at[18], at[68]);    MULADD(at[19], at[67]);    MULADD(at[20], at[66]);    MULADD(at[21], at[65]);    MULADD(at[22], at[64]); 
7359
+   COMBA_STORE(C->dp[22]);
7360
+   /* 23 */
7361
+   COMBA_FORWARD;
7362
+   MULADD(at[0], at[87]);    MULADD(at[1], at[86]);    MULADD(at[2], at[85]);    MULADD(at[3], at[84]);    MULADD(at[4], at[83]);    MULADD(at[5], at[82]);    MULADD(at[6], at[81]);    MULADD(at[7], at[80]);    MULADD(at[8], at[79]);    MULADD(at[9], at[78]);    MULADD(at[10], at[77]);    MULADD(at[11], at[76]);    MULADD(at[12], at[75]);    MULADD(at[13], at[74]);    MULADD(at[14], at[73]);    MULADD(at[15], at[72]);    MULADD(at[16], at[71]);    MULADD(at[17], at[70]);    MULADD(at[18], at[69]);    MULADD(at[19], at[68]);    MULADD(at[20], at[67]);    MULADD(at[21], at[66]);    MULADD(at[22], at[65]);    MULADD(at[23], at[64]); 
7363
+   COMBA_STORE(C->dp[23]);
7364
+   /* 24 */
7365
+   COMBA_FORWARD;
7366
+   MULADD(at[0], at[88]);    MULADD(at[1], at[87]);    MULADD(at[2], at[86]);    MULADD(at[3], at[85]);    MULADD(at[4], at[84]);    MULADD(at[5], at[83]);    MULADD(at[6], at[82]);    MULADD(at[7], at[81]);    MULADD(at[8], at[80]);    MULADD(at[9], at[79]);    MULADD(at[10], at[78]);    MULADD(at[11], at[77]);    MULADD(at[12], at[76]);    MULADD(at[13], at[75]);    MULADD(at[14], at[74]);    MULADD(at[15], at[73]);    MULADD(at[16], at[72]);    MULADD(at[17], at[71]);    MULADD(at[18], at[70]);    MULADD(at[19], at[69]);    MULADD(at[20], at[68]);    MULADD(at[21], at[67]);    MULADD(at[22], at[66]);    MULADD(at[23], at[65]);    MULADD(at[24], at[64]); 
7367
+   COMBA_STORE(C->dp[24]);
7368
+   /* 25 */
7369
+   COMBA_FORWARD;
7370
+   MULADD(at[0], at[89]);    MULADD(at[1], at[88]);    MULADD(at[2], at[87]);    MULADD(at[3], at[86]);    MULADD(at[4], at[85]);    MULADD(at[5], at[84]);    MULADD(at[6], at[83]);    MULADD(at[7], at[82]);    MULADD(at[8], at[81]);    MULADD(at[9], at[80]);    MULADD(at[10], at[79]);    MULADD(at[11], at[78]);    MULADD(at[12], at[77]);    MULADD(at[13], at[76]);    MULADD(at[14], at[75]);    MULADD(at[15], at[74]);    MULADD(at[16], at[73]);    MULADD(at[17], at[72]);    MULADD(at[18], at[71]);    MULADD(at[19], at[70]);    MULADD(at[20], at[69]);    MULADD(at[21], at[68]);    MULADD(at[22], at[67]);    MULADD(at[23], at[66]);    MULADD(at[24], at[65]);    MULADD(at[25], at[64]); 
7371
+   COMBA_STORE(C->dp[25]);
7372
+   /* 26 */
7373
+   COMBA_FORWARD;
7374
+   MULADD(at[0], at[90]);    MULADD(at[1], at[89]);    MULADD(at[2], at[88]);    MULADD(at[3], at[87]);    MULADD(at[4], at[86]);    MULADD(at[5], at[85]);    MULADD(at[6], at[84]);    MULADD(at[7], at[83]);    MULADD(at[8], at[82]);    MULADD(at[9], at[81]);    MULADD(at[10], at[80]);    MULADD(at[11], at[79]);    MULADD(at[12], at[78]);    MULADD(at[13], at[77]);    MULADD(at[14], at[76]);    MULADD(at[15], at[75]);    MULADD(at[16], at[74]);    MULADD(at[17], at[73]);    MULADD(at[18], at[72]);    MULADD(at[19], at[71]);    MULADD(at[20], at[70]);    MULADD(at[21], at[69]);    MULADD(at[22], at[68]);    MULADD(at[23], at[67]);    MULADD(at[24], at[66]);    MULADD(at[25], at[65]);    MULADD(at[26], at[64]); 
7375
+   COMBA_STORE(C->dp[26]);
7376
+   /* 27 */
7377
+   COMBA_FORWARD;
7378
+   MULADD(at[0], at[91]);    MULADD(at[1], at[90]);    MULADD(at[2], at[89]);    MULADD(at[3], at[88]);    MULADD(at[4], at[87]);    MULADD(at[5], at[86]);    MULADD(at[6], at[85]);    MULADD(at[7], at[84]);    MULADD(at[8], at[83]);    MULADD(at[9], at[82]);    MULADD(at[10], at[81]);    MULADD(at[11], at[80]);    MULADD(at[12], at[79]);    MULADD(at[13], at[78]);    MULADD(at[14], at[77]);    MULADD(at[15], at[76]);    MULADD(at[16], at[75]);    MULADD(at[17], at[74]);    MULADD(at[18], at[73]);    MULADD(at[19], at[72]);    MULADD(at[20], at[71]);    MULADD(at[21], at[70]);    MULADD(at[22], at[69]);    MULADD(at[23], at[68]);    MULADD(at[24], at[67]);    MULADD(at[25], at[66]);    MULADD(at[26], at[65]);    MULADD(at[27], at[64]); 
7379
+   COMBA_STORE(C->dp[27]);
7380
+   /* 28 */
7381
+   COMBA_FORWARD;
7382
+   MULADD(at[0], at[92]);    MULADD(at[1], at[91]);    MULADD(at[2], at[90]);    MULADD(at[3], at[89]);    MULADD(at[4], at[88]);    MULADD(at[5], at[87]);    MULADD(at[6], at[86]);    MULADD(at[7], at[85]);    MULADD(at[8], at[84]);    MULADD(at[9], at[83]);    MULADD(at[10], at[82]);    MULADD(at[11], at[81]);    MULADD(at[12], at[80]);    MULADD(at[13], at[79]);    MULADD(at[14], at[78]);    MULADD(at[15], at[77]);    MULADD(at[16], at[76]);    MULADD(at[17], at[75]);    MULADD(at[18], at[74]);    MULADD(at[19], at[73]);    MULADD(at[20], at[72]);    MULADD(at[21], at[71]);    MULADD(at[22], at[70]);    MULADD(at[23], at[69]);    MULADD(at[24], at[68]);    MULADD(at[25], at[67]);    MULADD(at[26], at[66]);    MULADD(at[27], at[65]);    MULADD(at[28], at[64]); 
7383
+   COMBA_STORE(C->dp[28]);
7384
+   /* 29 */
7385
+   COMBA_FORWARD;
7386
+   MULADD(at[0], at[93]);    MULADD(at[1], at[92]);    MULADD(at[2], at[91]);    MULADD(at[3], at[90]);    MULADD(at[4], at[89]);    MULADD(at[5], at[88]);    MULADD(at[6], at[87]);    MULADD(at[7], at[86]);    MULADD(at[8], at[85]);    MULADD(at[9], at[84]);    MULADD(at[10], at[83]);    MULADD(at[11], at[82]);    MULADD(at[12], at[81]);    MULADD(at[13], at[80]);    MULADD(at[14], at[79]);    MULADD(at[15], at[78]);    MULADD(at[16], at[77]);    MULADD(at[17], at[76]);    MULADD(at[18], at[75]);    MULADD(at[19], at[74]);    MULADD(at[20], at[73]);    MULADD(at[21], at[72]);    MULADD(at[22], at[71]);    MULADD(at[23], at[70]);    MULADD(at[24], at[69]);    MULADD(at[25], at[68]);    MULADD(at[26], at[67]);    MULADD(at[27], at[66]);    MULADD(at[28], at[65]);    MULADD(at[29], at[64]); 
7387
+   COMBA_STORE(C->dp[29]);
7388
+   /* 30 */
7389
+   COMBA_FORWARD;
7390
+   MULADD(at[0], at[94]);    MULADD(at[1], at[93]);    MULADD(at[2], at[92]);    MULADD(at[3], at[91]);    MULADD(at[4], at[90]);    MULADD(at[5], at[89]);    MULADD(at[6], at[88]);    MULADD(at[7], at[87]);    MULADD(at[8], at[86]);    MULADD(at[9], at[85]);    MULADD(at[10], at[84]);    MULADD(at[11], at[83]);    MULADD(at[12], at[82]);    MULADD(at[13], at[81]);    MULADD(at[14], at[80]);    MULADD(at[15], at[79]);    MULADD(at[16], at[78]);    MULADD(at[17], at[77]);    MULADD(at[18], at[76]);    MULADD(at[19], at[75]);    MULADD(at[20], at[74]);    MULADD(at[21], at[73]);    MULADD(at[22], at[72]);    MULADD(at[23], at[71]);    MULADD(at[24], at[70]);    MULADD(at[25], at[69]);    MULADD(at[26], at[68]);    MULADD(at[27], at[67]);    MULADD(at[28], at[66]);    MULADD(at[29], at[65]);    MULADD(at[30], at[64]); 
7391
+   COMBA_STORE(C->dp[30]);
7392
+   /* 31 */
7393
+   COMBA_FORWARD;
7394
+   MULADD(at[0], at[95]);    MULADD(at[1], at[94]);    MULADD(at[2], at[93]);    MULADD(at[3], at[92]);    MULADD(at[4], at[91]);    MULADD(at[5], at[90]);    MULADD(at[6], at[89]);    MULADD(at[7], at[88]);    MULADD(at[8], at[87]);    MULADD(at[9], at[86]);    MULADD(at[10], at[85]);    MULADD(at[11], at[84]);    MULADD(at[12], at[83]);    MULADD(at[13], at[82]);    MULADD(at[14], at[81]);    MULADD(at[15], at[80]);    MULADD(at[16], at[79]);    MULADD(at[17], at[78]);    MULADD(at[18], at[77]);    MULADD(at[19], at[76]);    MULADD(at[20], at[75]);    MULADD(at[21], at[74]);    MULADD(at[22], at[73]);    MULADD(at[23], at[72]);    MULADD(at[24], at[71]);    MULADD(at[25], at[70]);    MULADD(at[26], at[69]);    MULADD(at[27], at[68]);    MULADD(at[28], at[67]);    MULADD(at[29], at[66]);    MULADD(at[30], at[65]);    MULADD(at[31], at[64]); 
7395
+   COMBA_STORE(C->dp[31]);
7396
+   /* 32 */
7397
+   COMBA_FORWARD;
7398
+   MULADD(at[0], at[96]);    MULADD(at[1], at[95]);    MULADD(at[2], at[94]);    MULADD(at[3], at[93]);    MULADD(at[4], at[92]);    MULADD(at[5], at[91]);    MULADD(at[6], at[90]);    MULADD(at[7], at[89]);    MULADD(at[8], at[88]);    MULADD(at[9], at[87]);    MULADD(at[10], at[86]);    MULADD(at[11], at[85]);    MULADD(at[12], at[84]);    MULADD(at[13], at[83]);    MULADD(at[14], at[82]);    MULADD(at[15], at[81]);    MULADD(at[16], at[80]);    MULADD(at[17], at[79]);    MULADD(at[18], at[78]);    MULADD(at[19], at[77]);    MULADD(at[20], at[76]);    MULADD(at[21], at[75]);    MULADD(at[22], at[74]);    MULADD(at[23], at[73]);    MULADD(at[24], at[72]);    MULADD(at[25], at[71]);    MULADD(at[26], at[70]);    MULADD(at[27], at[69]);    MULADD(at[28], at[68]);    MULADD(at[29], at[67]);    MULADD(at[30], at[66]);    MULADD(at[31], at[65]);    MULADD(at[32], at[64]); 
7399
+   COMBA_STORE(C->dp[32]);
7400
+   /* 33 */
7401
+   COMBA_FORWARD;
7402
+   MULADD(at[0], at[97]);    MULADD(at[1], at[96]);    MULADD(at[2], at[95]);    MULADD(at[3], at[94]);    MULADD(at[4], at[93]);    MULADD(at[5], at[92]);    MULADD(at[6], at[91]);    MULADD(at[7], at[90]);    MULADD(at[8], at[89]);    MULADD(at[9], at[88]);    MULADD(at[10], at[87]);    MULADD(at[11], at[86]);    MULADD(at[12], at[85]);    MULADD(at[13], at[84]);    MULADD(at[14], at[83]);    MULADD(at[15], at[82]);    MULADD(at[16], at[81]);    MULADD(at[17], at[80]);    MULADD(at[18], at[79]);    MULADD(at[19], at[78]);    MULADD(at[20], at[77]);    MULADD(at[21], at[76]);    MULADD(at[22], at[75]);    MULADD(at[23], at[74]);    MULADD(at[24], at[73]);    MULADD(at[25], at[72]);    MULADD(at[26], at[71]);    MULADD(at[27], at[70]);    MULADD(at[28], at[69]);    MULADD(at[29], at[68]);    MULADD(at[30], at[67]);    MULADD(at[31], at[66]);    MULADD(at[32], at[65]);    MULADD(at[33], at[64]); 
7403
+   COMBA_STORE(C->dp[33]);
7404
+   /* 34 */
7405
+   COMBA_FORWARD;
7406
+   MULADD(at[0], at[98]);    MULADD(at[1], at[97]);    MULADD(at[2], at[96]);    MULADD(at[3], at[95]);    MULADD(at[4], at[94]);    MULADD(at[5], at[93]);    MULADD(at[6], at[92]);    MULADD(at[7], at[91]);    MULADD(at[8], at[90]);    MULADD(at[9], at[89]);    MULADD(at[10], at[88]);    MULADD(at[11], at[87]);    MULADD(at[12], at[86]);    MULADD(at[13], at[85]);    MULADD(at[14], at[84]);    MULADD(at[15], at[83]);    MULADD(at[16], at[82]);    MULADD(at[17], at[81]);    MULADD(at[18], at[80]);    MULADD(at[19], at[79]);    MULADD(at[20], at[78]);    MULADD(at[21], at[77]);    MULADD(at[22], at[76]);    MULADD(at[23], at[75]);    MULADD(at[24], at[74]);    MULADD(at[25], at[73]);    MULADD(at[26], at[72]);    MULADD(at[27], at[71]);    MULADD(at[28], at[70]);    MULADD(at[29], at[69]);    MULADD(at[30], at[68]);    MULADD(at[31], at[67]);    MULADD(at[32], at[66]);    MULADD(at[33], at[65]);    MULADD(at[34], at[64]); 
7407
+   COMBA_STORE(C->dp[34]);
7408
+   /* 35 */
7409
+   COMBA_FORWARD;
7410
+   MULADD(at[0], at[99]);    MULADD(at[1], at[98]);    MULADD(at[2], at[97]);    MULADD(at[3], at[96]);    MULADD(at[4], at[95]);    MULADD(at[5], at[94]);    MULADD(at[6], at[93]);    MULADD(at[7], at[92]);    MULADD(at[8], at[91]);    MULADD(at[9], at[90]);    MULADD(at[10], at[89]);    MULADD(at[11], at[88]);    MULADD(at[12], at[87]);    MULADD(at[13], at[86]);    MULADD(at[14], at[85]);    MULADD(at[15], at[84]);    MULADD(at[16], at[83]);    MULADD(at[17], at[82]);    MULADD(at[18], at[81]);    MULADD(at[19], at[80]);    MULADD(at[20], at[79]);    MULADD(at[21], at[78]);    MULADD(at[22], at[77]);    MULADD(at[23], at[76]);    MULADD(at[24], at[75]);    MULADD(at[25], at[74]);    MULADD(at[26], at[73]);    MULADD(at[27], at[72]);    MULADD(at[28], at[71]);    MULADD(at[29], at[70]);    MULADD(at[30], at[69]);    MULADD(at[31], at[68]);    MULADD(at[32], at[67]);    MULADD(at[33], at[66]);    MULADD(at[34], at[65]);    MULADD(at[35], at[64]); 
7411
+   COMBA_STORE(C->dp[35]);
7412
+   /* 36 */
7413
+   COMBA_FORWARD;
7414
+   MULADD(at[0], at[100]);    MULADD(at[1], at[99]);    MULADD(at[2], at[98]);    MULADD(at[3], at[97]);    MULADD(at[4], at[96]);    MULADD(at[5], at[95]);    MULADD(at[6], at[94]);    MULADD(at[7], at[93]);    MULADD(at[8], at[92]);    MULADD(at[9], at[91]);    MULADD(at[10], at[90]);    MULADD(at[11], at[89]);    MULADD(at[12], at[88]);    MULADD(at[13], at[87]);    MULADD(at[14], at[86]);    MULADD(at[15], at[85]);    MULADD(at[16], at[84]);    MULADD(at[17], at[83]);    MULADD(at[18], at[82]);    MULADD(at[19], at[81]);    MULADD(at[20], at[80]);    MULADD(at[21], at[79]);    MULADD(at[22], at[78]);    MULADD(at[23], at[77]);    MULADD(at[24], at[76]);    MULADD(at[25], at[75]);    MULADD(at[26], at[74]);    MULADD(at[27], at[73]);    MULADD(at[28], at[72]);    MULADD(at[29], at[71]);    MULADD(at[30], at[70]);    MULADD(at[31], at[69]);    MULADD(at[32], at[68]);    MULADD(at[33], at[67]);    MULADD(at[34], at[66]);    MULADD(at[35], at[65]);    MULADD(at[36], at[64]); 
7415
+   COMBA_STORE(C->dp[36]);
7416
+   /* 37 */
7417
+   COMBA_FORWARD;
7418
+   MULADD(at[0], at[101]);    MULADD(at[1], at[100]);    MULADD(at[2], at[99]);    MULADD(at[3], at[98]);    MULADD(at[4], at[97]);    MULADD(at[5], at[96]);    MULADD(at[6], at[95]);    MULADD(at[7], at[94]);    MULADD(at[8], at[93]);    MULADD(at[9], at[92]);    MULADD(at[10], at[91]);    MULADD(at[11], at[90]);    MULADD(at[12], at[89]);    MULADD(at[13], at[88]);    MULADD(at[14], at[87]);    MULADD(at[15], at[86]);    MULADD(at[16], at[85]);    MULADD(at[17], at[84]);    MULADD(at[18], at[83]);    MULADD(at[19], at[82]);    MULADD(at[20], at[81]);    MULADD(at[21], at[80]);    MULADD(at[22], at[79]);    MULADD(at[23], at[78]);    MULADD(at[24], at[77]);    MULADD(at[25], at[76]);    MULADD(at[26], at[75]);    MULADD(at[27], at[74]);    MULADD(at[28], at[73]);    MULADD(at[29], at[72]);    MULADD(at[30], at[71]);    MULADD(at[31], at[70]);    MULADD(at[32], at[69]);    MULADD(at[33], at[68]);    MULADD(at[34], at[67]);    MULADD(at[35], at[66]);    MULADD(at[36], at[65]);    MULADD(at[37], at[64]); 
7419
+   COMBA_STORE(C->dp[37]);
7420
+   /* 38 */
7421
+   COMBA_FORWARD;
7422
+   MULADD(at[0], at[102]);    MULADD(at[1], at[101]);    MULADD(at[2], at[100]);    MULADD(at[3], at[99]);    MULADD(at[4], at[98]);    MULADD(at[5], at[97]);    MULADD(at[6], at[96]);    MULADD(at[7], at[95]);    MULADD(at[8], at[94]);    MULADD(at[9], at[93]);    MULADD(at[10], at[92]);    MULADD(at[11], at[91]);    MULADD(at[12], at[90]);    MULADD(at[13], at[89]);    MULADD(at[14], at[88]);    MULADD(at[15], at[87]);    MULADD(at[16], at[86]);    MULADD(at[17], at[85]);    MULADD(at[18], at[84]);    MULADD(at[19], at[83]);    MULADD(at[20], at[82]);    MULADD(at[21], at[81]);    MULADD(at[22], at[80]);    MULADD(at[23], at[79]);    MULADD(at[24], at[78]);    MULADD(at[25], at[77]);    MULADD(at[26], at[76]);    MULADD(at[27], at[75]);    MULADD(at[28], at[74]);    MULADD(at[29], at[73]);    MULADD(at[30], at[72]);    MULADD(at[31], at[71]);    MULADD(at[32], at[70]);    MULADD(at[33], at[69]);    MULADD(at[34], at[68]);    MULADD(at[35], at[67]);    MULADD(at[36], at[66]);    MULADD(at[37], at[65]);    MULADD(at[38], at[64]); 
7423
+   COMBA_STORE(C->dp[38]);
7424
+   /* 39 */
7425
+   COMBA_FORWARD;
7426
+   MULADD(at[0], at[103]);    MULADD(at[1], at[102]);    MULADD(at[2], at[101]);    MULADD(at[3], at[100]);    MULADD(at[4], at[99]);    MULADD(at[5], at[98]);    MULADD(at[6], at[97]);    MULADD(at[7], at[96]);    MULADD(at[8], at[95]);    MULADD(at[9], at[94]);    MULADD(at[10], at[93]);    MULADD(at[11], at[92]);    MULADD(at[12], at[91]);    MULADD(at[13], at[90]);    MULADD(at[14], at[89]);    MULADD(at[15], at[88]);    MULADD(at[16], at[87]);    MULADD(at[17], at[86]);    MULADD(at[18], at[85]);    MULADD(at[19], at[84]);    MULADD(at[20], at[83]);    MULADD(at[21], at[82]);    MULADD(at[22], at[81]);    MULADD(at[23], at[80]);    MULADD(at[24], at[79]);    MULADD(at[25], at[78]);    MULADD(at[26], at[77]);    MULADD(at[27], at[76]);    MULADD(at[28], at[75]);    MULADD(at[29], at[74]);    MULADD(at[30], at[73]);    MULADD(at[31], at[72]);    MULADD(at[32], at[71]);    MULADD(at[33], at[70]);    MULADD(at[34], at[69]);    MULADD(at[35], at[68]);    MULADD(at[36], at[67]);    MULADD(at[37], at[66]);    MULADD(at[38], at[65]);    MULADD(at[39], at[64]); 
7427
+   COMBA_STORE(C->dp[39]);
7428
+   /* 40 */
7429
+   COMBA_FORWARD;
7430
+   MULADD(at[0], at[104]);    MULADD(at[1], at[103]);    MULADD(at[2], at[102]);    MULADD(at[3], at[101]);    MULADD(at[4], at[100]);    MULADD(at[5], at[99]);    MULADD(at[6], at[98]);    MULADD(at[7], at[97]);    MULADD(at[8], at[96]);    MULADD(at[9], at[95]);    MULADD(at[10], at[94]);    MULADD(at[11], at[93]);    MULADD(at[12], at[92]);    MULADD(at[13], at[91]);    MULADD(at[14], at[90]);    MULADD(at[15], at[89]);    MULADD(at[16], at[88]);    MULADD(at[17], at[87]);    MULADD(at[18], at[86]);    MULADD(at[19], at[85]);    MULADD(at[20], at[84]);    MULADD(at[21], at[83]);    MULADD(at[22], at[82]);    MULADD(at[23], at[81]);    MULADD(at[24], at[80]);    MULADD(at[25], at[79]);    MULADD(at[26], at[78]);    MULADD(at[27], at[77]);    MULADD(at[28], at[76]);    MULADD(at[29], at[75]);    MULADD(at[30], at[74]);    MULADD(at[31], at[73]);    MULADD(at[32], at[72]);    MULADD(at[33], at[71]);    MULADD(at[34], at[70]);    MULADD(at[35], at[69]);    MULADD(at[36], at[68]);    MULADD(at[37], at[67]);    MULADD(at[38], at[66]);    MULADD(at[39], at[65]);    MULADD(at[40], at[64]); 
7431
+   COMBA_STORE(C->dp[40]);
7432
+   /* 41 */
7433
+   COMBA_FORWARD;
7434
+   MULADD(at[0], at[105]);    MULADD(at[1], at[104]);    MULADD(at[2], at[103]);    MULADD(at[3], at[102]);    MULADD(at[4], at[101]);    MULADD(at[5], at[100]);    MULADD(at[6], at[99]);    MULADD(at[7], at[98]);    MULADD(at[8], at[97]);    MULADD(at[9], at[96]);    MULADD(at[10], at[95]);    MULADD(at[11], at[94]);    MULADD(at[12], at[93]);    MULADD(at[13], at[92]);    MULADD(at[14], at[91]);    MULADD(at[15], at[90]);    MULADD(at[16], at[89]);    MULADD(at[17], at[88]);    MULADD(at[18], at[87]);    MULADD(at[19], at[86]);    MULADD(at[20], at[85]);    MULADD(at[21], at[84]);    MULADD(at[22], at[83]);    MULADD(at[23], at[82]);    MULADD(at[24], at[81]);    MULADD(at[25], at[80]);    MULADD(at[26], at[79]);    MULADD(at[27], at[78]);    MULADD(at[28], at[77]);    MULADD(at[29], at[76]);    MULADD(at[30], at[75]);    MULADD(at[31], at[74]);    MULADD(at[32], at[73]);    MULADD(at[33], at[72]);    MULADD(at[34], at[71]);    MULADD(at[35], at[70]);    MULADD(at[36], at[69]);    MULADD(at[37], at[68]);    MULADD(at[38], at[67]);    MULADD(at[39], at[66]);    MULADD(at[40], at[65]);    MULADD(at[41], at[64]); 
7435
+   COMBA_STORE(C->dp[41]);
7436
+   /* 42 */
7437
+   COMBA_FORWARD;
7438
+   MULADD(at[0], at[106]);    MULADD(at[1], at[105]);    MULADD(at[2], at[104]);    MULADD(at[3], at[103]);    MULADD(at[4], at[102]);    MULADD(at[5], at[101]);    MULADD(at[6], at[100]);    MULADD(at[7], at[99]);    MULADD(at[8], at[98]);    MULADD(at[9], at[97]);    MULADD(at[10], at[96]);    MULADD(at[11], at[95]);    MULADD(at[12], at[94]);    MULADD(at[13], at[93]);    MULADD(at[14], at[92]);    MULADD(at[15], at[91]);    MULADD(at[16], at[90]);    MULADD(at[17], at[89]);    MULADD(at[18], at[88]);    MULADD(at[19], at[87]);    MULADD(at[20], at[86]);    MULADD(at[21], at[85]);    MULADD(at[22], at[84]);    MULADD(at[23], at[83]);    MULADD(at[24], at[82]);    MULADD(at[25], at[81]);    MULADD(at[26], at[80]);    MULADD(at[27], at[79]);    MULADD(at[28], at[78]);    MULADD(at[29], at[77]);    MULADD(at[30], at[76]);    MULADD(at[31], at[75]);    MULADD(at[32], at[74]);    MULADD(at[33], at[73]);    MULADD(at[34], at[72]);    MULADD(at[35], at[71]);    MULADD(at[36], at[70]);    MULADD(at[37], at[69]);    MULADD(at[38], at[68]);    MULADD(at[39], at[67]);    MULADD(at[40], at[66]);    MULADD(at[41], at[65]);    MULADD(at[42], at[64]); 
7439
+   COMBA_STORE(C->dp[42]);
7440
+   /* 43 */
7441
+   COMBA_FORWARD;
7442
+   MULADD(at[0], at[107]);    MULADD(at[1], at[106]);    MULADD(at[2], at[105]);    MULADD(at[3], at[104]);    MULADD(at[4], at[103]);    MULADD(at[5], at[102]);    MULADD(at[6], at[101]);    MULADD(at[7], at[100]);    MULADD(at[8], at[99]);    MULADD(at[9], at[98]);    MULADD(at[10], at[97]);    MULADD(at[11], at[96]);    MULADD(at[12], at[95]);    MULADD(at[13], at[94]);    MULADD(at[14], at[93]);    MULADD(at[15], at[92]);    MULADD(at[16], at[91]);    MULADD(at[17], at[90]);    MULADD(at[18], at[89]);    MULADD(at[19], at[88]);    MULADD(at[20], at[87]);    MULADD(at[21], at[86]);    MULADD(at[22], at[85]);    MULADD(at[23], at[84]);    MULADD(at[24], at[83]);    MULADD(at[25], at[82]);    MULADD(at[26], at[81]);    MULADD(at[27], at[80]);    MULADD(at[28], at[79]);    MULADD(at[29], at[78]);    MULADD(at[30], at[77]);    MULADD(at[31], at[76]);    MULADD(at[32], at[75]);    MULADD(at[33], at[74]);    MULADD(at[34], at[73]);    MULADD(at[35], at[72]);    MULADD(at[36], at[71]);    MULADD(at[37], at[70]);    MULADD(at[38], at[69]);    MULADD(at[39], at[68]);    MULADD(at[40], at[67]);    MULADD(at[41], at[66]);    MULADD(at[42], at[65]);    MULADD(at[43], at[64]); 
7443
+   COMBA_STORE(C->dp[43]);
7444
+   /* 44 */
7445
+   COMBA_FORWARD;
7446
+   MULADD(at[0], at[108]);    MULADD(at[1], at[107]);    MULADD(at[2], at[106]);    MULADD(at[3], at[105]);    MULADD(at[4], at[104]);    MULADD(at[5], at[103]);    MULADD(at[6], at[102]);    MULADD(at[7], at[101]);    MULADD(at[8], at[100]);    MULADD(at[9], at[99]);    MULADD(at[10], at[98]);    MULADD(at[11], at[97]);    MULADD(at[12], at[96]);    MULADD(at[13], at[95]);    MULADD(at[14], at[94]);    MULADD(at[15], at[93]);    MULADD(at[16], at[92]);    MULADD(at[17], at[91]);    MULADD(at[18], at[90]);    MULADD(at[19], at[89]);    MULADD(at[20], at[88]);    MULADD(at[21], at[87]);    MULADD(at[22], at[86]);    MULADD(at[23], at[85]);    MULADD(at[24], at[84]);    MULADD(at[25], at[83]);    MULADD(at[26], at[82]);    MULADD(at[27], at[81]);    MULADD(at[28], at[80]);    MULADD(at[29], at[79]);    MULADD(at[30], at[78]);    MULADD(at[31], at[77]);    MULADD(at[32], at[76]);    MULADD(at[33], at[75]);    MULADD(at[34], at[74]);    MULADD(at[35], at[73]);    MULADD(at[36], at[72]);    MULADD(at[37], at[71]);    MULADD(at[38], at[70]);    MULADD(at[39], at[69]);    MULADD(at[40], at[68]);    MULADD(at[41], at[67]);    MULADD(at[42], at[66]);    MULADD(at[43], at[65]);    MULADD(at[44], at[64]); 
7447
+   COMBA_STORE(C->dp[44]);
7448
+   /* 45 */
7449
+   COMBA_FORWARD;
7450
+   MULADD(at[0], at[109]);    MULADD(at[1], at[108]);    MULADD(at[2], at[107]);    MULADD(at[3], at[106]);    MULADD(at[4], at[105]);    MULADD(at[5], at[104]);    MULADD(at[6], at[103]);    MULADD(at[7], at[102]);    MULADD(at[8], at[101]);    MULADD(at[9], at[100]);    MULADD(at[10], at[99]);    MULADD(at[11], at[98]);    MULADD(at[12], at[97]);    MULADD(at[13], at[96]);    MULADD(at[14], at[95]);    MULADD(at[15], at[94]);    MULADD(at[16], at[93]);    MULADD(at[17], at[92]);    MULADD(at[18], at[91]);    MULADD(at[19], at[90]);    MULADD(at[20], at[89]);    MULADD(at[21], at[88]);    MULADD(at[22], at[87]);    MULADD(at[23], at[86]);    MULADD(at[24], at[85]);    MULADD(at[25], at[84]);    MULADD(at[26], at[83]);    MULADD(at[27], at[82]);    MULADD(at[28], at[81]);    MULADD(at[29], at[80]);    MULADD(at[30], at[79]);    MULADD(at[31], at[78]);    MULADD(at[32], at[77]);    MULADD(at[33], at[76]);    MULADD(at[34], at[75]);    MULADD(at[35], at[74]);    MULADD(at[36], at[73]);    MULADD(at[37], at[72]);    MULADD(at[38], at[71]);    MULADD(at[39], at[70]);    MULADD(at[40], at[69]);    MULADD(at[41], at[68]);    MULADD(at[42], at[67]);    MULADD(at[43], at[66]);    MULADD(at[44], at[65]);    MULADD(at[45], at[64]); 
7451
+   COMBA_STORE(C->dp[45]);
7452
+   /* 46 */
7453
+   COMBA_FORWARD;
7454
+   MULADD(at[0], at[110]);    MULADD(at[1], at[109]);    MULADD(at[2], at[108]);    MULADD(at[3], at[107]);    MULADD(at[4], at[106]);    MULADD(at[5], at[105]);    MULADD(at[6], at[104]);    MULADD(at[7], at[103]);    MULADD(at[8], at[102]);    MULADD(at[9], at[101]);    MULADD(at[10], at[100]);    MULADD(at[11], at[99]);    MULADD(at[12], at[98]);    MULADD(at[13], at[97]);    MULADD(at[14], at[96]);    MULADD(at[15], at[95]);    MULADD(at[16], at[94]);    MULADD(at[17], at[93]);    MULADD(at[18], at[92]);    MULADD(at[19], at[91]);    MULADD(at[20], at[90]);    MULADD(at[21], at[89]);    MULADD(at[22], at[88]);    MULADD(at[23], at[87]);    MULADD(at[24], at[86]);    MULADD(at[25], at[85]);    MULADD(at[26], at[84]);    MULADD(at[27], at[83]);    MULADD(at[28], at[82]);    MULADD(at[29], at[81]);    MULADD(at[30], at[80]);    MULADD(at[31], at[79]);    MULADD(at[32], at[78]);    MULADD(at[33], at[77]);    MULADD(at[34], at[76]);    MULADD(at[35], at[75]);    MULADD(at[36], at[74]);    MULADD(at[37], at[73]);    MULADD(at[38], at[72]);    MULADD(at[39], at[71]);    MULADD(at[40], at[70]);    MULADD(at[41], at[69]);    MULADD(at[42], at[68]);    MULADD(at[43], at[67]);    MULADD(at[44], at[66]);    MULADD(at[45], at[65]);    MULADD(at[46], at[64]); 
7455
+   COMBA_STORE(C->dp[46]);
7456
+   /* 47 */
7457
+   COMBA_FORWARD;
7458
+   MULADD(at[0], at[111]);    MULADD(at[1], at[110]);    MULADD(at[2], at[109]);    MULADD(at[3], at[108]);    MULADD(at[4], at[107]);    MULADD(at[5], at[106]);    MULADD(at[6], at[105]);    MULADD(at[7], at[104]);    MULADD(at[8], at[103]);    MULADD(at[9], at[102]);    MULADD(at[10], at[101]);    MULADD(at[11], at[100]);    MULADD(at[12], at[99]);    MULADD(at[13], at[98]);    MULADD(at[14], at[97]);    MULADD(at[15], at[96]);    MULADD(at[16], at[95]);    MULADD(at[17], at[94]);    MULADD(at[18], at[93]);    MULADD(at[19], at[92]);    MULADD(at[20], at[91]);    MULADD(at[21], at[90]);    MULADD(at[22], at[89]);    MULADD(at[23], at[88]);    MULADD(at[24], at[87]);    MULADD(at[25], at[86]);    MULADD(at[26], at[85]);    MULADD(at[27], at[84]);    MULADD(at[28], at[83]);    MULADD(at[29], at[82]);    MULADD(at[30], at[81]);    MULADD(at[31], at[80]);    MULADD(at[32], at[79]);    MULADD(at[33], at[78]);    MULADD(at[34], at[77]);    MULADD(at[35], at[76]);    MULADD(at[36], at[75]);    MULADD(at[37], at[74]);    MULADD(at[38], at[73]);    MULADD(at[39], at[72]);    MULADD(at[40], at[71]);    MULADD(at[41], at[70]);    MULADD(at[42], at[69]);    MULADD(at[43], at[68]);    MULADD(at[44], at[67]);    MULADD(at[45], at[66]);    MULADD(at[46], at[65]);    MULADD(at[47], at[64]); 
7459
+   COMBA_STORE(C->dp[47]);
7460
+   /* 48 */
7461
+   COMBA_FORWARD;
7462
+   MULADD(at[0], at[112]);    MULADD(at[1], at[111]);    MULADD(at[2], at[110]);    MULADD(at[3], at[109]);    MULADD(at[4], at[108]);    MULADD(at[5], at[107]);    MULADD(at[6], at[106]);    MULADD(at[7], at[105]);    MULADD(at[8], at[104]);    MULADD(at[9], at[103]);    MULADD(at[10], at[102]);    MULADD(at[11], at[101]);    MULADD(at[12], at[100]);    MULADD(at[13], at[99]);    MULADD(at[14], at[98]);    MULADD(at[15], at[97]);    MULADD(at[16], at[96]);    MULADD(at[17], at[95]);    MULADD(at[18], at[94]);    MULADD(at[19], at[93]);    MULADD(at[20], at[92]);    MULADD(at[21], at[91]);    MULADD(at[22], at[90]);    MULADD(at[23], at[89]);    MULADD(at[24], at[88]);    MULADD(at[25], at[87]);    MULADD(at[26], at[86]);    MULADD(at[27], at[85]);    MULADD(at[28], at[84]);    MULADD(at[29], at[83]);    MULADD(at[30], at[82]);    MULADD(at[31], at[81]);    MULADD(at[32], at[80]);    MULADD(at[33], at[79]);    MULADD(at[34], at[78]);    MULADD(at[35], at[77]);    MULADD(at[36], at[76]);    MULADD(at[37], at[75]);    MULADD(at[38], at[74]);    MULADD(at[39], at[73]);    MULADD(at[40], at[72]);    MULADD(at[41], at[71]);    MULADD(at[42], at[70]);    MULADD(at[43], at[69]);    MULADD(at[44], at[68]);    MULADD(at[45], at[67]);    MULADD(at[46], at[66]);    MULADD(at[47], at[65]);    MULADD(at[48], at[64]); 
7463
+   COMBA_STORE(C->dp[48]);
7464
+   /* 49 */
7465
+   COMBA_FORWARD;
7466
+   MULADD(at[0], at[113]);    MULADD(at[1], at[112]);    MULADD(at[2], at[111]);    MULADD(at[3], at[110]);    MULADD(at[4], at[109]);    MULADD(at[5], at[108]);    MULADD(at[6], at[107]);    MULADD(at[7], at[106]);    MULADD(at[8], at[105]);    MULADD(at[9], at[104]);    MULADD(at[10], at[103]);    MULADD(at[11], at[102]);    MULADD(at[12], at[101]);    MULADD(at[13], at[100]);    MULADD(at[14], at[99]);    MULADD(at[15], at[98]);    MULADD(at[16], at[97]);    MULADD(at[17], at[96]);    MULADD(at[18], at[95]);    MULADD(at[19], at[94]);    MULADD(at[20], at[93]);    MULADD(at[21], at[92]);    MULADD(at[22], at[91]);    MULADD(at[23], at[90]);    MULADD(at[24], at[89]);    MULADD(at[25], at[88]);    MULADD(at[26], at[87]);    MULADD(at[27], at[86]);    MULADD(at[28], at[85]);    MULADD(at[29], at[84]);    MULADD(at[30], at[83]);    MULADD(at[31], at[82]);    MULADD(at[32], at[81]);    MULADD(at[33], at[80]);    MULADD(at[34], at[79]);    MULADD(at[35], at[78]);    MULADD(at[36], at[77]);    MULADD(at[37], at[76]);    MULADD(at[38], at[75]);    MULADD(at[39], at[74]);    MULADD(at[40], at[73]);    MULADD(at[41], at[72]);    MULADD(at[42], at[71]);    MULADD(at[43], at[70]);    MULADD(at[44], at[69]);    MULADD(at[45], at[68]);    MULADD(at[46], at[67]);    MULADD(at[47], at[66]);    MULADD(at[48], at[65]);    MULADD(at[49], at[64]); 
7467
+   COMBA_STORE(C->dp[49]);
7468
+   /* 50 */
7469
+   COMBA_FORWARD;
7470
+   MULADD(at[0], at[114]);    MULADD(at[1], at[113]);    MULADD(at[2], at[112]);    MULADD(at[3], at[111]);    MULADD(at[4], at[110]);    MULADD(at[5], at[109]);    MULADD(at[6], at[108]);    MULADD(at[7], at[107]);    MULADD(at[8], at[106]);    MULADD(at[9], at[105]);    MULADD(at[10], at[104]);    MULADD(at[11], at[103]);    MULADD(at[12], at[102]);    MULADD(at[13], at[101]);    MULADD(at[14], at[100]);    MULADD(at[15], at[99]);    MULADD(at[16], at[98]);    MULADD(at[17], at[97]);    MULADD(at[18], at[96]);    MULADD(at[19], at[95]);    MULADD(at[20], at[94]);    MULADD(at[21], at[93]);    MULADD(at[22], at[92]);    MULADD(at[23], at[91]);    MULADD(at[24], at[90]);    MULADD(at[25], at[89]);    MULADD(at[26], at[88]);    MULADD(at[27], at[87]);    MULADD(at[28], at[86]);    MULADD(at[29], at[85]);    MULADD(at[30], at[84]);    MULADD(at[31], at[83]);    MULADD(at[32], at[82]);    MULADD(at[33], at[81]);    MULADD(at[34], at[80]);    MULADD(at[35], at[79]);    MULADD(at[36], at[78]);    MULADD(at[37], at[77]);    MULADD(at[38], at[76]);    MULADD(at[39], at[75]);    MULADD(at[40], at[74]);    MULADD(at[41], at[73]);    MULADD(at[42], at[72]);    MULADD(at[43], at[71]);    MULADD(at[44], at[70]);    MULADD(at[45], at[69]);    MULADD(at[46], at[68]);    MULADD(at[47], at[67]);    MULADD(at[48], at[66]);    MULADD(at[49], at[65]);    MULADD(at[50], at[64]); 
7471
+   COMBA_STORE(C->dp[50]);
7472
+   /* 51 */
7473
+   COMBA_FORWARD;
7474
+   MULADD(at[0], at[115]);    MULADD(at[1], at[114]);    MULADD(at[2], at[113]);    MULADD(at[3], at[112]);    MULADD(at[4], at[111]);    MULADD(at[5], at[110]);    MULADD(at[6], at[109]);    MULADD(at[7], at[108]);    MULADD(at[8], at[107]);    MULADD(at[9], at[106]);    MULADD(at[10], at[105]);    MULADD(at[11], at[104]);    MULADD(at[12], at[103]);    MULADD(at[13], at[102]);    MULADD(at[14], at[101]);    MULADD(at[15], at[100]);    MULADD(at[16], at[99]);    MULADD(at[17], at[98]);    MULADD(at[18], at[97]);    MULADD(at[19], at[96]);    MULADD(at[20], at[95]);    MULADD(at[21], at[94]);    MULADD(at[22], at[93]);    MULADD(at[23], at[92]);    MULADD(at[24], at[91]);    MULADD(at[25], at[90]);    MULADD(at[26], at[89]);    MULADD(at[27], at[88]);    MULADD(at[28], at[87]);    MULADD(at[29], at[86]);    MULADD(at[30], at[85]);    MULADD(at[31], at[84]);    MULADD(at[32], at[83]);    MULADD(at[33], at[82]);    MULADD(at[34], at[81]);    MULADD(at[35], at[80]);    MULADD(at[36], at[79]);    MULADD(at[37], at[78]);    MULADD(at[38], at[77]);    MULADD(at[39], at[76]);    MULADD(at[40], at[75]);    MULADD(at[41], at[74]);    MULADD(at[42], at[73]);    MULADD(at[43], at[72]);    MULADD(at[44], at[71]);    MULADD(at[45], at[70]);    MULADD(at[46], at[69]);    MULADD(at[47], at[68]);    MULADD(at[48], at[67]);    MULADD(at[49], at[66]);    MULADD(at[50], at[65]);    MULADD(at[51], at[64]); 
7475
+   COMBA_STORE(C->dp[51]);
7476
+   /* 52 */
7477
+   COMBA_FORWARD;
7478
+   MULADD(at[0], at[116]);    MULADD(at[1], at[115]);    MULADD(at[2], at[114]);    MULADD(at[3], at[113]);    MULADD(at[4], at[112]);    MULADD(at[5], at[111]);    MULADD(at[6], at[110]);    MULADD(at[7], at[109]);    MULADD(at[8], at[108]);    MULADD(at[9], at[107]);    MULADD(at[10], at[106]);    MULADD(at[11], at[105]);    MULADD(at[12], at[104]);    MULADD(at[13], at[103]);    MULADD(at[14], at[102]);    MULADD(at[15], at[101]);    MULADD(at[16], at[100]);    MULADD(at[17], at[99]);    MULADD(at[18], at[98]);    MULADD(at[19], at[97]);    MULADD(at[20], at[96]);    MULADD(at[21], at[95]);    MULADD(at[22], at[94]);    MULADD(at[23], at[93]);    MULADD(at[24], at[92]);    MULADD(at[25], at[91]);    MULADD(at[26], at[90]);    MULADD(at[27], at[89]);    MULADD(at[28], at[88]);    MULADD(at[29], at[87]);    MULADD(at[30], at[86]);    MULADD(at[31], at[85]);    MULADD(at[32], at[84]);    MULADD(at[33], at[83]);    MULADD(at[34], at[82]);    MULADD(at[35], at[81]);    MULADD(at[36], at[80]);    MULADD(at[37], at[79]);    MULADD(at[38], at[78]);    MULADD(at[39], at[77]);    MULADD(at[40], at[76]);    MULADD(at[41], at[75]);    MULADD(at[42], at[74]);    MULADD(at[43], at[73]);    MULADD(at[44], at[72]);    MULADD(at[45], at[71]);    MULADD(at[46], at[70]);    MULADD(at[47], at[69]);    MULADD(at[48], at[68]);    MULADD(at[49], at[67]);    MULADD(at[50], at[66]);    MULADD(at[51], at[65]);    MULADD(at[52], at[64]); 
7479
+   COMBA_STORE(C->dp[52]);
7480
+   /* 53 */
7481
+   COMBA_FORWARD;
7482
+   MULADD(at[0], at[117]);    MULADD(at[1], at[116]);    MULADD(at[2], at[115]);    MULADD(at[3], at[114]);    MULADD(at[4], at[113]);    MULADD(at[5], at[112]);    MULADD(at[6], at[111]);    MULADD(at[7], at[110]);    MULADD(at[8], at[109]);    MULADD(at[9], at[108]);    MULADD(at[10], at[107]);    MULADD(at[11], at[106]);    MULADD(at[12], at[105]);    MULADD(at[13], at[104]);    MULADD(at[14], at[103]);    MULADD(at[15], at[102]);    MULADD(at[16], at[101]);    MULADD(at[17], at[100]);    MULADD(at[18], at[99]);    MULADD(at[19], at[98]);    MULADD(at[20], at[97]);    MULADD(at[21], at[96]);    MULADD(at[22], at[95]);    MULADD(at[23], at[94]);    MULADD(at[24], at[93]);    MULADD(at[25], at[92]);    MULADD(at[26], at[91]);    MULADD(at[27], at[90]);    MULADD(at[28], at[89]);    MULADD(at[29], at[88]);    MULADD(at[30], at[87]);    MULADD(at[31], at[86]);    MULADD(at[32], at[85]);    MULADD(at[33], at[84]);    MULADD(at[34], at[83]);    MULADD(at[35], at[82]);    MULADD(at[36], at[81]);    MULADD(at[37], at[80]);    MULADD(at[38], at[79]);    MULADD(at[39], at[78]);    MULADD(at[40], at[77]);    MULADD(at[41], at[76]);    MULADD(at[42], at[75]);    MULADD(at[43], at[74]);    MULADD(at[44], at[73]);    MULADD(at[45], at[72]);    MULADD(at[46], at[71]);    MULADD(at[47], at[70]);    MULADD(at[48], at[69]);    MULADD(at[49], at[68]);    MULADD(at[50], at[67]);    MULADD(at[51], at[66]);    MULADD(at[52], at[65]);    MULADD(at[53], at[64]); 
7483
+   COMBA_STORE(C->dp[53]);
7484
+   /* 54 */
7485
+   COMBA_FORWARD;
7486
+   MULADD(at[0], at[118]);    MULADD(at[1], at[117]);    MULADD(at[2], at[116]);    MULADD(at[3], at[115]);    MULADD(at[4], at[114]);    MULADD(at[5], at[113]);    MULADD(at[6], at[112]);    MULADD(at[7], at[111]);    MULADD(at[8], at[110]);    MULADD(at[9], at[109]);    MULADD(at[10], at[108]);    MULADD(at[11], at[107]);    MULADD(at[12], at[106]);    MULADD(at[13], at[105]);    MULADD(at[14], at[104]);    MULADD(at[15], at[103]);    MULADD(at[16], at[102]);    MULADD(at[17], at[101]);    MULADD(at[18], at[100]);    MULADD(at[19], at[99]);    MULADD(at[20], at[98]);    MULADD(at[21], at[97]);    MULADD(at[22], at[96]);    MULADD(at[23], at[95]);    MULADD(at[24], at[94]);    MULADD(at[25], at[93]);    MULADD(at[26], at[92]);    MULADD(at[27], at[91]);    MULADD(at[28], at[90]);    MULADD(at[29], at[89]);    MULADD(at[30], at[88]);    MULADD(at[31], at[87]);    MULADD(at[32], at[86]);    MULADD(at[33], at[85]);    MULADD(at[34], at[84]);    MULADD(at[35], at[83]);    MULADD(at[36], at[82]);    MULADD(at[37], at[81]);    MULADD(at[38], at[80]);    MULADD(at[39], at[79]);    MULADD(at[40], at[78]);    MULADD(at[41], at[77]);    MULADD(at[42], at[76]);    MULADD(at[43], at[75]);    MULADD(at[44], at[74]);    MULADD(at[45], at[73]);    MULADD(at[46], at[72]);    MULADD(at[47], at[71]);    MULADD(at[48], at[70]);    MULADD(at[49], at[69]);    MULADD(at[50], at[68]);    MULADD(at[51], at[67]);    MULADD(at[52], at[66]);    MULADD(at[53], at[65]);    MULADD(at[54], at[64]); 
7487
+   COMBA_STORE(C->dp[54]);
7488
+   /* 55 */
7489
+   COMBA_FORWARD;
7490
+   MULADD(at[0], at[119]);    MULADD(at[1], at[118]);    MULADD(at[2], at[117]);    MULADD(at[3], at[116]);    MULADD(at[4], at[115]);    MULADD(at[5], at[114]);    MULADD(at[6], at[113]);    MULADD(at[7], at[112]);    MULADD(at[8], at[111]);    MULADD(at[9], at[110]);    MULADD(at[10], at[109]);    MULADD(at[11], at[108]);    MULADD(at[12], at[107]);    MULADD(at[13], at[106]);    MULADD(at[14], at[105]);    MULADD(at[15], at[104]);    MULADD(at[16], at[103]);    MULADD(at[17], at[102]);    MULADD(at[18], at[101]);    MULADD(at[19], at[100]);    MULADD(at[20], at[99]);    MULADD(at[21], at[98]);    MULADD(at[22], at[97]);    MULADD(at[23], at[96]);    MULADD(at[24], at[95]);    MULADD(at[25], at[94]);    MULADD(at[26], at[93]);    MULADD(at[27], at[92]);    MULADD(at[28], at[91]);    MULADD(at[29], at[90]);    MULADD(at[30], at[89]);    MULADD(at[31], at[88]);    MULADD(at[32], at[87]);    MULADD(at[33], at[86]);    MULADD(at[34], at[85]);    MULADD(at[35], at[84]);    MULADD(at[36], at[83]);    MULADD(at[37], at[82]);    MULADD(at[38], at[81]);    MULADD(at[39], at[80]);    MULADD(at[40], at[79]);    MULADD(at[41], at[78]);    MULADD(at[42], at[77]);    MULADD(at[43], at[76]);    MULADD(at[44], at[75]);    MULADD(at[45], at[74]);    MULADD(at[46], at[73]);    MULADD(at[47], at[72]);    MULADD(at[48], at[71]);    MULADD(at[49], at[70]);    MULADD(at[50], at[69]);    MULADD(at[51], at[68]);    MULADD(at[52], at[67]);    MULADD(at[53], at[66]);    MULADD(at[54], at[65]);    MULADD(at[55], at[64]); 
7491
+   COMBA_STORE(C->dp[55]);
7492
+   /* 56 */
7493
+   COMBA_FORWARD;
7494
+   MULADD(at[0], at[120]);    MULADD(at[1], at[119]);    MULADD(at[2], at[118]);    MULADD(at[3], at[117]);    MULADD(at[4], at[116]);    MULADD(at[5], at[115]);    MULADD(at[6], at[114]);    MULADD(at[7], at[113]);    MULADD(at[8], at[112]);    MULADD(at[9], at[111]);    MULADD(at[10], at[110]);    MULADD(at[11], at[109]);    MULADD(at[12], at[108]);    MULADD(at[13], at[107]);    MULADD(at[14], at[106]);    MULADD(at[15], at[105]);    MULADD(at[16], at[104]);    MULADD(at[17], at[103]);    MULADD(at[18], at[102]);    MULADD(at[19], at[101]);    MULADD(at[20], at[100]);    MULADD(at[21], at[99]);    MULADD(at[22], at[98]);    MULADD(at[23], at[97]);    MULADD(at[24], at[96]);    MULADD(at[25], at[95]);    MULADD(at[26], at[94]);    MULADD(at[27], at[93]);    MULADD(at[28], at[92]);    MULADD(at[29], at[91]);    MULADD(at[30], at[90]);    MULADD(at[31], at[89]);    MULADD(at[32], at[88]);    MULADD(at[33], at[87]);    MULADD(at[34], at[86]);    MULADD(at[35], at[85]);    MULADD(at[36], at[84]);    MULADD(at[37], at[83]);    MULADD(at[38], at[82]);    MULADD(at[39], at[81]);    MULADD(at[40], at[80]);    MULADD(at[41], at[79]);    MULADD(at[42], at[78]);    MULADD(at[43], at[77]);    MULADD(at[44], at[76]);    MULADD(at[45], at[75]);    MULADD(at[46], at[74]);    MULADD(at[47], at[73]);    MULADD(at[48], at[72]);    MULADD(at[49], at[71]);    MULADD(at[50], at[70]);    MULADD(at[51], at[69]);    MULADD(at[52], at[68]);    MULADD(at[53], at[67]);    MULADD(at[54], at[66]);    MULADD(at[55], at[65]);    MULADD(at[56], at[64]); 
7495
+   COMBA_STORE(C->dp[56]);
7496
+   /* 57 */
7497
+   COMBA_FORWARD;
7498
+   MULADD(at[0], at[121]);    MULADD(at[1], at[120]);    MULADD(at[2], at[119]);    MULADD(at[3], at[118]);    MULADD(at[4], at[117]);    MULADD(at[5], at[116]);    MULADD(at[6], at[115]);    MULADD(at[7], at[114]);    MULADD(at[8], at[113]);    MULADD(at[9], at[112]);    MULADD(at[10], at[111]);    MULADD(at[11], at[110]);    MULADD(at[12], at[109]);    MULADD(at[13], at[108]);    MULADD(at[14], at[107]);    MULADD(at[15], at[106]);    MULADD(at[16], at[105]);    MULADD(at[17], at[104]);    MULADD(at[18], at[103]);    MULADD(at[19], at[102]);    MULADD(at[20], at[101]);    MULADD(at[21], at[100]);    MULADD(at[22], at[99]);    MULADD(at[23], at[98]);    MULADD(at[24], at[97]);    MULADD(at[25], at[96]);    MULADD(at[26], at[95]);    MULADD(at[27], at[94]);    MULADD(at[28], at[93]);    MULADD(at[29], at[92]);    MULADD(at[30], at[91]);    MULADD(at[31], at[90]);    MULADD(at[32], at[89]);    MULADD(at[33], at[88]);    MULADD(at[34], at[87]);    MULADD(at[35], at[86]);    MULADD(at[36], at[85]);    MULADD(at[37], at[84]);    MULADD(at[38], at[83]);    MULADD(at[39], at[82]);    MULADD(at[40], at[81]);    MULADD(at[41], at[80]);    MULADD(at[42], at[79]);    MULADD(at[43], at[78]);    MULADD(at[44], at[77]);    MULADD(at[45], at[76]);    MULADD(at[46], at[75]);    MULADD(at[47], at[74]);    MULADD(at[48], at[73]);    MULADD(at[49], at[72]);    MULADD(at[50], at[71]);    MULADD(at[51], at[70]);    MULADD(at[52], at[69]);    MULADD(at[53], at[68]);    MULADD(at[54], at[67]);    MULADD(at[55], at[66]);    MULADD(at[56], at[65]);    MULADD(at[57], at[64]); 
7499
+   COMBA_STORE(C->dp[57]);
7500
+   /* 58 */
7501
+   COMBA_FORWARD;
7502
+   MULADD(at[0], at[122]);    MULADD(at[1], at[121]);    MULADD(at[2], at[120]);    MULADD(at[3], at[119]);    MULADD(at[4], at[118]);    MULADD(at[5], at[117]);    MULADD(at[6], at[116]);    MULADD(at[7], at[115]);    MULADD(at[8], at[114]);    MULADD(at[9], at[113]);    MULADD(at[10], at[112]);    MULADD(at[11], at[111]);    MULADD(at[12], at[110]);    MULADD(at[13], at[109]);    MULADD(at[14], at[108]);    MULADD(at[15], at[107]);    MULADD(at[16], at[106]);    MULADD(at[17], at[105]);    MULADD(at[18], at[104]);    MULADD(at[19], at[103]);    MULADD(at[20], at[102]);    MULADD(at[21], at[101]);    MULADD(at[22], at[100]);    MULADD(at[23], at[99]);    MULADD(at[24], at[98]);    MULADD(at[25], at[97]);    MULADD(at[26], at[96]);    MULADD(at[27], at[95]);    MULADD(at[28], at[94]);    MULADD(at[29], at[93]);    MULADD(at[30], at[92]);    MULADD(at[31], at[91]);    MULADD(at[32], at[90]);    MULADD(at[33], at[89]);    MULADD(at[34], at[88]);    MULADD(at[35], at[87]);    MULADD(at[36], at[86]);    MULADD(at[37], at[85]);    MULADD(at[38], at[84]);    MULADD(at[39], at[83]);    MULADD(at[40], at[82]);    MULADD(at[41], at[81]);    MULADD(at[42], at[80]);    MULADD(at[43], at[79]);    MULADD(at[44], at[78]);    MULADD(at[45], at[77]);    MULADD(at[46], at[76]);    MULADD(at[47], at[75]);    MULADD(at[48], at[74]);    MULADD(at[49], at[73]);    MULADD(at[50], at[72]);    MULADD(at[51], at[71]);    MULADD(at[52], at[70]);    MULADD(at[53], at[69]);    MULADD(at[54], at[68]);    MULADD(at[55], at[67]);    MULADD(at[56], at[66]);    MULADD(at[57], at[65]);    MULADD(at[58], at[64]); 
7503
+   COMBA_STORE(C->dp[58]);
7504
+   /* 59 */
7505
+   COMBA_FORWARD;
7506
+   MULADD(at[0], at[123]);    MULADD(at[1], at[122]);    MULADD(at[2], at[121]);    MULADD(at[3], at[120]);    MULADD(at[4], at[119]);    MULADD(at[5], at[118]);    MULADD(at[6], at[117]);    MULADD(at[7], at[116]);    MULADD(at[8], at[115]);    MULADD(at[9], at[114]);    MULADD(at[10], at[113]);    MULADD(at[11], at[112]);    MULADD(at[12], at[111]);    MULADD(at[13], at[110]);    MULADD(at[14], at[109]);    MULADD(at[15], at[108]);    MULADD(at[16], at[107]);    MULADD(at[17], at[106]);    MULADD(at[18], at[105]);    MULADD(at[19], at[104]);    MULADD(at[20], at[103]);    MULADD(at[21], at[102]);    MULADD(at[22], at[101]);    MULADD(at[23], at[100]);    MULADD(at[24], at[99]);    MULADD(at[25], at[98]);    MULADD(at[26], at[97]);    MULADD(at[27], at[96]);    MULADD(at[28], at[95]);    MULADD(at[29], at[94]);    MULADD(at[30], at[93]);    MULADD(at[31], at[92]);    MULADD(at[32], at[91]);    MULADD(at[33], at[90]);    MULADD(at[34], at[89]);    MULADD(at[35], at[88]);    MULADD(at[36], at[87]);    MULADD(at[37], at[86]);    MULADD(at[38], at[85]);    MULADD(at[39], at[84]);    MULADD(at[40], at[83]);    MULADD(at[41], at[82]);    MULADD(at[42], at[81]);    MULADD(at[43], at[80]);    MULADD(at[44], at[79]);    MULADD(at[45], at[78]);    MULADD(at[46], at[77]);    MULADD(at[47], at[76]);    MULADD(at[48], at[75]);    MULADD(at[49], at[74]);    MULADD(at[50], at[73]);    MULADD(at[51], at[72]);    MULADD(at[52], at[71]);    MULADD(at[53], at[70]);    MULADD(at[54], at[69]);    MULADD(at[55], at[68]);    MULADD(at[56], at[67]);    MULADD(at[57], at[66]);    MULADD(at[58], at[65]);    MULADD(at[59], at[64]); 
7507
+   COMBA_STORE(C->dp[59]);
7508
+   /* 60 */
7509
+   COMBA_FORWARD;
7510
+   MULADD(at[0], at[124]);    MULADD(at[1], at[123]);    MULADD(at[2], at[122]);    MULADD(at[3], at[121]);    MULADD(at[4], at[120]);    MULADD(at[5], at[119]);    MULADD(at[6], at[118]);    MULADD(at[7], at[117]);    MULADD(at[8], at[116]);    MULADD(at[9], at[115]);    MULADD(at[10], at[114]);    MULADD(at[11], at[113]);    MULADD(at[12], at[112]);    MULADD(at[13], at[111]);    MULADD(at[14], at[110]);    MULADD(at[15], at[109]);    MULADD(at[16], at[108]);    MULADD(at[17], at[107]);    MULADD(at[18], at[106]);    MULADD(at[19], at[105]);    MULADD(at[20], at[104]);    MULADD(at[21], at[103]);    MULADD(at[22], at[102]);    MULADD(at[23], at[101]);    MULADD(at[24], at[100]);    MULADD(at[25], at[99]);    MULADD(at[26], at[98]);    MULADD(at[27], at[97]);    MULADD(at[28], at[96]);    MULADD(at[29], at[95]);    MULADD(at[30], at[94]);    MULADD(at[31], at[93]);    MULADD(at[32], at[92]);    MULADD(at[33], at[91]);    MULADD(at[34], at[90]);    MULADD(at[35], at[89]);    MULADD(at[36], at[88]);    MULADD(at[37], at[87]);    MULADD(at[38], at[86]);    MULADD(at[39], at[85]);    MULADD(at[40], at[84]);    MULADD(at[41], at[83]);    MULADD(at[42], at[82]);    MULADD(at[43], at[81]);    MULADD(at[44], at[80]);    MULADD(at[45], at[79]);    MULADD(at[46], at[78]);    MULADD(at[47], at[77]);    MULADD(at[48], at[76]);    MULADD(at[49], at[75]);    MULADD(at[50], at[74]);    MULADD(at[51], at[73]);    MULADD(at[52], at[72]);    MULADD(at[53], at[71]);    MULADD(at[54], at[70]);    MULADD(at[55], at[69]);    MULADD(at[56], at[68]);    MULADD(at[57], at[67]);    MULADD(at[58], at[66]);    MULADD(at[59], at[65]);    MULADD(at[60], at[64]); 
7511
+   COMBA_STORE(C->dp[60]);
7512
+   /* 61 */
7513
+   COMBA_FORWARD;
7514
+   MULADD(at[0], at[125]);    MULADD(at[1], at[124]);    MULADD(at[2], at[123]);    MULADD(at[3], at[122]);    MULADD(at[4], at[121]);    MULADD(at[5], at[120]);    MULADD(at[6], at[119]);    MULADD(at[7], at[118]);    MULADD(at[8], at[117]);    MULADD(at[9], at[116]);    MULADD(at[10], at[115]);    MULADD(at[11], at[114]);    MULADD(at[12], at[113]);    MULADD(at[13], at[112]);    MULADD(at[14], at[111]);    MULADD(at[15], at[110]);    MULADD(at[16], at[109]);    MULADD(at[17], at[108]);    MULADD(at[18], at[107]);    MULADD(at[19], at[106]);    MULADD(at[20], at[105]);    MULADD(at[21], at[104]);    MULADD(at[22], at[103]);    MULADD(at[23], at[102]);    MULADD(at[24], at[101]);    MULADD(at[25], at[100]);    MULADD(at[26], at[99]);    MULADD(at[27], at[98]);    MULADD(at[28], at[97]);    MULADD(at[29], at[96]);    MULADD(at[30], at[95]);    MULADD(at[31], at[94]);    MULADD(at[32], at[93]);    MULADD(at[33], at[92]);    MULADD(at[34], at[91]);    MULADD(at[35], at[90]);    MULADD(at[36], at[89]);    MULADD(at[37], at[88]);    MULADD(at[38], at[87]);    MULADD(at[39], at[86]);    MULADD(at[40], at[85]);    MULADD(at[41], at[84]);    MULADD(at[42], at[83]);    MULADD(at[43], at[82]);    MULADD(at[44], at[81]);    MULADD(at[45], at[80]);    MULADD(at[46], at[79]);    MULADD(at[47], at[78]);    MULADD(at[48], at[77]);    MULADD(at[49], at[76]);    MULADD(at[50], at[75]);    MULADD(at[51], at[74]);    MULADD(at[52], at[73]);    MULADD(at[53], at[72]);    MULADD(at[54], at[71]);    MULADD(at[55], at[70]);    MULADD(at[56], at[69]);    MULADD(at[57], at[68]);    MULADD(at[58], at[67]);    MULADD(at[59], at[66]);    MULADD(at[60], at[65]);    MULADD(at[61], at[64]); 
7515
+   COMBA_STORE(C->dp[61]);
7516
+   /* 62 */
7517
+   COMBA_FORWARD;
7518
+   MULADD(at[0], at[126]);    MULADD(at[1], at[125]);    MULADD(at[2], at[124]);    MULADD(at[3], at[123]);    MULADD(at[4], at[122]);    MULADD(at[5], at[121]);    MULADD(at[6], at[120]);    MULADD(at[7], at[119]);    MULADD(at[8], at[118]);    MULADD(at[9], at[117]);    MULADD(at[10], at[116]);    MULADD(at[11], at[115]);    MULADD(at[12], at[114]);    MULADD(at[13], at[113]);    MULADD(at[14], at[112]);    MULADD(at[15], at[111]);    MULADD(at[16], at[110]);    MULADD(at[17], at[109]);    MULADD(at[18], at[108]);    MULADD(at[19], at[107]);    MULADD(at[20], at[106]);    MULADD(at[21], at[105]);    MULADD(at[22], at[104]);    MULADD(at[23], at[103]);    MULADD(at[24], at[102]);    MULADD(at[25], at[101]);    MULADD(at[26], at[100]);    MULADD(at[27], at[99]);    MULADD(at[28], at[98]);    MULADD(at[29], at[97]);    MULADD(at[30], at[96]);    MULADD(at[31], at[95]);    MULADD(at[32], at[94]);    MULADD(at[33], at[93]);    MULADD(at[34], at[92]);    MULADD(at[35], at[91]);    MULADD(at[36], at[90]);    MULADD(at[37], at[89]);    MULADD(at[38], at[88]);    MULADD(at[39], at[87]);    MULADD(at[40], at[86]);    MULADD(at[41], at[85]);    MULADD(at[42], at[84]);    MULADD(at[43], at[83]);    MULADD(at[44], at[82]);    MULADD(at[45], at[81]);    MULADD(at[46], at[80]);    MULADD(at[47], at[79]);    MULADD(at[48], at[78]);    MULADD(at[49], at[77]);    MULADD(at[50], at[76]);    MULADD(at[51], at[75]);    MULADD(at[52], at[74]);    MULADD(at[53], at[73]);    MULADD(at[54], at[72]);    MULADD(at[55], at[71]);    MULADD(at[56], at[70]);    MULADD(at[57], at[69]);    MULADD(at[58], at[68]);    MULADD(at[59], at[67]);    MULADD(at[60], at[66]);    MULADD(at[61], at[65]);    MULADD(at[62], at[64]); 
7519
+   COMBA_STORE(C->dp[62]);
7520
+   /* 63 */
7521
+   COMBA_FORWARD;
7522
+   MULADD(at[0], at[127]);    MULADD(at[1], at[126]);    MULADD(at[2], at[125]);    MULADD(at[3], at[124]);    MULADD(at[4], at[123]);    MULADD(at[5], at[122]);    MULADD(at[6], at[121]);    MULADD(at[7], at[120]);    MULADD(at[8], at[119]);    MULADD(at[9], at[118]);    MULADD(at[10], at[117]);    MULADD(at[11], at[116]);    MULADD(at[12], at[115]);    MULADD(at[13], at[114]);    MULADD(at[14], at[113]);    MULADD(at[15], at[112]);    MULADD(at[16], at[111]);    MULADD(at[17], at[110]);    MULADD(at[18], at[109]);    MULADD(at[19], at[108]);    MULADD(at[20], at[107]);    MULADD(at[21], at[106]);    MULADD(at[22], at[105]);    MULADD(at[23], at[104]);    MULADD(at[24], at[103]);    MULADD(at[25], at[102]);    MULADD(at[26], at[101]);    MULADD(at[27], at[100]);    MULADD(at[28], at[99]);    MULADD(at[29], at[98]);    MULADD(at[30], at[97]);    MULADD(at[31], at[96]);    MULADD(at[32], at[95]);    MULADD(at[33], at[94]);    MULADD(at[34], at[93]);    MULADD(at[35], at[92]);    MULADD(at[36], at[91]);    MULADD(at[37], at[90]);    MULADD(at[38], at[89]);    MULADD(at[39], at[88]);    MULADD(at[40], at[87]);    MULADD(at[41], at[86]);    MULADD(at[42], at[85]);    MULADD(at[43], at[84]);    MULADD(at[44], at[83]);    MULADD(at[45], at[82]);    MULADD(at[46], at[81]);    MULADD(at[47], at[80]);    MULADD(at[48], at[79]);    MULADD(at[49], at[78]);    MULADD(at[50], at[77]);    MULADD(at[51], at[76]);    MULADD(at[52], at[75]);    MULADD(at[53], at[74]);    MULADD(at[54], at[73]);    MULADD(at[55], at[72]);    MULADD(at[56], at[71]);    MULADD(at[57], at[70]);    MULADD(at[58], at[69]);    MULADD(at[59], at[68]);    MULADD(at[60], at[67]);    MULADD(at[61], at[66]);    MULADD(at[62], at[65]);    MULADD(at[63], at[64]); 
7523
+   COMBA_STORE(C->dp[63]);
7524
+   /* 64 */
7525
+   COMBA_FORWARD;
7526
+   MULADD(at[1], at[127]);    MULADD(at[2], at[126]);    MULADD(at[3], at[125]);    MULADD(at[4], at[124]);    MULADD(at[5], at[123]);    MULADD(at[6], at[122]);    MULADD(at[7], at[121]);    MULADD(at[8], at[120]);    MULADD(at[9], at[119]);    MULADD(at[10], at[118]);    MULADD(at[11], at[117]);    MULADD(at[12], at[116]);    MULADD(at[13], at[115]);    MULADD(at[14], at[114]);    MULADD(at[15], at[113]);    MULADD(at[16], at[112]);    MULADD(at[17], at[111]);    MULADD(at[18], at[110]);    MULADD(at[19], at[109]);    MULADD(at[20], at[108]);    MULADD(at[21], at[107]);    MULADD(at[22], at[106]);    MULADD(at[23], at[105]);    MULADD(at[24], at[104]);    MULADD(at[25], at[103]);    MULADD(at[26], at[102]);    MULADD(at[27], at[101]);    MULADD(at[28], at[100]);    MULADD(at[29], at[99]);    MULADD(at[30], at[98]);    MULADD(at[31], at[97]);    MULADD(at[32], at[96]);    MULADD(at[33], at[95]);    MULADD(at[34], at[94]);    MULADD(at[35], at[93]);    MULADD(at[36], at[92]);    MULADD(at[37], at[91]);    MULADD(at[38], at[90]);    MULADD(at[39], at[89]);    MULADD(at[40], at[88]);    MULADD(at[41], at[87]);    MULADD(at[42], at[86]);    MULADD(at[43], at[85]);    MULADD(at[44], at[84]);    MULADD(at[45], at[83]);    MULADD(at[46], at[82]);    MULADD(at[47], at[81]);    MULADD(at[48], at[80]);    MULADD(at[49], at[79]);    MULADD(at[50], at[78]);    MULADD(at[51], at[77]);    MULADD(at[52], at[76]);    MULADD(at[53], at[75]);    MULADD(at[54], at[74]);    MULADD(at[55], at[73]);    MULADD(at[56], at[72]);    MULADD(at[57], at[71]);    MULADD(at[58], at[70]);    MULADD(at[59], at[69]);    MULADD(at[60], at[68]);    MULADD(at[61], at[67]);    MULADD(at[62], at[66]);    MULADD(at[63], at[65]); 
7527
+   COMBA_STORE(C->dp[64]);
7528
+   /* 65 */
7529
+   COMBA_FORWARD;
7530
+   MULADD(at[2], at[127]);    MULADD(at[3], at[126]);    MULADD(at[4], at[125]);    MULADD(at[5], at[124]);    MULADD(at[6], at[123]);    MULADD(at[7], at[122]);    MULADD(at[8], at[121]);    MULADD(at[9], at[120]);    MULADD(at[10], at[119]);    MULADD(at[11], at[118]);    MULADD(at[12], at[117]);    MULADD(at[13], at[116]);    MULADD(at[14], at[115]);    MULADD(at[15], at[114]);    MULADD(at[16], at[113]);    MULADD(at[17], at[112]);    MULADD(at[18], at[111]);    MULADD(at[19], at[110]);    MULADD(at[20], at[109]);    MULADD(at[21], at[108]);    MULADD(at[22], at[107]);    MULADD(at[23], at[106]);    MULADD(at[24], at[105]);    MULADD(at[25], at[104]);    MULADD(at[26], at[103]);    MULADD(at[27], at[102]);    MULADD(at[28], at[101]);    MULADD(at[29], at[100]);    MULADD(at[30], at[99]);    MULADD(at[31], at[98]);    MULADD(at[32], at[97]);    MULADD(at[33], at[96]);    MULADD(at[34], at[95]);    MULADD(at[35], at[94]);    MULADD(at[36], at[93]);    MULADD(at[37], at[92]);    MULADD(at[38], at[91]);    MULADD(at[39], at[90]);    MULADD(at[40], at[89]);    MULADD(at[41], at[88]);    MULADD(at[42], at[87]);    MULADD(at[43], at[86]);    MULADD(at[44], at[85]);    MULADD(at[45], at[84]);    MULADD(at[46], at[83]);    MULADD(at[47], at[82]);    MULADD(at[48], at[81]);    MULADD(at[49], at[80]);    MULADD(at[50], at[79]);    MULADD(at[51], at[78]);    MULADD(at[52], at[77]);    MULADD(at[53], at[76]);    MULADD(at[54], at[75]);    MULADD(at[55], at[74]);    MULADD(at[56], at[73]);    MULADD(at[57], at[72]);    MULADD(at[58], at[71]);    MULADD(at[59], at[70]);    MULADD(at[60], at[69]);    MULADD(at[61], at[68]);    MULADD(at[62], at[67]);    MULADD(at[63], at[66]); 
7531
+   COMBA_STORE(C->dp[65]);
7532
+   /* 66 */
7533
+   COMBA_FORWARD;
7534
+   MULADD(at[3], at[127]);    MULADD(at[4], at[126]);    MULADD(at[5], at[125]);    MULADD(at[6], at[124]);    MULADD(at[7], at[123]);    MULADD(at[8], at[122]);    MULADD(at[9], at[121]);    MULADD(at[10], at[120]);    MULADD(at[11], at[119]);    MULADD(at[12], at[118]);    MULADD(at[13], at[117]);    MULADD(at[14], at[116]);    MULADD(at[15], at[115]);    MULADD(at[16], at[114]);    MULADD(at[17], at[113]);    MULADD(at[18], at[112]);    MULADD(at[19], at[111]);    MULADD(at[20], at[110]);    MULADD(at[21], at[109]);    MULADD(at[22], at[108]);    MULADD(at[23], at[107]);    MULADD(at[24], at[106]);    MULADD(at[25], at[105]);    MULADD(at[26], at[104]);    MULADD(at[27], at[103]);    MULADD(at[28], at[102]);    MULADD(at[29], at[101]);    MULADD(at[30], at[100]);    MULADD(at[31], at[99]);    MULADD(at[32], at[98]);    MULADD(at[33], at[97]);    MULADD(at[34], at[96]);    MULADD(at[35], at[95]);    MULADD(at[36], at[94]);    MULADD(at[37], at[93]);    MULADD(at[38], at[92]);    MULADD(at[39], at[91]);    MULADD(at[40], at[90]);    MULADD(at[41], at[89]);    MULADD(at[42], at[88]);    MULADD(at[43], at[87]);    MULADD(at[44], at[86]);    MULADD(at[45], at[85]);    MULADD(at[46], at[84]);    MULADD(at[47], at[83]);    MULADD(at[48], at[82]);    MULADD(at[49], at[81]);    MULADD(at[50], at[80]);    MULADD(at[51], at[79]);    MULADD(at[52], at[78]);    MULADD(at[53], at[77]);    MULADD(at[54], at[76]);    MULADD(at[55], at[75]);    MULADD(at[56], at[74]);    MULADD(at[57], at[73]);    MULADD(at[58], at[72]);    MULADD(at[59], at[71]);    MULADD(at[60], at[70]);    MULADD(at[61], at[69]);    MULADD(at[62], at[68]);    MULADD(at[63], at[67]); 
7535
+   COMBA_STORE(C->dp[66]);
7536
+   /* 67 */
7537
+   COMBA_FORWARD;
7538
+   MULADD(at[4], at[127]);    MULADD(at[5], at[126]);    MULADD(at[6], at[125]);    MULADD(at[7], at[124]);    MULADD(at[8], at[123]);    MULADD(at[9], at[122]);    MULADD(at[10], at[121]);    MULADD(at[11], at[120]);    MULADD(at[12], at[119]);    MULADD(at[13], at[118]);    MULADD(at[14], at[117]);    MULADD(at[15], at[116]);    MULADD(at[16], at[115]);    MULADD(at[17], at[114]);    MULADD(at[18], at[113]);    MULADD(at[19], at[112]);    MULADD(at[20], at[111]);    MULADD(at[21], at[110]);    MULADD(at[22], at[109]);    MULADD(at[23], at[108]);    MULADD(at[24], at[107]);    MULADD(at[25], at[106]);    MULADD(at[26], at[105]);    MULADD(at[27], at[104]);    MULADD(at[28], at[103]);    MULADD(at[29], at[102]);    MULADD(at[30], at[101]);    MULADD(at[31], at[100]);    MULADD(at[32], at[99]);    MULADD(at[33], at[98]);    MULADD(at[34], at[97]);    MULADD(at[35], at[96]);    MULADD(at[36], at[95]);    MULADD(at[37], at[94]);    MULADD(at[38], at[93]);    MULADD(at[39], at[92]);    MULADD(at[40], at[91]);    MULADD(at[41], at[90]);    MULADD(at[42], at[89]);    MULADD(at[43], at[88]);    MULADD(at[44], at[87]);    MULADD(at[45], at[86]);    MULADD(at[46], at[85]);    MULADD(at[47], at[84]);    MULADD(at[48], at[83]);    MULADD(at[49], at[82]);    MULADD(at[50], at[81]);    MULADD(at[51], at[80]);    MULADD(at[52], at[79]);    MULADD(at[53], at[78]);    MULADD(at[54], at[77]);    MULADD(at[55], at[76]);    MULADD(at[56], at[75]);    MULADD(at[57], at[74]);    MULADD(at[58], at[73]);    MULADD(at[59], at[72]);    MULADD(at[60], at[71]);    MULADD(at[61], at[70]);    MULADD(at[62], at[69]);    MULADD(at[63], at[68]); 
7539
+   COMBA_STORE(C->dp[67]);
7540
+   /* 68 */
7541
+   COMBA_FORWARD;
7542
+   MULADD(at[5], at[127]);    MULADD(at[6], at[126]);    MULADD(at[7], at[125]);    MULADD(at[8], at[124]);    MULADD(at[9], at[123]);    MULADD(at[10], at[122]);    MULADD(at[11], at[121]);    MULADD(at[12], at[120]);    MULADD(at[13], at[119]);    MULADD(at[14], at[118]);    MULADD(at[15], at[117]);    MULADD(at[16], at[116]);    MULADD(at[17], at[115]);    MULADD(at[18], at[114]);    MULADD(at[19], at[113]);    MULADD(at[20], at[112]);    MULADD(at[21], at[111]);    MULADD(at[22], at[110]);    MULADD(at[23], at[109]);    MULADD(at[24], at[108]);    MULADD(at[25], at[107]);    MULADD(at[26], at[106]);    MULADD(at[27], at[105]);    MULADD(at[28], at[104]);    MULADD(at[29], at[103]);    MULADD(at[30], at[102]);    MULADD(at[31], at[101]);    MULADD(at[32], at[100]);    MULADD(at[33], at[99]);    MULADD(at[34], at[98]);    MULADD(at[35], at[97]);    MULADD(at[36], at[96]);    MULADD(at[37], at[95]);    MULADD(at[38], at[94]);    MULADD(at[39], at[93]);    MULADD(at[40], at[92]);    MULADD(at[41], at[91]);    MULADD(at[42], at[90]);    MULADD(at[43], at[89]);    MULADD(at[44], at[88]);    MULADD(at[45], at[87]);    MULADD(at[46], at[86]);    MULADD(at[47], at[85]);    MULADD(at[48], at[84]);    MULADD(at[49], at[83]);    MULADD(at[50], at[82]);    MULADD(at[51], at[81]);    MULADD(at[52], at[80]);    MULADD(at[53], at[79]);    MULADD(at[54], at[78]);    MULADD(at[55], at[77]);    MULADD(at[56], at[76]);    MULADD(at[57], at[75]);    MULADD(at[58], at[74]);    MULADD(at[59], at[73]);    MULADD(at[60], at[72]);    MULADD(at[61], at[71]);    MULADD(at[62], at[70]);    MULADD(at[63], at[69]); 
7543
+   COMBA_STORE(C->dp[68]);
7544
+   /* 69 */
7545
+   COMBA_FORWARD;
7546
+   MULADD(at[6], at[127]);    MULADD(at[7], at[126]);    MULADD(at[8], at[125]);    MULADD(at[9], at[124]);    MULADD(at[10], at[123]);    MULADD(at[11], at[122]);    MULADD(at[12], at[121]);    MULADD(at[13], at[120]);    MULADD(at[14], at[119]);    MULADD(at[15], at[118]);    MULADD(at[16], at[117]);    MULADD(at[17], at[116]);    MULADD(at[18], at[115]);    MULADD(at[19], at[114]);    MULADD(at[20], at[113]);    MULADD(at[21], at[112]);    MULADD(at[22], at[111]);    MULADD(at[23], at[110]);    MULADD(at[24], at[109]);    MULADD(at[25], at[108]);    MULADD(at[26], at[107]);    MULADD(at[27], at[106]);    MULADD(at[28], at[105]);    MULADD(at[29], at[104]);    MULADD(at[30], at[103]);    MULADD(at[31], at[102]);    MULADD(at[32], at[101]);    MULADD(at[33], at[100]);    MULADD(at[34], at[99]);    MULADD(at[35], at[98]);    MULADD(at[36], at[97]);    MULADD(at[37], at[96]);    MULADD(at[38], at[95]);    MULADD(at[39], at[94]);    MULADD(at[40], at[93]);    MULADD(at[41], at[92]);    MULADD(at[42], at[91]);    MULADD(at[43], at[90]);    MULADD(at[44], at[89]);    MULADD(at[45], at[88]);    MULADD(at[46], at[87]);    MULADD(at[47], at[86]);    MULADD(at[48], at[85]);    MULADD(at[49], at[84]);    MULADD(at[50], at[83]);    MULADD(at[51], at[82]);    MULADD(at[52], at[81]);    MULADD(at[53], at[80]);    MULADD(at[54], at[79]);    MULADD(at[55], at[78]);    MULADD(at[56], at[77]);    MULADD(at[57], at[76]);    MULADD(at[58], at[75]);    MULADD(at[59], at[74]);    MULADD(at[60], at[73]);    MULADD(at[61], at[72]);    MULADD(at[62], at[71]);    MULADD(at[63], at[70]); 
7547
+   COMBA_STORE(C->dp[69]);
7548
+   /* 70 */
7549
+   COMBA_FORWARD;
7550
+   MULADD(at[7], at[127]);    MULADD(at[8], at[126]);    MULADD(at[9], at[125]);    MULADD(at[10], at[124]);    MULADD(at[11], at[123]);    MULADD(at[12], at[122]);    MULADD(at[13], at[121]);    MULADD(at[14], at[120]);    MULADD(at[15], at[119]);    MULADD(at[16], at[118]);    MULADD(at[17], at[117]);    MULADD(at[18], at[116]);    MULADD(at[19], at[115]);    MULADD(at[20], at[114]);    MULADD(at[21], at[113]);    MULADD(at[22], at[112]);    MULADD(at[23], at[111]);    MULADD(at[24], at[110]);    MULADD(at[25], at[109]);    MULADD(at[26], at[108]);    MULADD(at[27], at[107]);    MULADD(at[28], at[106]);    MULADD(at[29], at[105]);    MULADD(at[30], at[104]);    MULADD(at[31], at[103]);    MULADD(at[32], at[102]);    MULADD(at[33], at[101]);    MULADD(at[34], at[100]);    MULADD(at[35], at[99]);    MULADD(at[36], at[98]);    MULADD(at[37], at[97]);    MULADD(at[38], at[96]);    MULADD(at[39], at[95]);    MULADD(at[40], at[94]);    MULADD(at[41], at[93]);    MULADD(at[42], at[92]);    MULADD(at[43], at[91]);    MULADD(at[44], at[90]);    MULADD(at[45], at[89]);    MULADD(at[46], at[88]);    MULADD(at[47], at[87]);    MULADD(at[48], at[86]);    MULADD(at[49], at[85]);    MULADD(at[50], at[84]);    MULADD(at[51], at[83]);    MULADD(at[52], at[82]);    MULADD(at[53], at[81]);    MULADD(at[54], at[80]);    MULADD(at[55], at[79]);    MULADD(at[56], at[78]);    MULADD(at[57], at[77]);    MULADD(at[58], at[76]);    MULADD(at[59], at[75]);    MULADD(at[60], at[74]);    MULADD(at[61], at[73]);    MULADD(at[62], at[72]);    MULADD(at[63], at[71]); 
7551
+   COMBA_STORE(C->dp[70]);
7552
+   /* 71 */
7553
+   COMBA_FORWARD;
7554
+   MULADD(at[8], at[127]);    MULADD(at[9], at[126]);    MULADD(at[10], at[125]);    MULADD(at[11], at[124]);    MULADD(at[12], at[123]);    MULADD(at[13], at[122]);    MULADD(at[14], at[121]);    MULADD(at[15], at[120]);    MULADD(at[16], at[119]);    MULADD(at[17], at[118]);    MULADD(at[18], at[117]);    MULADD(at[19], at[116]);    MULADD(at[20], at[115]);    MULADD(at[21], at[114]);    MULADD(at[22], at[113]);    MULADD(at[23], at[112]);    MULADD(at[24], at[111]);    MULADD(at[25], at[110]);    MULADD(at[26], at[109]);    MULADD(at[27], at[108]);    MULADD(at[28], at[107]);    MULADD(at[29], at[106]);    MULADD(at[30], at[105]);    MULADD(at[31], at[104]);    MULADD(at[32], at[103]);    MULADD(at[33], at[102]);    MULADD(at[34], at[101]);    MULADD(at[35], at[100]);    MULADD(at[36], at[99]);    MULADD(at[37], at[98]);    MULADD(at[38], at[97]);    MULADD(at[39], at[96]);    MULADD(at[40], at[95]);    MULADD(at[41], at[94]);    MULADD(at[42], at[93]);    MULADD(at[43], at[92]);    MULADD(at[44], at[91]);    MULADD(at[45], at[90]);    MULADD(at[46], at[89]);    MULADD(at[47], at[88]);    MULADD(at[48], at[87]);    MULADD(at[49], at[86]);    MULADD(at[50], at[85]);    MULADD(at[51], at[84]);    MULADD(at[52], at[83]);    MULADD(at[53], at[82]);    MULADD(at[54], at[81]);    MULADD(at[55], at[80]);    MULADD(at[56], at[79]);    MULADD(at[57], at[78]);    MULADD(at[58], at[77]);    MULADD(at[59], at[76]);    MULADD(at[60], at[75]);    MULADD(at[61], at[74]);    MULADD(at[62], at[73]);    MULADD(at[63], at[72]); 
7555
+   COMBA_STORE(C->dp[71]);
7556
+   /* 72 */
7557
+   COMBA_FORWARD;
7558
+   MULADD(at[9], at[127]);    MULADD(at[10], at[126]);    MULADD(at[11], at[125]);    MULADD(at[12], at[124]);    MULADD(at[13], at[123]);    MULADD(at[14], at[122]);    MULADD(at[15], at[121]);    MULADD(at[16], at[120]);    MULADD(at[17], at[119]);    MULADD(at[18], at[118]);    MULADD(at[19], at[117]);    MULADD(at[20], at[116]);    MULADD(at[21], at[115]);    MULADD(at[22], at[114]);    MULADD(at[23], at[113]);    MULADD(at[24], at[112]);    MULADD(at[25], at[111]);    MULADD(at[26], at[110]);    MULADD(at[27], at[109]);    MULADD(at[28], at[108]);    MULADD(at[29], at[107]);    MULADD(at[30], at[106]);    MULADD(at[31], at[105]);    MULADD(at[32], at[104]);    MULADD(at[33], at[103]);    MULADD(at[34], at[102]);    MULADD(at[35], at[101]);    MULADD(at[36], at[100]);    MULADD(at[37], at[99]);    MULADD(at[38], at[98]);    MULADD(at[39], at[97]);    MULADD(at[40], at[96]);    MULADD(at[41], at[95]);    MULADD(at[42], at[94]);    MULADD(at[43], at[93]);    MULADD(at[44], at[92]);    MULADD(at[45], at[91]);    MULADD(at[46], at[90]);    MULADD(at[47], at[89]);    MULADD(at[48], at[88]);    MULADD(at[49], at[87]);    MULADD(at[50], at[86]);    MULADD(at[51], at[85]);    MULADD(at[52], at[84]);    MULADD(at[53], at[83]);    MULADD(at[54], at[82]);    MULADD(at[55], at[81]);    MULADD(at[56], at[80]);    MULADD(at[57], at[79]);    MULADD(at[58], at[78]);    MULADD(at[59], at[77]);    MULADD(at[60], at[76]);    MULADD(at[61], at[75]);    MULADD(at[62], at[74]);    MULADD(at[63], at[73]); 
7559
+   COMBA_STORE(C->dp[72]);
7560
+   /* 73 */
7561
+   COMBA_FORWARD;
7562
+   MULADD(at[10], at[127]);    MULADD(at[11], at[126]);    MULADD(at[12], at[125]);    MULADD(at[13], at[124]);    MULADD(at[14], at[123]);    MULADD(at[15], at[122]);    MULADD(at[16], at[121]);    MULADD(at[17], at[120]);    MULADD(at[18], at[119]);    MULADD(at[19], at[118]);    MULADD(at[20], at[117]);    MULADD(at[21], at[116]);    MULADD(at[22], at[115]);    MULADD(at[23], at[114]);    MULADD(at[24], at[113]);    MULADD(at[25], at[112]);    MULADD(at[26], at[111]);    MULADD(at[27], at[110]);    MULADD(at[28], at[109]);    MULADD(at[29], at[108]);    MULADD(at[30], at[107]);    MULADD(at[31], at[106]);    MULADD(at[32], at[105]);    MULADD(at[33], at[104]);    MULADD(at[34], at[103]);    MULADD(at[35], at[102]);    MULADD(at[36], at[101]);    MULADD(at[37], at[100]);    MULADD(at[38], at[99]);    MULADD(at[39], at[98]);    MULADD(at[40], at[97]);    MULADD(at[41], at[96]);    MULADD(at[42], at[95]);    MULADD(at[43], at[94]);    MULADD(at[44], at[93]);    MULADD(at[45], at[92]);    MULADD(at[46], at[91]);    MULADD(at[47], at[90]);    MULADD(at[48], at[89]);    MULADD(at[49], at[88]);    MULADD(at[50], at[87]);    MULADD(at[51], at[86]);    MULADD(at[52], at[85]);    MULADD(at[53], at[84]);    MULADD(at[54], at[83]);    MULADD(at[55], at[82]);    MULADD(at[56], at[81]);    MULADD(at[57], at[80]);    MULADD(at[58], at[79]);    MULADD(at[59], at[78]);    MULADD(at[60], at[77]);    MULADD(at[61], at[76]);    MULADD(at[62], at[75]);    MULADD(at[63], at[74]); 
7563
+   COMBA_STORE(C->dp[73]);
7564
+   /* 74 */
7565
+   COMBA_FORWARD;
7566
+   MULADD(at[11], at[127]);    MULADD(at[12], at[126]);    MULADD(at[13], at[125]);    MULADD(at[14], at[124]);    MULADD(at[15], at[123]);    MULADD(at[16], at[122]);    MULADD(at[17], at[121]);    MULADD(at[18], at[120]);    MULADD(at[19], at[119]);    MULADD(at[20], at[118]);    MULADD(at[21], at[117]);    MULADD(at[22], at[116]);    MULADD(at[23], at[115]);    MULADD(at[24], at[114]);    MULADD(at[25], at[113]);    MULADD(at[26], at[112]);    MULADD(at[27], at[111]);    MULADD(at[28], at[110]);    MULADD(at[29], at[109]);    MULADD(at[30], at[108]);    MULADD(at[31], at[107]);    MULADD(at[32], at[106]);    MULADD(at[33], at[105]);    MULADD(at[34], at[104]);    MULADD(at[35], at[103]);    MULADD(at[36], at[102]);    MULADD(at[37], at[101]);    MULADD(at[38], at[100]);    MULADD(at[39], at[99]);    MULADD(at[40], at[98]);    MULADD(at[41], at[97]);    MULADD(at[42], at[96]);    MULADD(at[43], at[95]);    MULADD(at[44], at[94]);    MULADD(at[45], at[93]);    MULADD(at[46], at[92]);    MULADD(at[47], at[91]);    MULADD(at[48], at[90]);    MULADD(at[49], at[89]);    MULADD(at[50], at[88]);    MULADD(at[51], at[87]);    MULADD(at[52], at[86]);    MULADD(at[53], at[85]);    MULADD(at[54], at[84]);    MULADD(at[55], at[83]);    MULADD(at[56], at[82]);    MULADD(at[57], at[81]);    MULADD(at[58], at[80]);    MULADD(at[59], at[79]);    MULADD(at[60], at[78]);    MULADD(at[61], at[77]);    MULADD(at[62], at[76]);    MULADD(at[63], at[75]); 
7567
+   COMBA_STORE(C->dp[74]);
7568
+   /* 75 */
7569
+   COMBA_FORWARD;
7570
+   MULADD(at[12], at[127]);    MULADD(at[13], at[126]);    MULADD(at[14], at[125]);    MULADD(at[15], at[124]);    MULADD(at[16], at[123]);    MULADD(at[17], at[122]);    MULADD(at[18], at[121]);    MULADD(at[19], at[120]);    MULADD(at[20], at[119]);    MULADD(at[21], at[118]);    MULADD(at[22], at[117]);    MULADD(at[23], at[116]);    MULADD(at[24], at[115]);    MULADD(at[25], at[114]);    MULADD(at[26], at[113]);    MULADD(at[27], at[112]);    MULADD(at[28], at[111]);    MULADD(at[29], at[110]);    MULADD(at[30], at[109]);    MULADD(at[31], at[108]);    MULADD(at[32], at[107]);    MULADD(at[33], at[106]);    MULADD(at[34], at[105]);    MULADD(at[35], at[104]);    MULADD(at[36], at[103]);    MULADD(at[37], at[102]);    MULADD(at[38], at[101]);    MULADD(at[39], at[100]);    MULADD(at[40], at[99]);    MULADD(at[41], at[98]);    MULADD(at[42], at[97]);    MULADD(at[43], at[96]);    MULADD(at[44], at[95]);    MULADD(at[45], at[94]);    MULADD(at[46], at[93]);    MULADD(at[47], at[92]);    MULADD(at[48], at[91]);    MULADD(at[49], at[90]);    MULADD(at[50], at[89]);    MULADD(at[51], at[88]);    MULADD(at[52], at[87]);    MULADD(at[53], at[86]);    MULADD(at[54], at[85]);    MULADD(at[55], at[84]);    MULADD(at[56], at[83]);    MULADD(at[57], at[82]);    MULADD(at[58], at[81]);    MULADD(at[59], at[80]);    MULADD(at[60], at[79]);    MULADD(at[61], at[78]);    MULADD(at[62], at[77]);    MULADD(at[63], at[76]); 
7571
+   COMBA_STORE(C->dp[75]);
7572
+   /* 76 */
7573
+   COMBA_FORWARD;
7574
+   MULADD(at[13], at[127]);    MULADD(at[14], at[126]);    MULADD(at[15], at[125]);    MULADD(at[16], at[124]);    MULADD(at[17], at[123]);    MULADD(at[18], at[122]);    MULADD(at[19], at[121]);    MULADD(at[20], at[120]);    MULADD(at[21], at[119]);    MULADD(at[22], at[118]);    MULADD(at[23], at[117]);    MULADD(at[24], at[116]);    MULADD(at[25], at[115]);    MULADD(at[26], at[114]);    MULADD(at[27], at[113]);    MULADD(at[28], at[112]);    MULADD(at[29], at[111]);    MULADD(at[30], at[110]);    MULADD(at[31], at[109]);    MULADD(at[32], at[108]);    MULADD(at[33], at[107]);    MULADD(at[34], at[106]);    MULADD(at[35], at[105]);    MULADD(at[36], at[104]);    MULADD(at[37], at[103]);    MULADD(at[38], at[102]);    MULADD(at[39], at[101]);    MULADD(at[40], at[100]);    MULADD(at[41], at[99]);    MULADD(at[42], at[98]);    MULADD(at[43], at[97]);    MULADD(at[44], at[96]);    MULADD(at[45], at[95]);    MULADD(at[46], at[94]);    MULADD(at[47], at[93]);    MULADD(at[48], at[92]);    MULADD(at[49], at[91]);    MULADD(at[50], at[90]);    MULADD(at[51], at[89]);    MULADD(at[52], at[88]);    MULADD(at[53], at[87]);    MULADD(at[54], at[86]);    MULADD(at[55], at[85]);    MULADD(at[56], at[84]);    MULADD(at[57], at[83]);    MULADD(at[58], at[82]);    MULADD(at[59], at[81]);    MULADD(at[60], at[80]);    MULADD(at[61], at[79]);    MULADD(at[62], at[78]);    MULADD(at[63], at[77]); 
7575
+   COMBA_STORE(C->dp[76]);
7576
+   /* 77 */
7577
+   COMBA_FORWARD;
7578
+   MULADD(at[14], at[127]);    MULADD(at[15], at[126]);    MULADD(at[16], at[125]);    MULADD(at[17], at[124]);    MULADD(at[18], at[123]);    MULADD(at[19], at[122]);    MULADD(at[20], at[121]);    MULADD(at[21], at[120]);    MULADD(at[22], at[119]);    MULADD(at[23], at[118]);    MULADD(at[24], at[117]);    MULADD(at[25], at[116]);    MULADD(at[26], at[115]);    MULADD(at[27], at[114]);    MULADD(at[28], at[113]);    MULADD(at[29], at[112]);    MULADD(at[30], at[111]);    MULADD(at[31], at[110]);    MULADD(at[32], at[109]);    MULADD(at[33], at[108]);    MULADD(at[34], at[107]);    MULADD(at[35], at[106]);    MULADD(at[36], at[105]);    MULADD(at[37], at[104]);    MULADD(at[38], at[103]);    MULADD(at[39], at[102]);    MULADD(at[40], at[101]);    MULADD(at[41], at[100]);    MULADD(at[42], at[99]);    MULADD(at[43], at[98]);    MULADD(at[44], at[97]);    MULADD(at[45], at[96]);    MULADD(at[46], at[95]);    MULADD(at[47], at[94]);    MULADD(at[48], at[93]);    MULADD(at[49], at[92]);    MULADD(at[50], at[91]);    MULADD(at[51], at[90]);    MULADD(at[52], at[89]);    MULADD(at[53], at[88]);    MULADD(at[54], at[87]);    MULADD(at[55], at[86]);    MULADD(at[56], at[85]);    MULADD(at[57], at[84]);    MULADD(at[58], at[83]);    MULADD(at[59], at[82]);    MULADD(at[60], at[81]);    MULADD(at[61], at[80]);    MULADD(at[62], at[79]);    MULADD(at[63], at[78]); 
7579
+   COMBA_STORE(C->dp[77]);
7580
+   /* 78 */
7581
+   COMBA_FORWARD;
7582
+   MULADD(at[15], at[127]);    MULADD(at[16], at[126]);    MULADD(at[17], at[125]);    MULADD(at[18], at[124]);    MULADD(at[19], at[123]);    MULADD(at[20], at[122]);    MULADD(at[21], at[121]);    MULADD(at[22], at[120]);    MULADD(at[23], at[119]);    MULADD(at[24], at[118]);    MULADD(at[25], at[117]);    MULADD(at[26], at[116]);    MULADD(at[27], at[115]);    MULADD(at[28], at[114]);    MULADD(at[29], at[113]);    MULADD(at[30], at[112]);    MULADD(at[31], at[111]);    MULADD(at[32], at[110]);    MULADD(at[33], at[109]);    MULADD(at[34], at[108]);    MULADD(at[35], at[107]);    MULADD(at[36], at[106]);    MULADD(at[37], at[105]);    MULADD(at[38], at[104]);    MULADD(at[39], at[103]);    MULADD(at[40], at[102]);    MULADD(at[41], at[101]);    MULADD(at[42], at[100]);    MULADD(at[43], at[99]);    MULADD(at[44], at[98]);    MULADD(at[45], at[97]);    MULADD(at[46], at[96]);    MULADD(at[47], at[95]);    MULADD(at[48], at[94]);    MULADD(at[49], at[93]);    MULADD(at[50], at[92]);    MULADD(at[51], at[91]);    MULADD(at[52], at[90]);    MULADD(at[53], at[89]);    MULADD(at[54], at[88]);    MULADD(at[55], at[87]);    MULADD(at[56], at[86]);    MULADD(at[57], at[85]);    MULADD(at[58], at[84]);    MULADD(at[59], at[83]);    MULADD(at[60], at[82]);    MULADD(at[61], at[81]);    MULADD(at[62], at[80]);    MULADD(at[63], at[79]); 
7583
+   COMBA_STORE(C->dp[78]);
7584
+   /* 79 */
7585
+   COMBA_FORWARD;
7586
+   MULADD(at[16], at[127]);    MULADD(at[17], at[126]);    MULADD(at[18], at[125]);    MULADD(at[19], at[124]);    MULADD(at[20], at[123]);    MULADD(at[21], at[122]);    MULADD(at[22], at[121]);    MULADD(at[23], at[120]);    MULADD(at[24], at[119]);    MULADD(at[25], at[118]);    MULADD(at[26], at[117]);    MULADD(at[27], at[116]);    MULADD(at[28], at[115]);    MULADD(at[29], at[114]);    MULADD(at[30], at[113]);    MULADD(at[31], at[112]);    MULADD(at[32], at[111]);    MULADD(at[33], at[110]);    MULADD(at[34], at[109]);    MULADD(at[35], at[108]);    MULADD(at[36], at[107]);    MULADD(at[37], at[106]);    MULADD(at[38], at[105]);    MULADD(at[39], at[104]);    MULADD(at[40], at[103]);    MULADD(at[41], at[102]);    MULADD(at[42], at[101]);    MULADD(at[43], at[100]);    MULADD(at[44], at[99]);    MULADD(at[45], at[98]);    MULADD(at[46], at[97]);    MULADD(at[47], at[96]);    MULADD(at[48], at[95]);    MULADD(at[49], at[94]);    MULADD(at[50], at[93]);    MULADD(at[51], at[92]);    MULADD(at[52], at[91]);    MULADD(at[53], at[90]);    MULADD(at[54], at[89]);    MULADD(at[55], at[88]);    MULADD(at[56], at[87]);    MULADD(at[57], at[86]);    MULADD(at[58], at[85]);    MULADD(at[59], at[84]);    MULADD(at[60], at[83]);    MULADD(at[61], at[82]);    MULADD(at[62], at[81]);    MULADD(at[63], at[80]); 
7587
+   COMBA_STORE(C->dp[79]);
7588
+   /* 80 */
7589
+   COMBA_FORWARD;
7590
+   MULADD(at[17], at[127]);    MULADD(at[18], at[126]);    MULADD(at[19], at[125]);    MULADD(at[20], at[124]);    MULADD(at[21], at[123]);    MULADD(at[22], at[122]);    MULADD(at[23], at[121]);    MULADD(at[24], at[120]);    MULADD(at[25], at[119]);    MULADD(at[26], at[118]);    MULADD(at[27], at[117]);    MULADD(at[28], at[116]);    MULADD(at[29], at[115]);    MULADD(at[30], at[114]);    MULADD(at[31], at[113]);    MULADD(at[32], at[112]);    MULADD(at[33], at[111]);    MULADD(at[34], at[110]);    MULADD(at[35], at[109]);    MULADD(at[36], at[108]);    MULADD(at[37], at[107]);    MULADD(at[38], at[106]);    MULADD(at[39], at[105]);    MULADD(at[40], at[104]);    MULADD(at[41], at[103]);    MULADD(at[42], at[102]);    MULADD(at[43], at[101]);    MULADD(at[44], at[100]);    MULADD(at[45], at[99]);    MULADD(at[46], at[98]);    MULADD(at[47], at[97]);    MULADD(at[48], at[96]);    MULADD(at[49], at[95]);    MULADD(at[50], at[94]);    MULADD(at[51], at[93]);    MULADD(at[52], at[92]);    MULADD(at[53], at[91]);    MULADD(at[54], at[90]);    MULADD(at[55], at[89]);    MULADD(at[56], at[88]);    MULADD(at[57], at[87]);    MULADD(at[58], at[86]);    MULADD(at[59], at[85]);    MULADD(at[60], at[84]);    MULADD(at[61], at[83]);    MULADD(at[62], at[82]);    MULADD(at[63], at[81]); 
7591
+   COMBA_STORE(C->dp[80]);
7592
+   /* 81 */
7593
+   COMBA_FORWARD;
7594
+   MULADD(at[18], at[127]);    MULADD(at[19], at[126]);    MULADD(at[20], at[125]);    MULADD(at[21], at[124]);    MULADD(at[22], at[123]);    MULADD(at[23], at[122]);    MULADD(at[24], at[121]);    MULADD(at[25], at[120]);    MULADD(at[26], at[119]);    MULADD(at[27], at[118]);    MULADD(at[28], at[117]);    MULADD(at[29], at[116]);    MULADD(at[30], at[115]);    MULADD(at[31], at[114]);    MULADD(at[32], at[113]);    MULADD(at[33], at[112]);    MULADD(at[34], at[111]);    MULADD(at[35], at[110]);    MULADD(at[36], at[109]);    MULADD(at[37], at[108]);    MULADD(at[38], at[107]);    MULADD(at[39], at[106]);    MULADD(at[40], at[105]);    MULADD(at[41], at[104]);    MULADD(at[42], at[103]);    MULADD(at[43], at[102]);    MULADD(at[44], at[101]);    MULADD(at[45], at[100]);    MULADD(at[46], at[99]);    MULADD(at[47], at[98]);    MULADD(at[48], at[97]);    MULADD(at[49], at[96]);    MULADD(at[50], at[95]);    MULADD(at[51], at[94]);    MULADD(at[52], at[93]);    MULADD(at[53], at[92]);    MULADD(at[54], at[91]);    MULADD(at[55], at[90]);    MULADD(at[56], at[89]);    MULADD(at[57], at[88]);    MULADD(at[58], at[87]);    MULADD(at[59], at[86]);    MULADD(at[60], at[85]);    MULADD(at[61], at[84]);    MULADD(at[62], at[83]);    MULADD(at[63], at[82]); 
7595
+   COMBA_STORE(C->dp[81]);
7596
+   /* 82 */
7597
+   COMBA_FORWARD;
7598
+   MULADD(at[19], at[127]);    MULADD(at[20], at[126]);    MULADD(at[21], at[125]);    MULADD(at[22], at[124]);    MULADD(at[23], at[123]);    MULADD(at[24], at[122]);    MULADD(at[25], at[121]);    MULADD(at[26], at[120]);    MULADD(at[27], at[119]);    MULADD(at[28], at[118]);    MULADD(at[29], at[117]);    MULADD(at[30], at[116]);    MULADD(at[31], at[115]);    MULADD(at[32], at[114]);    MULADD(at[33], at[113]);    MULADD(at[34], at[112]);    MULADD(at[35], at[111]);    MULADD(at[36], at[110]);    MULADD(at[37], at[109]);    MULADD(at[38], at[108]);    MULADD(at[39], at[107]);    MULADD(at[40], at[106]);    MULADD(at[41], at[105]);    MULADD(at[42], at[104]);    MULADD(at[43], at[103]);    MULADD(at[44], at[102]);    MULADD(at[45], at[101]);    MULADD(at[46], at[100]);    MULADD(at[47], at[99]);    MULADD(at[48], at[98]);    MULADD(at[49], at[97]);    MULADD(at[50], at[96]);    MULADD(at[51], at[95]);    MULADD(at[52], at[94]);    MULADD(at[53], at[93]);    MULADD(at[54], at[92]);    MULADD(at[55], at[91]);    MULADD(at[56], at[90]);    MULADD(at[57], at[89]);    MULADD(at[58], at[88]);    MULADD(at[59], at[87]);    MULADD(at[60], at[86]);    MULADD(at[61], at[85]);    MULADD(at[62], at[84]);    MULADD(at[63], at[83]); 
7599
+   COMBA_STORE(C->dp[82]);
7600
+   /* 83 */
7601
+   COMBA_FORWARD;
7602
+   MULADD(at[20], at[127]);    MULADD(at[21], at[126]);    MULADD(at[22], at[125]);    MULADD(at[23], at[124]);    MULADD(at[24], at[123]);    MULADD(at[25], at[122]);    MULADD(at[26], at[121]);    MULADD(at[27], at[120]);    MULADD(at[28], at[119]);    MULADD(at[29], at[118]);    MULADD(at[30], at[117]);    MULADD(at[31], at[116]);    MULADD(at[32], at[115]);    MULADD(at[33], at[114]);    MULADD(at[34], at[113]);    MULADD(at[35], at[112]);    MULADD(at[36], at[111]);    MULADD(at[37], at[110]);    MULADD(at[38], at[109]);    MULADD(at[39], at[108]);    MULADD(at[40], at[107]);    MULADD(at[41], at[106]);    MULADD(at[42], at[105]);    MULADD(at[43], at[104]);    MULADD(at[44], at[103]);    MULADD(at[45], at[102]);    MULADD(at[46], at[101]);    MULADD(at[47], at[100]);    MULADD(at[48], at[99]);    MULADD(at[49], at[98]);    MULADD(at[50], at[97]);    MULADD(at[51], at[96]);    MULADD(at[52], at[95]);    MULADD(at[53], at[94]);    MULADD(at[54], at[93]);    MULADD(at[55], at[92]);    MULADD(at[56], at[91]);    MULADD(at[57], at[90]);    MULADD(at[58], at[89]);    MULADD(at[59], at[88]);    MULADD(at[60], at[87]);    MULADD(at[61], at[86]);    MULADD(at[62], at[85]);    MULADD(at[63], at[84]); 
7603
+   COMBA_STORE(C->dp[83]);
7604
+   /* 84 */
7605
+   COMBA_FORWARD;
7606
+   MULADD(at[21], at[127]);    MULADD(at[22], at[126]);    MULADD(at[23], at[125]);    MULADD(at[24], at[124]);    MULADD(at[25], at[123]);    MULADD(at[26], at[122]);    MULADD(at[27], at[121]);    MULADD(at[28], at[120]);    MULADD(at[29], at[119]);    MULADD(at[30], at[118]);    MULADD(at[31], at[117]);    MULADD(at[32], at[116]);    MULADD(at[33], at[115]);    MULADD(at[34], at[114]);    MULADD(at[35], at[113]);    MULADD(at[36], at[112]);    MULADD(at[37], at[111]);    MULADD(at[38], at[110]);    MULADD(at[39], at[109]);    MULADD(at[40], at[108]);    MULADD(at[41], at[107]);    MULADD(at[42], at[106]);    MULADD(at[43], at[105]);    MULADD(at[44], at[104]);    MULADD(at[45], at[103]);    MULADD(at[46], at[102]);    MULADD(at[47], at[101]);    MULADD(at[48], at[100]);    MULADD(at[49], at[99]);    MULADD(at[50], at[98]);    MULADD(at[51], at[97]);    MULADD(at[52], at[96]);    MULADD(at[53], at[95]);    MULADD(at[54], at[94]);    MULADD(at[55], at[93]);    MULADD(at[56], at[92]);    MULADD(at[57], at[91]);    MULADD(at[58], at[90]);    MULADD(at[59], at[89]);    MULADD(at[60], at[88]);    MULADD(at[61], at[87]);    MULADD(at[62], at[86]);    MULADD(at[63], at[85]); 
7607
+   COMBA_STORE(C->dp[84]);
7608
+   /* 85 */
7609
+   COMBA_FORWARD;
7610
+   MULADD(at[22], at[127]);    MULADD(at[23], at[126]);    MULADD(at[24], at[125]);    MULADD(at[25], at[124]);    MULADD(at[26], at[123]);    MULADD(at[27], at[122]);    MULADD(at[28], at[121]);    MULADD(at[29], at[120]);    MULADD(at[30], at[119]);    MULADD(at[31], at[118]);    MULADD(at[32], at[117]);    MULADD(at[33], at[116]);    MULADD(at[34], at[115]);    MULADD(at[35], at[114]);    MULADD(at[36], at[113]);    MULADD(at[37], at[112]);    MULADD(at[38], at[111]);    MULADD(at[39], at[110]);    MULADD(at[40], at[109]);    MULADD(at[41], at[108]);    MULADD(at[42], at[107]);    MULADD(at[43], at[106]);    MULADD(at[44], at[105]);    MULADD(at[45], at[104]);    MULADD(at[46], at[103]);    MULADD(at[47], at[102]);    MULADD(at[48], at[101]);    MULADD(at[49], at[100]);    MULADD(at[50], at[99]);    MULADD(at[51], at[98]);    MULADD(at[52], at[97]);    MULADD(at[53], at[96]);    MULADD(at[54], at[95]);    MULADD(at[55], at[94]);    MULADD(at[56], at[93]);    MULADD(at[57], at[92]);    MULADD(at[58], at[91]);    MULADD(at[59], at[90]);    MULADD(at[60], at[89]);    MULADD(at[61], at[88]);    MULADD(at[62], at[87]);    MULADD(at[63], at[86]); 
7611
+   COMBA_STORE(C->dp[85]);
7612
+   /* 86 */
7613
+   COMBA_FORWARD;
7614
+   MULADD(at[23], at[127]);    MULADD(at[24], at[126]);    MULADD(at[25], at[125]);    MULADD(at[26], at[124]);    MULADD(at[27], at[123]);    MULADD(at[28], at[122]);    MULADD(at[29], at[121]);    MULADD(at[30], at[120]);    MULADD(at[31], at[119]);    MULADD(at[32], at[118]);    MULADD(at[33], at[117]);    MULADD(at[34], at[116]);    MULADD(at[35], at[115]);    MULADD(at[36], at[114]);    MULADD(at[37], at[113]);    MULADD(at[38], at[112]);    MULADD(at[39], at[111]);    MULADD(at[40], at[110]);    MULADD(at[41], at[109]);    MULADD(at[42], at[108]);    MULADD(at[43], at[107]);    MULADD(at[44], at[106]);    MULADD(at[45], at[105]);    MULADD(at[46], at[104]);    MULADD(at[47], at[103]);    MULADD(at[48], at[102]);    MULADD(at[49], at[101]);    MULADD(at[50], at[100]);    MULADD(at[51], at[99]);    MULADD(at[52], at[98]);    MULADD(at[53], at[97]);    MULADD(at[54], at[96]);    MULADD(at[55], at[95]);    MULADD(at[56], at[94]);    MULADD(at[57], at[93]);    MULADD(at[58], at[92]);    MULADD(at[59], at[91]);    MULADD(at[60], at[90]);    MULADD(at[61], at[89]);    MULADD(at[62], at[88]);    MULADD(at[63], at[87]); 
7615
+   COMBA_STORE(C->dp[86]);
7616
+   /* 87 */
7617
+   COMBA_FORWARD;
7618
+   MULADD(at[24], at[127]);    MULADD(at[25], at[126]);    MULADD(at[26], at[125]);    MULADD(at[27], at[124]);    MULADD(at[28], at[123]);    MULADD(at[29], at[122]);    MULADD(at[30], at[121]);    MULADD(at[31], at[120]);    MULADD(at[32], at[119]);    MULADD(at[33], at[118]);    MULADD(at[34], at[117]);    MULADD(at[35], at[116]);    MULADD(at[36], at[115]);    MULADD(at[37], at[114]);    MULADD(at[38], at[113]);    MULADD(at[39], at[112]);    MULADD(at[40], at[111]);    MULADD(at[41], at[110]);    MULADD(at[42], at[109]);    MULADD(at[43], at[108]);    MULADD(at[44], at[107]);    MULADD(at[45], at[106]);    MULADD(at[46], at[105]);    MULADD(at[47], at[104]);    MULADD(at[48], at[103]);    MULADD(at[49], at[102]);    MULADD(at[50], at[101]);    MULADD(at[51], at[100]);    MULADD(at[52], at[99]);    MULADD(at[53], at[98]);    MULADD(at[54], at[97]);    MULADD(at[55], at[96]);    MULADD(at[56], at[95]);    MULADD(at[57], at[94]);    MULADD(at[58], at[93]);    MULADD(at[59], at[92]);    MULADD(at[60], at[91]);    MULADD(at[61], at[90]);    MULADD(at[62], at[89]);    MULADD(at[63], at[88]); 
7619
+   COMBA_STORE(C->dp[87]);
7620
+   /* 88 */
7621
+   COMBA_FORWARD;
7622
+   MULADD(at[25], at[127]);    MULADD(at[26], at[126]);    MULADD(at[27], at[125]);    MULADD(at[28], at[124]);    MULADD(at[29], at[123]);    MULADD(at[30], at[122]);    MULADD(at[31], at[121]);    MULADD(at[32], at[120]);    MULADD(at[33], at[119]);    MULADD(at[34], at[118]);    MULADD(at[35], at[117]);    MULADD(at[36], at[116]);    MULADD(at[37], at[115]);    MULADD(at[38], at[114]);    MULADD(at[39], at[113]);    MULADD(at[40], at[112]);    MULADD(at[41], at[111]);    MULADD(at[42], at[110]);    MULADD(at[43], at[109]);    MULADD(at[44], at[108]);    MULADD(at[45], at[107]);    MULADD(at[46], at[106]);    MULADD(at[47], at[105]);    MULADD(at[48], at[104]);    MULADD(at[49], at[103]);    MULADD(at[50], at[102]);    MULADD(at[51], at[101]);    MULADD(at[52], at[100]);    MULADD(at[53], at[99]);    MULADD(at[54], at[98]);    MULADD(at[55], at[97]);    MULADD(at[56], at[96]);    MULADD(at[57], at[95]);    MULADD(at[58], at[94]);    MULADD(at[59], at[93]);    MULADD(at[60], at[92]);    MULADD(at[61], at[91]);    MULADD(at[62], at[90]);    MULADD(at[63], at[89]); 
7623
+   COMBA_STORE(C->dp[88]);
7624
+   /* 89 */
7625
+   COMBA_FORWARD;
7626
+   MULADD(at[26], at[127]);    MULADD(at[27], at[126]);    MULADD(at[28], at[125]);    MULADD(at[29], at[124]);    MULADD(at[30], at[123]);    MULADD(at[31], at[122]);    MULADD(at[32], at[121]);    MULADD(at[33], at[120]);    MULADD(at[34], at[119]);    MULADD(at[35], at[118]);    MULADD(at[36], at[117]);    MULADD(at[37], at[116]);    MULADD(at[38], at[115]);    MULADD(at[39], at[114]);    MULADD(at[40], at[113]);    MULADD(at[41], at[112]);    MULADD(at[42], at[111]);    MULADD(at[43], at[110]);    MULADD(at[44], at[109]);    MULADD(at[45], at[108]);    MULADD(at[46], at[107]);    MULADD(at[47], at[106]);    MULADD(at[48], at[105]);    MULADD(at[49], at[104]);    MULADD(at[50], at[103]);    MULADD(at[51], at[102]);    MULADD(at[52], at[101]);    MULADD(at[53], at[100]);    MULADD(at[54], at[99]);    MULADD(at[55], at[98]);    MULADD(at[56], at[97]);    MULADD(at[57], at[96]);    MULADD(at[58], at[95]);    MULADD(at[59], at[94]);    MULADD(at[60], at[93]);    MULADD(at[61], at[92]);    MULADD(at[62], at[91]);    MULADD(at[63], at[90]); 
7627
+   COMBA_STORE(C->dp[89]);
7628
+   /* 90 */
7629
+   COMBA_FORWARD;
7630
+   MULADD(at[27], at[127]);    MULADD(at[28], at[126]);    MULADD(at[29], at[125]);    MULADD(at[30], at[124]);    MULADD(at[31], at[123]);    MULADD(at[32], at[122]);    MULADD(at[33], at[121]);    MULADD(at[34], at[120]);    MULADD(at[35], at[119]);    MULADD(at[36], at[118]);    MULADD(at[37], at[117]);    MULADD(at[38], at[116]);    MULADD(at[39], at[115]);    MULADD(at[40], at[114]);    MULADD(at[41], at[113]);    MULADD(at[42], at[112]);    MULADD(at[43], at[111]);    MULADD(at[44], at[110]);    MULADD(at[45], at[109]);    MULADD(at[46], at[108]);    MULADD(at[47], at[107]);    MULADD(at[48], at[106]);    MULADD(at[49], at[105]);    MULADD(at[50], at[104]);    MULADD(at[51], at[103]);    MULADD(at[52], at[102]);    MULADD(at[53], at[101]);    MULADD(at[54], at[100]);    MULADD(at[55], at[99]);    MULADD(at[56], at[98]);    MULADD(at[57], at[97]);    MULADD(at[58], at[96]);    MULADD(at[59], at[95]);    MULADD(at[60], at[94]);    MULADD(at[61], at[93]);    MULADD(at[62], at[92]);    MULADD(at[63], at[91]); 
7631
+   COMBA_STORE(C->dp[90]);
7632
+   /* 91 */
7633
+   COMBA_FORWARD;
7634
+   MULADD(at[28], at[127]);    MULADD(at[29], at[126]);    MULADD(at[30], at[125]);    MULADD(at[31], at[124]);    MULADD(at[32], at[123]);    MULADD(at[33], at[122]);    MULADD(at[34], at[121]);    MULADD(at[35], at[120]);    MULADD(at[36], at[119]);    MULADD(at[37], at[118]);    MULADD(at[38], at[117]);    MULADD(at[39], at[116]);    MULADD(at[40], at[115]);    MULADD(at[41], at[114]);    MULADD(at[42], at[113]);    MULADD(at[43], at[112]);    MULADD(at[44], at[111]);    MULADD(at[45], at[110]);    MULADD(at[46], at[109]);    MULADD(at[47], at[108]);    MULADD(at[48], at[107]);    MULADD(at[49], at[106]);    MULADD(at[50], at[105]);    MULADD(at[51], at[104]);    MULADD(at[52], at[103]);    MULADD(at[53], at[102]);    MULADD(at[54], at[101]);    MULADD(at[55], at[100]);    MULADD(at[56], at[99]);    MULADD(at[57], at[98]);    MULADD(at[58], at[97]);    MULADD(at[59], at[96]);    MULADD(at[60], at[95]);    MULADD(at[61], at[94]);    MULADD(at[62], at[93]);    MULADD(at[63], at[92]); 
7635
+   COMBA_STORE(C->dp[91]);
7636
+   /* 92 */
7637
+   COMBA_FORWARD;
7638
+   MULADD(at[29], at[127]);    MULADD(at[30], at[126]);    MULADD(at[31], at[125]);    MULADD(at[32], at[124]);    MULADD(at[33], at[123]);    MULADD(at[34], at[122]);    MULADD(at[35], at[121]);    MULADD(at[36], at[120]);    MULADD(at[37], at[119]);    MULADD(at[38], at[118]);    MULADD(at[39], at[117]);    MULADD(at[40], at[116]);    MULADD(at[41], at[115]);    MULADD(at[42], at[114]);    MULADD(at[43], at[113]);    MULADD(at[44], at[112]);    MULADD(at[45], at[111]);    MULADD(at[46], at[110]);    MULADD(at[47], at[109]);    MULADD(at[48], at[108]);    MULADD(at[49], at[107]);    MULADD(at[50], at[106]);    MULADD(at[51], at[105]);    MULADD(at[52], at[104]);    MULADD(at[53], at[103]);    MULADD(at[54], at[102]);    MULADD(at[55], at[101]);    MULADD(at[56], at[100]);    MULADD(at[57], at[99]);    MULADD(at[58], at[98]);    MULADD(at[59], at[97]);    MULADD(at[60], at[96]);    MULADD(at[61], at[95]);    MULADD(at[62], at[94]);    MULADD(at[63], at[93]); 
7639
+   COMBA_STORE(C->dp[92]);
7640
+   /* 93 */
7641
+   COMBA_FORWARD;
7642
+   MULADD(at[30], at[127]);    MULADD(at[31], at[126]);    MULADD(at[32], at[125]);    MULADD(at[33], at[124]);    MULADD(at[34], at[123]);    MULADD(at[35], at[122]);    MULADD(at[36], at[121]);    MULADD(at[37], at[120]);    MULADD(at[38], at[119]);    MULADD(at[39], at[118]);    MULADD(at[40], at[117]);    MULADD(at[41], at[116]);    MULADD(at[42], at[115]);    MULADD(at[43], at[114]);    MULADD(at[44], at[113]);    MULADD(at[45], at[112]);    MULADD(at[46], at[111]);    MULADD(at[47], at[110]);    MULADD(at[48], at[109]);    MULADD(at[49], at[108]);    MULADD(at[50], at[107]);    MULADD(at[51], at[106]);    MULADD(at[52], at[105]);    MULADD(at[53], at[104]);    MULADD(at[54], at[103]);    MULADD(at[55], at[102]);    MULADD(at[56], at[101]);    MULADD(at[57], at[100]);    MULADD(at[58], at[99]);    MULADD(at[59], at[98]);    MULADD(at[60], at[97]);    MULADD(at[61], at[96]);    MULADD(at[62], at[95]);    MULADD(at[63], at[94]); 
7643
+   COMBA_STORE(C->dp[93]);
7644
+   /* 94 */
7645
+   COMBA_FORWARD;
7646
+   MULADD(at[31], at[127]);    MULADD(at[32], at[126]);    MULADD(at[33], at[125]);    MULADD(at[34], at[124]);    MULADD(at[35], at[123]);    MULADD(at[36], at[122]);    MULADD(at[37], at[121]);    MULADD(at[38], at[120]);    MULADD(at[39], at[119]);    MULADD(at[40], at[118]);    MULADD(at[41], at[117]);    MULADD(at[42], at[116]);    MULADD(at[43], at[115]);    MULADD(at[44], at[114]);    MULADD(at[45], at[113]);    MULADD(at[46], at[112]);    MULADD(at[47], at[111]);    MULADD(at[48], at[110]);    MULADD(at[49], at[109]);    MULADD(at[50], at[108]);    MULADD(at[51], at[107]);    MULADD(at[52], at[106]);    MULADD(at[53], at[105]);    MULADD(at[54], at[104]);    MULADD(at[55], at[103]);    MULADD(at[56], at[102]);    MULADD(at[57], at[101]);    MULADD(at[58], at[100]);    MULADD(at[59], at[99]);    MULADD(at[60], at[98]);    MULADD(at[61], at[97]);    MULADD(at[62], at[96]);    MULADD(at[63], at[95]); 
7647
+   COMBA_STORE(C->dp[94]);
7648
+   /* 95 */
7649
+   COMBA_FORWARD;
7650
+   MULADD(at[32], at[127]);    MULADD(at[33], at[126]);    MULADD(at[34], at[125]);    MULADD(at[35], at[124]);    MULADD(at[36], at[123]);    MULADD(at[37], at[122]);    MULADD(at[38], at[121]);    MULADD(at[39], at[120]);    MULADD(at[40], at[119]);    MULADD(at[41], at[118]);    MULADD(at[42], at[117]);    MULADD(at[43], at[116]);    MULADD(at[44], at[115]);    MULADD(at[45], at[114]);    MULADD(at[46], at[113]);    MULADD(at[47], at[112]);    MULADD(at[48], at[111]);    MULADD(at[49], at[110]);    MULADD(at[50], at[109]);    MULADD(at[51], at[108]);    MULADD(at[52], at[107]);    MULADD(at[53], at[106]);    MULADD(at[54], at[105]);    MULADD(at[55], at[104]);    MULADD(at[56], at[103]);    MULADD(at[57], at[102]);    MULADD(at[58], at[101]);    MULADD(at[59], at[100]);    MULADD(at[60], at[99]);    MULADD(at[61], at[98]);    MULADD(at[62], at[97]);    MULADD(at[63], at[96]); 
7651
+   COMBA_STORE(C->dp[95]);
7652
+   /* 96 */
7653
+   COMBA_FORWARD;
7654
+   MULADD(at[33], at[127]);    MULADD(at[34], at[126]);    MULADD(at[35], at[125]);    MULADD(at[36], at[124]);    MULADD(at[37], at[123]);    MULADD(at[38], at[122]);    MULADD(at[39], at[121]);    MULADD(at[40], at[120]);    MULADD(at[41], at[119]);    MULADD(at[42], at[118]);    MULADD(at[43], at[117]);    MULADD(at[44], at[116]);    MULADD(at[45], at[115]);    MULADD(at[46], at[114]);    MULADD(at[47], at[113]);    MULADD(at[48], at[112]);    MULADD(at[49], at[111]);    MULADD(at[50], at[110]);    MULADD(at[51], at[109]);    MULADD(at[52], at[108]);    MULADD(at[53], at[107]);    MULADD(at[54], at[106]);    MULADD(at[55], at[105]);    MULADD(at[56], at[104]);    MULADD(at[57], at[103]);    MULADD(at[58], at[102]);    MULADD(at[59], at[101]);    MULADD(at[60], at[100]);    MULADD(at[61], at[99]);    MULADD(at[62], at[98]);    MULADD(at[63], at[97]); 
7655
+   COMBA_STORE(C->dp[96]);
7656
+   /* 97 */
7657
+   COMBA_FORWARD;
7658
+   MULADD(at[34], at[127]);    MULADD(at[35], at[126]);    MULADD(at[36], at[125]);    MULADD(at[37], at[124]);    MULADD(at[38], at[123]);    MULADD(at[39], at[122]);    MULADD(at[40], at[121]);    MULADD(at[41], at[120]);    MULADD(at[42], at[119]);    MULADD(at[43], at[118]);    MULADD(at[44], at[117]);    MULADD(at[45], at[116]);    MULADD(at[46], at[115]);    MULADD(at[47], at[114]);    MULADD(at[48], at[113]);    MULADD(at[49], at[112]);    MULADD(at[50], at[111]);    MULADD(at[51], at[110]);    MULADD(at[52], at[109]);    MULADD(at[53], at[108]);    MULADD(at[54], at[107]);    MULADD(at[55], at[106]);    MULADD(at[56], at[105]);    MULADD(at[57], at[104]);    MULADD(at[58], at[103]);    MULADD(at[59], at[102]);    MULADD(at[60], at[101]);    MULADD(at[61], at[100]);    MULADD(at[62], at[99]);    MULADD(at[63], at[98]); 
7659
+   COMBA_STORE(C->dp[97]);
7660
+   /* 98 */
7661
+   COMBA_FORWARD;
7662
+   MULADD(at[35], at[127]);    MULADD(at[36], at[126]);    MULADD(at[37], at[125]);    MULADD(at[38], at[124]);    MULADD(at[39], at[123]);    MULADD(at[40], at[122]);    MULADD(at[41], at[121]);    MULADD(at[42], at[120]);    MULADD(at[43], at[119]);    MULADD(at[44], at[118]);    MULADD(at[45], at[117]);    MULADD(at[46], at[116]);    MULADD(at[47], at[115]);    MULADD(at[48], at[114]);    MULADD(at[49], at[113]);    MULADD(at[50], at[112]);    MULADD(at[51], at[111]);    MULADD(at[52], at[110]);    MULADD(at[53], at[109]);    MULADD(at[54], at[108]);    MULADD(at[55], at[107]);    MULADD(at[56], at[106]);    MULADD(at[57], at[105]);    MULADD(at[58], at[104]);    MULADD(at[59], at[103]);    MULADD(at[60], at[102]);    MULADD(at[61], at[101]);    MULADD(at[62], at[100]);    MULADD(at[63], at[99]); 
7663
+   COMBA_STORE(C->dp[98]);
7664
+   /* 99 */
7665
+   COMBA_FORWARD;
7666
+   MULADD(at[36], at[127]);    MULADD(at[37], at[126]);    MULADD(at[38], at[125]);    MULADD(at[39], at[124]);    MULADD(at[40], at[123]);    MULADD(at[41], at[122]);    MULADD(at[42], at[121]);    MULADD(at[43], at[120]);    MULADD(at[44], at[119]);    MULADD(at[45], at[118]);    MULADD(at[46], at[117]);    MULADD(at[47], at[116]);    MULADD(at[48], at[115]);    MULADD(at[49], at[114]);    MULADD(at[50], at[113]);    MULADD(at[51], at[112]);    MULADD(at[52], at[111]);    MULADD(at[53], at[110]);    MULADD(at[54], at[109]);    MULADD(at[55], at[108]);    MULADD(at[56], at[107]);    MULADD(at[57], at[106]);    MULADD(at[58], at[105]);    MULADD(at[59], at[104]);    MULADD(at[60], at[103]);    MULADD(at[61], at[102]);    MULADD(at[62], at[101]);    MULADD(at[63], at[100]); 
7667
+   COMBA_STORE(C->dp[99]);
7668
+   /* 100 */
7669
+   COMBA_FORWARD;
7670
+   MULADD(at[37], at[127]);    MULADD(at[38], at[126]);    MULADD(at[39], at[125]);    MULADD(at[40], at[124]);    MULADD(at[41], at[123]);    MULADD(at[42], at[122]);    MULADD(at[43], at[121]);    MULADD(at[44], at[120]);    MULADD(at[45], at[119]);    MULADD(at[46], at[118]);    MULADD(at[47], at[117]);    MULADD(at[48], at[116]);    MULADD(at[49], at[115]);    MULADD(at[50], at[114]);    MULADD(at[51], at[113]);    MULADD(at[52], at[112]);    MULADD(at[53], at[111]);    MULADD(at[54], at[110]);    MULADD(at[55], at[109]);    MULADD(at[56], at[108]);    MULADD(at[57], at[107]);    MULADD(at[58], at[106]);    MULADD(at[59], at[105]);    MULADD(at[60], at[104]);    MULADD(at[61], at[103]);    MULADD(at[62], at[102]);    MULADD(at[63], at[101]); 
7671
+   COMBA_STORE(C->dp[100]);
7672
+   /* 101 */
7673
+   COMBA_FORWARD;
7674
+   MULADD(at[38], at[127]);    MULADD(at[39], at[126]);    MULADD(at[40], at[125]);    MULADD(at[41], at[124]);    MULADD(at[42], at[123]);    MULADD(at[43], at[122]);    MULADD(at[44], at[121]);    MULADD(at[45], at[120]);    MULADD(at[46], at[119]);    MULADD(at[47], at[118]);    MULADD(at[48], at[117]);    MULADD(at[49], at[116]);    MULADD(at[50], at[115]);    MULADD(at[51], at[114]);    MULADD(at[52], at[113]);    MULADD(at[53], at[112]);    MULADD(at[54], at[111]);    MULADD(at[55], at[110]);    MULADD(at[56], at[109]);    MULADD(at[57], at[108]);    MULADD(at[58], at[107]);    MULADD(at[59], at[106]);    MULADD(at[60], at[105]);    MULADD(at[61], at[104]);    MULADD(at[62], at[103]);    MULADD(at[63], at[102]); 
7675
+   COMBA_STORE(C->dp[101]);
7676
+   /* 102 */
7677
+   COMBA_FORWARD;
7678
+   MULADD(at[39], at[127]);    MULADD(at[40], at[126]);    MULADD(at[41], at[125]);    MULADD(at[42], at[124]);    MULADD(at[43], at[123]);    MULADD(at[44], at[122]);    MULADD(at[45], at[121]);    MULADD(at[46], at[120]);    MULADD(at[47], at[119]);    MULADD(at[48], at[118]);    MULADD(at[49], at[117]);    MULADD(at[50], at[116]);    MULADD(at[51], at[115]);    MULADD(at[52], at[114]);    MULADD(at[53], at[113]);    MULADD(at[54], at[112]);    MULADD(at[55], at[111]);    MULADD(at[56], at[110]);    MULADD(at[57], at[109]);    MULADD(at[58], at[108]);    MULADD(at[59], at[107]);    MULADD(at[60], at[106]);    MULADD(at[61], at[105]);    MULADD(at[62], at[104]);    MULADD(at[63], at[103]); 
7679
+   COMBA_STORE(C->dp[102]);
7680
+   /* 103 */
7681
+   COMBA_FORWARD;
7682
+   MULADD(at[40], at[127]);    MULADD(at[41], at[126]);    MULADD(at[42], at[125]);    MULADD(at[43], at[124]);    MULADD(at[44], at[123]);    MULADD(at[45], at[122]);    MULADD(at[46], at[121]);    MULADD(at[47], at[120]);    MULADD(at[48], at[119]);    MULADD(at[49], at[118]);    MULADD(at[50], at[117]);    MULADD(at[51], at[116]);    MULADD(at[52], at[115]);    MULADD(at[53], at[114]);    MULADD(at[54], at[113]);    MULADD(at[55], at[112]);    MULADD(at[56], at[111]);    MULADD(at[57], at[110]);    MULADD(at[58], at[109]);    MULADD(at[59], at[108]);    MULADD(at[60], at[107]);    MULADD(at[61], at[106]);    MULADD(at[62], at[105]);    MULADD(at[63], at[104]); 
7683
+   COMBA_STORE(C->dp[103]);
7684
+   /* 104 */
7685
+   COMBA_FORWARD;
7686
+   MULADD(at[41], at[127]);    MULADD(at[42], at[126]);    MULADD(at[43], at[125]);    MULADD(at[44], at[124]);    MULADD(at[45], at[123]);    MULADD(at[46], at[122]);    MULADD(at[47], at[121]);    MULADD(at[48], at[120]);    MULADD(at[49], at[119]);    MULADD(at[50], at[118]);    MULADD(at[51], at[117]);    MULADD(at[52], at[116]);    MULADD(at[53], at[115]);    MULADD(at[54], at[114]);    MULADD(at[55], at[113]);    MULADD(at[56], at[112]);    MULADD(at[57], at[111]);    MULADD(at[58], at[110]);    MULADD(at[59], at[109]);    MULADD(at[60], at[108]);    MULADD(at[61], at[107]);    MULADD(at[62], at[106]);    MULADD(at[63], at[105]); 
7687
+   COMBA_STORE(C->dp[104]);
7688
+   /* 105 */
7689
+   COMBA_FORWARD;
7690
+   MULADD(at[42], at[127]);    MULADD(at[43], at[126]);    MULADD(at[44], at[125]);    MULADD(at[45], at[124]);    MULADD(at[46], at[123]);    MULADD(at[47], at[122]);    MULADD(at[48], at[121]);    MULADD(at[49], at[120]);    MULADD(at[50], at[119]);    MULADD(at[51], at[118]);    MULADD(at[52], at[117]);    MULADD(at[53], at[116]);    MULADD(at[54], at[115]);    MULADD(at[55], at[114]);    MULADD(at[56], at[113]);    MULADD(at[57], at[112]);    MULADD(at[58], at[111]);    MULADD(at[59], at[110]);    MULADD(at[60], at[109]);    MULADD(at[61], at[108]);    MULADD(at[62], at[107]);    MULADD(at[63], at[106]); 
7691
+   COMBA_STORE(C->dp[105]);
7692
+   /* 106 */
7693
+   COMBA_FORWARD;
7694
+   MULADD(at[43], at[127]);    MULADD(at[44], at[126]);    MULADD(at[45], at[125]);    MULADD(at[46], at[124]);    MULADD(at[47], at[123]);    MULADD(at[48], at[122]);    MULADD(at[49], at[121]);    MULADD(at[50], at[120]);    MULADD(at[51], at[119]);    MULADD(at[52], at[118]);    MULADD(at[53], at[117]);    MULADD(at[54], at[116]);    MULADD(at[55], at[115]);    MULADD(at[56], at[114]);    MULADD(at[57], at[113]);    MULADD(at[58], at[112]);    MULADD(at[59], at[111]);    MULADD(at[60], at[110]);    MULADD(at[61], at[109]);    MULADD(at[62], at[108]);    MULADD(at[63], at[107]); 
7695
+   COMBA_STORE(C->dp[106]);
7696
+   /* 107 */
7697
+   COMBA_FORWARD;
7698
+   MULADD(at[44], at[127]);    MULADD(at[45], at[126]);    MULADD(at[46], at[125]);    MULADD(at[47], at[124]);    MULADD(at[48], at[123]);    MULADD(at[49], at[122]);    MULADD(at[50], at[121]);    MULADD(at[51], at[120]);    MULADD(at[52], at[119]);    MULADD(at[53], at[118]);    MULADD(at[54], at[117]);    MULADD(at[55], at[116]);    MULADD(at[56], at[115]);    MULADD(at[57], at[114]);    MULADD(at[58], at[113]);    MULADD(at[59], at[112]);    MULADD(at[60], at[111]);    MULADD(at[61], at[110]);    MULADD(at[62], at[109]);    MULADD(at[63], at[108]); 
7699
+   COMBA_STORE(C->dp[107]);
7700
+   /* 108 */
7701
+   COMBA_FORWARD;
7702
+   MULADD(at[45], at[127]);    MULADD(at[46], at[126]);    MULADD(at[47], at[125]);    MULADD(at[48], at[124]);    MULADD(at[49], at[123]);    MULADD(at[50], at[122]);    MULADD(at[51], at[121]);    MULADD(at[52], at[120]);    MULADD(at[53], at[119]);    MULADD(at[54], at[118]);    MULADD(at[55], at[117]);    MULADD(at[56], at[116]);    MULADD(at[57], at[115]);    MULADD(at[58], at[114]);    MULADD(at[59], at[113]);    MULADD(at[60], at[112]);    MULADD(at[61], at[111]);    MULADD(at[62], at[110]);    MULADD(at[63], at[109]); 
7703
+   COMBA_STORE(C->dp[108]);
7704
+   /* 109 */
7705
+   COMBA_FORWARD;
7706
+   MULADD(at[46], at[127]);    MULADD(at[47], at[126]);    MULADD(at[48], at[125]);    MULADD(at[49], at[124]);    MULADD(at[50], at[123]);    MULADD(at[51], at[122]);    MULADD(at[52], at[121]);    MULADD(at[53], at[120]);    MULADD(at[54], at[119]);    MULADD(at[55], at[118]);    MULADD(at[56], at[117]);    MULADD(at[57], at[116]);    MULADD(at[58], at[115]);    MULADD(at[59], at[114]);    MULADD(at[60], at[113]);    MULADD(at[61], at[112]);    MULADD(at[62], at[111]);    MULADD(at[63], at[110]); 
7707
+   COMBA_STORE(C->dp[109]);
7708
+   /* 110 */
7709
+   COMBA_FORWARD;
7710
+   MULADD(at[47], at[127]);    MULADD(at[48], at[126]);    MULADD(at[49], at[125]);    MULADD(at[50], at[124]);    MULADD(at[51], at[123]);    MULADD(at[52], at[122]);    MULADD(at[53], at[121]);    MULADD(at[54], at[120]);    MULADD(at[55], at[119]);    MULADD(at[56], at[118]);    MULADD(at[57], at[117]);    MULADD(at[58], at[116]);    MULADD(at[59], at[115]);    MULADD(at[60], at[114]);    MULADD(at[61], at[113]);    MULADD(at[62], at[112]);    MULADD(at[63], at[111]); 
7711
+   COMBA_STORE(C->dp[110]);
7712
+   /* 111 */
7713
+   COMBA_FORWARD;
7714
+   MULADD(at[48], at[127]);    MULADD(at[49], at[126]);    MULADD(at[50], at[125]);    MULADD(at[51], at[124]);    MULADD(at[52], at[123]);    MULADD(at[53], at[122]);    MULADD(at[54], at[121]);    MULADD(at[55], at[120]);    MULADD(at[56], at[119]);    MULADD(at[57], at[118]);    MULADD(at[58], at[117]);    MULADD(at[59], at[116]);    MULADD(at[60], at[115]);    MULADD(at[61], at[114]);    MULADD(at[62], at[113]);    MULADD(at[63], at[112]); 
7715
+   COMBA_STORE(C->dp[111]);
7716
+   /* 112 */
7717
+   COMBA_FORWARD;
7718
+   MULADD(at[49], at[127]);    MULADD(at[50], at[126]);    MULADD(at[51], at[125]);    MULADD(at[52], at[124]);    MULADD(at[53], at[123]);    MULADD(at[54], at[122]);    MULADD(at[55], at[121]);    MULADD(at[56], at[120]);    MULADD(at[57], at[119]);    MULADD(at[58], at[118]);    MULADD(at[59], at[117]);    MULADD(at[60], at[116]);    MULADD(at[61], at[115]);    MULADD(at[62], at[114]);    MULADD(at[63], at[113]); 
7719
+   COMBA_STORE(C->dp[112]);
7720
+   /* 113 */
7721
+   COMBA_FORWARD;
7722
+   MULADD(at[50], at[127]);    MULADD(at[51], at[126]);    MULADD(at[52], at[125]);    MULADD(at[53], at[124]);    MULADD(at[54], at[123]);    MULADD(at[55], at[122]);    MULADD(at[56], at[121]);    MULADD(at[57], at[120]);    MULADD(at[58], at[119]);    MULADD(at[59], at[118]);    MULADD(at[60], at[117]);    MULADD(at[61], at[116]);    MULADD(at[62], at[115]);    MULADD(at[63], at[114]); 
7723
+   COMBA_STORE(C->dp[113]);
7724
+   /* 114 */
7725
+   COMBA_FORWARD;
7726
+   MULADD(at[51], at[127]);    MULADD(at[52], at[126]);    MULADD(at[53], at[125]);    MULADD(at[54], at[124]);    MULADD(at[55], at[123]);    MULADD(at[56], at[122]);    MULADD(at[57], at[121]);    MULADD(at[58], at[120]);    MULADD(at[59], at[119]);    MULADD(at[60], at[118]);    MULADD(at[61], at[117]);    MULADD(at[62], at[116]);    MULADD(at[63], at[115]); 
7727
+   COMBA_STORE(C->dp[114]);
7728
+   /* 115 */
7729
+   COMBA_FORWARD;
7730
+   MULADD(at[52], at[127]);    MULADD(at[53], at[126]);    MULADD(at[54], at[125]);    MULADD(at[55], at[124]);    MULADD(at[56], at[123]);    MULADD(at[57], at[122]);    MULADD(at[58], at[121]);    MULADD(at[59], at[120]);    MULADD(at[60], at[119]);    MULADD(at[61], at[118]);    MULADD(at[62], at[117]);    MULADD(at[63], at[116]); 
7731
+   COMBA_STORE(C->dp[115]);
7732
+   /* 116 */
7733
+   COMBA_FORWARD;
7734
+   MULADD(at[53], at[127]);    MULADD(at[54], at[126]);    MULADD(at[55], at[125]);    MULADD(at[56], at[124]);    MULADD(at[57], at[123]);    MULADD(at[58], at[122]);    MULADD(at[59], at[121]);    MULADD(at[60], at[120]);    MULADD(at[61], at[119]);    MULADD(at[62], at[118]);    MULADD(at[63], at[117]); 
7735
+   COMBA_STORE(C->dp[116]);
7736
+   /* 117 */
7737
+   COMBA_FORWARD;
7738
+   MULADD(at[54], at[127]);    MULADD(at[55], at[126]);    MULADD(at[56], at[125]);    MULADD(at[57], at[124]);    MULADD(at[58], at[123]);    MULADD(at[59], at[122]);    MULADD(at[60], at[121]);    MULADD(at[61], at[120]);    MULADD(at[62], at[119]);    MULADD(at[63], at[118]); 
7739
+   COMBA_STORE(C->dp[117]);
7740
+   /* 118 */
7741
+   COMBA_FORWARD;
7742
+   MULADD(at[55], at[127]);    MULADD(at[56], at[126]);    MULADD(at[57], at[125]);    MULADD(at[58], at[124]);    MULADD(at[59], at[123]);    MULADD(at[60], at[122]);    MULADD(at[61], at[121]);    MULADD(at[62], at[120]);    MULADD(at[63], at[119]); 
7743
+   COMBA_STORE(C->dp[118]);
7744
+   /* 119 */
7745
+   COMBA_FORWARD;
7746
+   MULADD(at[56], at[127]);    MULADD(at[57], at[126]);    MULADD(at[58], at[125]);    MULADD(at[59], at[124]);    MULADD(at[60], at[123]);    MULADD(at[61], at[122]);    MULADD(at[62], at[121]);    MULADD(at[63], at[120]); 
7747
+   COMBA_STORE(C->dp[119]);
7748
+   /* 120 */
7749
+   COMBA_FORWARD;
7750
+   MULADD(at[57], at[127]);    MULADD(at[58], at[126]);    MULADD(at[59], at[125]);    MULADD(at[60], at[124]);    MULADD(at[61], at[123]);    MULADD(at[62], at[122]);    MULADD(at[63], at[121]); 
7751
+   COMBA_STORE(C->dp[120]);
7752
+   /* 121 */
7753
+   COMBA_FORWARD;
7754
+   MULADD(at[58], at[127]);    MULADD(at[59], at[126]);    MULADD(at[60], at[125]);    MULADD(at[61], at[124]);    MULADD(at[62], at[123]);    MULADD(at[63], at[122]); 
7755
+   COMBA_STORE(C->dp[121]);
7756
+   /* 122 */
7757
+   COMBA_FORWARD;
7758
+   MULADD(at[59], at[127]);    MULADD(at[60], at[126]);    MULADD(at[61], at[125]);    MULADD(at[62], at[124]);    MULADD(at[63], at[123]); 
7759
+   COMBA_STORE(C->dp[122]);
7760
+   /* 123 */
7761
+   COMBA_FORWARD;
7762
+   MULADD(at[60], at[127]);    MULADD(at[61], at[126]);    MULADD(at[62], at[125]);    MULADD(at[63], at[124]); 
7763
+   COMBA_STORE(C->dp[123]);
7764
+   /* 124 */
7765
+   COMBA_FORWARD;
7766
+   MULADD(at[61], at[127]);    MULADD(at[62], at[126]);    MULADD(at[63], at[125]); 
7767
+   COMBA_STORE(C->dp[124]);
7768
+   /* 125 */
7769
+   COMBA_FORWARD;
7770
+   MULADD(at[62], at[127]);    MULADD(at[63], at[126]); 
7771
+   COMBA_STORE(C->dp[125]);
7772
+   /* 126 */
7773
+   COMBA_FORWARD;
7774
+   MULADD(at[63], at[127]); 
7775
+   COMBA_STORE(C->dp[126]);
7776
+   COMBA_STORE2(C->dp[127]);
7777
+   C->used = 128;
7778
+   C->sign = A->sign ^ B->sign;
7779
+   fp_clamp(C);
7780
+   COMBA_FINI;
7781
+}
7782
+#endif
5620 7783
 
5621
-  /* default to not */
5622
-  *result = MP_NO;
7784
+/* End: fp_mul_comba_64.c */
5623 7785
 
5624
-  for (ix = 0; ix < PRIME_SIZE; ix++) {
5625
-    /* what is a mod LBL_prime_tab[ix] */
5626
-    if ((err = mp_mod_d (a, ltm_prime_tab[ix], &res)) != MP_OKAY) {
5627
-      return err;
5628
-    }
7786
+/* Start: fp_mul_comba_7.c */
7787
+#define TFM_DEFINES
7788
+#include "fp_mul_comba.c"
5629 7789
 
5630
-    /* is the residue zero? */
5631
-    if (res == 0) {
5632
-      *result = MP_YES;
5633
-      return MP_OKAY;
5634
-    }
5635
-  }
7790
+#ifdef TFM_MUL7
7791
+void fp_mul_comba7(fp_int *A, fp_int *B, fp_int *C)
7792
+{
7793
+   fp_digit c0, c1, c2, at[14];
7794
+
7795
+   memcpy(at, A->dp, 7 * sizeof(fp_digit));
7796
+   memcpy(at+7, B->dp, 7 * sizeof(fp_digit));
7797
+   COMBA_START;
7798
+
7799
+   COMBA_CLEAR;
7800
+   /* 0 */
7801
+   MULADD(at[0], at[7]); 
7802
+   COMBA_STORE(C->dp[0]);
7803
+   /* 1 */
7804
+   COMBA_FORWARD;
7805
+   MULADD(at[0], at[8]);    MULADD(at[1], at[7]); 
7806
+   COMBA_STORE(C->dp[1]);
7807
+   /* 2 */
7808
+   COMBA_FORWARD;
7809
+   MULADD(at[0], at[9]);    MULADD(at[1], at[8]);    MULADD(at[2], at[7]); 
7810
+   COMBA_STORE(C->dp[2]);
7811
+   /* 3 */
7812
+   COMBA_FORWARD;
7813
+   MULADD(at[0], at[10]);    MULADD(at[1], at[9]);    MULADD(at[2], at[8]);    MULADD(at[3], at[7]); 
7814
+   COMBA_STORE(C->dp[3]);
7815
+   /* 4 */
7816
+   COMBA_FORWARD;
7817
+   MULADD(at[0], at[11]);    MULADD(at[1], at[10]);    MULADD(at[2], at[9]);    MULADD(at[3], at[8]);    MULADD(at[4], at[7]); 
7818
+   COMBA_STORE(C->dp[4]);
7819
+   /* 5 */
7820
+   COMBA_FORWARD;
7821
+   MULADD(at[0], at[12]);    MULADD(at[1], at[11]);    MULADD(at[2], at[10]);    MULADD(at[3], at[9]);    MULADD(at[4], at[8]);    MULADD(at[5], at[7]); 
7822
+   COMBA_STORE(C->dp[5]);
7823
+   /* 6 */
7824
+   COMBA_FORWARD;
7825
+   MULADD(at[0], at[13]);    MULADD(at[1], at[12]);    MULADD(at[2], at[11]);    MULADD(at[3], at[10]);    MULADD(at[4], at[9]);    MULADD(at[5], at[8]);    MULADD(at[6], at[7]); 
7826
+   COMBA_STORE(C->dp[6]);
7827
+   /* 7 */
7828
+   COMBA_FORWARD;
7829
+   MULADD(at[1], at[13]);    MULADD(at[2], at[12]);    MULADD(at[3], at[11]);    MULADD(at[4], at[10]);    MULADD(at[5], at[9]);    MULADD(at[6], at[8]); 
7830
+   COMBA_STORE(C->dp[7]);
7831
+   /* 8 */
7832
+   COMBA_FORWARD;
7833
+   MULADD(at[2], at[13]);    MULADD(at[3], at[12]);    MULADD(at[4], at[11]);    MULADD(at[5], at[10]);    MULADD(at[6], at[9]); 
7834
+   COMBA_STORE(C->dp[8]);
7835
+   /* 9 */
7836
+   COMBA_FORWARD;
7837
+   MULADD(at[3], at[13]);    MULADD(at[4], at[12]);    MULADD(at[5], at[11]);    MULADD(at[6], at[10]); 
7838
+   COMBA_STORE(C->dp[9]);
7839
+   /* 10 */
7840
+   COMBA_FORWARD;
7841
+   MULADD(at[4], at[13]);    MULADD(at[5], at[12]);    MULADD(at[6], at[11]); 
7842
+   COMBA_STORE(C->dp[10]);
7843
+   /* 11 */
7844
+   COMBA_FORWARD;
7845
+   MULADD(at[5], at[13]);    MULADD(at[6], at[12]); 
7846
+   COMBA_STORE(C->dp[11]);
7847
+   /* 12 */
7848
+   COMBA_FORWARD;
7849
+   MULADD(at[6], at[13]); 
7850
+   COMBA_STORE(C->dp[12]);
7851
+   COMBA_STORE2(C->dp[13]);
7852
+   C->used = 14;
7853
+   C->sign = A->sign ^ B->sign;
7854
+   fp_clamp(C);
7855
+   COMBA_FINI;
7856
+}
7857
+#endif
7858
+
7859
+/* End: fp_mul_comba_7.c */
7860
+
7861
+/* Start: fp_mul_comba_8.c */
7862
+#define TFM_DEFINES
7863
+#include "fp_mul_comba.c"
5636 7864
 
5637
-  return MP_OKAY;
7865
+#ifdef TFM_MUL8
7866
+void fp_mul_comba8(fp_int *A, fp_int *B, fp_int *C)
7867
+{
7868
+   fp_digit c0, c1, c2, at[16];
7869
+
7870
+   memcpy(at, A->dp, 8 * sizeof(fp_digit));
7871
+   memcpy(at+8, B->dp, 8 * sizeof(fp_digit));
7872
+   COMBA_START;
7873
+
7874
+   COMBA_CLEAR;
7875
+   /* 0 */
7876
+   MULADD(at[0], at[8]); 
7877
+   COMBA_STORE(C->dp[0]);
7878
+   /* 1 */
7879
+   COMBA_FORWARD;
7880
+   MULADD(at[0], at[9]);    MULADD(at[1], at[8]); 
7881
+   COMBA_STORE(C->dp[1]);
7882
+   /* 2 */
7883
+   COMBA_FORWARD;
7884
+   MULADD(at[0], at[10]);    MULADD(at[1], at[9]);    MULADD(at[2], at[8]); 
7885
+   COMBA_STORE(C->dp[2]);
7886
+   /* 3 */
7887
+   COMBA_FORWARD;
7888
+   MULADD(at[0], at[11]);    MULADD(at[1], at[10]);    MULADD(at[2], at[9]);    MULADD(at[3], at[8]); 
7889
+   COMBA_STORE(C->dp[3]);
7890
+   /* 4 */
7891
+   COMBA_FORWARD;
7892
+   MULADD(at[0], at[12]);    MULADD(at[1], at[11]);    MULADD(at[2], at[10]);    MULADD(at[3], at[9]);    MULADD(at[4], at[8]); 
7893
+   COMBA_STORE(C->dp[4]);
7894
+   /* 5 */
7895
+   COMBA_FORWARD;
7896
+   MULADD(at[0], at[13]);    MULADD(at[1], at[12]);    MULADD(at[2], at[11]);    MULADD(at[3], at[10]);    MULADD(at[4], at[9]);    MULADD(at[5], at[8]); 
7897
+   COMBA_STORE(C->dp[5]);
7898
+   /* 6 */
7899
+   COMBA_FORWARD;
7900
+   MULADD(at[0], at[14]);    MULADD(at[1], at[13]);    MULADD(at[2], at[12]);    MULADD(at[3], at[11]);    MULADD(at[4], at[10]);    MULADD(at[5], at[9]);    MULADD(at[6], at[8]); 
7901
+   COMBA_STORE(C->dp[6]);
7902
+   /* 7 */
7903
+   COMBA_FORWARD;
7904
+   MULADD(at[0], at[15]);    MULADD(at[1], at[14]);    MULADD(at[2], at[13]);    MULADD(at[3], at[12]);    MULADD(at[4], at[11]);    MULADD(at[5], at[10]);    MULADD(at[6], at[9]);    MULADD(at[7], at[8]); 
7905
+   COMBA_STORE(C->dp[7]);
7906
+   /* 8 */
7907
+   COMBA_FORWARD;
7908
+   MULADD(at[1], at[15]);    MULADD(at[2], at[14]);    MULADD(at[3], at[13]);    MULADD(at[4], at[12]);    MULADD(at[5], at[11]);    MULADD(at[6], at[10]);    MULADD(at[7], at[9]); 
7909
+   COMBA_STORE(C->dp[8]);
7910
+   /* 9 */
7911
+   COMBA_FORWARD;
7912
+   MULADD(at[2], at[15]);    MULADD(at[3], at[14]);    MULADD(at[4], at[13]);    MULADD(at[5], at[12]);    MULADD(at[6], at[11]);    MULADD(at[7], at[10]); 
7913
+   COMBA_STORE(C->dp[9]);
7914
+   /* 10 */
7915
+   COMBA_FORWARD;
7916
+   MULADD(at[3], at[15]);    MULADD(at[4], at[14]);    MULADD(at[5], at[13]);    MULADD(at[6], at[12]);    MULADD(at[7], at[11]); 
7917
+   COMBA_STORE(C->dp[10]);
7918
+   /* 11 */
7919
+   COMBA_FORWARD;
7920
+   MULADD(at[4], at[15]);    MULADD(at[5], at[14]);    MULADD(at[6], at[13]);    MULADD(at[7], at[12]); 
7921
+   COMBA_STORE(C->dp[11]);
7922
+   /* 12 */
7923
+   COMBA_FORWARD;
7924
+   MULADD(at[5], at[15]);    MULADD(at[6], at[14]);    MULADD(at[7], at[13]); 
7925
+   COMBA_STORE(C->dp[12]);
7926
+   /* 13 */
7927
+   COMBA_FORWARD;
7928
+   MULADD(at[6], at[15]);    MULADD(at[7], at[14]); 
7929
+   COMBA_STORE(C->dp[13]);
7930
+   /* 14 */
7931
+   COMBA_FORWARD;
7932
+   MULADD(at[7], at[15]); 
7933
+   COMBA_STORE(C->dp[14]);
7934
+   COMBA_STORE2(C->dp[15]);
7935
+   C->used = 16;
7936
+   C->sign = A->sign ^ B->sign;
7937
+   fp_clamp(C);
7938
+   COMBA_FINI;
5638 7939
 }
5639 7940
 #endif
5640 7941
 
5641
-/* $Source: /cvs/libtom/libtommath/bn_mp_prime_is_divisible.c,v $ */
5642
-/* $Revision: 1.3 $ */
5643
-/* $Date: 2006/03/31 14:18:44 $ */
7942
+/* End: fp_mul_comba_8.c */
5644 7943
 
5645
-/* End: bn_mp_prime_is_divisible.c */
7944
+/* Start: fp_mul_comba_9.c */
7945
+#define TFM_DEFINES
7946
+#include "fp_mul_comba.c"
5646 7947
 
5647
-/* Start: bn_mp_prime_is_prime.c */
5648
-#include <bignum.h>
5649
-#ifdef BN_MP_PRIME_IS_PRIME_C
5650
-/* LibTomMath, multiple-precision integer library -- Tom St Denis
5651
- *
5652
- * LibTomMath is a library that provides multiple-precision
5653
- * integer arithmetic as well as number theoretic functionality.
5654
- *
5655
- * The library was designed directly after the MPI library by
5656
- * Michael Fromberger but has been written from scratch with
5657
- * additional optimizations in place.
5658
- *
5659
- * The library is free for all purposes without any express
5660
- * guarantee it works.
5661
- *
5662
- * Tom St Denis, tomstdenis@gmail.com, http://math.libtomcrypt.com
5663
- */
7948
+#ifdef TFM_MUL9
7949
+void fp_mul_comba9(fp_int *A, fp_int *B, fp_int *C)
7950
+{
7951
+   fp_digit c0, c1, c2, at[18];
7952
+
7953
+   memcpy(at, A->dp, 9 * sizeof(fp_digit));
7954
+   memcpy(at+9, B->dp, 9 * sizeof(fp_digit));
7955
+   COMBA_START;
7956
+
7957
+   COMBA_CLEAR;
7958
+   /* 0 */
7959
+   MULADD(at[0], at[9]); 
7960
+   COMBA_STORE(C->dp[0]);
7961
+   /* 1 */
7962
+   COMBA_FORWARD;
7963
+   MULADD(at[0], at[10]);    MULADD(at[1], at[9]); 
7964
+   COMBA_STORE(C->dp[1]);
7965
+   /* 2 */
7966
+   COMBA_FORWARD;
7967
+   MULADD(at[0], at[11]);    MULADD(at[1], at[10]);    MULADD(at[2], at[9]); 
7968
+   COMBA_STORE(C->dp[2]);
7969
+   /* 3 */
7970
+   COMBA_FORWARD;
7971
+   MULADD(at[0], at[12]);    MULADD(at[1], at[11]);    MULADD(at[2], at[10]);    MULADD(at[3], at[9]); 
7972
+   COMBA_STORE(C->dp[3]);
7973
+   /* 4 */
7974
+   COMBA_FORWARD;
7975
+   MULADD(at[0], at[13]);    MULADD(at[1], at[12]);    MULADD(at[2], at[11]);    MULADD(at[3], at[10]);    MULADD(at[4], at[9]); 
7976
+   COMBA_STORE(C->dp[4]);
7977
+   /* 5 */
7978
+   COMBA_FORWARD;
7979
+   MULADD(at[0], at[14]);    MULADD(at[1], at[13]);    MULADD(at[2], at[12]);    MULADD(at[3], at[11]);    MULADD(at[4], at[10]);    MULADD(at[5], at[9]); 
7980
+   COMBA_STORE(C->dp[5]);
7981
+   /* 6 */
7982
+   COMBA_FORWARD;
7983
+   MULADD(at[0], at[15]);    MULADD(at[1], at[14]);    MULADD(at[2], at[13]);    MULADD(at[3], at[12]);    MULADD(at[4], at[11]);    MULADD(at[5], at[10]);    MULADD(at[6], at[9]); 
7984
+   COMBA_STORE(C->dp[6]);
7985
+   /* 7 */
7986
+   COMBA_FORWARD;
7987
+   MULADD(at[0], at[16]);    MULADD(at[1], at[15]);    MULADD(at[2], at[14]);    MULADD(at[3], at[13]);    MULADD(at[4], at[12]);    MULADD(at[5], at[11]);    MULADD(at[6], at[10]);    MULADD(at[7], at[9]); 
7988
+   COMBA_STORE(C->dp[7]);
7989
+   /* 8 */
7990
+   COMBA_FORWARD;
7991
+   MULADD(at[0], at[17]);    MULADD(at[1], at[16]);    MULADD(at[2], at[15]);    MULADD(at[3], at[14]);    MULADD(at[4], at[13]);    MULADD(at[5], at[12]);    MULADD(at[6], at[11]);    MULADD(at[7], at[10]);    MULADD(at[8], at[9]); 
7992
+   COMBA_STORE(C->dp[8]);
7993
+   /* 9 */
7994
+   COMBA_FORWARD;
7995
+   MULADD(at[1], at[17]);    MULADD(at[2], at[16]);    MULADD(at[3], at[15]);    MULADD(at[4], at[14]);    MULADD(at[5], at[13]);    MULADD(at[6], at[12]);    MULADD(at[7], at[11]);    MULADD(at[8], at[10]); 
7996
+   COMBA_STORE(C->dp[9]);
7997
+   /* 10 */
7998
+   COMBA_FORWARD;
7999
+   MULADD(at[2], at[17]);    MULADD(at[3], at[16]);    MULADD(at[4], at[15]);    MULADD(at[5], at[14]);    MULADD(at[6], at[13]);    MULADD(at[7], at[12]);    MULADD(at[8], at[11]); 
8000
+   COMBA_STORE(C->dp[10]);
8001
+   /* 11 */
8002
+   COMBA_FORWARD;
8003
+   MULADD(at[3], at[17]);    MULADD(at[4], at[16]);    MULADD(at[5], at[15]);    MULADD(at[6], at[14]);    MULADD(at[7], at[13]);    MULADD(at[8], at[12]); 
8004
+   COMBA_STORE(C->dp[11]);
8005
+   /* 12 */
8006
+   COMBA_FORWARD;
8007
+   MULADD(at[4], at[17]);    MULADD(at[5], at[16]);    MULADD(at[6], at[15]);    MULADD(at[7], at[14]);    MULADD(at[8], at[13]); 
8008
+   COMBA_STORE(C->dp[12]);
8009
+   /* 13 */
8010
+   COMBA_FORWARD;
8011
+   MULADD(at[5], at[17]);    MULADD(at[6], at[16]);    MULADD(at[7], at[15]);    MULADD(at[8], at[14]); 
8012
+   COMBA_STORE(C->dp[13]);
8013
+   /* 14 */
8014
+   COMBA_FORWARD;
8015
+   MULADD(at[6], at[17]);    MULADD(at[7], at[16]);    MULADD(at[8], at[15]); 
8016
+   COMBA_STORE(C->dp[14]);
8017
+   /* 15 */
8018
+   COMBA_FORWARD;
8019
+   MULADD(at[7], at[17]);    MULADD(at[8], at[16]); 
8020
+   COMBA_STORE(C->dp[15]);
8021
+   /* 16 */
8022
+   COMBA_FORWARD;
8023
+   MULADD(at[8], at[17]); 
8024
+   COMBA_STORE(C->dp[16]);
8025
+   COMBA_STORE2(C->dp[17]);
8026
+   C->used = 18;
8027
+   C->sign = A->sign ^ B->sign;
8028
+   fp_clamp(C);
8029
+   COMBA_FINI;
8030
+}
8031
+#endif
5664 8032
 
5665
-/* performs a variable number of rounds of Miller-Rabin
5666
- *
5667
- * Probability of error after t rounds is no more than
8033
+/* End: fp_mul_comba_9.c */
5668 8034
 
5669
- *
5670
- * Sets result to 1 if probably prime, 0 otherwise
5671
- */
5672
-int mp_prime_is_prime (mp_int * a, int t, int *result)
8035
+/* Start: fp_mul_comba_small_set.c */
8036
+#define TFM_DEFINES
8037
+#include "fp_mul_comba.c"
8038
+
8039
+#if defined(TFM_SMALL_SET)
8040
+void fp_mul_comba_small(fp_int *A, fp_int *B, fp_int *C)
5673 8041
 {
5674
-  mp_int  b;
5675
-  int     ix, err, res;
8042
+   fp_digit c0, c1, c2, at[32];
8043
+   switch (MAX(A->used, B->used)) { 
8044
+
8045
+   case 1:
8046
+      memcpy(at, A->dp, 1 * sizeof(fp_digit));
8047
+      memcpy(at+1, B->dp, 1 * sizeof(fp_digit));
8048
+      COMBA_START;
8049
+
8050
+      COMBA_CLEAR;
8051
+      /* 0 */
8052
+      MULADD(at[0], at[1]); 
8053
+      COMBA_STORE(C->dp[0]);
8054
+      COMBA_STORE2(C->dp[1]);
8055
+      C->used = 2;
8056
+      C->sign = A->sign ^ B->sign;
8057
+      fp_clamp(C);
8058
+      COMBA_FINI;
8059
+      break;
5676 8060
 
5677
-  /* default to no */
5678
-  *result = MP_NO;
8061
+   case 2:
8062
+      memcpy(at, A->dp, 2 * sizeof(fp_digit));
8063
+      memcpy(at+2, B->dp, 2 * sizeof(fp_digit));
8064
+      COMBA_START;
8065
+
8066
+      COMBA_CLEAR;
8067
+      /* 0 */
8068
+      MULADD(at[0], at[2]); 
8069
+      COMBA_STORE(C->dp[0]);
8070
+      /* 1 */
8071
+      COMBA_FORWARD;
8072
+      MULADD(at[0], at[3]);       MULADD(at[1], at[2]); 
8073
+      COMBA_STORE(C->dp[1]);
8074
+      /* 2 */
8075
+      COMBA_FORWARD;
8076
+      MULADD(at[1], at[3]); 
8077
+      COMBA_STORE(C->dp[2]);
8078
+      COMBA_STORE2(C->dp[3]);
8079
+      C->used = 4;
8080
+      C->sign = A->sign ^ B->sign;
8081
+      fp_clamp(C);
8082
+      COMBA_FINI;
8083
+      break;
5679 8084
 
5680
-  /* valid value of t? */
5681
-  if (t <= 0 || t > PRIME_SIZE) {
5682
-    return MP_VAL;
5683
-  }
8085
+   case 3:
8086
+      memcpy(at, A->dp, 3 * sizeof(fp_digit));
8087
+      memcpy(at+3, B->dp, 3 * sizeof(fp_digit));
8088
+      COMBA_START;
8089
+
8090
+      COMBA_CLEAR;
8091
+      /* 0 */
8092
+      MULADD(at[0], at[3]); 
8093
+      COMBA_STORE(C->dp[0]);
8094
+      /* 1 */
8095
+      COMBA_FORWARD;
8096
+      MULADD(at[0], at[4]);       MULADD(at[1], at[3]); 
8097
+      COMBA_STORE(C->dp[1]);
8098
+      /* 2 */
8099
+      COMBA_FORWARD;
8100
+      MULADD(at[0], at[5]);       MULADD(at[1], at[4]);       MULADD(at[2], at[3]); 
8101
+      COMBA_STORE(C->dp[2]);
8102
+      /* 3 */
8103
+      COMBA_FORWARD;
8104
+      MULADD(at[1], at[5]);       MULADD(at[2], at[4]); 
8105
+      COMBA_STORE(C->dp[3]);
8106
+      /* 4 */
8107
+      COMBA_FORWARD;
8108
+      MULADD(at[2], at[5]); 
8109
+      COMBA_STORE(C->dp[4]);
8110
+      COMBA_STORE2(C->dp[5]);
8111
+      C->used = 6;
8112
+      C->sign = A->sign ^ B->sign;
8113
+      fp_clamp(C);
8114
+      COMBA_FINI;
8115
+      break;
5684 8116
 
5685
-  /* is the input equal to one of the primes in the table? */
5686
-  for (ix = 0; ix < PRIME_SIZE; ix++) {
5687
-      if (mp_cmp_d(a, ltm_prime_tab[ix]) == MP_EQ) {
5688
-         *result = 1;
5689
-         return MP_OKAY;
5690
-      }
5691
-  }
8117
+   case 4:
8118
+      memcpy(at, A->dp, 4 * sizeof(fp_digit));
8119
+      memcpy(at+4, B->dp, 4 * sizeof(fp_digit));
8120
+      COMBA_START;
8121
+
8122
+      COMBA_CLEAR;
8123
+      /* 0 */
8124
+      MULADD(at[0], at[4]); 
8125
+      COMBA_STORE(C->dp[0]);
8126
+      /* 1 */
8127
+      COMBA_FORWARD;
8128
+      MULADD(at[0], at[5]);       MULADD(at[1], at[4]); 
8129
+      COMBA_STORE(C->dp[1]);
8130
+      /* 2 */
8131
+      COMBA_FORWARD;
8132
+      MULADD(at[0], at[6]);       MULADD(at[1], at[5]);       MULADD(at[2], at[4]); 
8133
+      COMBA_STORE(C->dp[2]);
8134
+      /* 3 */
8135
+      COMBA_FORWARD;
8136
+      MULADD(at[0], at[7]);       MULADD(at[1], at[6]);       MULADD(at[2], at[5]);       MULADD(at[3], at[4]); 
8137
+      COMBA_STORE(C->dp[3]);
8138
+      /* 4 */
8139
+      COMBA_FORWARD;
8140
+      MULADD(at[1], at[7]);       MULADD(at[2], at[6]);       MULADD(at[3], at[5]); 
8141
+      COMBA_STORE(C->dp[4]);
8142
+      /* 5 */
8143
+      COMBA_FORWARD;
8144
+      MULADD(at[2], at[7]);       MULADD(at[3], at[6]); 
8145
+      COMBA_STORE(C->dp[5]);
8146
+      /* 6 */
8147
+      COMBA_FORWARD;
8148
+      MULADD(at[3], at[7]); 
8149
+      COMBA_STORE(C->dp[6]);
8150
+      COMBA_STORE2(C->dp[7]);
8151
+      C->used = 8;
8152
+      C->sign = A->sign ^ B->sign;
8153
+      fp_clamp(C);
8154
+      COMBA_FINI;
8155
+      break;
5692 8156
 
5693
-  /* first perform trial division */
5694
-  if ((err = mp_prime_is_divisible (a, &res)) != MP_OKAY) {
5695
-    return err;
5696
-  }
8157
+   case 5:
8158
+      memcpy(at, A->dp, 5 * sizeof(fp_digit));
8159
+      memcpy(at+5, B->dp, 5 * sizeof(fp_digit));
8160
+      COMBA_START;
8161
+
8162
+      COMBA_CLEAR;
8163
+      /* 0 */
8164
+      MULADD(at[0], at[5]); 
8165
+      COMBA_STORE(C->dp[0]);
8166
+      /* 1 */
8167
+      COMBA_FORWARD;
8168
+      MULADD(at[0], at[6]);       MULADD(at[1], at[5]); 
8169
+      COMBA_STORE(C->dp[1]);
8170
+      /* 2 */
8171
+      COMBA_FORWARD;
8172
+      MULADD(at[0], at[7]);       MULADD(at[1], at[6]);       MULADD(at[2], at[5]); 
8173
+      COMBA_STORE(C->dp[2]);
8174
+      /* 3 */
8175
+      COMBA_FORWARD;
8176
+      MULADD(at[0], at[8]);       MULADD(at[1], at[7]);       MULADD(at[2], at[6]);       MULADD(at[3], at[5]); 
8177
+      COMBA_STORE(C->dp[3]);
8178
+      /* 4 */
8179
+      COMBA_FORWARD;
8180
+      MULADD(at[0], at[9]);       MULADD(at[1], at[8]);       MULADD(at[2], at[7]);       MULADD(at[3], at[6]);       MULADD(at[4], at[5]); 
8181
+      COMBA_STORE(C->dp[4]);
8182
+      /* 5 */
8183
+      COMBA_FORWARD;
8184
+      MULADD(at[1], at[9]);       MULADD(at[2], at[8]);       MULADD(at[3], at[7]);       MULADD(at[4], at[6]); 
8185
+      COMBA_STORE(C->dp[5]);
8186
+      /* 6 */
8187
+      COMBA_FORWARD;
8188
+      MULADD(at[2], at[9]);       MULADD(at[3], at[8]);       MULADD(at[4], at[7]); 
8189
+      COMBA_STORE(C->dp[6]);
8190
+      /* 7 */
8191
+      COMBA_FORWARD;
8192
+      MULADD(at[3], at[9]);       MULADD(at[4], at[8]); 
8193
+      COMBA_STORE(C->dp[7]);
8194
+      /* 8 */
8195
+      COMBA_FORWARD;
8196
+      MULADD(at[4], at[9]); 
8197
+      COMBA_STORE(C->dp[8]);
8198
+      COMBA_STORE2(C->dp[9]);
8199
+      C->used = 10;
8200
+      C->sign = A->sign ^ B->sign;
8201
+      fp_clamp(C);
8202
+      COMBA_FINI;
8203
+      break;
5697 8204
 
5698
-  /* return if it was trivially divisible */
5699
-  if (res == MP_YES) {
5700
-    return MP_OKAY;
5701
-  }
8205
+   case 6:
8206
+      memcpy(at, A->dp, 6 * sizeof(fp_digit));
8207
+      memcpy(at+6, B->dp, 6 * sizeof(fp_digit));
8208
+      COMBA_START;
8209
+
8210
+      COMBA_CLEAR;
8211
+      /* 0 */
8212
+      MULADD(at[0], at[6]); 
8213
+      COMBA_STORE(C->dp[0]);
8214
+      /* 1 */
8215
+      COMBA_FORWARD;
8216
+      MULADD(at[0], at[7]);       MULADD(at[1], at[6]); 
8217
+      COMBA_STORE(C->dp[1]);
8218
+      /* 2 */
8219
+      COMBA_FORWARD;
8220
+      MULADD(at[0], at[8]);       MULADD(at[1], at[7]);       MULADD(at[2], at[6]); 
8221
+      COMBA_STORE(C->dp[2]);
8222
+      /* 3 */
8223
+      COMBA_FORWARD;
8224
+      MULADD(at[0], at[9]);       MULADD(at[1], at[8]);       MULADD(at[2], at[7]);       MULADD(at[3], at[6]); 
8225
+      COMBA_STORE(C->dp[3]);
8226
+      /* 4 */
8227
+      COMBA_FORWARD;
8228
+      MULADD(at[0], at[10]);       MULADD(at[1], at[9]);       MULADD(at[2], at[8]);       MULADD(at[3], at[7]);       MULADD(at[4], at[6]); 
8229
+      COMBA_STORE(C->dp[4]);
8230
+      /* 5 */
8231
+      COMBA_FORWARD;
8232
+      MULADD(at[0], at[11]);       MULADD(at[1], at[10]);       MULADD(at[2], at[9]);       MULADD(at[3], at[8]);       MULADD(at[4], at[7]);       MULADD(at[5], at[6]); 
8233
+      COMBA_STORE(C->dp[5]);
8234
+      /* 6 */
8235
+      COMBA_FORWARD;
8236
+      MULADD(at[1], at[11]);       MULADD(at[2], at[10]);       MULADD(at[3], at[9]);       MULADD(at[4], at[8]);       MULADD(at[5], at[7]); 
8237
+      COMBA_STORE(C->dp[6]);
8238
+      /* 7 */
8239
+      COMBA_FORWARD;
8240
+      MULADD(at[2], at[11]);       MULADD(at[3], at[10]);       MULADD(at[4], at[9]);       MULADD(at[5], at[8]); 
8241
+      COMBA_STORE(C->dp[7]);
8242
+      /* 8 */
8243
+      COMBA_FORWARD;
8244
+      MULADD(at[3], at[11]);       MULADD(at[4], at[10]);       MULADD(at[5], at[9]); 
8245
+      COMBA_STORE(C->dp[8]);
8246
+      /* 9 */
8247
+      COMBA_FORWARD;
8248
+      MULADD(at[4], at[11]);       MULADD(at[5], at[10]); 
8249
+      COMBA_STORE(C->dp[9]);
8250
+      /* 10 */
8251
+      COMBA_FORWARD;
8252
+      MULADD(at[5], at[11]); 
8253
+      COMBA_STORE(C->dp[10]);
8254
+      COMBA_STORE2(C->dp[11]);
8255
+      C->used = 12;
8256
+      C->sign = A->sign ^ B->sign;
8257
+      fp_clamp(C);
8258
+      COMBA_FINI;
8259
+      break;
5702 8260
 
5703
-  /* now perform the miller-rabin rounds */
5704
-  if ((err = mp_init (&b)) != MP_OKAY) {
5705
-    return err;
5706
-  }
8261
+   case 7:
8262
+      memcpy(at, A->dp, 7 * sizeof(fp_digit));
8263
+      memcpy(at+7, B->dp, 7 * sizeof(fp_digit));
8264
+      COMBA_START;
8265
+
8266
+      COMBA_CLEAR;
8267
+      /* 0 */
8268
+      MULADD(at[0], at[7]); 
8269
+      COMBA_STORE(C->dp[0]);
8270
+      /* 1 */
8271
+      COMBA_FORWARD;
8272
+      MULADD(at[0], at[8]);       MULADD(at[1], at[7]); 
8273
+      COMBA_STORE(C->dp[1]);
8274
+      /* 2 */
8275
+      COMBA_FORWARD;
8276
+      MULADD(at[0], at[9]);       MULADD(at[1], at[8]);       MULADD(at[2], at[7]); 
8277
+      COMBA_STORE(C->dp[2]);
8278
+      /* 3 */
8279
+      COMBA_FORWARD;
8280
+      MULADD(at[0], at[10]);       MULADD(at[1], at[9]);       MULADD(at[2], at[8]);       MULADD(at[3], at[7]); 
8281
+      COMBA_STORE(C->dp[3]);
8282
+      /* 4 */
8283
+      COMBA_FORWARD;
8284
+      MULADD(at[0], at[11]);       MULADD(at[1], at[10]);       MULADD(at[2], at[9]);       MULADD(at[3], at[8]);       MULADD(at[4], at[7]); 
8285
+      COMBA_STORE(C->dp[4]);
8286
+      /* 5 */
8287
+      COMBA_FORWARD;
8288
+      MULADD(at[0], at[12]);       MULADD(at[1], at[11]);       MULADD(at[2], at[10]);       MULADD(at[3], at[9]);       MULADD(at[4], at[8]);       MULADD(at[5], at[7]); 
8289
+      COMBA_STORE(C->dp[5]);
8290
+      /* 6 */
8291
+      COMBA_FORWARD;
8292
+      MULADD(at[0], at[13]);       MULADD(at[1], at[12]);       MULADD(at[2], at[11]);       MULADD(at[3], at[10]);       MULADD(at[4], at[9]);       MULADD(at[5], at[8]);       MULADD(at[6], at[7]); 
8293
+      COMBA_STORE(C->dp[6]);
8294
+      /* 7 */
8295
+      COMBA_FORWARD;
8296
+      MULADD(at[1], at[13]);       MULADD(at[2], at[12]);       MULADD(at[3], at[11]);       MULADD(at[4], at[10]);       MULADD(at[5], at[9]);       MULADD(at[6], at[8]); 
8297
+      COMBA_STORE(C->dp[7]);
8298
+      /* 8 */
8299
+      COMBA_FORWARD;
8300
+      MULADD(at[2], at[13]);       MULADD(at[3], at[12]);       MULADD(at[4], at[11]);       MULADD(at[5], at[10]);       MULADD(at[6], at[9]); 
8301
+      COMBA_STORE(C->dp[8]);
8302
+      /* 9 */
8303
+      COMBA_FORWARD;
8304
+      MULADD(at[3], at[13]);       MULADD(at[4], at[12]);       MULADD(at[5], at[11]);       MULADD(at[6], at[10]); 
8305
+      COMBA_STORE(C->dp[9]);
8306
+      /* 10 */
8307
+      COMBA_FORWARD;
8308
+      MULADD(at[4], at[13]);       MULADD(at[5], at[12]);       MULADD(at[6], at[11]); 
8309
+      COMBA_STORE(C->dp[10]);
8310
+      /* 11 */
8311
+      COMBA_FORWARD;
8312
+      MULADD(at[5], at[13]);       MULADD(at[6], at[12]); 
8313
+      COMBA_STORE(C->dp[11]);
8314
+      /* 12 */
8315
+      COMBA_FORWARD;
8316
+      MULADD(at[6], at[13]); 
8317
+      COMBA_STORE(C->dp[12]);
8318
+      COMBA_STORE2(C->dp[13]);
8319
+      C->used = 14;
8320
+      C->sign = A->sign ^ B->sign;
8321
+      fp_clamp(C);
8322
+      COMBA_FINI;
8323
+      break;
5707 8324
 
5708
-  for (ix = 0; ix < t; ix++) {
5709
-    /* set the prime */
5710
-    mp_set (&b, ltm_prime_tab[ix]);
8325
+   case 8:
8326
+      memcpy(at, A->dp, 8 * sizeof(fp_digit));
8327
+      memcpy(at+8, B->dp, 8 * sizeof(fp_digit));
8328
+      COMBA_START;
8329
+
8330
+      COMBA_CLEAR;
8331
+      /* 0 */
8332
+      MULADD(at[0], at[8]); 
8333
+      COMBA_STORE(C->dp[0]);
8334
+      /* 1 */
8335
+      COMBA_FORWARD;
8336
+      MULADD(at[0], at[9]);       MULADD(at[1], at[8]); 
8337
+      COMBA_STORE(C->dp[1]);
8338
+      /* 2 */
8339
+      COMBA_FORWARD;
8340
+      MULADD(at[0], at[10]);       MULADD(at[1], at[9]);       MULADD(at[2], at[8]); 
8341
+      COMBA_STORE(C->dp[2]);
8342
+      /* 3 */
8343
+      COMBA_FORWARD;
8344
+      MULADD(at[0], at[11]);       MULADD(at[1], at[10]);       MULADD(at[2], at[9]);       MULADD(at[3], at[8]); 
8345
+      COMBA_STORE(C->dp[3]);
8346
+      /* 4 */
8347
+      COMBA_FORWARD;
8348
+      MULADD(at[0], at[12]);       MULADD(at[1], at[11]);       MULADD(at[2], at[10]);       MULADD(at[3], at[9]);       MULADD(at[4], at[8]); 
8349
+      COMBA_STORE(C->dp[4]);
8350
+      /* 5 */
8351
+      COMBA_FORWARD;
8352
+      MULADD(at[0], at[13]);       MULADD(at[1], at[12]);       MULADD(at[2], at[11]);       MULADD(at[3], at[10]);       MULADD(at[4], at[9]);       MULADD(at[5], at[8]); 
8353
+      COMBA_STORE(C->dp[5]);
8354
+      /* 6 */
8355
+      COMBA_FORWARD;
8356
+      MULADD(at[0], at[14]);       MULADD(at[1], at[13]);       MULADD(at[2], at[12]);       MULADD(at[3], at[11]);       MULADD(at[4], at[10]);       MULADD(at[5], at[9]);       MULADD(at[6], at[8]); 
8357
+      COMBA_STORE(C->dp[6]);
8358
+      /* 7 */
8359
+      COMBA_FORWARD;
8360
+      MULADD(at[0], at[15]);       MULADD(at[1], at[14]);       MULADD(at[2], at[13]);       MULADD(at[3], at[12]);       MULADD(at[4], at[11]);       MULADD(at[5], at[10]);       MULADD(at[6], at[9]);       MULADD(at[7], at[8]); 
8361
+      COMBA_STORE(C->dp[7]);
8362
+      /* 8 */
8363
+      COMBA_FORWARD;
8364
+      MULADD(at[1], at[15]);       MULADD(at[2], at[14]);       MULADD(at[3], at[13]);       MULADD(at[4], at[12]);       MULADD(at[5], at[11]);       MULADD(at[6], at[10]);       MULADD(at[7], at[9]); 
8365
+      COMBA_STORE(C->dp[8]);
8366
+      /* 9 */
8367
+      COMBA_FORWARD;
8368
+      MULADD(at[2], at[15]);       MULADD(at[3], at[14]);       MULADD(at[4], at[13]);       MULADD(at[5], at[12]);       MULADD(at[6], at[11]);       MULADD(at[7], at[10]); 
8369
+      COMBA_STORE(C->dp[9]);
8370
+      /* 10 */
8371
+      COMBA_FORWARD;
8372
+      MULADD(at[3], at[15]);       MULADD(at[4], at[14]);       MULADD(at[5], at[13]);       MULADD(at[6], at[12]);       MULADD(at[7], at[11]); 
8373
+      COMBA_STORE(C->dp[10]);
8374
+      /* 11 */
8375
+      COMBA_FORWARD;
8376
+      MULADD(at[4], at[15]);       MULADD(at[5], at[14]);       MULADD(at[6], at[13]);       MULADD(at[7], at[12]); 
8377
+      COMBA_STORE(C->dp[11]);
8378
+      /* 12 */
8379
+      COMBA_FORWARD;
8380
+      MULADD(at[5], at[15]);       MULADD(at[6], at[14]);       MULADD(at[7], at[13]); 
8381
+      COMBA_STORE(C->dp[12]);
8382
+      /* 13 */
8383
+      COMBA_FORWARD;
8384
+      MULADD(at[6], at[15]);       MULADD(at[7], at[14]); 
8385
+      COMBA_STORE(C->dp[13]);
8386
+      /* 14 */
8387
+      COMBA_FORWARD;
8388
+      MULADD(at[7], at[15]); 
8389
+      COMBA_STORE(C->dp[14]);
8390
+      COMBA_STORE2(C->dp[15]);
8391
+      C->used = 16;
8392
+      C->sign = A->sign ^ B->sign;
8393
+      fp_clamp(C);
8394
+      COMBA_FINI;
8395
+      break;
5711 8396
 
5712
-    if ((err = mp_prime_miller_rabin (a, &b, &res)) != MP_OKAY) {
5713
-      goto LBL_B;
5714
-    }
8397
+   case 9:
8398
+      memcpy(at, A->dp, 9 * sizeof(fp_digit));
8399
+      memcpy(at+9, B->dp, 9 * sizeof(fp_digit));
8400
+      COMBA_START;
8401
+
8402
+      COMBA_CLEAR;
8403
+      /* 0 */
8404
+      MULADD(at[0], at[9]); 
8405
+      COMBA_STORE(C->dp[0]);
8406
+      /* 1 */
8407
+      COMBA_FORWARD;
8408
+      MULADD(at[0], at[10]);       MULADD(at[1], at[9]); 
8409
+      COMBA_STORE(C->dp[1]);
8410
+      /* 2 */
8411
+      COMBA_FORWARD;
8412
+      MULADD(at[0], at[11]);       MULADD(at[1], at[10]);       MULADD(at[2], at[9]); 
8413
+      COMBA_STORE(C->dp[2]);
8414
+      /* 3 */
8415
+      COMBA_FORWARD;
8416
+      MULADD(at[0], at[12]);       MULADD(at[1], at[11]);       MULADD(at[2], at[10]);       MULADD(at[3], at[9]); 
8417
+      COMBA_STORE(C->dp[3]);
8418
+      /* 4 */
8419
+      COMBA_FORWARD;
8420
+      MULADD(at[0], at[13]);       MULADD(at[1], at[12]);       MULADD(at[2], at[11]);       MULADD(at[3], at[10]);       MULADD(at[4], at[9]); 
8421
+      COMBA_STORE(C->dp[4]);
8422
+      /* 5 */
8423
+      COMBA_FORWARD;
8424
+      MULADD(at[0], at[14]);       MULADD(at[1], at[13]);       MULADD(at[2], at[12]);       MULADD(at[3], at[11]);       MULADD(at[4], at[10]);       MULADD(at[5], at[9]); 
8425
+      COMBA_STORE(C->dp[5]);
8426
+      /* 6 */
8427
+      COMBA_FORWARD;
8428
+      MULADD(at[0], at[15]);       MULADD(at[1], at[14]);       MULADD(at[2], at[13]);       MULADD(at[3], at[12]);       MULADD(at[4], at[11]);       MULADD(at[5], at[10]);       MULADD(at[6], at[9]); 
8429
+      COMBA_STORE(C->dp[6]);
8430
+      /* 7 */
8431
+      COMBA_FORWARD;
8432
+      MULADD(at[0], at[16]);       MULADD(at[1], at[15]);       MULADD(at[2], at[14]);       MULADD(at[3], at[13]);       MULADD(at[4], at[12]);       MULADD(at[5], at[11]);       MULADD(at[6], at[10]);       MULADD(at[7], at[9]); 
8433
+      COMBA_STORE(C->dp[7]);
8434
+      /* 8 */
8435
+      COMBA_FORWARD;
8436
+      MULADD(at[0], at[17]);       MULADD(at[1], at[16]);       MULADD(at[2], at[15]);       MULADD(at[3], at[14]);       MULADD(at[4], at[13]);       MULADD(at[5], at[12]);       MULADD(at[6], at[11]);       MULADD(at[7], at[10]);       MULADD(at[8], at[9]); 
8437
+      COMBA_STORE(C->dp[8]);
8438
+      /* 9 */
8439
+      COMBA_FORWARD;
8440
+      MULADD(at[1], at[17]);       MULADD(at[2], at[16]);       MULADD(at[3], at[15]);       MULADD(at[4], at[14]);       MULADD(at[5], at[13]);       MULADD(at[6], at[12]);       MULADD(at[7], at[11]);       MULADD(at[8], at[10]); 
8441
+      COMBA_STORE(C->dp[9]);
8442
+      /* 10 */
8443
+      COMBA_FORWARD;
8444
+      MULADD(at[2], at[17]);       MULADD(at[3], at[16]);       MULADD(at[4], at[15]);       MULADD(at[5], at[14]);       MULADD(at[6], at[13]);       MULADD(at[7], at[12]);       MULADD(at[8], at[11]); 
8445
+      COMBA_STORE(C->dp[10]);
8446
+      /* 11 */
8447
+      COMBA_FORWARD;
8448
+      MULADD(at[3], at[17]);       MULADD(at[4], at[16]);       MULADD(at[5], at[15]);       MULADD(at[6], at[14]);       MULADD(at[7], at[13]);       MULADD(at[8], at[12]); 
8449
+      COMBA_STORE(C->dp[11]);
8450
+      /* 12 */
8451
+      COMBA_FORWARD;
8452
+      MULADD(at[4], at[17]);       MULADD(at[5], at[16]);       MULADD(at[6], at[15]);       MULADD(at[7], at[14]);       MULADD(at[8], at[13]); 
8453
+      COMBA_STORE(C->dp[12]);
8454
+      /* 13 */
8455
+      COMBA_FORWARD;
8456
+      MULADD(at[5], at[17]);       MULADD(at[6], at[16]);       MULADD(at[7], at[15]);       MULADD(at[8], at[14]); 
8457
+      COMBA_STORE(C->dp[13]);
8458
+      /* 14 */
8459
+      COMBA_FORWARD;
8460
+      MULADD(at[6], at[17]);       MULADD(at[7], at[16]);       MULADD(at[8], at[15]); 
8461
+      COMBA_STORE(C->dp[14]);
8462
+      /* 15 */
8463
+      COMBA_FORWARD;
8464
+      MULADD(at[7], at[17]);       MULADD(at[8], at[16]); 
8465
+      COMBA_STORE(C->dp[15]);
8466
+      /* 16 */
8467
+      COMBA_FORWARD;
8468
+      MULADD(at[8], at[17]); 
8469
+      COMBA_STORE(C->dp[16]);
8470
+      COMBA_STORE2(C->dp[17]);
8471
+      C->used = 18;
8472
+      C->sign = A->sign ^ B->sign;
8473
+      fp_clamp(C);
8474
+      COMBA_FINI;
8475
+      break;
5715 8476
 
5716
-    if (res == MP_NO) {
5717
-      goto LBL_B;
5718
-    }
5719
-  }
8477
+   case 10:
8478
+      memcpy(at, A->dp, 10 * sizeof(fp_digit));
8479
+      memcpy(at+10, B->dp, 10 * sizeof(fp_digit));
8480
+      COMBA_START;
8481
+
8482
+      COMBA_CLEAR;
8483
+      /* 0 */
8484
+      MULADD(at[0], at[10]); 
8485
+      COMBA_STORE(C->dp[0]);
8486
+      /* 1 */
8487
+      COMBA_FORWARD;
8488
+      MULADD(at[0], at[11]);       MULADD(at[1], at[10]); 
8489
+      COMBA_STORE(C->dp[1]);
8490
+      /* 2 */
8491
+      COMBA_FORWARD;
8492
+      MULADD(at[0], at[12]);       MULADD(at[1], at[11]);       MULADD(at[2], at[10]); 
8493
+      COMBA_STORE(C->dp[2]);
8494
+      /* 3 */
8495
+      COMBA_FORWARD;
8496
+      MULADD(at[0], at[13]);       MULADD(at[1], at[12]);       MULADD(at[2], at[11]);       MULADD(at[3], at[10]); 
8497
+      COMBA_STORE(C->dp[3]);
8498
+      /* 4 */
8499
+      COMBA_FORWARD;
8500
+      MULADD(at[0], at[14]);       MULADD(at[1], at[13]);       MULADD(at[2], at[12]);       MULADD(at[3], at[11]);       MULADD(at[4], at[10]); 
8501
+      COMBA_STORE(C->dp[4]);
8502
+      /* 5 */
8503
+      COMBA_FORWARD;
8504
+      MULADD(at[0], at[15]);       MULADD(at[1], at[14]);       MULADD(at[2], at[13]);       MULADD(at[3], at[12]);       MULADD(at[4], at[11]);       MULADD(at[5], at[10]); 
8505
+      COMBA_STORE(C->dp[5]);
8506
+      /* 6 */
8507
+      COMBA_FORWARD;
8508
+      MULADD(at[0], at[16]);       MULADD(at[1], at[15]);       MULADD(at[2], at[14]);       MULADD(at[3], at[13]);       MULADD(at[4], at[12]);       MULADD(at[5], at[11]);       MULADD(at[6], at[10]); 
8509
+      COMBA_STORE(C->dp[6]);
8510
+      /* 7 */
8511
+      COMBA_FORWARD;
8512
+      MULADD(at[0], at[17]);       MULADD(at[1], at[16]);       MULADD(at[2], at[15]);       MULADD(at[3], at[14]);       MULADD(at[4], at[13]);       MULADD(at[5], at[12]);       MULADD(at[6], at[11]);       MULADD(at[7], at[10]); 
8513
+      COMBA_STORE(C->dp[7]);
8514
+      /* 8 */
8515
+      COMBA_FORWARD;
8516
+      MULADD(at[0], at[18]);       MULADD(at[1], at[17]);       MULADD(at[2], at[16]);       MULADD(at[3], at[15]);       MULADD(at[4], at[14]);       MULADD(at[5], at[13]);       MULADD(at[6], at[12]);       MULADD(at[7], at[11]);       MULADD(at[8], at[10]); 
8517
+      COMBA_STORE(C->dp[8]);
8518
+      /* 9 */
8519
+      COMBA_FORWARD;
8520
+      MULADD(at[0], at[19]);       MULADD(at[1], at[18]);       MULADD(at[2], at[17]);       MULADD(at[3], at[16]);       MULADD(at[4], at[15]);       MULADD(at[5], at[14]);       MULADD(at[6], at[13]);       MULADD(at[7], at[12]);       MULADD(at[8], at[11]);       MULADD(at[9], at[10]); 
8521
+      COMBA_STORE(C->dp[9]);
8522
+      /* 10 */
8523
+      COMBA_FORWARD;
8524
+      MULADD(at[1], at[19]);       MULADD(at[2], at[18]);       MULADD(at[3], at[17]);       MULADD(at[4], at[16]);       MULADD(at[5], at[15]);       MULADD(at[6], at[14]);       MULADD(at[7], at[13]);       MULADD(at[8], at[12]);       MULADD(at[9], at[11]); 
8525
+      COMBA_STORE(C->dp[10]);
8526
+      /* 11 */
8527
+      COMBA_FORWARD;
8528
+      MULADD(at[2], at[19]);       MULADD(at[3], at[18]);       MULADD(at[4], at[17]);       MULADD(at[5], at[16]);       MULADD(at[6], at[15]);       MULADD(at[7], at[14]);       MULADD(at[8], at[13]);       MULADD(at[9], at[12]); 
8529
+      COMBA_STORE(C->dp[11]);
8530
+      /* 12 */
8531
+      COMBA_FORWARD;
8532
+      MULADD(at[3], at[19]);       MULADD(at[4], at[18]);       MULADD(at[5], at[17]);       MULADD(at[6], at[16]);       MULADD(at[7], at[15]);       MULADD(at[8], at[14]);       MULADD(at[9], at[13]); 
8533
+      COMBA_STORE(C->dp[12]);
8534
+      /* 13 */
8535
+      COMBA_FORWARD;
8536
+      MULADD(at[4], at[19]);       MULADD(at[5], at[18]);       MULADD(at[6], at[17]);       MULADD(at[7], at[16]);       MULADD(at[8], at[15]);       MULADD(at[9], at[14]); 
8537
+      COMBA_STORE(C->dp[13]);
8538
+      /* 14 */
8539
+      COMBA_FORWARD;
8540
+      MULADD(at[5], at[19]);       MULADD(at[6], at[18]);       MULADD(at[7], at[17]);       MULADD(at[8], at[16]);       MULADD(at[9], at[15]); 
8541
+      COMBA_STORE(C->dp[14]);
8542
+      /* 15 */
8543
+      COMBA_FORWARD;
8544
+      MULADD(at[6], at[19]);       MULADD(at[7], at[18]);       MULADD(at[8], at[17]);       MULADD(at[9], at[16]); 
8545
+      COMBA_STORE(C->dp[15]);
8546
+      /* 16 */
8547
+      COMBA_FORWARD;
8548
+      MULADD(at[7], at[19]);       MULADD(at[8], at[18]);       MULADD(at[9], at[17]); 
8549
+      COMBA_STORE(C->dp[16]);
8550
+      /* 17 */
8551
+      COMBA_FORWARD;
8552
+      MULADD(at[8], at[19]);       MULADD(at[9], at[18]); 
8553
+      COMBA_STORE(C->dp[17]);
8554
+      /* 18 */
8555
+      COMBA_FORWARD;
8556
+      MULADD(at[9], at[19]); 
8557
+      COMBA_STORE(C->dp[18]);
8558
+      COMBA_STORE2(C->dp[19]);
8559
+      C->used = 20;
8560
+      C->sign = A->sign ^ B->sign;
8561
+      fp_clamp(C);
8562
+      COMBA_FINI;
8563
+      break;
8564
+
8565
+   case 11:
8566
+      memcpy(at, A->dp, 11 * sizeof(fp_digit));
8567
+      memcpy(at+11, B->dp, 11 * sizeof(fp_digit));
8568
+      COMBA_START;
8569
+
8570
+      COMBA_CLEAR;
8571
+      /* 0 */
8572
+      MULADD(at[0], at[11]); 
8573
+      COMBA_STORE(C->dp[0]);
8574
+      /* 1 */
8575
+      COMBA_FORWARD;
8576
+      MULADD(at[0], at[12]);       MULADD(at[1], at[11]); 
8577
+      COMBA_STORE(C->dp[1]);
8578
+      /* 2 */
8579
+      COMBA_FORWARD;
8580
+      MULADD(at[0], at[13]);       MULADD(at[1], at[12]);       MULADD(at[2], at[11]); 
8581
+      COMBA_STORE(C->dp[2]);
8582
+      /* 3 */
8583
+      COMBA_FORWARD;
8584
+      MULADD(at[0], at[14]);       MULADD(at[1], at[13]);       MULADD(at[2], at[12]);       MULADD(at[3], at[11]); 
8585
+      COMBA_STORE(C->dp[3]);
8586
+      /* 4 */
8587
+      COMBA_FORWARD;
8588
+      MULADD(at[0], at[15]);       MULADD(at[1], at[14]);       MULADD(at[2], at[13]);       MULADD(at[3], at[12]);       MULADD(at[4], at[11]); 
8589
+      COMBA_STORE(C->dp[4]);
8590
+      /* 5 */
8591
+      COMBA_FORWARD;
8592
+      MULADD(at[0], at[16]);       MULADD(at[1], at[15]);       MULADD(at[2], at[14]);       MULADD(at[3], at[13]);       MULADD(at[4], at[12]);       MULADD(at[5], at[11]); 
8593
+      COMBA_STORE(C->dp[5]);
8594
+      /* 6 */
8595
+      COMBA_FORWARD;
8596
+      MULADD(at[0], at[17]);       MULADD(at[1], at[16]);       MULADD(at[2], at[15]);       MULADD(at[3], at[14]);       MULADD(at[4], at[13]);       MULADD(at[5], at[12]);       MULADD(at[6], at[11]); 
8597
+      COMBA_STORE(C->dp[6]);
8598
+      /* 7 */
8599
+      COMBA_FORWARD;
8600
+      MULADD(at[0], at[18]);       MULADD(at[1], at[17]);       MULADD(at[2], at[16]);       MULADD(at[3], at[15]);       MULADD(at[4], at[14]);       MULADD(at[5], at[13]);       MULADD(at[6], at[12]);       MULADD(at[7], at[11]); 
8601
+      COMBA_STORE(C->dp[7]);
8602
+      /* 8 */
8603
+      COMBA_FORWARD;
8604
+      MULADD(at[0], at[19]);       MULADD(at[1], at[18]);       MULADD(at[2], at[17]);       MULADD(at[3], at[16]);       MULADD(at[4], at[15]);       MULADD(at[5], at[14]);       MULADD(at[6], at[13]);       MULADD(at[7], at[12]);       MULADD(at[8], at[11]); 
8605
+      COMBA_STORE(C->dp[8]);
8606
+      /* 9 */
8607
+      COMBA_FORWARD;
8608
+      MULADD(at[0], at[20]);       MULADD(at[1], at[19]);       MULADD(at[2], at[18]);       MULADD(at[3], at[17]);       MULADD(at[4], at[16]);       MULADD(at[5], at[15]);       MULADD(at[6], at[14]);       MULADD(at[7], at[13]);       MULADD(at[8], at[12]);       MULADD(at[9], at[11]); 
8609
+      COMBA_STORE(C->dp[9]);
8610
+      /* 10 */
8611
+      COMBA_FORWARD;
8612
+      MULADD(at[0], at[21]);       MULADD(at[1], at[20]);       MULADD(at[2], at[19]);       MULADD(at[3], at[18]);       MULADD(at[4], at[17]);       MULADD(at[5], at[16]);       MULADD(at[6], at[15]);       MULADD(at[7], at[14]);       MULADD(at[8], at[13]);       MULADD(at[9], at[12]);       MULADD(at[10], at[11]); 
8613
+      COMBA_STORE(C->dp[10]);
8614
+      /* 11 */
8615
+      COMBA_FORWARD;
8616
+      MULADD(at[1], at[21]);       MULADD(at[2], at[20]);       MULADD(at[3], at[19]);       MULADD(at[4], at[18]);       MULADD(at[5], at[17]);       MULADD(at[6], at[16]);       MULADD(at[7], at[15]);       MULADD(at[8], at[14]);       MULADD(at[9], at[13]);       MULADD(at[10], at[12]); 
8617
+      COMBA_STORE(C->dp[11]);
8618
+      /* 12 */
8619
+      COMBA_FORWARD;
8620
+      MULADD(at[2], at[21]);       MULADD(at[3], at[20]);       MULADD(at[4], at[19]);       MULADD(at[5], at[18]);       MULADD(at[6], at[17]);       MULADD(at[7], at[16]);       MULADD(at[8], at[15]);       MULADD(at[9], at[14]);       MULADD(at[10], at[13]); 
8621
+      COMBA_STORE(C->dp[12]);
8622
+      /* 13 */
8623
+      COMBA_FORWARD;
8624
+      MULADD(at[3], at[21]);       MULADD(at[4], at[20]);       MULADD(at[5], at[19]);       MULADD(at[6], at[18]);       MULADD(at[7], at[17]);       MULADD(at[8], at[16]);       MULADD(at[9], at[15]);       MULADD(at[10], at[14]); 
8625
+      COMBA_STORE(C->dp[13]);
8626
+      /* 14 */
8627
+      COMBA_FORWARD;
8628
+      MULADD(at[4], at[21]);       MULADD(at[5], at[20]);       MULADD(at[6], at[19]);       MULADD(at[7], at[18]);       MULADD(at[8], at[17]);       MULADD(at[9], at[16]);       MULADD(at[10], at[15]); 
8629
+      COMBA_STORE(C->dp[14]);
8630
+      /* 15 */
8631
+      COMBA_FORWARD;
8632
+      MULADD(at[5], at[21]);       MULADD(at[6], at[20]);       MULADD(at[7], at[19]);       MULADD(at[8], at[18]);       MULADD(at[9], at[17]);       MULADD(at[10], at[16]); 
8633
+      COMBA_STORE(C->dp[15]);
8634
+      /* 16 */
8635
+      COMBA_FORWARD;
8636
+      MULADD(at[6], at[21]);       MULADD(at[7], at[20]);       MULADD(at[8], at[19]);       MULADD(at[9], at[18]);       MULADD(at[10], at[17]); 
8637
+      COMBA_STORE(C->dp[16]);
8638
+      /* 17 */
8639
+      COMBA_FORWARD;
8640
+      MULADD(at[7], at[21]);       MULADD(at[8], at[20]);       MULADD(at[9], at[19]);       MULADD(at[10], at[18]); 
8641
+      COMBA_STORE(C->dp[17]);
8642
+      /* 18 */
8643
+      COMBA_FORWARD;
8644
+      MULADD(at[8], at[21]);       MULADD(at[9], at[20]);       MULADD(at[10], at[19]); 
8645
+      COMBA_STORE(C->dp[18]);
8646
+      /* 19 */
8647
+      COMBA_FORWARD;
8648
+      MULADD(at[9], at[21]);       MULADD(at[10], at[20]); 
8649
+      COMBA_STORE(C->dp[19]);
8650
+      /* 20 */
8651
+      COMBA_FORWARD;
8652
+      MULADD(at[10], at[21]); 
8653
+      COMBA_STORE(C->dp[20]);
8654
+      COMBA_STORE2(C->dp[21]);
8655
+      C->used = 22;
8656
+      C->sign = A->sign ^ B->sign;
8657
+      fp_clamp(C);
8658
+      COMBA_FINI;
8659
+      break;
5720 8660
 
5721
-  /* passed the test */
5722
-  *result = MP_YES;
5723
-LBL_B:mp_clear (&b);
5724
-  return err;
8661
+   case 12:
8662
+      memcpy(at, A->dp, 12 * sizeof(fp_digit));
8663
+      memcpy(at+12, B->dp, 12 * sizeof(fp_digit));
8664
+      COMBA_START;
8665
+
8666
+      COMBA_CLEAR;
8667
+      /* 0 */
8668
+      MULADD(at[0], at[12]); 
8669
+      COMBA_STORE(C->dp[0]);
8670
+      /* 1 */
8671
+      COMBA_FORWARD;
8672
+      MULADD(at[0], at[13]);       MULADD(at[1], at[12]); 
8673
+      COMBA_STORE(C->dp[1]);
8674
+      /* 2 */
8675
+      COMBA_FORWARD;
8676
+      MULADD(at[0], at[14]);       MULADD(at[1], at[13]);       MULADD(at[2], at[12]); 
8677
+      COMBA_STORE(C->dp[2]);
8678
+      /* 3 */
8679
+      COMBA_FORWARD;
8680
+      MULADD(at[0], at[15]);       MULADD(at[1], at[14]);       MULADD(at[2], at[13]);       MULADD(at[3], at[12]); 
8681
+      COMBA_STORE(C->dp[3]);
8682
+      /* 4 */
8683
+      COMBA_FORWARD;
8684
+      MULADD(at[0], at[16]);       MULADD(at[1], at[15]);       MULADD(at[2], at[14]);       MULADD(at[3], at[13]);       MULADD(at[4], at[12]); 
8685
+      COMBA_STORE(C->dp[4]);
8686
+      /* 5 */
8687
+      COMBA_FORWARD;
8688
+      MULADD(at[0], at[17]);       MULADD(at[1], at[16]);       MULADD(at[2], at[15]);       MULADD(at[3], at[14]);       MULADD(at[4], at[13]);       MULADD(at[5], at[12]); 
8689
+      COMBA_STORE(C->dp[5]);
8690
+      /* 6 */
8691
+      COMBA_FORWARD;
8692
+      MULADD(at[0], at[18]);       MULADD(at[1], at[17]);       MULADD(at[2], at[16]);       MULADD(at[3], at[15]);       MULADD(at[4], at[14]);       MULADD(at[5], at[13]);       MULADD(at[6], at[12]); 
8693
+      COMBA_STORE(C->dp[6]);
8694
+      /* 7 */
8695
+      COMBA_FORWARD;
8696
+      MULADD(at[0], at[19]);       MULADD(at[1], at[18]);       MULADD(at[2], at[17]);       MULADD(at[3], at[16]);       MULADD(at[4], at[15]);       MULADD(at[5], at[14]);       MULADD(at[6], at[13]);       MULADD(at[7], at[12]); 
8697
+      COMBA_STORE(C->dp[7]);
8698
+      /* 8 */
8699
+      COMBA_FORWARD;
8700
+      MULADD(at[0], at[20]);       MULADD(at[1], at[19]);       MULADD(at[2], at[18]);       MULADD(at[3], at[17]);       MULADD(at[4], at[16]);       MULADD(at[5], at[15]);       MULADD(at[6], at[14]);       MULADD(at[7], at[13]);       MULADD(at[8], at[12]); 
8701
+      COMBA_STORE(C->dp[8]);
8702
+      /* 9 */
8703
+      COMBA_FORWARD;
8704
+      MULADD(at[0], at[21]);       MULADD(at[1], at[20]);       MULADD(at[2], at[19]);       MULADD(at[3], at[18]);       MULADD(at[4], at[17]);       MULADD(at[5], at[16]);       MULADD(at[6], at[15]);       MULADD(at[7], at[14]);       MULADD(at[8], at[13]);       MULADD(at[9], at[12]); 
8705
+      COMBA_STORE(C->dp[9]);
8706
+      /* 10 */
8707
+      COMBA_FORWARD;
8708
+      MULADD(at[0], at[22]);       MULADD(at[1], at[21]);       MULADD(at[2], at[20]);       MULADD(at[3], at[19]);       MULADD(at[4], at[18]);       MULADD(at[5], at[17]);       MULADD(at[6], at[16]);       MULADD(at[7], at[15]);       MULADD(at[8], at[14]);       MULADD(at[9], at[13]);       MULADD(at[10], at[12]); 
8709
+      COMBA_STORE(C->dp[10]);
8710
+      /* 11 */
8711
+      COMBA_FORWARD;
8712
+      MULADD(at[0], at[23]);       MULADD(at[1], at[22]);       MULADD(at[2], at[21]);       MULADD(at[3], at[20]);       MULADD(at[4], at[19]);       MULADD(at[5], at[18]);       MULADD(at[6], at[17]);       MULADD(at[7], at[16]);       MULADD(at[8], at[15]);       MULADD(at[9], at[14]);       MULADD(at[10], at[13]);       MULADD(at[11], at[12]); 
8713
+      COMBA_STORE(C->dp[11]);
8714
+      /* 12 */
8715
+      COMBA_FORWARD;
8716
+      MULADD(at[1], at[23]);       MULADD(at[2], at[22]);       MULADD(at[3], at[21]);       MULADD(at[4], at[20]);       MULADD(at[5], at[19]);       MULADD(at[6], at[18]);       MULADD(at[7], at[17]);       MULADD(at[8], at[16]);       MULADD(at[9], at[15]);       MULADD(at[10], at[14]);       MULADD(at[11], at[13]); 
8717
+      COMBA_STORE(C->dp[12]);
8718
+      /* 13 */
8719
+      COMBA_FORWARD;
8720
+      MULADD(at[2], at[23]);       MULADD(at[3], at[22]);       MULADD(at[4], at[21]);       MULADD(at[5], at[20]);       MULADD(at[6], at[19]);       MULADD(at[7], at[18]);       MULADD(at[8], at[17]);       MULADD(at[9], at[16]);       MULADD(at[10], at[15]);       MULADD(at[11], at[14]); 
8721
+      COMBA_STORE(C->dp[13]);
8722
+      /* 14 */
8723
+      COMBA_FORWARD;
8724
+      MULADD(at[3], at[23]);       MULADD(at[4], at[22]);       MULADD(at[5], at[21]);       MULADD(at[6], at[20]);       MULADD(at[7], at[19]);       MULADD(at[8], at[18]);       MULADD(at[9], at[17]);       MULADD(at[10], at[16]);       MULADD(at[11], at[15]); 
8725
+      COMBA_STORE(C->dp[14]);
8726
+      /* 15 */
8727
+      COMBA_FORWARD;
8728
+      MULADD(at[4], at[23]);       MULADD(at[5], at[22]);       MULADD(at[6], at[21]);       MULADD(at[7], at[20]);       MULADD(at[8], at[19]);       MULADD(at[9], at[18]);       MULADD(at[10], at[17]);       MULADD(at[11], at[16]); 
8729
+      COMBA_STORE(C->dp[15]);
8730
+      /* 16 */
8731
+      COMBA_FORWARD;
8732
+      MULADD(at[5], at[23]);       MULADD(at[6], at[22]);       MULADD(at[7], at[21]);       MULADD(at[8], at[20]);       MULADD(at[9], at[19]);       MULADD(at[10], at[18]);       MULADD(at[11], at[17]); 
8733
+      COMBA_STORE(C->dp[16]);
8734
+      /* 17 */
8735
+      COMBA_FORWARD;
8736
+      MULADD(at[6], at[23]);       MULADD(at[7], at[22]);       MULADD(at[8], at[21]);       MULADD(at[9], at[20]);       MULADD(at[10], at[19]);       MULADD(at[11], at[18]); 
8737
+      COMBA_STORE(C->dp[17]);
8738
+      /* 18 */
8739
+      COMBA_FORWARD;
8740
+      MULADD(at[7], at[23]);       MULADD(at[8], at[22]);       MULADD(at[9], at[21]);       MULADD(at[10], at[20]);       MULADD(at[11], at[19]); 
8741
+      COMBA_STORE(C->dp[18]);
8742
+      /* 19 */
8743
+      COMBA_FORWARD;
8744
+      MULADD(at[8], at[23]);       MULADD(at[9], at[22]);       MULADD(at[10], at[21]);       MULADD(at[11], at[20]); 
8745
+      COMBA_STORE(C->dp[19]);
8746
+      /* 20 */
8747
+      COMBA_FORWARD;
8748
+      MULADD(at[9], at[23]);       MULADD(at[10], at[22]);       MULADD(at[11], at[21]); 
8749
+      COMBA_STORE(C->dp[20]);
8750
+      /* 21 */
8751
+      COMBA_FORWARD;
8752
+      MULADD(at[10], at[23]);       MULADD(at[11], at[22]); 
8753
+      COMBA_STORE(C->dp[21]);
8754
+      /* 22 */
8755
+      COMBA_FORWARD;
8756
+      MULADD(at[11], at[23]); 
8757
+      COMBA_STORE(C->dp[22]);
8758
+      COMBA_STORE2(C->dp[23]);
8759
+      C->used = 24;
8760
+      C->sign = A->sign ^ B->sign;
8761
+      fp_clamp(C);
8762
+      COMBA_FINI;
8763
+      break;
8764
+
8765
+   case 13:
8766
+      memcpy(at, A->dp, 13 * sizeof(fp_digit));
8767
+      memcpy(at+13, B->dp, 13 * sizeof(fp_digit));
8768
+      COMBA_START;
8769
+
8770
+      COMBA_CLEAR;
8771
+      /* 0 */
8772
+      MULADD(at[0], at[13]); 
8773
+      COMBA_STORE(C->dp[0]);
8774
+      /* 1 */
8775
+      COMBA_FORWARD;
8776
+      MULADD(at[0], at[14]);       MULADD(at[1], at[13]); 
8777
+      COMBA_STORE(C->dp[1]);
8778
+      /* 2 */
8779
+      COMBA_FORWARD;
8780
+      MULADD(at[0], at[15]);       MULADD(at[1], at[14]);       MULADD(at[2], at[13]); 
8781
+      COMBA_STORE(C->dp[2]);
8782
+      /* 3 */
8783
+      COMBA_FORWARD;
8784
+      MULADD(at[0], at[16]);       MULADD(at[1], at[15]);       MULADD(at[2], at[14]);       MULADD(at[3], at[13]); 
8785
+      COMBA_STORE(C->dp[3]);
8786
+      /* 4 */
8787
+      COMBA_FORWARD;
8788
+      MULADD(at[0], at[17]);       MULADD(at[1], at[16]);       MULADD(at[2], at[15]);       MULADD(at[3], at[14]);       MULADD(at[4], at[13]); 
8789
+      COMBA_STORE(C->dp[4]);
8790
+      /* 5 */
8791
+      COMBA_FORWARD;
8792
+      MULADD(at[0], at[18]);       MULADD(at[1], at[17]);       MULADD(at[2], at[16]);       MULADD(at[3], at[15]);       MULADD(at[4], at[14]);       MULADD(at[5], at[13]); 
8793
+      COMBA_STORE(C->dp[5]);
8794
+      /* 6 */
8795
+      COMBA_FORWARD;
8796
+      MULADD(at[0], at[19]);       MULADD(at[1], at[18]);       MULADD(at[2], at[17]);       MULADD(at[3], at[16]);       MULADD(at[4], at[15]);       MULADD(at[5], at[14]);       MULADD(at[6], at[13]); 
8797
+      COMBA_STORE(C->dp[6]);
8798
+      /* 7 */
8799
+      COMBA_FORWARD;
8800
+      MULADD(at[0], at[20]);       MULADD(at[1], at[19]);       MULADD(at[2], at[18]);       MULADD(at[3], at[17]);       MULADD(at[4], at[16]);       MULADD(at[5], at[15]);       MULADD(at[6], at[14]);       MULADD(at[7], at[13]); 
8801
+      COMBA_STORE(C->dp[7]);
8802
+      /* 8 */
8803
+      COMBA_FORWARD;
8804
+      MULADD(at[0], at[21]);       MULADD(at[1], at[20]);       MULADD(at[2], at[19]);       MULADD(at[3], at[18]);       MULADD(at[4], at[17]);       MULADD(at[5], at[16]);       MULADD(at[6], at[15]);       MULADD(at[7], at[14]);       MULADD(at[8], at[13]); 
8805
+      COMBA_STORE(C->dp[8]);
8806
+      /* 9 */
8807
+      COMBA_FORWARD;
8808
+      MULADD(at[0], at[22]);       MULADD(at[1], at[21]);       MULADD(at[2], at[20]);       MULADD(at[3], at[19]);       MULADD(at[4], at[18]);       MULADD(at[5], at[17]);       MULADD(at[6], at[16]);       MULADD(at[7], at[15]);       MULADD(at[8], at[14]);       MULADD(at[9], at[13]); 
8809
+      COMBA_STORE(C->dp[9]);
8810
+      /* 10 */
8811
+      COMBA_FORWARD;
8812
+      MULADD(at[0], at[23]);       MULADD(at[1], at[22]);       MULADD(at[2], at[21]);       MULADD(at[3], at[20]);       MULADD(at[4], at[19]);       MULADD(at[5], at[18]);       MULADD(at[6], at[17]);       MULADD(at[7], at[16]);       MULADD(at[8], at[15]);       MULADD(at[9], at[14]);       MULADD(at[10], at[13]); 
8813
+      COMBA_STORE(C->dp[10]);
8814
+      /* 11 */
8815
+      COMBA_FORWARD;
8816
+      MULADD(at[0], at[24]);       MULADD(at[1], at[23]);       MULADD(at[2], at[22]);       MULADD(at[3], at[21]);       MULADD(at[4], at[20]);       MULADD(at[5], at[19]);       MULADD(at[6], at[18]);       MULADD(at[7], at[17]);       MULADD(at[8], at[16]);       MULADD(at[9], at[15]);       MULADD(at[10], at[14]);       MULADD(at[11], at[13]); 
8817
+      COMBA_STORE(C->dp[11]);
8818
+      /* 12 */
8819
+      COMBA_FORWARD;
8820
+      MULADD(at[0], at[25]);       MULADD(at[1], at[24]);       MULADD(at[2], at[23]);       MULADD(at[3], at[22]);       MULADD(at[4], at[21]);       MULADD(at[5], at[20]);       MULADD(at[6], at[19]);       MULADD(at[7], at[18]);       MULADD(at[8], at[17]);       MULADD(at[9], at[16]);       MULADD(at[10], at[15]);       MULADD(at[11], at[14]);       MULADD(at[12], at[13]); 
8821
+      COMBA_STORE(C->dp[12]);
8822
+      /* 13 */
8823
+      COMBA_FORWARD;
8824
+      MULADD(at[1], at[25]);       MULADD(at[2], at[24]);       MULADD(at[3], at[23]);       MULADD(at[4], at[22]);       MULADD(at[5], at[21]);       MULADD(at[6], at[20]);       MULADD(at[7], at[19]);       MULADD(at[8], at[18]);       MULADD(at[9], at[17]);       MULADD(at[10], at[16]);       MULADD(at[11], at[15]);       MULADD(at[12], at[14]); 
8825
+      COMBA_STORE(C->dp[13]);
8826
+      /* 14 */
8827
+      COMBA_FORWARD;
8828
+      MULADD(at[2], at[25]);       MULADD(at[3], at[24]);       MULADD(at[4], at[23]);       MULADD(at[5], at[22]);       MULADD(at[6], at[21]);       MULADD(at[7], at[20]);       MULADD(at[8], at[19]);       MULADD(at[9], at[18]);       MULADD(at[10], at[17]);       MULADD(at[11], at[16]);       MULADD(at[12], at[15]); 
8829
+      COMBA_STORE(C->dp[14]);
8830
+      /* 15 */
8831
+      COMBA_FORWARD;
8832
+      MULADD(at[3], at[25]);       MULADD(at[4], at[24]);       MULADD(at[5], at[23]);       MULADD(at[6], at[22]);       MULADD(at[7], at[21]);       MULADD(at[8], at[20]);       MULADD(at[9], at[19]);       MULADD(at[10], at[18]);       MULADD(at[11], at[17]);       MULADD(at[12], at[16]); 
8833
+      COMBA_STORE(C->dp[15]);
8834
+      /* 16 */
8835
+      COMBA_FORWARD;
8836
+      MULADD(at[4], at[25]);       MULADD(at[5], at[24]);       MULADD(at[6], at[23]);       MULADD(at[7], at[22]);       MULADD(at[8], at[21]);       MULADD(at[9], at[20]);       MULADD(at[10], at[19]);       MULADD(at[11], at[18]);       MULADD(at[12], at[17]); 
8837
+      COMBA_STORE(C->dp[16]);
8838
+      /* 17 */
8839
+      COMBA_FORWARD;
8840
+      MULADD(at[5], at[25]);       MULADD(at[6], at[24]);       MULADD(at[7], at[23]);       MULADD(at[8], at[22]);       MULADD(at[9], at[21]);       MULADD(at[10], at[20]);       MULADD(at[11], at[19]);       MULADD(at[12], at[18]); 
8841
+      COMBA_STORE(C->dp[17]);
8842
+      /* 18 */
8843
+      COMBA_FORWARD;
8844
+      MULADD(at[6], at[25]);       MULADD(at[7], at[24]);       MULADD(at[8], at[23]);       MULADD(at[9], at[22]);       MULADD(at[10], at[21]);       MULADD(at[11], at[20]);       MULADD(at[12], at[19]); 
8845
+      COMBA_STORE(C->dp[18]);
8846
+      /* 19 */
8847
+      COMBA_FORWARD;
8848
+      MULADD(at[7], at[25]);       MULADD(at[8], at[24]);       MULADD(at[9], at[23]);       MULADD(at[10], at[22]);       MULADD(at[11], at[21]);       MULADD(at[12], at[20]); 
8849
+      COMBA_STORE(C->dp[19]);
8850
+      /* 20 */
8851
+      COMBA_FORWARD;
8852
+      MULADD(at[8], at[25]);       MULADD(at[9], at[24]);       MULADD(at[10], at[23]);       MULADD(at[11], at[22]);       MULADD(at[12], at[21]); 
8853
+      COMBA_STORE(C->dp[20]);
8854
+      /* 21 */
8855
+      COMBA_FORWARD;
8856
+      MULADD(at[9], at[25]);       MULADD(at[10], at[24]);       MULADD(at[11], at[23]);       MULADD(at[12], at[22]); 
8857
+      COMBA_STORE(C->dp[21]);
8858
+      /* 22 */
8859
+      COMBA_FORWARD;
8860
+      MULADD(at[10], at[25]);       MULADD(at[11], at[24]);       MULADD(at[12], at[23]); 
8861
+      COMBA_STORE(C->dp[22]);
8862
+      /* 23 */
8863
+      COMBA_FORWARD;
8864
+      MULADD(at[11], at[25]);       MULADD(at[12], at[24]); 
8865
+      COMBA_STORE(C->dp[23]);
8866
+      /* 24 */
8867
+      COMBA_FORWARD;
8868
+      MULADD(at[12], at[25]); 
8869
+      COMBA_STORE(C->dp[24]);
8870
+      COMBA_STORE2(C->dp[25]);
8871
+      C->used = 26;
8872
+      C->sign = A->sign ^ B->sign;
8873
+      fp_clamp(C);
8874
+      COMBA_FINI;
8875
+      break;
8876
+
8877
+   case 14:
8878
+      memcpy(at, A->dp, 14 * sizeof(fp_digit));
8879
+      memcpy(at+14, B->dp, 14 * sizeof(fp_digit));
8880
+      COMBA_START;
8881
+
8882
+      COMBA_CLEAR;
8883
+      /* 0 */
8884
+      MULADD(at[0], at[14]); 
8885
+      COMBA_STORE(C->dp[0]);
8886
+      /* 1 */
8887
+      COMBA_FORWARD;
8888
+      MULADD(at[0], at[15]);       MULADD(at[1], at[14]); 
8889
+      COMBA_STORE(C->dp[1]);
8890
+      /* 2 */
8891
+      COMBA_FORWARD;
8892
+      MULADD(at[0], at[16]);       MULADD(at[1], at[15]);       MULADD(at[2], at[14]); 
8893
+      COMBA_STORE(C->dp[2]);
8894
+      /* 3 */
8895
+      COMBA_FORWARD;
8896
+      MULADD(at[0], at[17]);       MULADD(at[1], at[16]);       MULADD(at[2], at[15]);       MULADD(at[3], at[14]); 
8897
+      COMBA_STORE(C->dp[3]);
8898
+      /* 4 */
8899
+      COMBA_FORWARD;
8900
+      MULADD(at[0], at[18]);       MULADD(at[1], at[17]);       MULADD(at[2], at[16]);       MULADD(at[3], at[15]);       MULADD(at[4], at[14]); 
8901
+      COMBA_STORE(C->dp[4]);
8902
+      /* 5 */
8903
+      COMBA_FORWARD;
8904
+      MULADD(at[0], at[19]);       MULADD(at[1], at[18]);       MULADD(at[2], at[17]);       MULADD(at[3], at[16]);       MULADD(at[4], at[15]);       MULADD(at[5], at[14]); 
8905
+      COMBA_STORE(C->dp[5]);
8906
+      /* 6 */
8907
+      COMBA_FORWARD;
8908
+      MULADD(at[0], at[20]);       MULADD(at[1], at[19]);       MULADD(at[2], at[18]);       MULADD(at[3], at[17]);       MULADD(at[4], at[16]);       MULADD(at[5], at[15]);       MULADD(at[6], at[14]); 
8909
+      COMBA_STORE(C->dp[6]);
8910
+      /* 7 */
8911
+      COMBA_FORWARD;
8912
+      MULADD(at[0], at[21]);       MULADD(at[1], at[20]);       MULADD(at[2], at[19]);       MULADD(at[3], at[18]);       MULADD(at[4], at[17]);       MULADD(at[5], at[16]);       MULADD(at[6], at[15]);       MULADD(at[7], at[14]); 
8913
+      COMBA_STORE(C->dp[7]);
8914
+      /* 8 */
8915
+      COMBA_FORWARD;
8916
+      MULADD(at[0], at[22]);       MULADD(at[1], at[21]);       MULADD(at[2], at[20]);       MULADD(at[3], at[19]);       MULADD(at[4], at[18]);       MULADD(at[5], at[17]);       MULADD(at[6], at[16]);       MULADD(at[7], at[15]);       MULADD(at[8], at[14]); 
8917
+      COMBA_STORE(C->dp[8]);
8918
+      /* 9 */
8919
+      COMBA_FORWARD;
8920
+      MULADD(at[0], at[23]);       MULADD(at[1], at[22]);       MULADD(at[2], at[21]);       MULADD(at[3], at[20]);       MULADD(at[4], at[19]);       MULADD(at[5], at[18]);       MULADD(at[6], at[17]);       MULADD(at[7], at[16]);       MULADD(at[8], at[15]);       MULADD(at[9], at[14]); 
8921
+      COMBA_STORE(C->dp[9]);
8922
+      /* 10 */
8923
+      COMBA_FORWARD;
8924
+      MULADD(at[0], at[24]);       MULADD(at[1], at[23]);       MULADD(at[2], at[22]);       MULADD(at[3], at[21]);       MULADD(at[4], at[20]);       MULADD(at[5], at[19]);       MULADD(at[6], at[18]);       MULADD(at[7], at[17]);       MULADD(at[8], at[16]);       MULADD(at[9], at[15]);       MULADD(at[10], at[14]); 
8925
+      COMBA_STORE(C->dp[10]);
8926
+      /* 11 */
8927
+      COMBA_FORWARD;
8928
+      MULADD(at[0], at[25]);       MULADD(at[1], at[24]);       MULADD(at[2], at[23]);       MULADD(at[3], at[22]);       MULADD(at[4], at[21]);       MULADD(at[5], at[20]);       MULADD(at[6], at[19]);       MULADD(at[7], at[18]);       MULADD(at[8], at[17]);       MULADD(at[9], at[16]);       MULADD(at[10], at[15]);       MULADD(at[11], at[14]); 
8929
+      COMBA_STORE(C->dp[11]);
8930
+      /* 12 */
8931
+      COMBA_FORWARD;
8932
+      MULADD(at[0], at[26]);       MULADD(at[1], at[25]);       MULADD(at[2], at[24]);       MULADD(at[3], at[23]);       MULADD(at[4], at[22]);       MULADD(at[5], at[21]);       MULADD(at[6], at[20]);       MULADD(at[7], at[19]);       MULADD(at[8], at[18]);       MULADD(at[9], at[17]);       MULADD(at[10], at[16]);       MULADD(at[11], at[15]);       MULADD(at[12], at[14]); 
8933
+      COMBA_STORE(C->dp[12]);
8934
+      /* 13 */
8935
+      COMBA_FORWARD;
8936
+      MULADD(at[0], at[27]);       MULADD(at[1], at[26]);       MULADD(at[2], at[25]);       MULADD(at[3], at[24]);       MULADD(at[4], at[23]);       MULADD(at[5], at[22]);       MULADD(at[6], at[21]);       MULADD(at[7], at[20]);       MULADD(at[8], at[19]);       MULADD(at[9], at[18]);       MULADD(at[10], at[17]);       MULADD(at[11], at[16]);       MULADD(at[12], at[15]);       MULADD(at[13], at[14]); 
8937
+      COMBA_STORE(C->dp[13]);
8938
+      /* 14 */
8939
+      COMBA_FORWARD;
8940
+      MULADD(at[1], at[27]);       MULADD(at[2], at[26]);       MULADD(at[3], at[25]);       MULADD(at[4], at[24]);       MULADD(at[5], at[23]);       MULADD(at[6], at[22]);       MULADD(at[7], at[21]);       MULADD(at[8], at[20]);       MULADD(at[9], at[19]);       MULADD(at[10], at[18]);       MULADD(at[11], at[17]);       MULADD(at[12], at[16]);       MULADD(at[13], at[15]); 
8941
+      COMBA_STORE(C->dp[14]);
8942
+      /* 15 */
8943
+      COMBA_FORWARD;
8944
+      MULADD(at[2], at[27]);       MULADD(at[3], at[26]);       MULADD(at[4], at[25]);       MULADD(at[5], at[24]);       MULADD(at[6], at[23]);       MULADD(at[7], at[22]);       MULADD(at[8], at[21]);       MULADD(at[9], at[20]);       MULADD(at[10], at[19]);       MULADD(at[11], at[18]);       MULADD(at[12], at[17]);       MULADD(at[13], at[16]); 
8945
+      COMBA_STORE(C->dp[15]);
8946
+      /* 16 */
8947
+      COMBA_FORWARD;
8948
+      MULADD(at[3], at[27]);       MULADD(at[4], at[26]);       MULADD(at[5], at[25]);       MULADD(at[6], at[24]);       MULADD(at[7], at[23]);       MULADD(at[8], at[22]);       MULADD(at[9], at[21]);       MULADD(at[10], at[20]);       MULADD(at[11], at[19]);       MULADD(at[12], at[18]);       MULADD(at[13], at[17]); 
8949
+      COMBA_STORE(C->dp[16]);
8950
+      /* 17 */
8951
+      COMBA_FORWARD;
8952
+      MULADD(at[4], at[27]);       MULADD(at[5], at[26]);       MULADD(at[6], at[25]);       MULADD(at[7], at[24]);       MULADD(at[8], at[23]);       MULADD(at[9], at[22]);       MULADD(at[10], at[21]);       MULADD(at[11], at[20]);       MULADD(at[12], at[19]);       MULADD(at[13], at[18]); 
8953
+      COMBA_STORE(C->dp[17]);
8954
+      /* 18 */
8955
+      COMBA_FORWARD;
8956
+      MULADD(at[5], at[27]);       MULADD(at[6], at[26]);       MULADD(at[7], at[25]);       MULADD(at[8], at[24]);       MULADD(at[9], at[23]);       MULADD(at[10], at[22]);       MULADD(at[11], at[21]);       MULADD(at[12], at[20]);       MULADD(at[13], at[19]); 
8957
+      COMBA_STORE(C->dp[18]);
8958
+      /* 19 */
8959
+      COMBA_FORWARD;
8960
+      MULADD(at[6], at[27]);       MULADD(at[7], at[26]);       MULADD(at[8], at[25]);       MULADD(at[9], at[24]);       MULADD(at[10], at[23]);       MULADD(at[11], at[22]);       MULADD(at[12], at[21]);       MULADD(at[13], at[20]); 
8961
+      COMBA_STORE(C->dp[19]);
8962
+      /* 20 */
8963
+      COMBA_FORWARD;
8964
+      MULADD(at[7], at[27]);       MULADD(at[8], at[26]);       MULADD(at[9], at[25]);       MULADD(at[10], at[24]);       MULADD(at[11], at[23]);       MULADD(at[12], at[22]);       MULADD(at[13], at[21]); 
8965
+      COMBA_STORE(C->dp[20]);
8966
+      /* 21 */
8967
+      COMBA_FORWARD;
8968
+      MULADD(at[8], at[27]);       MULADD(at[9], at[26]);       MULADD(at[10], at[25]);       MULADD(at[11], at[24]);       MULADD(at[12], at[23]);       MULADD(at[13], at[22]); 
8969
+      COMBA_STORE(C->dp[21]);
8970
+      /* 22 */
8971
+      COMBA_FORWARD;
8972
+      MULADD(at[9], at[27]);       MULADD(at[10], at[26]);       MULADD(at[11], at[25]);       MULADD(at[12], at[24]);       MULADD(at[13], at[23]); 
8973
+      COMBA_STORE(C->dp[22]);
8974
+      /* 23 */
8975
+      COMBA_FORWARD;
8976
+      MULADD(at[10], at[27]);       MULADD(at[11], at[26]);       MULADD(at[12], at[25]);       MULADD(at[13], at[24]); 
8977
+      COMBA_STORE(C->dp[23]);
8978
+      /* 24 */
8979
+      COMBA_FORWARD;
8980
+      MULADD(at[11], at[27]);       MULADD(at[12], at[26]);       MULADD(at[13], at[25]); 
8981
+      COMBA_STORE(C->dp[24]);
8982
+      /* 25 */
8983
+      COMBA_FORWARD;
8984
+      MULADD(at[12], at[27]);       MULADD(at[13], at[26]); 
8985
+      COMBA_STORE(C->dp[25]);
8986
+      /* 26 */
8987
+      COMBA_FORWARD;
8988
+      MULADD(at[13], at[27]); 
8989
+      COMBA_STORE(C->dp[26]);
8990
+      COMBA_STORE2(C->dp[27]);
8991
+      C->used = 28;
8992
+      C->sign = A->sign ^ B->sign;
8993
+      fp_clamp(C);
8994
+      COMBA_FINI;
8995
+      break;
8996
+
8997
+   case 15:
8998
+      memcpy(at, A->dp, 15 * sizeof(fp_digit));
8999
+      memcpy(at+15, B->dp, 15 * sizeof(fp_digit));
9000
+      COMBA_START;
9001
+
9002
+      COMBA_CLEAR;
9003
+      /* 0 */
9004
+      MULADD(at[0], at[15]); 
9005
+      COMBA_STORE(C->dp[0]);
9006
+      /* 1 */
9007
+      COMBA_FORWARD;
9008
+      MULADD(at[0], at[16]);       MULADD(at[1], at[15]); 
9009
+      COMBA_STORE(C->dp[1]);
9010
+      /* 2 */
9011
+      COMBA_FORWARD;
9012
+      MULADD(at[0], at[17]);       MULADD(at[1], at[16]);       MULADD(at[2], at[15]); 
9013
+      COMBA_STORE(C->dp[2]);
9014
+      /* 3 */
9015
+      COMBA_FORWARD;
9016
+      MULADD(at[0], at[18]);       MULADD(at[1], at[17]);       MULADD(at[2], at[16]);       MULADD(at[3], at[15]); 
9017
+      COMBA_STORE(C->dp[3]);
9018
+      /* 4 */
9019
+      COMBA_FORWARD;
9020
+      MULADD(at[0], at[19]);       MULADD(at[1], at[18]);       MULADD(at[2], at[17]);       MULADD(at[3], at[16]);       MULADD(at[4], at[15]); 
9021
+      COMBA_STORE(C->dp[4]);
9022
+      /* 5 */
9023
+      COMBA_FORWARD;
9024
+      MULADD(at[0], at[20]);       MULADD(at[1], at[19]);       MULADD(at[2], at[18]);       MULADD(at[3], at[17]);       MULADD(at[4], at[16]);       MULADD(at[5], at[15]); 
9025
+      COMBA_STORE(C->dp[5]);
9026
+      /* 6 */
9027
+      COMBA_FORWARD;
9028
+      MULADD(at[0], at[21]);       MULADD(at[1], at[20]);       MULADD(at[2], at[19]);       MULADD(at[3], at[18]);       MULADD(at[4], at[17]);       MULADD(at[5], at[16]);       MULADD(at[6], at[15]); 
9029
+      COMBA_STORE(C->dp[6]);
9030
+      /* 7 */
9031
+      COMBA_FORWARD;
9032
+      MULADD(at[0], at[22]);       MULADD(at[1], at[21]);       MULADD(at[2], at[20]);       MULADD(at[3], at[19]);       MULADD(at[4], at[18]);       MULADD(at[5], at[17]);       MULADD(at[6], at[16]);       MULADD(at[7], at[15]); 
9033
+      COMBA_STORE(C->dp[7]);
9034
+      /* 8 */
9035
+      COMBA_FORWARD;
9036
+      MULADD(at[0], at[23]);       MULADD(at[1], at[22]);       MULADD(at[2], at[21]);       MULADD(at[3], at[20]);       MULADD(at[4], at[19]);       MULADD(at[5], at[18]);       MULADD(at[6], at[17]);       MULADD(at[7], at[16]);       MULADD(at[8], at[15]); 
9037
+      COMBA_STORE(C->dp[8]);
9038
+      /* 9 */
9039
+      COMBA_FORWARD;
9040
+      MULADD(at[0], at[24]);       MULADD(at[1], at[23]);       MULADD(at[2], at[22]);       MULADD(at[3], at[21]);       MULADD(at[4], at[20]);       MULADD(at[5], at[19]);       MULADD(at[6], at[18]);       MULADD(at[7], at[17]);       MULADD(at[8], at[16]);       MULADD(at[9], at[15]); 
9041
+      COMBA_STORE(C->dp[9]);
9042
+      /* 10 */
9043
+      COMBA_FORWARD;
9044
+      MULADD(at[0], at[25]);       MULADD(at[1], at[24]);       MULADD(at[2], at[23]);       MULADD(at[3], at[22]);       MULADD(at[4], at[21]);       MULADD(at[5], at[20]);       MULADD(at[6], at[19]);       MULADD(at[7], at[18]);       MULADD(at[8], at[17]);       MULADD(at[9], at[16]);       MULADD(at[10], at[15]); 
9045
+      COMBA_STORE(C->dp[10]);
9046
+      /* 11 */
9047
+      COMBA_FORWARD;
9048
+      MULADD(at[0], at[26]);       MULADD(at[1], at[25]);       MULADD(at[2], at[24]);       MULADD(at[3], at[23]);       MULADD(at[4], at[22]);       MULADD(at[5], at[21]);       MULADD(at[6], at[20]);       MULADD(at[7], at[19]);       MULADD(at[8], at[18]);       MULADD(at[9], at[17]);       MULADD(at[10], at[16]);       MULADD(at[11], at[15]); 
9049
+      COMBA_STORE(C->dp[11]);
9050
+      /* 12 */
9051
+      COMBA_FORWARD;
9052
+      MULADD(at[0], at[27]);       MULADD(at[1], at[26]);       MULADD(at[2], at[25]);       MULADD(at[3], at[24]);       MULADD(at[4], at[23]);       MULADD(at[5], at[22]);       MULADD(at[6], at[21]);       MULADD(at[7], at[20]);       MULADD(at[8], at[19]);       MULADD(at[9], at[18]);       MULADD(at[10], at[17]);       MULADD(at[11], at[16]);       MULADD(at[12], at[15]); 
9053
+      COMBA_STORE(C->dp[12]);
9054
+      /* 13 */
9055
+      COMBA_FORWARD;
9056
+      MULADD(at[0], at[28]);       MULADD(at[1], at[27]);       MULADD(at[2], at[26]);       MULADD(at[3], at[25]);       MULADD(at[4], at[24]);       MULADD(at[5], at[23]);       MULADD(at[6], at[22]);       MULADD(at[7], at[21]);       MULADD(at[8], at[20]);       MULADD(at[9], at[19]);       MULADD(at[10], at[18]);       MULADD(at[11], at[17]);       MULADD(at[12], at[16]);       MULADD(at[13], at[15]); 
9057
+      COMBA_STORE(C->dp[13]);
9058
+      /* 14 */
9059
+      COMBA_FORWARD;
9060
+      MULADD(at[0], at[29]);       MULADD(at[1], at[28]);       MULADD(at[2], at[27]);       MULADD(at[3], at[26]);       MULADD(at[4], at[25]);       MULADD(at[5], at[24]);       MULADD(at[6], at[23]);       MULADD(at[7], at[22]);       MULADD(at[8], at[21]);       MULADD(at[9], at[20]);       MULADD(at[10], at[19]);       MULADD(at[11], at[18]);       MULADD(at[12], at[17]);       MULADD(at[13], at[16]);       MULADD(at[14], at[15]); 
9061
+      COMBA_STORE(C->dp[14]);
9062
+      /* 15 */
9063
+      COMBA_FORWARD;
9064
+      MULADD(at[1], at[29]);       MULADD(at[2], at[28]);       MULADD(at[3], at[27]);       MULADD(at[4], at[26]);       MULADD(at[5], at[25]);       MULADD(at[6], at[24]);       MULADD(at[7], at[23]);       MULADD(at[8], at[22]);       MULADD(at[9], at[21]);       MULADD(at[10], at[20]);       MULADD(at[11], at[19]);       MULADD(at[12], at[18]);       MULADD(at[13], at[17]);       MULADD(at[14], at[16]); 
9065
+      COMBA_STORE(C->dp[15]);
9066
+      /* 16 */
9067
+      COMBA_FORWARD;
9068
+      MULADD(at[2], at[29]);       MULADD(at[3], at[28]);       MULADD(at[4], at[27]);       MULADD(at[5], at[26]);       MULADD(at[6], at[25]);       MULADD(at[7], at[24]);       MULADD(at[8], at[23]);       MULADD(at[9], at[22]);       MULADD(at[10], at[21]);       MULADD(at[11], at[20]);       MULADD(at[12], at[19]);       MULADD(at[13], at[18]);       MULADD(at[14], at[17]); 
9069
+      COMBA_STORE(C->dp[16]);
9070
+      /* 17 */
9071
+      COMBA_FORWARD;
9072
+      MULADD(at[3], at[29]);       MULADD(at[4], at[28]);       MULADD(at[5], at[27]);       MULADD(at[6], at[26]);       MULADD(at[7], at[25]);       MULADD(at[8], at[24]);       MULADD(at[9], at[23]);       MULADD(at[10], at[22]);       MULADD(at[11], at[21]);       MULADD(at[12], at[20]);       MULADD(at[13], at[19]);       MULADD(at[14], at[18]); 
9073
+      COMBA_STORE(C->dp[17]);
9074
+      /* 18 */
9075
+      COMBA_FORWARD;
9076
+      MULADD(at[4], at[29]);       MULADD(at[5], at[28]);       MULADD(at[6], at[27]);       MULADD(at[7], at[26]);       MULADD(at[8], at[25]);       MULADD(at[9], at[24]);       MULADD(at[10], at[23]);       MULADD(at[11], at[22]);       MULADD(at[12], at[21]);       MULADD(at[13], at[20]);       MULADD(at[14], at[19]); 
9077
+      COMBA_STORE(C->dp[18]);
9078
+      /* 19 */
9079
+      COMBA_FORWARD;
9080
+      MULADD(at[5], at[29]);       MULADD(at[6], at[28]);       MULADD(at[7], at[27]);       MULADD(at[8], at[26]);       MULADD(at[9], at[25]);       MULADD(at[10], at[24]);       MULADD(at[11], at[23]);       MULADD(at[12], at[22]);       MULADD(at[13], at[21]);       MULADD(at[14], at[20]); 
9081
+      COMBA_STORE(C->dp[19]);
9082
+      /* 20 */
9083
+      COMBA_FORWARD;
9084
+      MULADD(at[6], at[29]);       MULADD(at[7], at[28]);       MULADD(at[8], at[27]);       MULADD(at[9], at[26]);       MULADD(at[10], at[25]);       MULADD(at[11], at[24]);       MULADD(at[12], at[23]);       MULADD(at[13], at[22]);       MULADD(at[14], at[21]); 
9085
+      COMBA_STORE(C->dp[20]);
9086
+      /* 21 */
9087
+      COMBA_FORWARD;
9088
+      MULADD(at[7], at[29]);       MULADD(at[8], at[28]);       MULADD(at[9], at[27]);       MULADD(at[10], at[26]);       MULADD(at[11], at[25]);       MULADD(at[12], at[24]);       MULADD(at[13], at[23]);       MULADD(at[14], at[22]); 
9089
+      COMBA_STORE(C->dp[21]);
9090
+      /* 22 */
9091
+      COMBA_FORWARD;
9092
+      MULADD(at[8], at[29]);       MULADD(at[9], at[28]);       MULADD(at[10], at[27]);       MULADD(at[11], at[26]);       MULADD(at[12], at[25]);       MULADD(at[13], at[24]);       MULADD(at[14], at[23]); 
9093
+      COMBA_STORE(C->dp[22]);
9094
+      /* 23 */
9095
+      COMBA_FORWARD;
9096
+      MULADD(at[9], at[29]);       MULADD(at[10], at[28]);       MULADD(at[11], at[27]);       MULADD(at[12], at[26]);       MULADD(at[13], at[25]);       MULADD(at[14], at[24]); 
9097
+      COMBA_STORE(C->dp[23]);
9098
+      /* 24 */
9099
+      COMBA_FORWARD;
9100
+      MULADD(at[10], at[29]);       MULADD(at[11], at[28]);       MULADD(at[12], at[27]);       MULADD(at[13], at[26]);       MULADD(at[14], at[25]); 
9101
+      COMBA_STORE(C->dp[24]);
9102
+      /* 25 */
9103
+      COMBA_FORWARD;
9104
+      MULADD(at[11], at[29]);       MULADD(at[12], at[28]);       MULADD(at[13], at[27]);       MULADD(at[14], at[26]); 
9105
+      COMBA_STORE(C->dp[25]);
9106
+      /* 26 */
9107
+      COMBA_FORWARD;
9108
+      MULADD(at[12], at[29]);       MULADD(at[13], at[28]);       MULADD(at[14], at[27]); 
9109
+      COMBA_STORE(C->dp[26]);
9110
+      /* 27 */
9111
+      COMBA_FORWARD;
9112
+      MULADD(at[13], at[29]);       MULADD(at[14], at[28]); 
9113
+      COMBA_STORE(C->dp[27]);
9114
+      /* 28 */
9115
+      COMBA_FORWARD;
9116
+      MULADD(at[14], at[29]); 
9117
+      COMBA_STORE(C->dp[28]);
9118
+      COMBA_STORE2(C->dp[29]);
9119
+      C->used = 30;
9120
+      C->sign = A->sign ^ B->sign;
9121
+      fp_clamp(C);
9122
+      COMBA_FINI;
9123
+      break;
9124
+
9125
+   case 16:
9126
+      memcpy(at, A->dp, 16 * sizeof(fp_digit));
9127
+      memcpy(at+16, B->dp, 16 * sizeof(fp_digit));
9128
+      COMBA_START;
9129
+
9130
+      COMBA_CLEAR;
9131
+      /* 0 */
9132
+      MULADD(at[0], at[16]); 
9133
+      COMBA_STORE(C->dp[0]);
9134
+      /* 1 */
9135
+      COMBA_FORWARD;
9136
+      MULADD(at[0], at[17]);       MULADD(at[1], at[16]); 
9137
+      COMBA_STORE(C->dp[1]);
9138
+      /* 2 */
9139
+      COMBA_FORWARD;
9140
+      MULADD(at[0], at[18]);       MULADD(at[1], at[17]);       MULADD(at[2], at[16]); 
9141
+      COMBA_STORE(C->dp[2]);
9142
+      /* 3 */
9143
+      COMBA_FORWARD;
9144
+      MULADD(at[0], at[19]);       MULADD(at[1], at[18]);       MULADD(at[2], at[17]);       MULADD(at[3], at[16]); 
9145
+      COMBA_STORE(C->dp[3]);
9146
+      /* 4 */
9147
+      COMBA_FORWARD;
9148
+      MULADD(at[0], at[20]);       MULADD(at[1], at[19]);       MULADD(at[2], at[18]);       MULADD(at[3], at[17]);       MULADD(at[4], at[16]); 
9149
+      COMBA_STORE(C->dp[4]);
9150
+      /* 5 */
9151
+      COMBA_FORWARD;
9152
+      MULADD(at[0], at[21]);       MULADD(at[1], at[20]);       MULADD(at[2], at[19]);       MULADD(at[3], at[18]);       MULADD(at[4], at[17]);       MULADD(at[5], at[16]); 
9153
+      COMBA_STORE(C->dp[5]);
9154
+      /* 6 */
9155
+      COMBA_FORWARD;
9156
+      MULADD(at[0], at[22]);       MULADD(at[1], at[21]);       MULADD(at[2], at[20]);       MULADD(at[3], at[19]);       MULADD(at[4], at[18]);       MULADD(at[5], at[17]);       MULADD(at[6], at[16]); 
9157
+      COMBA_STORE(C->dp[6]);
9158
+      /* 7 */
9159
+      COMBA_FORWARD;
9160
+      MULADD(at[0], at[23]);       MULADD(at[1], at[22]);       MULADD(at[2], at[21]);       MULADD(at[3], at[20]);       MULADD(at[4], at[19]);       MULADD(at[5], at[18]);       MULADD(at[6], at[17]);       MULADD(at[7], at[16]); 
9161
+      COMBA_STORE(C->dp[7]);
9162
+      /* 8 */
9163
+      COMBA_FORWARD;
9164
+      MULADD(at[0], at[24]);       MULADD(at[1], at[23]);       MULADD(at[2], at[22]);       MULADD(at[3], at[21]);       MULADD(at[4], at[20]);       MULADD(at[5], at[19]);       MULADD(at[6], at[18]);       MULADD(at[7], at[17]);       MULADD(at[8], at[16]); 
9165
+      COMBA_STORE(C->dp[8]);
9166
+      /* 9 */
9167
+      COMBA_FORWARD;
9168
+      MULADD(at[0], at[25]);       MULADD(at[1], at[24]);       MULADD(at[2], at[23]);       MULADD(at[3], at[22]);       MULADD(at[4], at[21]);       MULADD(at[5], at[20]);       MULADD(at[6], at[19]);       MULADD(at[7], at[18]);       MULADD(at[8], at[17]);       MULADD(at[9], at[16]); 
9169
+      COMBA_STORE(C->dp[9]);
9170
+      /* 10 */
9171
+      COMBA_FORWARD;
9172
+      MULADD(at[0], at[26]);       MULADD(at[1], at[25]);       MULADD(at[2], at[24]);       MULADD(at[3], at[23]);       MULADD(at[4], at[22]);       MULADD(at[5], at[21]);       MULADD(at[6], at[20]);       MULADD(at[7], at[19]);       MULADD(at[8], at[18]);       MULADD(at[9], at[17]);       MULADD(at[10], at[16]); 
9173
+      COMBA_STORE(C->dp[10]);
9174
+      /* 11 */
9175
+      COMBA_FORWARD;
9176
+      MULADD(at[0], at[27]);       MULADD(at[1], at[26]);       MULADD(at[2], at[25]);       MULADD(at[3], at[24]);       MULADD(at[4], at[23]);       MULADD(at[5], at[22]);       MULADD(at[6], at[21]);       MULADD(at[7], at[20]);       MULADD(at[8], at[19]);       MULADD(at[9], at[18]);       MULADD(at[10], at[17]);       MULADD(at[11], at[16]); 
9177
+      COMBA_STORE(C->dp[11]);
9178
+      /* 12 */
9179
+      COMBA_FORWARD;
9180
+      MULADD(at[0], at[28]);       MULADD(at[1], at[27]);       MULADD(at[2], at[26]);       MULADD(at[3], at[25]);       MULADD(at[4], at[24]);       MULADD(at[5], at[23]);       MULADD(at[6], at[22]);       MULADD(at[7], at[21]);       MULADD(at[8], at[20]);       MULADD(at[9], at[19]);       MULADD(at[10], at[18]);       MULADD(at[11], at[17]);       MULADD(at[12], at[16]); 
9181
+      COMBA_STORE(C->dp[12]);
9182
+      /* 13 */
9183
+      COMBA_FORWARD;
9184
+      MULADD(at[0], at[29]);       MULADD(at[1], at[28]);       MULADD(at[2], at[27]);       MULADD(at[3], at[26]);       MULADD(at[4], at[25]);       MULADD(at[5], at[24]);       MULADD(at[6], at[23]);       MULADD(at[7], at[22]);       MULADD(at[8], at[21]);       MULADD(at[9], at[20]);       MULADD(at[10], at[19]);       MULADD(at[11], at[18]);       MULADD(at[12], at[17]);       MULADD(at[13], at[16]); 
9185
+      COMBA_STORE(C->dp[13]);
9186
+      /* 14 */
9187
+      COMBA_FORWARD;
9188
+      MULADD(at[0], at[30]);       MULADD(at[1], at[29]);       MULADD(at[2], at[28]);       MULADD(at[3], at[27]);       MULADD(at[4], at[26]);       MULADD(at[5], at[25]);       MULADD(at[6], at[24]);       MULADD(at[7], at[23]);       MULADD(at[8], at[22]);       MULADD(at[9], at[21]);       MULADD(at[10], at[20]);       MULADD(at[11], at[19]);       MULADD(at[12], at[18]);       MULADD(at[13], at[17]);       MULADD(at[14], at[16]); 
9189
+      COMBA_STORE(C->dp[14]);
9190
+      /* 15 */
9191
+      COMBA_FORWARD;
9192
+      MULADD(at[0], at[31]);       MULADD(at[1], at[30]);       MULADD(at[2], at[29]);       MULADD(at[3], at[28]);       MULADD(at[4], at[27]);       MULADD(at[5], at[26]);       MULADD(at[6], at[25]);       MULADD(at[7], at[24]);       MULADD(at[8], at[23]);       MULADD(at[9], at[22]);       MULADD(at[10], at[21]);       MULADD(at[11], at[20]);       MULADD(at[12], at[19]);       MULADD(at[13], at[18]);       MULADD(at[14], at[17]);       MULADD(at[15], at[16]); 
9193
+      COMBA_STORE(C->dp[15]);
9194
+      /* 16 */
9195
+      COMBA_FORWARD;
9196
+      MULADD(at[1], at[31]);       MULADD(at[2], at[30]);       MULADD(at[3], at[29]);       MULADD(at[4], at[28]);       MULADD(at[5], at[27]);       MULADD(at[6], at[26]);       MULADD(at[7], at[25]);       MULADD(at[8], at[24]);       MULADD(at[9], at[23]);       MULADD(at[10], at[22]);       MULADD(at[11], at[21]);       MULADD(at[12], at[20]);       MULADD(at[13], at[19]);       MULADD(at[14], at[18]);       MULADD(at[15], at[17]); 
9197
+      COMBA_STORE(C->dp[16]);
9198
+      /* 17 */
9199
+      COMBA_FORWARD;
9200
+      MULADD(at[2], at[31]);       MULADD(at[3], at[30]);       MULADD(at[4], at[29]);       MULADD(at[5], at[28]);       MULADD(at[6], at[27]);       MULADD(at[7], at[26]);       MULADD(at[8], at[25]);       MULADD(at[9], at[24]);       MULADD(at[10], at[23]);       MULADD(at[11], at[22]);       MULADD(at[12], at[21]);       MULADD(at[13], at[20]);       MULADD(at[14], at[19]);       MULADD(at[15], at[18]); 
9201
+      COMBA_STORE(C->dp[17]);
9202
+      /* 18 */
9203
+      COMBA_FORWARD;
9204
+      MULADD(at[3], at[31]);       MULADD(at[4], at[30]);       MULADD(at[5], at[29]);       MULADD(at[6], at[28]);       MULADD(at[7], at[27]);       MULADD(at[8], at[26]);       MULADD(at[9], at[25]);       MULADD(at[10], at[24]);       MULADD(at[11], at[23]);       MULADD(at[12], at[22]);       MULADD(at[13], at[21]);       MULADD(at[14], at[20]);       MULADD(at[15], at[19]); 
9205
+      COMBA_STORE(C->dp[18]);
9206
+      /* 19 */
9207
+      COMBA_FORWARD;
9208
+      MULADD(at[4], at[31]);       MULADD(at[5], at[30]);       MULADD(at[6], at[29]);       MULADD(at[7], at[28]);       MULADD(at[8], at[27]);       MULADD(at[9], at[26]);       MULADD(at[10], at[25]);       MULADD(at[11], at[24]);       MULADD(at[12], at[23]);       MULADD(at[13], at[22]);       MULADD(at[14], at[21]);       MULADD(at[15], at[20]); 
9209
+      COMBA_STORE(C->dp[19]);
9210
+      /* 20 */
9211
+      COMBA_FORWARD;
9212
+      MULADD(at[5], at[31]);       MULADD(at[6], at[30]);       MULADD(at[7], at[29]);       MULADD(at[8], at[28]);       MULADD(at[9], at[27]);       MULADD(at[10], at[26]);       MULADD(at[11], at[25]);       MULADD(at[12], at[24]);       MULADD(at[13], at[23]);       MULADD(at[14], at[22]);       MULADD(at[15], at[21]); 
9213
+      COMBA_STORE(C->dp[20]);
9214
+      /* 21 */
9215
+      COMBA_FORWARD;
9216
+      MULADD(at[6], at[31]);       MULADD(at[7], at[30]);       MULADD(at[8], at[29]);       MULADD(at[9], at[28]);       MULADD(at[10], at[27]);       MULADD(at[11], at[26]);       MULADD(at[12], at[25]);       MULADD(at[13], at[24]);       MULADD(at[14], at[23]);       MULADD(at[15], at[22]); 
9217
+      COMBA_STORE(C->dp[21]);
9218
+      /* 22 */
9219
+      COMBA_FORWARD;
9220
+      MULADD(at[7], at[31]);       MULADD(at[8], at[30]);       MULADD(at[9], at[29]);       MULADD(at[10], at[28]);       MULADD(at[11], at[27]);       MULADD(at[12], at[26]);       MULADD(at[13], at[25]);       MULADD(at[14], at[24]);       MULADD(at[15], at[23]); 
9221
+      COMBA_STORE(C->dp[22]);
9222
+      /* 23 */
9223
+      COMBA_FORWARD;
9224
+      MULADD(at[8], at[31]);       MULADD(at[9], at[30]);       MULADD(at[10], at[29]);       MULADD(at[11], at[28]);       MULADD(at[12], at[27]);       MULADD(at[13], at[26]);       MULADD(at[14], at[25]);       MULADD(at[15], at[24]); 
9225
+      COMBA_STORE(C->dp[23]);
9226
+      /* 24 */
9227
+      COMBA_FORWARD;
9228
+      MULADD(at[9], at[31]);       MULADD(at[10], at[30]);       MULADD(at[11], at[29]);       MULADD(at[12], at[28]);       MULADD(at[13], at[27]);       MULADD(at[14], at[26]);       MULADD(at[15], at[25]); 
9229
+      COMBA_STORE(C->dp[24]);
9230
+      /* 25 */
9231
+      COMBA_FORWARD;
9232
+      MULADD(at[10], at[31]);       MULADD(at[11], at[30]);       MULADD(at[12], at[29]);       MULADD(at[13], at[28]);       MULADD(at[14], at[27]);       MULADD(at[15], at[26]); 
9233
+      COMBA_STORE(C->dp[25]);
9234
+      /* 26 */
9235
+      COMBA_FORWARD;
9236
+      MULADD(at[11], at[31]);       MULADD(at[12], at[30]);       MULADD(at[13], at[29]);       MULADD(at[14], at[28]);       MULADD(at[15], at[27]); 
9237
+      COMBA_STORE(C->dp[26]);
9238
+      /* 27 */
9239
+      COMBA_FORWARD;
9240
+      MULADD(at[12], at[31]);       MULADD(at[13], at[30]);       MULADD(at[14], at[29]);       MULADD(at[15], at[28]); 
9241
+      COMBA_STORE(C->dp[27]);
9242
+      /* 28 */
9243
+      COMBA_FORWARD;
9244
+      MULADD(at[13], at[31]);       MULADD(at[14], at[30]);       MULADD(at[15], at[29]); 
9245
+      COMBA_STORE(C->dp[28]);
9246
+      /* 29 */
9247
+      COMBA_FORWARD;
9248
+      MULADD(at[14], at[31]);       MULADD(at[15], at[30]); 
9249
+      COMBA_STORE(C->dp[29]);
9250
+      /* 30 */
9251
+      COMBA_FORWARD;
9252
+      MULADD(at[15], at[31]); 
9253
+      COMBA_STORE(C->dp[30]);
9254
+      COMBA_STORE2(C->dp[31]);
9255
+      C->used = 32;
9256
+      C->sign = A->sign ^ B->sign;
9257
+      fp_clamp(C);
9258
+      COMBA_FINI;
9259
+      break;
9260
+   }
5725 9261
 }
5726
-#endif
5727 9262
 
5728
-/* $Source: /cvs/libtom/libtommath/bn_mp_prime_is_prime.c,v $ */
5729
-/* $Revision: 1.3 $ */
5730
-/* $Date: 2006/03/31 14:18:44 $ */
9263
+#endif
5731 9264
 
5732
-/* End: bn_mp_prime_is_prime.c */
9265
+/* End: fp_mul_comba_small_set.c */
5733 9266
 
5734
-/* Start: bn_mp_prime_miller_rabin.c */
5735
-#include <bignum.h>
5736
-#ifdef BN_MP_PRIME_MILLER_RABIN_C
5737
-/* LibTomMath, multiple-precision integer library -- Tom St Denis
5738
- *
5739
- * LibTomMath is a library that provides multiple-precision
5740
- * integer arithmetic as well as number theoretic functionality.
9267
+/* Start: fp_mul_d.c */
9268
+/* TomsFastMath, a fast ISO C bignum library.
9269
+ * 
9270
+ * This project is meant to fill in where LibTomMath
9271
+ * falls short.  That is speed ;-)
5741 9272
  *
5742
- * The library was designed directly after the MPI library by
5743
- * Michael Fromberger but has been written from scratch with
5744
- * additional optimizations in place.
9273
+ * This project is public domain and free for all purposes.
9274
+ * 
9275
+ * Tom St Denis, tomstdenis@gmail.com
9276
+ */
9277
+#include "bignum_fast.h"
9278
+
9279
+/* c = a * b */
9280
+void fp_mul_d(fp_int *a, fp_digit b, fp_int *c)
9281
+{
9282
+   fp_word  w;
9283
+   int      x, oldused;
9284
+
9285
+   oldused = c->used;
9286
+   c->used = a->used;
9287
+   c->sign = a->sign;
9288
+   w       = 0;
9289
+   for (x = 0; x < a->used; x++) {
9290
+       w         = ((fp_word)a->dp[x]) * ((fp_word)b) + w;
9291
+       c->dp[x]  = (fp_digit)w;
9292
+       w         = w >> DIGIT_BIT;
9293
+   }
9294
+   if (w != 0 && (a->used != FP_SIZE)) {
9295
+      c->dp[c->used++] = w;
9296
+      ++x;
9297
+   }
9298
+   for (; x < oldused; x++) {
9299
+      c->dp[x] = 0;
9300
+   }
9301
+   fp_clamp(c);
9302
+}
9303
+
9304
+
9305
+/* $Source: /cvs/libtom/tomsfastmath/src/mul/fp_mul_d.c,v $ */
9306
+/* $Revision: 1.1 $ */
9307
+/* $Date: 2006/12/31 21:25:53 $ */
9308
+
9309
+/* End: fp_mul_d.c */
9310
+
9311
+/* Start: fp_mulmod.c */
9312
+/* TomsFastMath, a fast ISO C bignum library.
9313
+ * 
9314
+ * This project is meant to fill in where LibTomMath
9315
+ * falls short.  That is speed ;-)
5745 9316
  *
5746
- * The library is free for all purposes without any express
5747
- * guarantee it works.
9317
+ * This project is public domain and free for all purposes.
9318
+ * 
9319
+ * Tom St Denis, tomstdenis@gmail.com
9320
+ */
9321
+#include "bignum_fast.h"
9322
+/* d = a * b (mod c) */
9323
+int fp_mulmod(fp_int *a, fp_int *b, fp_int *c, fp_int *d)
9324
+{
9325
+  fp_int tmp;
9326
+  fp_zero(&tmp);
9327
+  fp_mul(a, b, &tmp);
9328
+  return fp_mod(&tmp, c, d);
9329
+}
9330
+
9331
+/* $Source: /cvs/libtom/tomsfastmath/src/mul/fp_mulmod.c,v $ */
9332
+/* $Revision: 1.1 $ */
9333
+/* $Date: 2006/12/31 21:25:53 $ */
9334
+
9335
+/* End: fp_mulmod.c */
9336
+
9337
+/* Start: fp_prime_miller_rabin.c */
9338
+/* TomsFastMath, a fast ISO C bignum library.
9339
+ * 
9340
+ * This project is meant to fill in where LibTomMath
9341
+ * falls short.  That is speed ;-)
5748 9342
  *
5749
- * Tom St Denis, tomstdenis@gmail.com, http://math.libtomcrypt.com
9343
+ * This project is public domain and free for all purposes.
9344
+ * 
9345
+ * Tom St Denis, tomstdenis@gmail.com
5750 9346
  */
9347
+#include "bignum_fast.h"
5751 9348
 
5752 9349
 /* Miller-Rabin test of "a" to the base of "b" as described in 
5753 9350
  * HAC pp. 139 Algorithm 4.24
... ...
@@ -5756,396 +6808,127 @@ LBL_B:mp_clear (&b);
5756 5756
  * Randomly the chance of error is no more than 1/4 and often 
5757 5757
  * very much lower.
5758 5758
  */
5759
-int mp_prime_miller_rabin (mp_int * a, mp_int * b, int *result)
5759
+void fp_prime_miller_rabin (fp_int * a, fp_int * b, int *result)
5760 5760
 {
5761
-  mp_int  n1, y, r;
5762
-  int     s, j, err;
5761
+  fp_int  n1, y, r;
5762
+  int     s, j;
5763 5763
 
5764 5764
   /* default */
5765
-  *result = MP_NO;
5765
+  *result = FP_NO;
5766 5766
 
5767 5767
   /* ensure b > 1 */
5768
-  if (mp_cmp_d(b, 1) != MP_GT) {
5769
-     return MP_VAL;
5768
+  if (fp_cmp_d(b, 1) != FP_GT) {
5769
+     return;
5770 5770
   }     
5771 5771
 
5772 5772
   /* get n1 = a - 1 */
5773
-  if ((err = mp_init_copy (&n1, a)) != MP_OKAY) {
5774
-    return err;
5775
-  }
5776
-  if ((err = mp_sub_d (&n1, 1, &n1)) != MP_OKAY) {
5777
-    goto LBL_N1;
5778
-  }
5773
+  fp_init_copy(&n1, a);
5774
+  fp_sub_d(&n1, 1, &n1);
5779 5775
 
5780 5776
   /* set 2**s * r = n1 */
5781
-  if ((err = mp_init_copy (&r, &n1)) != MP_OKAY) {
5782
-    goto LBL_N1;
5783
-  }
5777
+  fp_init_copy(&r, &n1);
5784 5778
 
5785 5779
   /* count the number of least significant bits
5786 5780
    * which are zero
5787 5781
    */
5788
-  s = mp_cnt_lsb(&r);
5782
+  s = fp_cnt_lsb(&r);
5789 5783
 
5790 5784
   /* now divide n - 1 by 2**s */
5791
-  if ((err = mp_div_2d (&r, s, &r, NULL)) != MP_OKAY) {
5792
-    goto LBL_R;
5793
-  }
5785
+  fp_div_2d (&r, s, &r, NULL);
5794 5786
 
5795 5787
   /* compute y = b**r mod a */
5796
-  if ((err = mp_init (&y)) != MP_OKAY) {
5797
-    goto LBL_R;
5798
-  }
5799
-  if ((err = mp_exptmod (b, &r, a, &y)) != MP_OKAY) {
5800
-    goto LBL_Y;
5801
-  }
5788
+  fp_init(&y);
5789
+  fp_exptmod(b, &r, a, &y);
5802 5790
 
5803 5791
   /* if y != 1 and y != n1 do */
5804
-  if (mp_cmp_d (&y, 1) != MP_EQ && mp_cmp (&y, &n1) != MP_EQ) {
5792
+  if (fp_cmp_d (&y, 1) != FP_EQ && fp_cmp (&y, &n1) != FP_EQ) {
5805 5793
     j = 1;
5806 5794
     /* while j <= s-1 and y != n1 */
5807
-    while ((j <= (s - 1)) && mp_cmp (&y, &n1) != MP_EQ) {
5808
-      if ((err = mp_sqrmod (&y, a, &y)) != MP_OKAY) {
5809
-         goto LBL_Y;
5810
-      }
5795
+    while ((j <= (s - 1)) && fp_cmp (&y, &n1) != FP_EQ) {
5796
+      fp_sqrmod (&y, a, &y);
5811 5797
 
5812 5798
       /* if y == 1 then composite */
5813
-      if (mp_cmp_d (&y, 1) == MP_EQ) {
5814
-         goto LBL_Y;
5799
+      if (fp_cmp_d (&y, 1) == FP_EQ) {
5800
+         return;
5815 5801
       }
5816
-
5817 5802
       ++j;
5818 5803
     }
5819 5804
 
5820 5805
     /* if y != n1 then composite */
5821
-    if (mp_cmp (&y, &n1) != MP_EQ) {
5822
-      goto LBL_Y;
5806
+    if (fp_cmp (&y, &n1) != FP_EQ) {
5807
+       return;
5823 5808
     }
5824 5809
   }
5825 5810
 
5826 5811
   /* probably prime now */
5827
-  *result = MP_YES;
5828
-LBL_Y:mp_clear (&y);
5829
-LBL_R:mp_clear (&r);
5830
-LBL_N1:mp_clear (&n1);
5831
-  return err;
5832
-}
5833
-#endif
5834
-
5835
-/* $Source: /cvs/libtom/libtommath/bn_mp_prime_miller_rabin.c,v $ */
5836
-/* $Revision: 1.3 $ */
5837
-/* $Date: 2006/03/31 14:18:44 $ */
5838
-
5839
-/* End: bn_mp_prime_miller_rabin.c */
5840
-
5841
-/* Start: bn_mp_prime_next_prime.c */
5842
-#include <bignum.h>
5843
-#ifdef BN_MP_PRIME_NEXT_PRIME_C
5844
-/* LibTomMath, multiple-precision integer library -- Tom St Denis
5845
- *
5846
- * LibTomMath is a library that provides multiple-precision
5847
- * integer arithmetic as well as number theoretic functionality.
5848
- *
5849
- * The library was designed directly after the MPI library by
5850
- * Michael Fromberger but has been written from scratch with
5851
- * additional optimizations in place.
5852
- *
5853
- * The library is free for all purposes without any express
5854
- * guarantee it works.
5855
- *
5856
- * Tom St Denis, tomstdenis@gmail.com, http://math.libtomcrypt.com
5857
- */
5858
-
5859
-/* finds the next prime after the number "a" using "t" trials
5860
- * of Miller-Rabin.
5861
- *
5862
- * bbs_style = 1 means the prime must be congruent to 3 mod 4
5863
- */
5864
-int mp_prime_next_prime(mp_int *a, int t, int bbs_style)
5865
-{
5866
-   int      err, res, x, y;
5867
-   mp_digit res_tab[PRIME_SIZE], step, kstep;
5868
-   mp_int   b;
5869
-
5870
-   /* ensure t is valid */
5871
-   if (t <= 0 || t > PRIME_SIZE) {
5872
-      return MP_VAL;
5873
-   }
5874
-
5875
-   /* force positive */
5876
-   a->sign = MP_ZPOS;
5877
-
5878
-   /* simple algo if a is less than the largest prime in the table */
5879
-   if (mp_cmp_d(a, ltm_prime_tab[PRIME_SIZE-1]) == MP_LT) {
5880
-      /* find which prime it is bigger than */
5881
-      for (x = PRIME_SIZE - 2; x >= 0; x--) {
5882
-          if (mp_cmp_d(a, ltm_prime_tab[x]) != MP_LT) {
5883
-             if (bbs_style == 1) {
5884
-                /* ok we found a prime smaller or
5885
-                 * equal [so the next is larger]
5886
-                 *
5887
-                 * however, the prime must be
5888
-                 * congruent to 3 mod 4
5889
-                 */
5890
-                if ((ltm_prime_tab[x + 1] & 3) != 3) {
5891
-                   /* scan upwards for a prime congruent to 3 mod 4 */
5892
-                   for (y = x + 1; y < PRIME_SIZE; y++) {
5893
-                       if ((ltm_prime_tab[y] & 3) == 3) {
5894
-                          mp_set(a, ltm_prime_tab[y]);
5895
-                          return MP_OKAY;
5896
-                       }
5897
-                   }
5898
-                }
5899
-             } else {
5900
-                mp_set(a, ltm_prime_tab[x + 1]);
5901
-                return MP_OKAY;
5902
-             }
5903
-          }
5904
-      }
5905
-      /* at this point a maybe 1 */
5906
-      if (mp_cmp_d(a, 1) == MP_EQ) {
5907
-         mp_set(a, 2);
5908
-         return MP_OKAY;
5909
-      }
5910
-      /* fall through to the sieve */
5911
-   }
5912
-
5913
-   /* generate a prime congruent to 3 mod 4 or 1/3 mod 4? */
5914
-   if (bbs_style == 1) {
5915
-      kstep   = 4;
5916
-   } else {
5917
-      kstep   = 2;
5918
-   }
5919
-
5920
-   /* at this point we will use a combination of a sieve and Miller-Rabin */
5921
-
5922
-   if (bbs_style == 1) {
5923
-      /* if a mod 4 != 3 subtract the correct value to make it so */
5924
-      if ((a->dp[0] & 3) != 3) {
5925
-         if ((err = mp_sub_d(a, (a->dp[0] & 3) + 1, a)) != MP_OKAY) { return err; };
5926
-      }
5927
-   } else {
5928
-      if (mp_iseven(a) == 1) {
5929
-         /* force odd */
5930
-         if ((err = mp_sub_d(a, 1, a)) != MP_OKAY) {
5931
-            return err;
5932
-         }
5933
-      }
5934
-   }
5935
-
5936
-   /* generate the restable */
5937
-   for (x = 1; x < PRIME_SIZE; x++) {
5938
-      if ((err = mp_mod_d(a, ltm_prime_tab[x], res_tab + x)) != MP_OKAY) {
5939
-         return err;
5940
-      }
5941
-   }
5942
-
5943
-   /* init temp used for Miller-Rabin Testing */
5944
-   if ((err = mp_init(&b)) != MP_OKAY) {
5945
-      return err;
5946
-   }
5947
-
5948
-   for (;;) {
5949
-      /* skip to the next non-trivially divisible candidate */
5950
-      step = 0;
5951
-      do {
5952
-         /* y == 1 if any residue was zero [e.g. cannot be prime] */
5953
-         y     =  0;
5954
-
5955
-         /* increase step to next candidate */
5956
-         step += kstep;
5957
-
5958
-         /* compute the new residue without using division */
5959
-         for (x = 1; x < PRIME_SIZE; x++) {
5960
-             /* add the step to each residue */
5961
-             res_tab[x] += kstep;
5962
-
5963
-             /* subtract the modulus [instead of using division] */
5964
-             if (res_tab[x] >= ltm_prime_tab[x]) {
5965
-                res_tab[x]  -= ltm_prime_tab[x];
5966
-             }
5967
-
5968
-             /* set flag if zero */
5969
-             if (res_tab[x] == 0) {
5970
-                y = 1;
5971
-             }
5972
-         }
5973
-      } while (y == 1 && step < ((((mp_digit)1)<<DIGIT_BIT) - kstep));
5974
-
5975
-      /* add the step */
5976
-      if ((err = mp_add_d(a, step, a)) != MP_OKAY) {
5977
-         goto LBL_ERR;
5978
-      }
5979
-
5980
-      /* if didn't pass sieve and step == MAX then skip test */
5981
-      if (y == 1 && step >= ((((mp_digit)1)<<DIGIT_BIT) - kstep)) {
5982
-         continue;
5983
-      }
5984
-
5985
-      /* is this prime? */
5986
-      for (x = 0; x < t; x++) {
5987
-          mp_set(&b, ltm_prime_tab[t]);
5988
-          if ((err = mp_prime_miller_rabin(a, &b, &res)) != MP_OKAY) {
5989
-             goto LBL_ERR;
5990
-          }
5991
-          if (res == MP_NO) {
5992
-             break;
5993
-          }
5994
-      }
5995
-
5996
-      if (res == MP_YES) {
5997
-         break;
5998
-      }
5999
-   }
6000
-
6001
-   err = MP_OKAY;
6002
-LBL_ERR:
6003
-   mp_clear(&b);
6004
-   return err;
5812
+  *result = FP_YES;
6005 5813
 }
6006 5814
 
6007
-#endif
6008
-
6009
-/* $Source: /cvs/libtom/libtommath/bn_mp_prime_next_prime.c,v $ */
6010
-/* $Revision: 1.3 $ */
6011
-/* $Date: 2006/03/31 14:18:44 $ */
5815
+/* $Source: /cvs/libtom/tomsfastmath/src/numtheory/fp_prime_miller_rabin.c,v $ */
5816
+/* $Revision: 1.1 $ */
5817
+/* $Date: 2007/01/24 21:25:19 $ */
6012 5818
 
6013
-/* End: bn_mp_prime_next_prime.c */
6014
-
6015
-/* Start: bn_mp_prime_rabin_miller_trials.c */
6016
-#include <bignum.h>
6017
-#ifdef BN_MP_PRIME_RABIN_MILLER_TRIALS_C
6018
-/* LibTomMath, multiple-precision integer library -- Tom St Denis
6019
- *
6020
- * LibTomMath is a library that provides multiple-precision
6021
- * integer arithmetic as well as number theoretic functionality.
6022
- *
6023
- * The library was designed directly after the MPI library by
6024
- * Michael Fromberger but has been written from scratch with
6025
- * additional optimizations in place.
6026
- *
6027
- * The library is free for all purposes without any express
6028
- * guarantee it works.
6029
- *
6030
- * Tom St Denis, tomstdenis@gmail.com, http://math.libtomcrypt.com
6031
- */
6032
-
6033
-
6034
-static const struct {
6035
-   int k, t;
6036
-} sizes[] = {
6037
-{   128,    28 },
6038
-{   256,    16 },
6039
-{   384,    10 },
6040
-{   512,     7 },
6041
-{   640,     6 },
6042
-{   768,     5 },
6043
-{   896,     4 },
6044
-{  1024,     4 }
6045
-};
5819
+/* End: fp_prime_miller_rabin.c */
6046 5820
 
6047
-/* returns # of RM trials required for a given bit size */
6048
-int mp_prime_rabin_miller_trials(int size)
6049
-{
6050
-   int x;
6051
-
6052
-   for (x = 0; x < (int)(sizeof(sizes)/(sizeof(sizes[0]))); x++) {
6053
-       if (sizes[x].k == size) {
6054
-          return sizes[x].t;
6055
-       } else if (sizes[x].k > size) {
6056
-          return (x == 0) ? sizes[0].t : sizes[x - 1].t;
6057
-       }
6058
-   }
6059
-   return sizes[x-1].t + 1;
6060
-}
6061
-
6062
-
6063
-#endif
6064
-
6065
-/* $Source: /cvs/libtom/libtommath/bn_mp_prime_rabin_miller_trials.c,v $ */
6066
-/* $Revision: 1.3 $ */
6067
-/* $Date: 2006/03/31 14:18:44 $ */
6068
-
6069
-/* End: bn_mp_prime_rabin_miller_trials.c */
6070
-
6071
-/* Start: bn_mp_prime_random_ex.c */
6072
-#include <bignum.h>
6073
-#ifdef BN_MP_PRIME_RANDOM_EX_C
6074
-/* LibTomMath, multiple-precision integer library -- Tom St Denis
6075
- *
6076
- * LibTomMath is a library that provides multiple-precision
6077
- * integer arithmetic as well as number theoretic functionality.
6078
- *
6079
- * The library was designed directly after the MPI library by
6080
- * Michael Fromberger but has been written from scratch with
6081
- * additional optimizations in place.
6082
- *
6083
- * The library is free for all purposes without any express
6084
- * guarantee it works.
6085
- *
6086
- * Tom St Denis, tomstdenis@gmail.com, http://math.libtomcrypt.com
6087
- */
6088
-
6089
-/* makes a truly random prime of a given size (bits),
6090
- *
6091
- * Flags are as follows:
5821
+/* Start: fp_prime_random_ex.c */
5822
+/* TomsFastMath, a fast ISO C bignum library.
6092 5823
  * 
6093
- *   LTM_PRIME_BBS      - make prime congruent to 3 mod 4
6094
- *   LTM_PRIME_SAFE     - make sure (p-1)/2 is prime as well (implies LTM_PRIME_BBS)
6095
- *   LTM_PRIME_2MSB_OFF - make the 2nd highest bit zero
6096
- *   LTM_PRIME_2MSB_ON  - make the 2nd highest bit one
6097
- *
6098
- * You have to supply a callback which fills in a buffer with random bytes.  "dat" is a parameter you can
6099
- * have passed to the callback (e.g. a state or something).  This function doesn't use "dat" itself
6100
- * so it can be NULL
5824
+ * This project is meant to fill in where LibTomMath
5825
+ * falls short.  That is speed ;-)
6101 5826
  *
5827
+ * This project is public domain and free for all purposes.
5828
+ * 
5829
+ * Tom St Denis, tomstdenis@gmail.com
6102 5830
  */
5831
+#include "bignum_fast.h"
6103 5832
 
6104 5833
 /* This is possibly the mother of all prime generation functions, muahahahahaha! */
6105
-int mp_prime_random_ex(mp_int *a, int t, int size, int flags, ltm_prime_callback cb, void *dat)
5834
+int fp_prime_random_ex(fp_int *a, int t, int size, int flags, tfm_prime_callback cb, void *dat)
6106 5835
 {
6107 5836
    unsigned char *tmp, maskAND, maskOR_msb, maskOR_lsb;
6108 5837
    int res, err, bsize, maskOR_msb_offset;
6109 5838
 
6110 5839
    /* sanity check the input */
6111 5840
    if (size <= 1 || t <= 0) {
6112
-      return MP_VAL;
5841
+      return FP_VAL;
6113 5842
    }
6114 5843
 
6115
-   /* LTM_PRIME_SAFE implies LTM_PRIME_BBS */
6116
-   if (flags & LTM_PRIME_SAFE) {
6117
-      flags |= LTM_PRIME_BBS;
5844
+   /* TFM_PRIME_SAFE implies TFM_PRIME_BBS */
5845
+   if (flags & TFM_PRIME_SAFE) {
5846
+      flags |= TFM_PRIME_BBS;
6118 5847
    }
6119 5848
 
6120 5849
    /* calc the byte size */
6121
-   bsize = (size>>3) + ((size&7)?1:0);
5850
+   bsize = (size>>3)+(size&7?1:0);
6122 5851
 
6123 5852
    /* we need a buffer of bsize bytes */
6124
-   tmp = OPT_CAST(unsigned char) cli_malloc(bsize);
5853
+   tmp = malloc(bsize);
6125 5854
    if (tmp == NULL) {
6126
-      return MP_MEM;
5855
+      return FP_MEM;
6127 5856
    }
6128 5857
 
6129 5858
    /* calc the maskAND value for the MSbyte*/
6130
-   maskAND = ((size&7) == 0) ? 0xFF : (0xFF >> (8 - (size & 7)));
5859
+   maskAND = 0xFF >> (8 - (size & 7));
6131 5860
 
6132 5861
    /* calc the maskOR_msb */
6133 5862
    maskOR_msb        = 0;
6134
-   maskOR_msb_offset = ((size & 7) == 1) ? 1 : 0;
6135
-   if (flags & LTM_PRIME_2MSB_ON) {
6136
-      maskOR_msb       |= 0x80 >> ((9 - size) & 7);
6137
-   }  
5863
+   maskOR_msb_offset = (size - 2) >> 3;
5864
+   if (flags & TFM_PRIME_2MSB_ON) {
5865
+      maskOR_msb     |= 1 << ((size - 2) & 7);
5866
+   } else if (flags & TFM_PRIME_2MSB_OFF) {
5867
+      maskAND        &= ~(1 << ((size - 2) & 7));
5868
+   }
6138 5869
 
6139 5870
    /* get the maskOR_lsb */
6140 5871
    maskOR_lsb         = 1;
6141
-   if (flags & LTM_PRIME_BBS) {
5872
+   if (flags & TFM_PRIME_BBS) {
6142 5873
       maskOR_lsb     |= 3;
6143 5874
    }
6144 5875
 
6145 5876
    do {
6146 5877
       /* read the bytes */
6147 5878
       if (cb(tmp, bsize, dat) != bsize) {
6148
-         err = MP_VAL;
5879
+         err = FP_VAL;
6149 5880
          goto error;
6150 5881
       }
6151 5882
  
... ...
@@ -6158,259 +6941,132 @@ int mp_prime_random_ex(mp_int *a, int t, int size, int flags, ltm_prime_callback
6158 6158
       tmp[bsize-1]             |= maskOR_lsb;
6159 6159
 
6160 6160
       /* read it in */
6161
-      if ((err = mp_read_unsigned_bin(a, tmp, bsize)) != MP_OKAY)     { goto error; }
6161
+      fp_read_unsigned_bin(a, tmp, bsize);
6162 6162
 
6163 6163
       /* is it prime? */
6164
-      if ((err = mp_prime_is_prime(a, t, &res)) != MP_OKAY)           { goto error; }
6165
-      if (res == MP_NO) {  
6166
-         continue;
6167
-      }
6164
+      res = fp_isprime(a);
6165
+      if (res == FP_NO) continue;
6168 6166
 
6169
-      if (flags & LTM_PRIME_SAFE) {
6167
+      if (flags & TFM_PRIME_SAFE) {
6170 6168
          /* see if (a-1)/2 is prime */
6171
-         if ((err = mp_sub_d(a, 1, a)) != MP_OKAY)                    { goto error; }
6172
-         if ((err = mp_div_2(a, a)) != MP_OKAY)                       { goto error; }
6169
+         fp_sub_d(a, 1, a);
6170
+         fp_div_2(a, a);
6173 6171
  
6174 6172
          /* is it prime? */
6175
-         if ((err = mp_prime_is_prime(a, t, &res)) != MP_OKAY)        { goto error; }
6173
+         res = fp_isprime(a);
6176 6174
       }
6177
-   } while (res == MP_NO);
6175
+   } while (res == FP_NO);
6178 6176
 
6179
-   if (flags & LTM_PRIME_SAFE) {
6177
+   if (flags & TFM_PRIME_SAFE) {
6180 6178
       /* restore a to the original value */
6181
-      if ((err = mp_mul_2(a, a)) != MP_OKAY)                          { goto error; }
6182
-      if ((err = mp_add_d(a, 1, a)) != MP_OKAY)                       { goto error; }
6179
+      fp_mul_2(a, a);
6180
+      fp_add_d(a, 1, a);
6183 6181
    }
6184 6182
 
6185
-   err = MP_OKAY;
6183
+   err = FP_OKAY;
6186 6184
 error:
6187 6185
    free(tmp);
6188 6186
    return err;
6189 6187
 }
6190 6188
 
6189
+/* $Source: /cvs/libtom/tomsfastmath/src/numtheory/fp_prime_random_ex.c,v $ */
6190
+/* $Revision: 1.1 $ */
6191
+/* $Date: 2007/01/24 21:25:19 $ */
6191 6192
 
6192
-#endif
6193
-
6194
-/* $Source: /cvs/libtom/libtommath/bn_mp_prime_random_ex.c,v $ */
6195
-/* $Revision: 1.4 $ */
6196
-/* $Date: 2006/03/31 14:18:44 $ */
6197
-
6198
-/* End: bn_mp_prime_random_ex.c */
6193
+/* End: fp_prime_random_ex.c */
6199 6194
 
6200
-/* Start: bn_mp_radix_size.c */
6201
-#include <bignum.h>
6202
-#ifdef BN_MP_RADIX_SIZE_C
6203
-/* LibTomMath, multiple-precision integer library -- Tom St Denis
6204
- *
6205
- * LibTomMath is a library that provides multiple-precision
6206
- * integer arithmetic as well as number theoretic functionality.
6207
- *
6208
- * The library was designed directly after the MPI library by
6209
- * Michael Fromberger but has been written from scratch with
6210
- * additional optimizations in place.
6211
- *
6212
- * The library is free for all purposes without any express
6213
- * guarantee it works.
6195
+/* Start: fp_radix_size.c */
6196
+/* TomsFastMath, a fast ISO C bignum library.
6197
+ * 
6198
+ * This project is meant to fill in where LibTomMath
6199
+ * falls short.  That is speed ;-)
6214 6200
  *
6215
- * Tom St Denis, tomstdenis@gmail.com, http://math.libtomcrypt.com
6201
+ * This project is public domain and free for all purposes.
6202
+ * 
6203
+ * Tom St Denis, tomstdenis@gmail.com
6216 6204
  */
6205
+#include "bignum_fast.h"
6217 6206
 
6218
-/* returns size of ASCII reprensentation */
6219
-int mp_radix_size (mp_int * a, int radix, int *size)
6207
+int fp_radix_size(fp_int *a, int radix, int *size)
6220 6208
 {
6221
-  int     res, digs;
6222
-  mp_int  t;
6223
-  mp_digit d;
6224
-
6209
+  int     digs;
6210
+  fp_int  t;
6211
+  fp_digit d;
6212
+   
6225 6213
   *size = 0;
6226 6214
 
6227
-  /* special case for binary */
6228
-  if (radix == 2) {
6229
-    *size = mp_count_bits (a) + (a->sign == MP_NEG ? 1 : 0) + 1;
6230
-    return MP_OKAY;
6231
-  }
6232
-
6233
-  /* make sure the radix is in range */
6215
+  /* check range of the radix */
6234 6216
   if (radix < 2 || radix > 64) {
6235
-    return MP_VAL;
6217
+    return FP_VAL;
6236 6218
   }
6237 6219
 
6238
-  if (mp_iszero(a) == MP_YES) {
6239
-    *size = 2;
6240
-    return MP_OKAY;
6241
-  }
6242
-
6243
-  /* digs is the digit count */
6244
-  digs = 0;
6245
-
6246
-  /* if it's negative add one for the sign */
6247
-  if (a->sign == MP_NEG) {
6248
-    ++digs;
6249
-  }
6250
-
6251
-  /* init a copy of the input */
6252
-  if ((res = mp_init_copy (&t, a)) != MP_OKAY) {
6253
-    return res;
6254
-  }
6255
-
6256
-  /* force temp to positive */
6257
-  t.sign = MP_ZPOS; 
6258
-
6259
-  /* fetch out all of the digits */
6260
-  while (mp_iszero (&t) == MP_NO) {
6261
-    if ((res = mp_div_d (&t, (mp_digit) radix, &t, &d)) != MP_OKAY) {
6262
-      mp_clear (&t);
6263
-      return res;
6264
-    }
6265
-    ++digs;
6220
+  /* quick out if its zero */
6221
+  if (fp_iszero(a) == 1) {
6222
+     *size = 2;
6223
+     return FP_OKAY;
6266 6224
   }
6267
-  mp_clear (&t);
6268
-
6269
-  /* return digs + 1, the 1 is for the NULL byte that would be required. */
6270
-  *size = digs + 1;
6271
-  return MP_OKAY;
6272
-}
6273
-
6274
-#endif
6275
-
6276
-/* $Source: /cvs/libtom/libtommath/bn_mp_radix_size.c,v $ */
6277
-/* $Revision: 1.4 $ */
6278
-/* $Date: 2006/03/31 14:18:44 $ */
6279
-
6280
-/* End: bn_mp_radix_size.c */
6281
-
6282
-/* Start: bn_mp_radix_smap.c */
6283
-#include <bignum.h>
6284
-#ifdef BN_MP_RADIX_SMAP_C
6285
-/* LibTomMath, multiple-precision integer library -- Tom St Denis
6286
- *
6287
- * LibTomMath is a library that provides multiple-precision
6288
- * integer arithmetic as well as number theoretic functionality.
6289
- *
6290
- * The library was designed directly after the MPI library by
6291
- * Michael Fromberger but has been written from scratch with
6292
- * additional optimizations in place.
6293
- *
6294
- * The library is free for all purposes without any express
6295
- * guarantee it works.
6296
- *
6297
- * Tom St Denis, tomstdenis@gmail.com, http://math.libtomcrypt.com
6298
- */
6299
-
6300
-/* chars used in radix conversions */
6301
-const char *mp_s_rmap = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz+/";
6302
-#endif
6303
-
6304
-/* $Source: /cvs/libtom/libtommath/bn_mp_radix_smap.c,v $ */
6305
-/* $Revision: 1.3 $ */
6306
-/* $Date: 2006/03/31 14:18:44 $ */
6307
-
6308
-/* End: bn_mp_radix_smap.c */
6309
-
6310
-/* Start: bn_mp_rand.c */
6311
-#include <bignum.h>
6312
-#ifdef BN_MP_RAND_C
6313
-/* LibTomMath, multiple-precision integer library -- Tom St Denis
6314
- *
6315
- * LibTomMath is a library that provides multiple-precision
6316
- * integer arithmetic as well as number theoretic functionality.
6317
- *
6318
- * The library was designed directly after the MPI library by
6319
- * Michael Fromberger but has been written from scratch with
6320
- * additional optimizations in place.
6321
- *
6322
- * The library is free for all purposes without any express
6323
- * guarantee it works.
6324
- *
6325
- * Tom St Denis, tomstdenis@gmail.com, http://math.libtomcrypt.com
6326
- */
6327 6225
 
6328
-/* makes a pseudo-random int of a given size */
6329
-int
6330
-mp_rand (mp_int * a, int digits)
6331
-{
6332
-  int     res;
6333
-  mp_digit d;
6226
+  fp_init_copy(&t, a);
6334 6227
 
6335
-  mp_zero (a);
6336
-  if (digits <= 0) {
6337
-    return MP_OKAY;
6228
+  /* if it is negative output a - */
6229
+  if (t.sign == FP_NEG) {
6230
+    (*size)++;
6231
+    t.sign = FP_ZPOS;
6338 6232
   }
6339 6233
 
6340
-  /* first place a random non-zero digit */
6341
-  do {
6342
-    d = ((mp_digit) abs (rand ())) & MP_MASK;
6343
-  } while (d == 0);
6344
-
6345
-  if ((res = mp_add_d (a, d, a)) != MP_OKAY) {
6346
-    return res;
6234
+  digs = 0;
6235
+  while (fp_iszero (&t) == FP_NO) {
6236
+    fp_div_d (&t, (fp_digit) radix, &t, &d);
6237
+    (*size)++;
6347 6238
   }
6348 6239
 
6349
-  while (--digits > 0) {
6350
-    if ((res = mp_lshd (a, 1)) != MP_OKAY) {
6351
-      return res;
6352
-    }
6353
-
6354
-    if ((res = mp_add_d (a, ((mp_digit) abs (rand ())), a)) != MP_OKAY) {
6355
-      return res;
6356
-    }
6357
-  }
6240
+  /* append a NULL so the string is properly terminated */
6241
+  (*size)++;
6242
+  return FP_OKAY;
6358 6243
 
6359
-  return MP_OKAY;
6360 6244
 }
6361
-#endif
6362 6245
 
6363
-/* $Source: /cvs/libtom/libtommath/bn_mp_rand.c,v $ */
6364
-/* $Revision: 1.3 $ */
6365
-/* $Date: 2006/03/31 14:18:44 $ */
6246
+/* $Source: /cvs/libtom/tomsfastmath/src/bin/fp_radix_size.c,v $ */
6247
+/* $Revision: 1.1 $ */
6248
+/* $Date: 2006/12/31 21:25:53 $ */
6366 6249
 
6367
-/* End: bn_mp_rand.c */
6250
+/* End: fp_radix_size.c */
6368 6251
 
6369
-/* Start: bn_mp_read_radix.c */
6370
-#include <bignum.h>
6371
-#ifdef BN_MP_READ_RADIX_C
6372
-/* LibTomMath, multiple-precision integer library -- Tom St Denis
6373
- *
6374
- * LibTomMath is a library that provides multiple-precision
6375
- * integer arithmetic as well as number theoretic functionality.
6376
- *
6377
- * The library was designed directly after the MPI library by
6378
- * Michael Fromberger but has been written from scratch with
6379
- * additional optimizations in place.
6380
- *
6381
- * The library is free for all purposes without any express
6382
- * guarantee it works.
6252
+/* Start: fp_read_radix.c */
6253
+/* TomsFastMath, a fast ISO C bignum library.
6254
+ * 
6255
+ * This project is meant to fill in where LibTomMath
6256
+ * falls short.  That is speed ;-)
6383 6257
  *
6384
- * Tom St Denis, tomstdenis@gmail.com, http://math.libtomcrypt.com
6258
+ * This project is public domain and free for all purposes.
6259
+ * 
6260
+ * Tom St Denis, tomstdenis@gmail.com
6385 6261
  */
6262
+#include "bignum_fast.h"
6386 6263
 
6387
-/* read a string [ASCII] in a given radix */
6388
-int mp_read_radix (mp_int * a, const char *str, int radix)
6264
+int fp_read_radix(fp_int *a, const char *str, int radix)
6389 6265
 {
6390
-  int     y, res, neg;
6266
+  int     y, neg;
6391 6267
   char    ch;
6392 6268
 
6393
-  /* zero the digit bignum */
6394
-  mp_zero(a);
6395
-
6396 6269
   /* make sure the radix is ok */
6397 6270
   if (radix < 2 || radix > 64) {
6398
-    return MP_VAL;
6271
+    return FP_VAL;
6399 6272
   }
6400 6273
 
6401
-  /* if the leading digit is a 
6402
-   * minus set the sign to negative. 
6274
+  /* if the leading digit is a
6275
+   * minus set the sign to negative.
6403 6276
    */
6404 6277
   if (*str == '-') {
6405 6278
     ++str;
6406
-    neg = MP_NEG;
6279
+    neg = FP_NEG;
6407 6280
   } else {
6408
-    neg = MP_ZPOS;
6281
+    neg = FP_ZPOS;
6409 6282
   }
6410 6283
 
6411 6284
   /* set the integer to the default of zero */
6412
-  mp_zero (a);
6413
-  
6285
+  fp_zero (a);
6286
+
6414 6287
   /* process each digit of the string */
6415 6288
   while (*str) {
6416 6289
     /* if the radix < 36 the conversion is case insensitive
... ...
@@ -6419,3102 +7075,6292 @@ int mp_read_radix (mp_int * a, const char *str, int radix)
6419 6419
      */
6420 6420
     ch = (char) ((radix < 36) ? toupper (*str) : *str);
6421 6421
     for (y = 0; y < 64; y++) {
6422
-      if (ch == mp_s_rmap[y]) {
6422
+      if (ch == fp_s_rmap[y]) {
6423 6423
          break;
6424 6424
       }
6425 6425
     }
6426 6426
 
6427
-    /* if the char was found in the map 
6427
+    /* if the char was found in the map
6428 6428
      * and is less than the given radix add it
6429
-     * to the number, otherwise exit the loop. 
6429
+     * to the number, otherwise exit the loop.
6430 6430
      */
6431 6431
     if (y < radix) {
6432
-      if ((res = mp_mul_d (a, (mp_digit) radix, a)) != MP_OKAY) {
6433
-         return res;
6434
-      }
6435
-      if ((res = mp_add_d (a, (mp_digit) y, a)) != MP_OKAY) {
6436
-         return res;
6437
-      }
6432
+      fp_mul_d (a, (fp_digit) radix, a);
6433
+      fp_add_d (a, (fp_digit) y, a);
6438 6434
     } else {
6439 6435
       break;
6440 6436
     }
6441 6437
     ++str;
6442 6438
   }
6443
-  
6439
+
6444 6440
   /* set the sign only if a != 0 */
6445
-  if (mp_iszero(a) != 1) {
6441
+  if (fp_iszero(a) != FP_YES) {
6446 6442
      a->sign = neg;
6447 6443
   }
6448
-  return MP_OKAY;
6444
+  return FP_OKAY;
6449 6445
 }
6450
-#endif
6451 6446
 
6452
-/* $Source: /cvs/libtom/libtommath/bn_mp_read_radix.c,v $ */
6453
-/* $Revision: 1.4 $ */
6454
-/* $Date: 2006/03/31 14:18:44 $ */
6447
+/* $Source: /cvs/libtom/tomsfastmath/src/bin/fp_read_radix.c,v $ */
6448
+/* $Revision: 1.1 $ */
6449
+/* $Date: 2006/12/31 21:25:53 $ */
6455 6450
 
6456
-/* End: bn_mp_read_radix.c */
6451
+/* End: fp_read_radix.c */
6457 6452
 
6458
-/* Start: bn_mp_read_signed_bin.c */
6459
-#include <bignum.h>
6460
-#ifdef BN_MP_READ_SIGNED_BIN_C
6461
-/* LibTomMath, multiple-precision integer library -- Tom St Denis
6462
- *
6463
- * LibTomMath is a library that provides multiple-precision
6464
- * integer arithmetic as well as number theoretic functionality.
6465
- *
6466
- * The library was designed directly after the MPI library by
6467
- * Michael Fromberger but has been written from scratch with
6468
- * additional optimizations in place.
6469
- *
6470
- * The library is free for all purposes without any express
6471
- * guarantee it works.
6453
+/* Start: fp_read_signed_bin.c */
6454
+/* TomsFastMath, a fast ISO C bignum library.
6455
+ * 
6456
+ * This project is meant to fill in where LibTomMath
6457
+ * falls short.  That is speed ;-)
6472 6458
  *
6473
- * Tom St Denis, tomstdenis@gmail.com, http://math.libtomcrypt.com
6459
+ * This project is public domain and free for all purposes.
6460
+ * 
6461
+ * Tom St Denis, tomstdenis@gmail.com
6474 6462
  */
6463
+#include "bignum_fast.h"
6475 6464
 
6476
-/* read signed bin, big endian, first byte is 0==positive or 1==negative */
6477
-int mp_read_signed_bin (mp_int * a, const unsigned char *b, int c)
6465
+void fp_read_signed_bin(fp_int *a, unsigned char *b, int c)
6478 6466
 {
6479
-  int     res;
6480
-
6481 6467
   /* read magnitude */
6482
-  if ((res = mp_read_unsigned_bin (a, b + 1, c - 1)) != MP_OKAY) {
6483
-    return res;
6484
-  }
6468
+  fp_read_unsigned_bin (a, b + 1, c - 1);
6485 6469
 
6486 6470
   /* first byte is 0 for positive, non-zero for negative */
6487 6471
   if (b[0] == 0) {
6488
-     a->sign = MP_ZPOS;
6472
+     a->sign = FP_ZPOS;
6489 6473
   } else {
6490
-     a->sign = MP_NEG;
6474
+     a->sign = FP_NEG;
6491 6475
   }
6492
-
6493
-  return MP_OKAY;
6494 6476
 }
6495
-#endif
6496 6477
 
6497
-/* $Source: /cvs/libtom/libtommath/bn_mp_read_signed_bin.c,v $ */
6498
-/* $Revision: 1.4 $ */
6499
-/* $Date: 2006/03/31 14:18:44 $ */
6478
+/* $Source: /cvs/libtom/tomsfastmath/src/bin/fp_read_signed_bin.c,v $ */
6479
+/* $Revision: 1.1 $ */
6480
+/* $Date: 2006/12/31 21:25:53 $ */
6500 6481
 
6501
-/* End: bn_mp_read_signed_bin.c */
6482
+/* End: fp_read_signed_bin.c */
6502 6483
 
6503
-/* Start: bn_mp_read_unsigned_bin.c */
6504
-#include <bignum.h>
6505
-#ifdef BN_MP_READ_UNSIGNED_BIN_C
6506
-/* LibTomMath, multiple-precision integer library -- Tom St Denis
6507
- *
6508
- * LibTomMath is a library that provides multiple-precision
6509
- * integer arithmetic as well as number theoretic functionality.
6510
- *
6511
- * The library was designed directly after the MPI library by
6512
- * Michael Fromberger but has been written from scratch with
6513
- * additional optimizations in place.
6514
- *
6515
- * The library is free for all purposes without any express
6516
- * guarantee it works.
6484
+/* Start: fp_read_unsigned_bin.c */
6485
+/* TomsFastMath, a fast ISO C bignum library.
6486
+ * 
6487
+ * This project is meant to fill in where LibTomMath
6488
+ * falls short.  That is speed ;-)
6517 6489
  *
6518
- * Tom St Denis, tomstdenis@gmail.com, http://math.libtomcrypt.com
6490
+ * This project is public domain and free for all purposes.
6491
+ * 
6492
+ * Tom St Denis, tomstdenis@gmail.com
6519 6493
  */
6494
+#include "bignum_fast.h"
6520 6495
 
6521
-/* reads a unsigned char array, assumes the msb is stored first [big endian] */
6522
-int mp_read_unsigned_bin (mp_int * a, const unsigned char *b, int c)
6496
+void fp_read_unsigned_bin(fp_int *a, const unsigned char *b, int c)
6523 6497
 {
6524
-  int     res;
6525
-
6526
-  /* make sure there are at least two digits */
6527
-  if (a->alloc < 2) {
6528
-     if ((res = mp_grow(a, 2)) != MP_OKAY) {
6529
-        return res;
6530
-     }
6531
-  }
6532
-
6533 6498
   /* zero the int */
6534
-  mp_zero (a);
6499
+  fp_zero (a);
6535 6500
 
6536
-  /* read the bytes in */
6537
-  while (c-- > 0) {
6538
-    if ((res = mp_mul_2d (a, 8, a)) != MP_OKAY) {
6539
-      return res;
6540
-    }
6501
+  /* If we know the endianness of this architecture, and we're using
6502
+     32-bit fp_digits, we can optimize this */
6503
+#if (defined(ENDIAN_LITTLE) || defined(ENDIAN_BIG)) && !defined(FP_64BIT)
6504
+  /* But not for both simultaneously */
6505
+#if defined(ENDIAN_LITTLE) && defined(ENDIAN_BIG)
6506
+#error Both ENDIAN_LITTLE and ENDIAN_BIG defined.
6507
+#endif
6508
+  {
6509
+     unsigned char *pd = (unsigned char *)a->dp;
6541 6510
 
6542
-#ifndef MP_8BIT
6543
-      a->dp[0] |= *b++;
6544
-      a->used += 1;
6511
+     if ((unsigned)c > (FP_SIZE * sizeof(fp_digit))) {
6512
+        int excess = c - (FP_SIZE * sizeof(fp_digit));
6513
+        c -= excess;
6514
+        b += excess;
6515
+     }
6516
+     a->used = (c + sizeof(fp_digit) - 1)/sizeof(fp_digit);
6517
+     /* read the bytes in */
6518
+#ifdef ENDIAN_BIG
6519
+     {
6520
+       /* Use Duff's device to unroll the loop. */
6521
+       int idx = (c - 1) & ~3;
6522
+       switch (c % 4) {
6523
+       case 0:	do { pd[idx+0] = *b++;
6524
+       case 3:	     pd[idx+1] = *b++;
6525
+       case 2:	     pd[idx+2] = *b++;
6526
+       case 1:	     pd[idx+3] = *b++;
6527
+                     idx -= 4;
6528
+	 	        } while ((c -= 4) > 0);
6529
+       }
6530
+     }
6545 6531
 #else
6546
-      a->dp[0] = (*b & MP_MASK);
6547
-      a->dp[1] |= ((*b++ >> 7U) & 1);
6548
-      a->used += 2;
6532
+     for (c -= 1; c >= 0; c -= 1) {
6533
+       pd[c] = *b++;
6534
+     }
6549 6535
 #endif
6550 6536
   }
6551
-  mp_clamp (a);
6552
-  return MP_OKAY;
6553
-}
6537
+#else
6538
+  /* read the bytes in */
6539
+  for (; c > 0; c--) {
6540
+     fp_mul_2d (a, 8, a);
6541
+     a->dp[0] |= *b++;
6542
+     a->used += 1;
6543
+  }
6554 6544
 #endif
6545
+  fp_clamp (a);
6546
+}
6555 6547
 
6556
-/* $Source: /cvs/libtom/libtommath/bn_mp_read_unsigned_bin.c,v $ */
6557
-/* $Revision: 1.4 $ */
6558
-/* $Date: 2006/03/31 14:18:44 $ */
6548
+/* $Source: /cvs/libtom/tomsfastmath/src/bin/fp_read_unsigned_bin.c,v $ */
6549
+/* $Revision: 1.2 $ */
6550
+/* $Date: 2007/02/17 02:58:19 $ */
6559 6551
 
6560
-/* End: bn_mp_read_unsigned_bin.c */
6552
+/* End: fp_read_unsigned_bin.c */
6561 6553
 
6562
-/* Start: bn_mp_reduce.c */
6563
-#include <bignum.h>
6564
-#ifdef BN_MP_REDUCE_C
6565
-/* LibTomMath, multiple-precision integer library -- Tom St Denis
6566
- *
6567
- * LibTomMath is a library that provides multiple-precision
6568
- * integer arithmetic as well as number theoretic functionality.
6569
- *
6570
- * The library was designed directly after the MPI library by
6571
- * Michael Fromberger but has been written from scratch with
6572
- * additional optimizations in place.
6573
- *
6574
- * The library is free for all purposes without any express
6575
- * guarantee it works.
6554
+/* Start: fp_reverse.c */
6555
+/* TomsFastMath, a fast ISO C bignum library.
6556
+ * 
6557
+ * This project is meant to fill in where LibTomMath
6558
+ * falls short.  That is speed ;-)
6576 6559
  *
6577
- * Tom St Denis, tomstdenis@gmail.com, http://math.libtomcrypt.com
6560
+ * This project is public domain and free for all purposes.
6561
+ * 
6562
+ * Tom St Denis, tomstdenis@gmail.com
6578 6563
  */
6564
+#include "bignum_fast.h"
6579 6565
 
6580
-/* reduces x mod m, assumes 0 < x < m**2, mu is 
6581
- * precomputed via mp_reduce_setup.
6582
- * From HAC pp.604 Algorithm 14.42
6583
- */
6584
-int mp_reduce (mp_int * x, mp_int * m, mp_int * mu)
6566
+/* reverse an array, used for radix code */
6567
+void fp_reverse (unsigned char *s, int len)
6585 6568
 {
6586
-  mp_int  q;
6587
-  int     res, um = m->used;
6588
-
6589
-  /* q = x */
6590
-  if ((res = mp_init_copy (&q, x)) != MP_OKAY) {
6591
-    return res;
6592
-  }
6593
-
6594
-  /* q1 = x / b**(k-1)  */
6595
-  mp_rshd (&q, um - 1);         
6596
-
6597
-  /* according to HAC this optimization is ok */
6598
-  if (((unsigned long) um) > (((mp_digit)1) << (DIGIT_BIT - 1))) {
6599
-    if ((res = mp_mul (&q, mu, &q)) != MP_OKAY) {
6600
-      goto CLEANUP;
6601
-    }
6602
-  } else {
6603
-#ifdef BN_S_MP_MUL_HIGH_DIGS_C
6604
-    if ((res = s_mp_mul_high_digs (&q, mu, &q, um)) != MP_OKAY) {
6605
-      goto CLEANUP;
6606
-    }
6607
-#elif defined(BN_FAST_S_MP_MUL_HIGH_DIGS_C)
6608
-    if ((res = fast_s_mp_mul_high_digs (&q, mu, &q, um)) != MP_OKAY) {
6609
-      goto CLEANUP;
6610
-    }
6611
-#else 
6612
-    { 
6613
-      res = MP_VAL;
6614
-      goto CLEANUP;
6615
-    }
6616
-#endif
6617
-  }
6618
-
6619
-  /* q3 = q2 / b**(k+1) */
6620
-  mp_rshd (&q, um + 1);         
6621
-
6622
-  /* x = x mod b**(k+1), quick (no division) */
6623
-  if ((res = mp_mod_2d (x, DIGIT_BIT * (um + 1), x)) != MP_OKAY) {
6624
-    goto CLEANUP;
6625
-  }
6626
-
6627
-  /* q = q * m mod b**(k+1), quick (no division) */
6628
-  if ((res = s_mp_mul_digs (&q, m, &q, um + 1)) != MP_OKAY) {
6629
-    goto CLEANUP;
6630
-  }
6631
-
6632
-  /* x = x - q */
6633
-  if ((res = mp_sub (x, &q, x)) != MP_OKAY) {
6634
-    goto CLEANUP;
6635
-  }
6636
-
6637
-  /* If x < 0, add b**(k+1) to it */
6638
-  if (mp_cmp_d (x, 0) == MP_LT) {
6639
-    mp_set (&q, 1);
6640
-    if ((res = mp_lshd (&q, um + 1)) != MP_OKAY)
6641
-      goto CLEANUP;
6642
-    if ((res = mp_add (x, &q, x)) != MP_OKAY)
6643
-      goto CLEANUP;
6644
-  }
6569
+  int     ix, iy;
6570
+  unsigned char t;
6645 6571
 
6646
-  /* Back off if it's too big */
6647
-  while (mp_cmp (x, m) != MP_LT) {
6648
-    if ((res = s_mp_sub (x, m, x)) != MP_OKAY) {
6649
-      goto CLEANUP;
6650
-    }
6572
+  ix = 0;
6573
+  iy = len - 1;
6574
+  while (ix < iy) {
6575
+    t     = s[ix];
6576
+    s[ix] = s[iy];
6577
+    s[iy] = t;
6578
+    ++ix;
6579
+    --iy;
6651 6580
   }
6652
-  
6653
-CLEANUP:
6654
-  mp_clear (&q);
6655
-
6656
-  return res;
6657 6581
 }
6658
-#endif
6659 6582
 
6660
-/* $Source: /cvs/libtom/libtommath/bn_mp_reduce.c,v $ */
6661
-/* $Revision: 1.3 $ */
6662
-/* $Date: 2006/03/31 14:18:44 $ */
6583
+/* $Source: /cvs/libtom/tomsfastmath/src/bin/fp_reverse.c,v $ */
6584
+/* $Revision: 1.2 $ */
6585
+/* $Date: 2007/02/27 02:38:44 $ */
6663 6586
 
6664
-/* End: bn_mp_reduce.c */
6587
+/* End: fp_reverse.c */
6665 6588
 
6666
-/* Start: bn_mp_reduce_2k.c */
6667
-#include <bignum.h>
6668
-#ifdef BN_MP_REDUCE_2K_C
6669
-/* LibTomMath, multiple-precision integer library -- Tom St Denis
6670
- *
6671
- * LibTomMath is a library that provides multiple-precision
6672
- * integer arithmetic as well as number theoretic functionality.
6673
- *
6674
- * The library was designed directly after the MPI library by
6675
- * Michael Fromberger but has been written from scratch with
6676
- * additional optimizations in place.
6677
- *
6678
- * The library is free for all purposes without any express
6679
- * guarantee it works.
6589
+/* Start: fp_rshd.c */
6590
+/* TomsFastMath, a fast ISO C bignum library.
6591
+ * 
6592
+ * This project is meant to fill in where LibTomMath
6593
+ * falls short.  That is speed ;-)
6680 6594
  *
6681
- * Tom St Denis, tomstdenis@gmail.com, http://math.libtomcrypt.com
6595
+ * This project is public domain and free for all purposes.
6596
+ * 
6597
+ * Tom St Denis, tomstdenis@gmail.com
6682 6598
  */
6599
+#include "bignum_fast.h"
6683 6600
 
6684
-/* reduces a modulo n where n is of the form 2**p - d */
6685
-int mp_reduce_2k(mp_int *a, mp_int *n, mp_digit d)
6601
+void fp_rshd(fp_int *a, int x)
6686 6602
 {
6687
-   mp_int q;
6688
-   int    p, res;
6689
-   
6690
-   if ((res = mp_init(&q)) != MP_OKAY) {
6691
-      return res;
6692
-   }
6693
-   
6694
-   p = mp_count_bits(n);    
6695
-top:
6696
-   /* q = a/2**p, a = a mod 2**p */
6697
-   if ((res = mp_div_2d(a, p, &q, a)) != MP_OKAY) {
6698
-      goto ERR;
6699
-   }
6700
-   
6701
-   if (d != 1) {
6702
-      /* q = q * d */
6703
-      if ((res = mp_mul_d(&q, d, &q)) != MP_OKAY) { 
6704
-         goto ERR;
6705
-      }
6706
-   }
6707
-   
6708
-   /* a = a + q */
6709
-   if ((res = s_mp_add(a, &q, a)) != MP_OKAY) {
6710
-      goto ERR;
6711
-   }
6712
-   
6713
-   if (mp_cmp_mag(a, n) != MP_LT) {
6714
-      s_mp_sub(a, n, a);
6715
-      goto top;
6716
-   }
6717
-   
6718
-ERR:
6719
-   mp_clear(&q);
6720
-   return res;
6721
-}
6603
+  int y;
6722 6604
 
6723
-#endif
6724
-
6725
-/* $Source: /cvs/libtom/libtommath/bn_mp_reduce_2k.c,v $ */
6726
-/* $Revision: 1.3 $ */
6727
-/* $Date: 2006/03/31 14:18:44 $ */
6728
-
6729
-/* End: bn_mp_reduce_2k.c */
6730
-
6731
-/* Start: bn_mp_reduce_2k_l.c */
6732
-#include <bignum.h>
6733
-#ifdef BN_MP_REDUCE_2K_L_C
6734
-/* LibTomMath, multiple-precision integer library -- Tom St Denis
6735
- *
6736
- * LibTomMath is a library that provides multiple-precision
6737
- * integer arithmetic as well as number theoretic functionality.
6738
- *
6739
- * The library was designed directly after the MPI library by
6740
- * Michael Fromberger but has been written from scratch with
6741
- * additional optimizations in place.
6742
- *
6743
- * The library is free for all purposes without any express
6744
- * guarantee it works.
6745
- *
6746
- * Tom St Denis, tomstdenis@gmail.com, http://math.libtomcrypt.com
6747
- */
6605
+  /* too many digits just zero and return */
6606
+  if (x >= a->used) {
6607
+     fp_zero(a);
6608
+     return;
6609
+  }
6748 6610
 
6749
-/* reduces a modulo n where n is of the form 2**p - d 
6750
-   This differs from reduce_2k since "d" can be larger
6751
-   than a single digit.
6752
-*/
6753
-int mp_reduce_2k_l(mp_int *a, mp_int *n, mp_int *d)
6754
-{
6755
-   mp_int q;
6756
-   int    p, res;
6757
-   
6758
-   if ((res = mp_init(&q)) != MP_OKAY) {
6759
-      return res;
6760
-   }
6761
-   
6762
-   p = mp_count_bits(n);    
6763
-top:
6764
-   /* q = a/2**p, a = a mod 2**p */
6765
-   if ((res = mp_div_2d(a, p, &q, a)) != MP_OKAY) {
6766
-      goto ERR;
6767
-   }
6768
-   
6769
-   /* q = q * d */
6770
-   if ((res = mp_mul(&q, d, &q)) != MP_OKAY) { 
6771
-      goto ERR;
6772
-   }
6773
-   
6774
-   /* a = a + q */
6775
-   if ((res = s_mp_add(a, &q, a)) != MP_OKAY) {
6776
-      goto ERR;
6611
+   /* shift */
6612
+   for (y = 0; y < a->used - x; y++) {
6613
+      a->dp[y] = a->dp[y+x];
6777 6614
    }
6778
-   
6779
-   if (mp_cmp_mag(a, n) != MP_LT) {
6780
-      s_mp_sub(a, n, a);
6781
-      goto top;
6615
+
6616
+   /* zero rest */
6617
+   for (; y < a->used; y++) {
6618
+      a->dp[y] = 0;
6782 6619
    }
6783 6620
    
6784
-ERR:
6785
-   mp_clear(&q);
6786
-   return res;
6621
+   /* decrement count */
6622
+   a->used -= x;
6623
+   fp_clamp(a);
6787 6624
 }
6788 6625
 
6789
-#endif
6790 6626
 
6791
-/* $Source: /cvs/libtom/libtommath/bn_mp_reduce_2k_l.c,v $ */
6792
-/* $Revision: 1.3 $ */
6793
-/* $Date: 2006/03/31 14:18:44 $ */
6627
+/* $Source: /cvs/libtom/tomsfastmath/src/bit/fp_rshd.c,v $ */
6628
+/* $Revision: 1.1 $ */
6629
+/* $Date: 2006/12/31 21:25:53 $ */
6794 6630
 
6795
-/* End: bn_mp_reduce_2k_l.c */
6631
+/* End: fp_rshd.c */
6796 6632
 
6797
-/* Start: bn_mp_reduce_2k_setup.c */
6798
-#include <bignum.h>
6799
-#ifdef BN_MP_REDUCE_2K_SETUP_C
6800
-/* LibTomMath, multiple-precision integer library -- Tom St Denis
6801
- *
6802
- * LibTomMath is a library that provides multiple-precision
6803
- * integer arithmetic as well as number theoretic functionality.
6804
- *
6805
- * The library was designed directly after the MPI library by
6806
- * Michael Fromberger but has been written from scratch with
6807
- * additional optimizations in place.
6808
- *
6809
- * The library is free for all purposes without any express
6810
- * guarantee it works.
6633
+/* Start: fp_s_rmap.c */
6634
+/* TomsFastMath, a fast ISO C bignum library.
6635
+ * 
6636
+ * This project is meant to fill in where LibTomMath
6637
+ * falls short.  That is speed ;-)
6811 6638
  *
6812
- * Tom St Denis, tomstdenis@gmail.com, http://math.libtomcrypt.com
6639
+ * This project is public domain and free for all purposes.
6640
+ * 
6641
+ * Tom St Denis, tomstdenis@gmail.com
6813 6642
  */
6643
+#include "bignum_fast.h"
6814 6644
 
6815
-/* determines the setup value */
6816
-int mp_reduce_2k_setup(mp_int *a, mp_digit *d)
6817
-{
6818
-   int res, p;
6819
-   mp_int tmp;
6820
-   
6821
-   if ((res = mp_init(&tmp)) != MP_OKAY) {
6822
-      return res;
6823
-   }
6824
-   
6825
-   p = mp_count_bits(a);
6826
-   if ((res = mp_2expt(&tmp, p)) != MP_OKAY) {
6827
-      mp_clear(&tmp);
6828
-      return res;
6829
-   }
6830
-   
6831
-   if ((res = s_mp_sub(&tmp, a, &tmp)) != MP_OKAY) {
6832
-      mp_clear(&tmp);
6833
-      return res;
6834
-   }
6835
-   
6836
-   *d = tmp.dp[0];
6837
-   mp_clear(&tmp);
6838
-   return MP_OKAY;
6839
-}
6840
-#endif
6645
+/* chars used in radix conversions */
6646
+const char *fp_s_rmap = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz+/";
6841 6647
 
6842
-/* $Source: /cvs/libtom/libtommath/bn_mp_reduce_2k_setup.c,v $ */
6843
-/* $Revision: 1.3 $ */
6844
-/* $Date: 2006/03/31 14:18:44 $ */
6648
+/* $Source: /cvs/libtom/tomsfastmath/src/bin/fp_s_rmap.c,v $ */
6649
+/* $Revision: 1.1 $ */
6650
+/* $Date: 2006/12/31 21:25:53 $ */
6845 6651
 
6846
-/* End: bn_mp_reduce_2k_setup.c */
6652
+/* End: fp_s_rmap.c */
6847 6653
 
6848
-/* Start: bn_mp_reduce_2k_setup_l.c */
6849
-#include <bignum.h>
6850
-#ifdef BN_MP_REDUCE_2K_SETUP_L_C
6851
-/* LibTomMath, multiple-precision integer library -- Tom St Denis
6852
- *
6853
- * LibTomMath is a library that provides multiple-precision
6854
- * integer arithmetic as well as number theoretic functionality.
6855
- *
6856
- * The library was designed directly after the MPI library by
6857
- * Michael Fromberger but has been written from scratch with
6858
- * additional optimizations in place.
6859
- *
6860
- * The library is free for all purposes without any express
6861
- * guarantee it works.
6654
+/* Start: fp_set.c */
6655
+/* TomsFastMath, a fast ISO C bignum library.
6656
+ * 
6657
+ * This project is meant to fill in where LibTomMath
6658
+ * falls short.  That is speed ;-)
6862 6659
  *
6863
- * Tom St Denis, tomstdenis@gmail.com, http://math.libtomcrypt.com
6660
+ * This project is public domain and free for all purposes.
6661
+ * 
6662
+ * Tom St Denis, tomstdenis@gmail.com
6864 6663
  */
6664
+#include "bignum_fast.h"
6865 6665
 
6866
-/* determines the setup value */
6867
-int mp_reduce_2k_setup_l(mp_int *a, mp_int *d)
6666
+void fp_set(fp_int *a, fp_digit b)
6868 6667
 {
6869
-   int    res;
6870
-   mp_int tmp;
6871
-   
6872
-   if ((res = mp_init(&tmp)) != MP_OKAY) {
6873
-      return res;
6874
-   }
6875
-   
6876
-   if ((res = mp_2expt(&tmp, mp_count_bits(a))) != MP_OKAY) {
6877
-      goto ERR;
6878
-   }
6879
-   
6880
-   if ((res = s_mp_sub(&tmp, a, d)) != MP_OKAY) {
6881
-      goto ERR;
6882
-   }
6883
-   
6884
-ERR:
6885
-   mp_clear(&tmp);
6886
-   return res;
6668
+   fp_zero(a);
6669
+   a->dp[0] = b;
6670
+   a->used  = a->dp[0] ? 1 : 0;
6887 6671
 }
6888
-#endif
6889 6672
 
6890
-/* $Source: /cvs/libtom/libtommath/bn_mp_reduce_2k_setup_l.c,v $ */
6891
-/* $Revision: 1.3 $ */
6892
-/* $Date: 2006/03/31 14:18:44 $ */
6673
+/* $Source: /cvs/libtom/tomsfastmath/src/misc/fp_set.c,v $ */
6674
+/* $Revision: 1.1 $ */
6675
+/* $Date: 2006/12/31 21:25:53 $ */
6893 6676
 
6894
-/* End: bn_mp_reduce_2k_setup_l.c */
6677
+/* End: fp_set.c */
6895 6678
 
6896
-/* Start: bn_mp_reduce_is_2k.c */
6897
-#include <bignum.h>
6898
-#ifdef BN_MP_REDUCE_IS_2K_C
6899
-/* LibTomMath, multiple-precision integer library -- Tom St Denis
6900
- *
6901
- * LibTomMath is a library that provides multiple-precision
6902
- * integer arithmetic as well as number theoretic functionality.
6903
- *
6904
- * The library was designed directly after the MPI library by
6905
- * Michael Fromberger but has been written from scratch with
6906
- * additional optimizations in place.
6907
- *
6908
- * The library is free for all purposes without any express
6909
- * guarantee it works.
6679
+/* Start: fp_signed_bin_size.c */
6680
+/* TomsFastMath, a fast ISO C bignum library.
6681
+ * 
6682
+ * This project is meant to fill in where LibTomMath
6683
+ * falls short.  That is speed ;-)
6910 6684
  *
6911
- * Tom St Denis, tomstdenis@gmail.com, http://math.libtomcrypt.com
6685
+ * This project is public domain and free for all purposes.
6686
+ * 
6687
+ * Tom St Denis, tomstdenis@gmail.com
6912 6688
  */
6689
+#include "bignum_fast.h"
6913 6690
 
6914
-/* determines if mp_reduce_2k can be used */
6915
-int mp_reduce_is_2k(mp_int *a)
6691
+int fp_signed_bin_size(fp_int *a)
6916 6692
 {
6917
-   int ix, iy, iw;
6918
-   mp_digit iz;
6919
-   
6920
-   if (a->used == 0) {
6921
-      return MP_NO;
6922
-   } else if (a->used == 1) {
6923
-      return MP_YES;
6924
-   } else if (a->used > 1) {
6925
-      iy = mp_count_bits(a);
6926
-      iz = 1;
6927
-      iw = 1;
6928
-    
6929
-      /* Test every bit from the second digit up, must be 1 */
6930
-      for (ix = DIGIT_BIT; ix < iy; ix++) {
6931
-          if ((a->dp[iw] & iz) == 0) {
6932
-             return MP_NO;
6933
-          }
6934
-          iz <<= 1;
6935
-          if (iz > (mp_digit)MP_MASK) {
6936
-             ++iw;
6937
-             iz = 1;
6938
-          }
6939
-      }
6940
-   }
6941
-   return MP_YES;
6693
+  return 1 + fp_unsigned_bin_size (a);
6942 6694
 }
6943 6695
 
6944
-#endif
6945
-
6946
-/* $Source: /cvs/libtom/libtommath/bn_mp_reduce_is_2k.c,v $ */
6947
-/* $Revision: 1.3 $ */
6948
-/* $Date: 2006/03/31 14:18:44 $ */
6696
+/* $Source: /cvs/libtom/tomsfastmath/src/bin/fp_signed_bin_size.c,v $ */
6697
+/* $Revision: 1.1 $ */
6698
+/* $Date: 2006/12/31 21:25:53 $ */
6949 6699
 
6950
-/* End: bn_mp_reduce_is_2k.c */
6700
+/* End: fp_signed_bin_size.c */
6951 6701
 
6952
-/* Start: bn_mp_reduce_is_2k_l.c */
6953
-#include <bignum.h>
6954
-#ifdef BN_MP_REDUCE_IS_2K_L_C
6955
-/* LibTomMath, multiple-precision integer library -- Tom St Denis
6956
- *
6957
- * LibTomMath is a library that provides multiple-precision
6958
- * integer arithmetic as well as number theoretic functionality.
6959
- *
6960
- * The library was designed directly after the MPI library by
6961
- * Michael Fromberger but has been written from scratch with
6962
- * additional optimizations in place.
6963
- *
6964
- * The library is free for all purposes without any express
6965
- * guarantee it works.
6702
+/* Start: fp_sqr.c */
6703
+/* TomsFastMath, a fast ISO C bignum library.
6704
+ * 
6705
+ * This project is meant to fill in where LibTomMath
6706
+ * falls short.  That is speed ;-)
6966 6707
  *
6967
- * Tom St Denis, tomstdenis@gmail.com, http://math.libtomcrypt.com
6708
+ * This project is public domain and free for all purposes.
6709
+ * 
6710
+ * Tom St Denis, tomstdenis@gmail.com
6968 6711
  */
6712
+#include "bignum_fast.h"
6969 6713
 
6970
-/* determines if reduce_2k_l can be used */
6971
-int mp_reduce_is_2k_l(mp_int *a)
6714
+/* b = a*a  */
6715
+void fp_sqr(fp_int *A, fp_int *B)
6972 6716
 {
6973
-   int ix, iy;
6974
-   
6975
-   if (a->used == 0) {
6976
-      return MP_NO;
6977
-   } else if (a->used == 1) {
6978
-      return MP_YES;
6979
-   } else if (a->used > 1) {
6980
-      /* if more than half of the digits are -1 we're sold */
6981
-      for (iy = ix = 0; ix < a->used; ix++) {
6982
-          if (a->dp[ix] == MP_MASK) {
6983
-              ++iy;
6984
-          }
6985
-      }
6986
-      return (iy >= (a->used/2)) ? MP_YES : MP_NO;
6987
-      
6988
-   }
6989
-   return MP_NO;
6990
-}
6991
-
6992
-#endif
6993
-
6994
-/* $Source: /cvs/libtom/libtommath/bn_mp_reduce_is_2k_l.c,v $ */
6995
-/* $Revision: 1.3 $ */
6996
-/* $Date: 2006/03/31 14:18:44 $ */
6997
-
6998
-/* End: bn_mp_reduce_is_2k_l.c */
6717
+    int     y;
6999 6718
 
7000
-/* Start: bn_mp_reduce_setup.c */
7001
-#include <bignum.h>
7002
-#ifdef BN_MP_REDUCE_SETUP_C
7003
-/* LibTomMath, multiple-precision integer library -- Tom St Denis
7004
- *
7005
- * LibTomMath is a library that provides multiple-precision
7006
- * integer arithmetic as well as number theoretic functionality.
7007
- *
7008
- * The library was designed directly after the MPI library by
7009
- * Michael Fromberger but has been written from scratch with
7010
- * additional optimizations in place.
7011
- *
7012
- * The library is free for all purposes without any express
7013
- * guarantee it works.
7014
- *
7015
- * Tom St Denis, tomstdenis@gmail.com, http://math.libtomcrypt.com
7016
- */
6719
+    /* call generic if we're out of range */
6720
+    if (A->used + A->used > FP_SIZE) {
6721
+       fp_sqr_comba(A, B);
6722
+       return ;
6723
+    }
7017 6724
 
7018
-/* pre-calculate the value required for Barrett reduction
7019
- * For a given modulus "b" it calulates the value required in "a"
7020
- */
7021
-int mp_reduce_setup (mp_int * a, mp_int * b)
7022
-{
7023
-  int     res;
7024
-  
7025
-  if ((res = mp_2expt (a, b->used * 2 * DIGIT_BIT)) != MP_OKAY) {
7026
-    return res;
7027
-  }
7028
-  return mp_div (a, b, a, NULL);
7029
-}
6725
+    y = A->used;
6726
+#if defined(TFM_SQR3)
6727
+        if (y <= 3) {
6728
+           fp_sqr_comba3(A,B);
6729
+           return;
6730
+        }
6731
+#endif
6732
+#if defined(TFM_SQR4)
6733
+        if (y == 4) {
6734
+           fp_sqr_comba4(A,B);
6735
+           return;
6736
+        }
6737
+#endif
6738
+#if defined(TFM_SQR6)
6739
+        if (y <= 6) {
6740
+           fp_sqr_comba6(A,B);
6741
+           return;
6742
+        }
6743
+#endif
6744
+#if defined(TFM_SQR7)
6745
+        if (y == 7) {
6746
+           fp_sqr_comba7(A,B);
6747
+           return;
6748
+        }
6749
+#endif
6750
+#if defined(TFM_SQR8)
6751
+        if (y == 8) {
6752
+           fp_sqr_comba8(A,B);
6753
+           return;
6754
+        }
6755
+#endif
6756
+#if defined(TFM_SQR9)
6757
+        if (y == 9) {
6758
+           fp_sqr_comba9(A,B);
6759
+           return;
6760
+        }
6761
+#endif
6762
+#if defined(TFM_SQR12)
6763
+        if (y <= 12) {
6764
+           fp_sqr_comba12(A,B);
6765
+           return;
6766
+        }
6767
+#endif
6768
+#if defined(TFM_SQR17)
6769
+        if (y <= 17) {
6770
+           fp_sqr_comba17(A,B);
6771
+           return;
6772
+        }
6773
+#endif
6774
+#if defined(TFM_SMALL_SET)
6775
+        if (y <= 16) {
6776
+           fp_sqr_comba_small(A,B);
6777
+           return;
6778
+        }
6779
+#endif
6780
+#if defined(TFM_SQR20)
6781
+        if (y <= 20) {
6782
+           fp_sqr_comba20(A,B);
6783
+           return;
6784
+        }
6785
+#endif
6786
+#if defined(TFM_SQR24)
6787
+        if (y <= 24) {
6788
+           fp_sqr_comba24(A,B);
6789
+           return;
6790
+        }
6791
+#endif
6792
+#if defined(TFM_SQR28)
6793
+        if (y <= 28) {
6794
+           fp_sqr_comba28(A,B);
6795
+           return;
6796
+        }
6797
+#endif
6798
+#if defined(TFM_SQR32)
6799
+        if (y <= 32) {
6800
+           fp_sqr_comba32(A,B);
6801
+           return;
6802
+        }
6803
+#endif
6804
+#if defined(TFM_SQR48)
6805
+        if (y <= 48) {
6806
+           fp_sqr_comba48(A,B);
6807
+           return;
6808
+        }
7030 6809
 #endif
6810
+#if defined(TFM_SQR64)
6811
+        if (y <= 64) {
6812
+           fp_sqr_comba64(A,B);
6813
+           return;
6814
+        }
6815
+#endif
6816
+       fp_sqr_comba(A, B);
6817
+}
7031 6818
 
7032
-/* $Source: /cvs/libtom/libtommath/bn_mp_reduce_setup.c,v $ */
7033
-/* $Revision: 1.3 $ */
7034
-/* $Date: 2006/03/31 14:18:44 $ */
7035 6819
 
7036
-/* End: bn_mp_reduce_setup.c */
6820
+/* $Source: /cvs/libtom/tomsfastmath/src/sqr/fp_sqr.c,v $ */
6821
+/* $Revision: 1.1 $ */
6822
+/* $Date: 2006/12/31 21:25:53 $ */
7037 6823
 
7038
-/* Start: bn_mp_rshd.c */
7039
-#include <bignum.h>
7040
-#ifdef BN_MP_RSHD_C
7041
-/* LibTomMath, multiple-precision integer library -- Tom St Denis
7042
- *
7043
- * LibTomMath is a library that provides multiple-precision
7044
- * integer arithmetic as well as number theoretic functionality.
7045
- *
7046
- * The library was designed directly after the MPI library by
7047
- * Michael Fromberger but has been written from scratch with
7048
- * additional optimizations in place.
7049
- *
7050
- * The library is free for all purposes without any express
7051
- * guarantee it works.
6824
+/* End: fp_sqr.c */
6825
+
6826
+/* Start: fp_sqr_comba.c */
6827
+/*
6828
+ * 
6829
+ * This project is meant to fill in where LibTomMath
6830
+ * falls short.  That is speed ;-)
7052 6831
  *
7053
- * Tom St Denis, tomstdenis@gmail.com, http://math.libtomcrypt.com
7054
- */
6832
+ * This project is public domain and free for all purposes.
6833
+ * 
6834
+ * Tom St Denis, tomstdenis@gmail.com
6835
+ */
6836
+#include "bignum_fast.h"
6837
+
6838
+#if defined(TFM_PRESCOTT) && defined(TFM_SSE2)
6839
+   #undef TFM_SSE2
6840
+   #define TFM_X86
6841
+#endif
6842
+
6843
+#if defined(TFM_X86)
6844
+
6845
+/* x86-32 optimized */
6846
+
6847
+#define COMBA_START
6848
+
6849
+#define CLEAR_CARRY \
6850
+   c0 = c1 = c2 = 0;
6851
+
6852
+#define COMBA_STORE(x) \
6853
+   x = c0;
6854
+
6855
+#define COMBA_STORE2(x) \
6856
+   x = c1;
6857
+
6858
+#define CARRY_FORWARD \
6859
+   do { c0 = c1; c1 = c2; c2 = 0; } while (0);
6860
+
6861
+#define COMBA_FINI
6862
+
6863
+#define SQRADD(i, j)                                      \
6864
+asm(                                            \
6865
+     "movl  %6,%%eax     \n\t"                            \
6866
+     "mull  %%eax        \n\t"                            \
6867
+     "addl  %%eax,%0     \n\t"                            \
6868
+     "adcl  %%edx,%1     \n\t"                            \
6869
+     "adcl  $0,%2        \n\t"                            \
6870
+     :"=r"(c0), "=r"(c1), "=r"(c2): "0"(c0), "1"(c1), "2"(c2), "m"(i) :"%eax","%edx","%cc");
6871
+
6872
+#define SQRADD2(i, j)                                     \
6873
+asm(                                            \
6874
+     "movl  %6,%%eax     \n\t"                            \
6875
+     "mull  %7           \n\t"                            \
6876
+     "addl  %%eax,%0     \n\t"                            \
6877
+     "adcl  %%edx,%1     \n\t"                            \
6878
+     "adcl  $0,%2        \n\t"                            \
6879
+     "addl  %%eax,%0     \n\t"                            \
6880
+     "adcl  %%edx,%1     \n\t"                            \
6881
+     "adcl  $0,%2        \n\t"                            \
6882
+     :"=r"(c0), "=r"(c1), "=r"(c2): "0"(c0), "1"(c1), "2"(c2), "m"(i), "m"(j)  :"%eax","%edx","%cc");
6883
+
6884
+#define SQRADDSC(i, j)                                    \
6885
+asm(                                                     \
6886
+     "movl  %6,%%eax     \n\t"                            \
6887
+     "mull  %7           \n\t"                            \
6888
+     "movl  %%eax,%0     \n\t"                            \
6889
+     "movl  %%edx,%1     \n\t"                            \
6890
+     "xorl  %2,%2        \n\t"                            \
6891
+     :"=r"(sc0), "=r"(sc1), "=r"(sc2): "0"(sc0), "1"(sc1), "2"(sc2), "g"(i), "g"(j) :"%eax","%edx","%cc");
6892
+
6893
+#define SQRADDAC(i, j)                                    \
6894
+asm(                                                     \
6895
+     "movl  %6,%%eax     \n\t"                            \
6896
+     "mull  %7           \n\t"                            \
6897
+     "addl  %%eax,%0     \n\t"                            \
6898
+     "adcl  %%edx,%1     \n\t"                            \
6899
+     "adcl  $0,%2        \n\t"                            \
6900
+     :"=r"(sc0), "=r"(sc1), "=r"(sc2): "0"(sc0), "1"(sc1), "2"(sc2), "g"(i), "g"(j) :"%eax","%edx","%cc");
6901
+
6902
+#define SQRADDDB                                          \
6903
+asm(                                                     \
6904
+     "addl %6,%0         \n\t"                            \
6905
+     "adcl %7,%1         \n\t"                            \
6906
+     "adcl %8,%2         \n\t"                            \
6907
+     "addl %6,%0         \n\t"                            \
6908
+     "adcl %7,%1         \n\t"                            \
6909
+     "adcl %8,%2         \n\t"                            \
6910
+     :"=r"(c0), "=r"(c1), "=r"(c2) : "0"(c0), "1"(c1), "2"(c2), "r"(sc0), "r"(sc1), "r"(sc2) : "%cc");
6911
+
6912
+#elif defined(TFM_X86_64)
6913
+/* x86-64 optimized */
6914
+
6915
+#define COMBA_START
6916
+
6917
+#define CLEAR_CARRY \
6918
+   c0 = c1 = c2 = 0;
6919
+
6920
+#define COMBA_STORE(x) \
6921
+   x = c0;
6922
+
6923
+#define COMBA_STORE2(x) \
6924
+   x = c1;
6925
+
6926
+#define CARRY_FORWARD \
6927
+   do { c0 = c1; c1 = c2; c2 = 0; } while (0);
6928
+
6929
+#define COMBA_FINI
6930
+
6931
+#define SQRADD(i, j)                                      \
6932
+asm(                                                     \
6933
+     "movq  %6,%%rax     \n\t"                            \
6934
+     "mulq  %%rax        \n\t"                            \
6935
+     "addq  %%rax,%0     \n\t"                            \
6936
+     "adcq  %%rdx,%1     \n\t"                            \
6937
+     "adcq  $0,%2        \n\t"                            \
6938
+     :"=r"(c0), "=r"(c1), "=r"(c2): "0"(c0), "1"(c1), "2"(c2), "g"(i) :"%rax","%rdx","%cc");
6939
+
6940
+#define SQRADD2(i, j)                                     \
6941
+asm(                                                     \
6942
+     "movq  %6,%%rax     \n\t"                            \
6943
+     "mulq  %7           \n\t"                            \
6944
+     "addq  %%rax,%0     \n\t"                            \
6945
+     "adcq  %%rdx,%1     \n\t"                            \
6946
+     "adcq  $0,%2        \n\t"                            \
6947
+     "addq  %%rax,%0     \n\t"                            \
6948
+     "adcq  %%rdx,%1     \n\t"                            \
6949
+     "adcq  $0,%2        \n\t"                            \
6950
+     :"=r"(c0), "=r"(c1), "=r"(c2): "0"(c0), "1"(c1), "2"(c2), "g"(i), "g"(j)  :"%rax","%rdx","%cc");
6951
+
6952
+#define SQRADDSC(i, j)                                    \
6953
+asm(                                                     \
6954
+     "movq  %6,%%rax     \n\t"                            \
6955
+     "mulq  %7           \n\t"                            \
6956
+     "movq  %%rax,%0     \n\t"                            \
6957
+     "movq  %%rdx,%1     \n\t"                            \
6958
+     "xorq  %2,%2        \n\t"                            \
6959
+     :"=r"(sc0), "=r"(sc1), "=r"(sc2): "0"(sc0), "1"(sc1), "2"(sc2), "g"(i), "g"(j) :"%rax","%rdx","%cc");
6960
+
6961
+#define SQRADDAC(i, j)                                                         \
6962
+asm(                                                     \
6963
+     "movq  %6,%%rax     \n\t"                            \
6964
+     "mulq  %7           \n\t"                            \
6965
+     "addq  %%rax,%0     \n\t"                            \
6966
+     "adcq  %%rdx,%1     \n\t"                            \
6967
+     "adcq  $0,%2        \n\t"                            \
6968
+     :"=r"(sc0), "=r"(sc1), "=r"(sc2): "0"(sc0), "1"(sc1), "2"(sc2), "g"(i), "g"(j) :"%rax","%rdx","%cc");
6969
+
6970
+#define SQRADDDB                                          \
6971
+asm(                                                     \
6972
+     "addq %6,%0         \n\t"                            \
6973
+     "adcq %7,%1         \n\t"                            \
6974
+     "adcq %8,%2         \n\t"                            \
6975
+     "addq %6,%0         \n\t"                            \
6976
+     "adcq %7,%1         \n\t"                            \
6977
+     "adcq %8,%2         \n\t"                            \
6978
+     :"=r"(c0), "=r"(c1), "=r"(c2) : "0"(c0), "1"(c1), "2"(c2), "r"(sc0), "r"(sc1), "r"(sc2) : "%cc");
6979
+
6980
+#elif defined(TFM_SSE2)
6981
+
6982
+/* SSE2 Optimized */
6983
+#define COMBA_START
6984
+
6985
+#define CLEAR_CARRY \
6986
+   c0 = c1 = c2 = 0;
6987
+
6988
+#define COMBA_STORE(x) \
6989
+   x = c0;
6990
+
6991
+#define COMBA_STORE2(x) \
6992
+   x = c1;
6993
+
6994
+#define CARRY_FORWARD \
6995
+   do { c0 = c1; c1 = c2; c2 = 0; } while (0);
6996
+
6997
+#define COMBA_FINI \
6998
+   asm("emms");
6999
+
7000
+#define SQRADD(i, j)                                      \
7001
+asm(                                            \
7002
+     "movd  %6,%%mm0     \n\t"                            \
7003
+     "pmuludq %%mm0,%%mm0\n\t"                            \
7004
+     "movd  %%mm0,%%eax  \n\t"                            \
7005
+     "psrlq $32,%%mm0    \n\t"                            \
7006
+     "addl  %%eax,%0     \n\t"                            \
7007
+     "movd  %%mm0,%%eax  \n\t"                            \
7008
+     "adcl  %%eax,%1     \n\t"                            \
7009
+     "adcl  $0,%2        \n\t"                            \
7010
+     :"=r"(c0), "=r"(c1), "=r"(c2): "0"(c0), "1"(c1), "2"(c2), "m"(i) :"%eax","%cc");
7011
+
7012
+#define SQRADD2(i, j)                                     \
7013
+asm(                                            \
7014
+     "movd  %6,%%mm0     \n\t"                            \
7015
+     "movd  %7,%%mm1     \n\t"                            \
7016
+     "pmuludq %%mm1,%%mm0\n\t"                            \
7017
+     "movd  %%mm0,%%eax  \n\t"                            \
7018
+     "psrlq $32,%%mm0    \n\t"                            \
7019
+     "movd  %%mm0,%%edx  \n\t"                            \
7020
+     "addl  %%eax,%0     \n\t"                            \
7021
+     "adcl  %%edx,%1     \n\t"                            \
7022
+     "adcl  $0,%2        \n\t"                            \
7023
+     "addl  %%eax,%0     \n\t"                            \
7024
+     "adcl  %%edx,%1     \n\t"                            \
7025
+     "adcl  $0,%2        \n\t"                            \
7026
+     :"=r"(c0), "=r"(c1), "=r"(c2): "0"(c0), "1"(c1), "2"(c2), "m"(i), "m"(j)  :"%eax","%edx","%cc");
7027
+
7028
+#define SQRADDSC(i, j)                                                         \
7029
+asm(                                            \
7030
+     "movd  %6,%%mm0     \n\t"                            \
7031
+     "movd  %7,%%mm1     \n\t"                            \
7032
+     "pmuludq %%mm1,%%mm0\n\t"                            \
7033
+     "movd  %%mm0,%0     \n\t"                            \
7034
+     "psrlq $32,%%mm0    \n\t"                            \
7035
+     "movd  %%mm0,%1     \n\t"                            \
7036
+     "xorl  %2,%2        \n\t"                            \
7037
+     :"=r"(sc0), "=r"(sc1), "=r"(sc2): "0"(sc0), "1"(sc1), "2"(sc2), "m"(i), "m"(j));
7038
+
7039
+#define SQRADDAC(i, j)                                                         \
7040
+asm(                                            \
7041
+     "movd  %6,%%mm0     \n\t"                            \
7042
+     "movd  %7,%%mm1     \n\t"                            \
7043
+     "pmuludq %%mm1,%%mm0\n\t"                            \
7044
+     "movd  %%mm0,%%eax  \n\t"                            \
7045
+     "psrlq $32,%%mm0    \n\t"                            \
7046
+     "movd  %%mm0,%%edx  \n\t"                            \
7047
+     "addl  %%eax,%0     \n\t"                            \
7048
+     "adcl  %%edx,%1     \n\t"                            \
7049
+     "adcl  $0,%2        \n\t"                            \
7050
+     :"=r"(sc0), "=r"(sc1), "=r"(sc2): "0"(sc0), "1"(sc1), "2"(sc2), "m"(i), "m"(j)  :"%eax","%edx","%cc");
7051
+
7052
+#define SQRADDDB                                          \
7053
+asm(                                                     \
7054
+     "addl %6,%0         \n\t"                            \
7055
+     "adcl %7,%1         \n\t"                            \
7056
+     "adcl %8,%2         \n\t"                            \
7057
+     "addl %6,%0         \n\t"                            \
7058
+     "adcl %7,%1         \n\t"                            \
7059
+     "adcl %8,%2         \n\t"                            \
7060
+     :"=r"(c0), "=r"(c1), "=r"(c2) : "0"(c0), "1"(c1), "2"(c2), "r"(sc0), "r"(sc1), "r"(sc2) : "%cc");
7061
+
7062
+#elif defined(TFM_ARM)
7063
+
7064
+/* ARM code */
7065
+
7066
+#define COMBA_START
7067
+
7068
+#define CLEAR_CARRY \
7069
+   c0 = c1 = c2 = 0;
7070
+
7071
+#define COMBA_STORE(x) \
7072
+   x = c0;
7073
+
7074
+#define COMBA_STORE2(x) \
7075
+   x = c1;
7076
+
7077
+#define CARRY_FORWARD \
7078
+   do { c0 = c1; c1 = c2; c2 = 0; } while (0);
7079
+
7080
+#define COMBA_FINI
7081
+
7082
+/* multiplies point i and j, updates carry "c1" and digit c2 */
7083
+#define SQRADD(i, j)                                             \
7084
+asm(                                                             \
7085
+"  UMULL  r0,r1,%6,%6              \n\t"                         \
7086
+"  ADDS   %0,%0,r0                 \n\t"                         \
7087
+"  ADCS   %1,%1,r1                 \n\t"                         \
7088
+"  ADC    %2,%2,#0                 \n\t"                         \
7089
+:"=r"(c0), "=r"(c1), "=r"(c2) : "0"(c0), "1"(c1), "2"(c2), "r"(i) : "r0", "r1", "%cc");
7090
+	
7091
+/* for squaring some of the terms are doubled... */
7092
+#define SQRADD2(i, j)                                            \
7093
+asm(                                                             \
7094
+"  UMULL  r0,r1,%6,%7              \n\t"                         \
7095
+"  ADDS   %0,%0,r0                 \n\t"                         \
7096
+"  ADCS   %1,%1,r1                 \n\t"                         \
7097
+"  ADC    %2,%2,#0                 \n\t"                         \
7098
+"  ADDS   %0,%0,r0                 \n\t"                         \
7099
+"  ADCS   %1,%1,r1                 \n\t"                         \
7100
+"  ADC    %2,%2,#0                 \n\t"                         \
7101
+:"=r"(c0), "=r"(c1), "=r"(c2) : "0"(c0), "1"(c1), "2"(c2), "r"(i), "r"(j) : "r0", "r1", "%cc");
7102
+
7103
+#define SQRADDSC(i, j)                                           \
7104
+asm(                                                             \
7105
+"  UMULL  %0,%1,%6,%7              \n\t"                         \
7106
+"  SUB    %2,%2,%2                 \n\t"                         \
7107
+:"=r"(sc0), "=r"(sc1), "=r"(sc2) : "0"(sc0), "1"(sc1), "2"(sc2), "r"(i), "r"(j) : "%cc");
7108
+
7109
+#define SQRADDAC(i, j)                                           \
7110
+asm(                                                             \
7111
+"  UMULL  r0,r1,%6,%7              \n\t"                         \
7112
+"  ADDS   %0,%0,r0                 \n\t"                         \
7113
+"  ADCS   %1,%1,r1                 \n\t"                         \
7114
+"  ADC    %2,%2,#0                 \n\t"                         \
7115
+:"=r"(sc0), "=r"(sc1), "=r"(sc2) : "0"(sc0), "1"(sc1), "2"(sc2), "r"(i), "r"(j) : "r0", "r1", "%cc");
7116
+
7117
+#define SQRADDDB                                                 \
7118
+asm(                                                             \
7119
+"  ADDS  %0,%0,%3                     \n\t"                      \
7120
+"  ADCS  %1,%1,%4                     \n\t"                      \
7121
+"  ADC   %2,%2,%5                     \n\t"                      \
7122
+"  ADDS  %0,%0,%3                     \n\t"                      \
7123
+"  ADCS  %1,%1,%4                     \n\t"                      \
7124
+"  ADC   %2,%2,%5                     \n\t"                      \
7125
+:"=r"(c0), "=r"(c1), "=r"(c2) : "r"(sc0), "r"(sc1), "r"(sc2), "0"(c0), "1"(c1), "2"(c2) : "%cc");
7126
+
7127
+#elif defined(TFM_PPC32)
7128
+
7129
+/* PPC32 */
7130
+
7131
+#define COMBA_START
7132
+
7133
+#define CLEAR_CARRY \
7134
+   c0 = c1 = c2 = 0;
7135
+
7136
+#define COMBA_STORE(x) \
7137
+   x = c0;
7138
+
7139
+#define COMBA_STORE2(x) \
7140
+   x = c1;
7141
+
7142
+#define CARRY_FORWARD \
7143
+   do { c0 = c1; c1 = c2; c2 = 0; } while (0);
7144
+
7145
+#define COMBA_FINI
7146
+
7147
+/* multiplies point i and j, updates carry "c1" and digit c2 */
7148
+#define SQRADD(i, j)             \
7149
+asm(                             \
7150
+   " mullw  16,%6,%6       \n\t" \
7151
+   " addc   %0,%0,16       \n\t" \
7152
+   " mulhwu 16,%6,%6       \n\t" \
7153
+   " adde   %1,%1,16       \n\t" \
7154
+   " addze  %2,%2          \n\t" \
7155
+:"=r"(c0), "=r"(c1), "=r"(c2):"0"(c0), "1"(c1), "2"(c2), "r"(i):"16","%cc");
7156
+
7157
+/* for squaring some of the terms are doubled... */
7158
+#define SQRADD2(i, j)            \
7159
+asm(                             \
7160
+   " mullw  16,%6,%7       \n\t" \
7161
+   " mulhwu 17,%6,%7       \n\t" \
7162
+   " addc   %0,%0,16       \n\t" \
7163
+   " adde   %1,%1,17       \n\t" \
7164
+   " addze  %2,%2          \n\t" \
7165
+   " addc   %0,%0,16       \n\t" \
7166
+   " adde   %1,%1,17       \n\t" \
7167
+   " addze  %2,%2          \n\t" \
7168
+:"=r"(c0), "=r"(c1), "=r"(c2):"0"(c0), "1"(c1), "2"(c2), "r"(i), "r"(j):"16", "17","%cc");
7169
+
7170
+#define SQRADDSC(i, j)            \
7171
+asm(                              \
7172
+   " mullw  %0,%6,%7        \n\t" \
7173
+   " mulhwu %1,%6,%7        \n\t" \
7174
+   " xor    %2,%2,%2        \n\t" \
7175
+:"=r"(sc0), "=r"(sc1), "=r"(sc2):"0"(sc0), "1"(sc1), "2"(sc2), "r"(i),"r"(j) : "%cc");
7176
+
7177
+#define SQRADDAC(i, j)           \
7178
+asm(                             \
7179
+   " mullw  16,%6,%7       \n\t" \
7180
+   " addc   %0,%0,16       \n\t" \
7181
+   " mulhwu 16,%6,%7       \n\t" \
7182
+   " adde   %1,%1,16       \n\t" \
7183
+   " addze  %2,%2          \n\t" \
7184
+:"=r"(sc0), "=r"(sc1), "=r"(sc2):"0"(sc0), "1"(sc1), "2"(sc2), "r"(i), "r"(j):"16", "%cc");
7185
+
7186
+#define SQRADDDB                  \
7187
+asm(                              \
7188
+   " addc   %0,%0,%3        \n\t" \
7189
+   " adde   %1,%1,%4        \n\t" \
7190
+   " adde   %2,%2,%5        \n\t" \
7191
+   " addc   %0,%0,%3        \n\t" \
7192
+   " adde   %1,%1,%4        \n\t" \
7193
+   " adde   %2,%2,%5        \n\t" \
7194
+:"=r"(c0), "=r"(c1), "=r"(c2) : "r"(sc0), "r"(sc1), "r"(sc2), "0"(c0), "1"(c1), "2"(c2) : "%cc");
7195
+
7196
+#elif defined(TFM_PPC64)
7197
+/* PPC64 */
7198
+
7199
+#define COMBA_START
7200
+
7201
+#define CLEAR_CARRY \
7202
+   c0 = c1 = c2 = 0;
7203
+
7204
+#define COMBA_STORE(x) \
7205
+   x = c0;
7206
+
7207
+#define COMBA_STORE2(x) \
7208
+   x = c1;
7209
+
7210
+#define CARRY_FORWARD \
7211
+   do { c0 = c1; c1 = c2; c2 = 0; } while (0);
7212
+
7213
+#define COMBA_FINI
7214
+
7215
+/* multiplies point i and j, updates carry "c1" and digit c2 */
7216
+#define SQRADD(i, j)             \
7217
+asm(                             \
7218
+   " mulld  r16,%6,%6       \n\t" \
7219
+   " addc   %0,%0,r16       \n\t" \
7220
+   " mulhdu r16,%6,%6       \n\t" \
7221
+   " adde   %1,%1,r16       \n\t" \
7222
+   " addze  %2,%2          \n\t" \
7223
+:"=r"(c0), "=r"(c1), "=r"(c2):"0"(c0), "1"(c1), "2"(c2), "r"(i):"r16","%cc");
7224
+
7225
+/* for squaring some of the terms are doubled... */
7226
+#define SQRADD2(i, j)            \
7227
+asm(                             \
7228
+   " mulld  r16,%6,%7       \n\t" \
7229
+   " mulhdu r17,%6,%7       \n\t" \
7230
+   " addc   %0,%0,r16       \n\t" \
7231
+   " adde   %1,%1,r17       \n\t" \
7232
+   " addze  %2,%2          \n\t" \
7233
+   " addc   %0,%0,r16       \n\t" \
7234
+   " adde   %1,%1,r17       \n\t" \
7235
+   " addze  %2,%2          \n\t" \
7236
+:"=r"(c0), "=r"(c1), "=r"(c2):"0"(c0), "1"(c1), "2"(c2), "r"(i), "r"(j):"r16", "r17","%cc");
7237
+
7238
+#define SQRADDSC(i, j)            \
7239
+asm(                              \
7240
+   " mulld  %0,%6,%7        \n\t" \
7241
+   " mulhdu %1,%6,%7        \n\t" \
7242
+   " xor    %2,%2,%2        \n\t" \
7243
+:"=r"(sc0), "=r"(sc1), "=r"(sc2):"0"(sc0), "1"(sc1), "2"(sc2), "r"(i),"r"(j) : "%cc");
7244
+
7245
+#define SQRADDAC(i, j)           \
7246
+asm(                             \
7247
+   " mulld  r16,%6,%7       \n\t" \
7248
+   " addc   %0,%0,r16       \n\t" \
7249
+   " mulhdu r16,%6,%7       \n\t" \
7250
+   " adde   %1,%1,r16       \n\t" \
7251
+   " addze  %2,%2          \n\t" \
7252
+:"=r"(sc0), "=r"(sc1), "=r"(sc2):"0"(sc0), "1"(sc1), "2"(sc2), "r"(i), "r"(j):"r16", "%cc");
7253
+
7254
+#define SQRADDDB                  \
7255
+asm(                              \
7256
+   " addc   %0,%0,%3        \n\t" \
7257
+   " adde   %1,%1,%4        \n\t" \
7258
+   " adde   %2,%2,%5        \n\t" \
7259
+   " addc   %0,%0,%3        \n\t" \
7260
+   " adde   %1,%1,%4        \n\t" \
7261
+   " adde   %2,%2,%5        \n\t" \
7262
+:"=r"(c0), "=r"(c1), "=r"(c2) : "r"(sc0), "r"(sc1), "r"(sc2), "0"(c0), "1"(c1), "2"(c2) : "%cc");
7263
+
7264
+
7265
+#elif defined(TFM_AVR32)
7266
+
7267
+/* AVR32 */
7268
+
7269
+#define COMBA_START
7270
+
7271
+#define CLEAR_CARRY \
7272
+   c0 = c1 = c2 = 0;
7273
+
7274
+#define COMBA_STORE(x) \
7275
+   x = c0;
7276
+
7277
+#define COMBA_STORE2(x) \
7278
+   x = c1;
7279
+
7280
+#define CARRY_FORWARD \
7281
+   do { c0 = c1; c1 = c2; c2 = 0; } while (0);
7282
+
7283
+#define COMBA_FINI
7284
+
7285
+/* multiplies point i and j, updates carry "c1" and digit c2 */
7286
+#define SQRADD(i, j)             \
7287
+asm(                             \
7288
+   " mulu.d r2,%6,%6       \n\t" \
7289
+   " add    %0,%0,r2       \n\t" \
7290
+   " adc    %1,%1,r3       \n\t" \
7291
+   " acr    %2             \n\t" \
7292
+:"=r"(c0), "=r"(c1), "=r"(c2):"0"(c0), "1"(c1), "2"(c2), "r"(i):"r2","r3");
7293
+
7294
+/* for squaring some of the terms are doubled... */
7295
+#define SQRADD2(i, j)            \
7296
+asm(                             \
7297
+   " mulu.d r2,%6,%7       \n\t" \
7298
+   " add    %0,%0,r2       \n\t" \
7299
+   " adc    %1,%1,r3       \n\t" \
7300
+   " acr    %2,            \n\t" \
7301
+   " add    %0,%0,r2       \n\t" \
7302
+   " adc    %1,%1,r3       \n\t" \
7303
+   " acr    %2,            \n\t" \
7304
+:"=r"(c0), "=r"(c1), "=r"(c2):"0"(c0), "1"(c1), "2"(c2), "r"(i), "r"(j):"r2", "r3");
7305
+
7306
+#define SQRADDSC(i, j)            \
7307
+asm(                              \
7308
+   " mulu.d r2,%6,%7        \n\t" \
7309
+   " mov    %0,r2           \n\t" \
7310
+   " mov    %1,r3           \n\t" \
7311
+   " eor    %2,%2           \n\t" \
7312
+:"=r"(sc0), "=r"(sc1), "=r"(sc2):"0"(sc0), "1"(sc1), "2"(sc2), "r"(i),"r"(j) : "r2", "r3");
7313
+
7314
+#define SQRADDAC(i, j)           \
7315
+asm(                             \
7316
+   " mulu.d r2,%6,%7       \n\t" \
7317
+   " add    %0,%0,r2       \n\t" \
7318
+   " adc    %1,%1,r3       \n\t" \
7319
+   " acr    %2             \n\t" \
7320
+:"=r"(sc0), "=r"(sc1), "=r"(sc2):"0"(sc0), "1"(sc1), "2"(sc2), "r"(i), "r"(j):"r2", "r3");
7321
+
7322
+#define SQRADDDB                  \
7323
+asm(                              \
7324
+   " add    %0,%0,%3        \n\t" \
7325
+   " adc    %1,%1,%4        \n\t" \
7326
+   " adc    %2,%2,%5        \n\t" \
7327
+   " add    %0,%0,%3        \n\t" \
7328
+   " adc    %1,%1,%4        \n\t" \
7329
+   " adc    %2,%2,%5        \n\t" \
7330
+:"=r"(c0), "=r"(c1), "=r"(c2) : "r"(sc0), "r"(sc1), "r"(sc2), "0"(c0), "1"(c1), "2"(c2) : "%cc");
7331
+
7332
+#elif defined(TFM_MIPS)
7333
+
7334
+/* MIPS */
7335
+
7336
+#define COMBA_START
7337
+
7338
+#define CLEAR_CARRY \
7339
+   c0 = c1 = c2 = 0;
7340
+
7341
+#define COMBA_STORE(x) \
7342
+   x = c0;
7343
+
7344
+#define COMBA_STORE2(x) \
7345
+   x = c1;
7346
+
7347
+#define CARRY_FORWARD \
7348
+   do { c0 = c1; c1 = c2; c2 = 0; } while (0);
7349
+
7350
+#define COMBA_FINI
7351
+
7352
+/* multiplies point i and j, updates carry "c1" and digit c2 */
7353
+#define SQRADD(i, j)              \
7354
+asm(                              \
7355
+   " multu  %6,%6          \n\t"  \
7356
+   " mflo   $12            \n\t"  \
7357
+   " mfhi   $13            \n\t"  \
7358
+   " addu    %0,%0,$12     \n\t"  \
7359
+   " sltu   $12,%0,$12     \n\t"  \
7360
+   " addu    %1,%1,$13     \n\t"  \
7361
+   " sltu   $13,%1,$13     \n\t"  \
7362
+   " addu    %1,%1,$12     \n\t"  \
7363
+   " sltu   $12,%1,$12     \n\t"  \
7364
+   " addu    %2,%2,$13     \n\t"  \
7365
+   " addu    %2,%2,$12     \n\t"  \
7366
+:"=r"(c0), "=r"(c1), "=r"(c2):"0"(c0), "1"(c1), "2"(c2), "r"(i):"$12","$13");
7367
+
7368
+/* for squaring some of the terms are doubled... */
7369
+#define SQRADD2(i, j)            \
7370
+asm(                             \
7371
+   " multu  %6,%7          \n\t" \
7372
+   " mflo   $12            \n\t" \
7373
+   " mfhi   $13            \n\t" \
7374
+                                 \
7375
+   " addu    %0,%0,$12     \n\t" \
7376
+   " sltu   $14,%0,$12     \n\t" \
7377
+   " addu    %1,%1,$13     \n\t" \
7378
+   " sltu   $15,%1,$13     \n\t" \
7379
+   " addu    %1,%1,$14     \n\t" \
7380
+   " sltu   $14,%1,$14     \n\t" \
7381
+   " addu    %2,%2,$15     \n\t" \
7382
+   " addu    %2,%2,$14     \n\t" \
7383
+                                 \
7384
+   " addu    %0,%0,$12     \n\t" \
7385
+   " sltu   $14,%0,$12     \n\t" \
7386
+   " addu    %1,%1,$13     \n\t" \
7387
+   " sltu   $15,%1,$13     \n\t" \
7388
+   " addu    %1,%1,$14     \n\t" \
7389
+   " sltu   $14,%1,$14     \n\t" \
7390
+   " addu    %2,%2,$15     \n\t" \
7391
+   " addu    %2,%2,$14     \n\t" \
7392
+:"=r"(c0), "=r"(c1), "=r"(c2):"0"(c0), "1"(c1), "2"(c2), "r"(i), "r"(j):"$12", "$13", "$14", "$15");
7393
+
7394
+#define SQRADDSC(i, j)            \
7395
+asm(                              \
7396
+   " multu  %6,%7          \n\t"  \
7397
+   " mflo   %0             \n\t"  \
7398
+   " mfhi   %1             \n\t"  \
7399
+   " xor    %2,%2,%2       \n\t"  \
7400
+:"=r"(sc0), "=r"(sc1), "=r"(sc2):"0"(sc0), "1"(sc1), "2"(sc2), "r"(i),"r"(j) : "%cc");
7401
+
7402
+#define SQRADDAC(i, j)           \
7403
+asm(                             \
7404
+   " multu  %6,%7          \n\t" \
7405
+   " mflo   $12            \n\t" \
7406
+   " mfhi   $13            \n\t" \
7407
+   " addu    %0,%0,$12     \n\t" \
7408
+   " sltu   $12,%0,$12     \n\t" \
7409
+   " addu    %1,%1,$13     \n\t" \
7410
+   " sltu   $13,%1,$13     \n\t" \
7411
+   " addu    %1,%1,$12     \n\t" \
7412
+   " sltu   $12,%1,$12     \n\t" \
7413
+   " addu    %2,%2,$13     \n\t" \
7414
+   " addu    %2,%2,$12     \n\t" \
7415
+:"=r"(sc0), "=r"(sc1), "=r"(sc2):"0"(sc0), "1"(sc1), "2"(sc2), "r"(i), "r"(j):"$12", "$13", "$14");
7416
+
7417
+#define SQRADDDB                  \
7418
+asm(                              \
7419
+   " addu    %0,%0,%3       \n\t" \
7420
+   " sltu   $10,%0,%3       \n\t" \
7421
+   " addu    %1,%1,$10      \n\t" \
7422
+   " sltu   $10,%1,$10      \n\t" \
7423
+   " addu    %1,%1,%4       \n\t" \
7424
+   " sltu   $11,%1,%4       \n\t" \
7425
+   " addu    %2,%2,$10      \n\t" \
7426
+   " addu    %2,%2,$11      \n\t" \
7427
+   " addu    %2,%2,%5       \n\t" \
7428
+                                  \
7429
+   " addu    %0,%0,%3       \n\t" \
7430
+   " sltu   $10,%0,%3       \n\t" \
7431
+   " addu    %1,%1,$10      \n\t" \
7432
+   " sltu   $10,%1,$10      \n\t" \
7433
+   " addu    %1,%1,%4       \n\t" \
7434
+   " sltu   $11,%1,%4       \n\t" \
7435
+   " addu    %2,%2,$10      \n\t" \
7436
+   " addu    %2,%2,$11      \n\t" \
7437
+   " addu    %2,%2,%5       \n\t" \
7438
+:"=r"(c0), "=r"(c1), "=r"(c2) : "r"(sc0), "r"(sc1), "r"(sc2), "0"(c0), "1"(c1), "2"(c2) : "$10", "$11");
7055 7439
 
7056
-/* shift right a certain amount of digits */
7057
-void mp_rshd (mp_int * a, int b)
7058
-{
7059
-  int     x;
7440
+#else
7060 7441
 
7061
-  /* if b <= 0 then ignore it */
7062
-  if (b <= 0) {
7063
-    return;
7064
-  }
7442
+#define TFM_ISO
7065 7443
 
7066
-  /* if b > used then simply zero it and return */
7067
-  if (a->used <= b) {
7068
-    mp_zero (a);
7069
-    return;
7070
-  }
7444
+/* ISO C portable code */
7071 7445
 
7072
-  {
7073
-    register mp_digit *bottom, *top;
7446
+#define COMBA_START
7074 7447
 
7075
-    /* shift the digits down */
7448
+#define CLEAR_CARRY \
7449
+   c0 = c1 = c2 = 0;
7076 7450
 
7077
-    /* bottom */
7078
-    bottom = a->dp;
7451
+#define COMBA_STORE(x) \
7452
+   x = c0;
7079 7453
 
7080
-    /* top [offset into digits] */
7081
-    top = a->dp + b;
7454
+#define COMBA_STORE2(x) \
7455
+   x = c1;
7082 7456
 
7083
-    /* this is implemented as a sliding window where 
7084
-     * the window is b-digits long and digits from 
7085
-     * the top of the window are copied to the bottom
7086
-     *
7087
-     * e.g.
7457
+#define CARRY_FORWARD \
7458
+   do { c0 = c1; c1 = c2; c2 = 0; } while (0);
7088 7459
 
7089
-     b-2 | b-1 | b0 | b1 | b2 | ... | bb |   ---->
7090
-                 /\                   |      ---->
7091
-                  \-------------------/      ---->
7092
-     */
7093
-    for (x = 0; x < (a->used - b); x++) {
7094
-      *bottom++ = *top++;
7095
-    }
7460
+#define COMBA_FINI
7096 7461
 
7097
-    /* zero the top digits */
7098
-    for (; x < a->used; x++) {
7099
-      *bottom++ = 0;
7100
-    }
7101
-  }
7462
+/* multiplies point i and j, updates carry "c1" and digit c2 */
7463
+#define SQRADD(i, j)                                 \
7464
+   do { fp_word t;                                   \
7465
+   t = c0 + ((fp_word)i) * ((fp_word)j);  c0 = t;    \
7466
+   t = c1 + (t >> DIGIT_BIT);             c1 = t; c2 += t >> DIGIT_BIT; \
7467
+   } while (0);
7102 7468
   
7103
-  /* remove excess digits */
7104
-  a->used -= b;
7105
-}
7106
-#endif
7107 7469
 
7108
-/* $Source: /cvs/libtom/libtommath/bn_mp_rshd.c,v $ */
7109
-/* $Revision: 1.3 $ */
7110
-/* $Date: 2006/03/31 14:18:44 $ */
7470
+/* for squaring some of the terms are doubled... */
7471
+#define SQRADD2(i, j)                                                 \
7472
+   do { fp_word t;                                                    \
7473
+   t  = ((fp_word)i) * ((fp_word)j);                                  \
7474
+   tt = (fp_word)c0 + t;                 c0 = tt;                              \
7475
+   tt = (fp_word)c1 + (tt >> DIGIT_BIT); c1 = tt; c2 += tt >> DIGIT_BIT;       \
7476
+   tt = (fp_word)c0 + t;                 c0 = tt;                              \
7477
+   tt = (fp_word)c1 + (tt >> DIGIT_BIT); c1 = tt; c2 += tt >> DIGIT_BIT;       \
7478
+   } while (0);
7479
+
7480
+#define SQRADDSC(i, j)                                                         \
7481
+   do { fp_word t;                                                             \
7482
+      t =  ((fp_word)i) * ((fp_word)j);                                        \
7483
+      sc0 = (fp_digit)t; sc1 = (t >> DIGIT_BIT); sc2 = 0;                      \
7484
+   } while (0);
7485
+
7486
+#define SQRADDAC(i, j)                                                         \
7487
+   do { fp_word t;                                                             \
7488
+   t = sc0 + ((fp_word)i) * ((fp_word)j);  sc0 = t;                            \
7489
+   t = sc1 + (t >> DIGIT_BIT);             sc1 = t; sc2 += t >> DIGIT_BIT;     \
7490
+   } while (0);
7491
+
7492
+#define SQRADDDB                                                               \
7493
+   do { fp_word t;                                                             \
7494
+   t = ((fp_word)sc0) + ((fp_word)sc0) + c0; c0 = t;                                                 \
7495
+   t = ((fp_word)sc1) + ((fp_word)sc1) + c1 + (t >> DIGIT_BIT); c1 = t;                              \
7496
+   c2 = c2 + ((fp_word)sc2) + ((fp_word)sc2) + (t >> DIGIT_BIT);                                     \
7497
+   } while (0);
7498
+
7499
+#endif
7500
+
7501
+/* $Source: /cvs/libtom/tomsfastmath/src/sqr/fp_sqr_comba.c,v $ */
7502
+/* $Revision: 1.4 $ */
7503
+/* $Date: 2007/03/14 23:47:42 $ */
7111 7504
 
7112
-/* End: bn_mp_rshd.c */
7505
+/* End: fp_sqr_comba.c */
7113 7506
 
7114
-/* Start: bn_mp_set.c */
7115
-#include <bignum.h>
7116
-#ifdef BN_MP_SET_C
7117
-/* LibTomMath, multiple-precision integer library -- Tom St Denis
7118
- *
7119
- * LibTomMath is a library that provides multiple-precision
7120
- * integer arithmetic as well as number theoretic functionality.
7121
- *
7122
- * The library was designed directly after the MPI library by
7123
- * Michael Fromberger but has been written from scratch with
7124
- * additional optimizations in place.
7125
- *
7126
- * The library is free for all purposes without any express
7127
- * guarantee it works.
7128
- *
7129
- * Tom St Denis, tomstdenis@gmail.com, http://math.libtomcrypt.com
7130
- */
7507
+/* Start: fp_sqr_comba_12.c */
7508
+#define TFM_DEFINES
7509
+#include "fp_sqr_comba.c"
7131 7510
 
7132
-/* set to a digit */
7133
-void mp_set (mp_int * a, mp_digit b)
7511
+#ifdef TFM_SQR12
7512
+void fp_sqr_comba12(fp_int *A, fp_int *B)
7134 7513
 {
7135
-  mp_zero (a);
7136
-  a->dp[0] = b & MP_MASK;
7137
-  a->used  = (a->dp[0] != 0) ? 1 : 0;
7514
+   fp_digit *a, b[24], c0, c1, c2, sc0, sc1, sc2;
7515
+#ifdef TFM_ISO
7516
+   fp_word tt;
7517
+#endif
7518
+
7519
+   a = A->dp;
7520
+   COMBA_START; 
7521
+
7522
+   /* clear carries */
7523
+   CLEAR_CARRY;
7524
+
7525
+   /* output 0 */
7526
+   SQRADD(a[0],a[0]);
7527
+   COMBA_STORE(b[0]);
7528
+
7529
+   /* output 1 */
7530
+   CARRY_FORWARD;
7531
+   SQRADD2(a[0], a[1]); 
7532
+   COMBA_STORE(b[1]);
7533
+
7534
+   /* output 2 */
7535
+   CARRY_FORWARD;
7536
+   SQRADD2(a[0], a[2]); SQRADD(a[1], a[1]); 
7537
+   COMBA_STORE(b[2]);
7538
+
7539
+   /* output 3 */
7540
+   CARRY_FORWARD;
7541
+   SQRADD2(a[0], a[3]); SQRADD2(a[1], a[2]); 
7542
+   COMBA_STORE(b[3]);
7543
+
7544
+   /* output 4 */
7545
+   CARRY_FORWARD;
7546
+   SQRADD2(a[0], a[4]); SQRADD2(a[1], a[3]); SQRADD(a[2], a[2]); 
7547
+   COMBA_STORE(b[4]);
7548
+
7549
+   /* output 5 */
7550
+   CARRY_FORWARD;
7551
+   SQRADDSC(a[0], a[5]); SQRADDAC(a[1], a[4]); SQRADDAC(a[2], a[3]); SQRADDDB; 
7552
+   COMBA_STORE(b[5]);
7553
+
7554
+   /* output 6 */
7555
+   CARRY_FORWARD;
7556
+   SQRADDSC(a[0], a[6]); SQRADDAC(a[1], a[5]); SQRADDAC(a[2], a[4]); SQRADDDB; SQRADD(a[3], a[3]); 
7557
+   COMBA_STORE(b[6]);
7558
+
7559
+   /* output 7 */
7560
+   CARRY_FORWARD;
7561
+   SQRADDSC(a[0], a[7]); SQRADDAC(a[1], a[6]); SQRADDAC(a[2], a[5]); SQRADDAC(a[3], a[4]); SQRADDDB; 
7562
+   COMBA_STORE(b[7]);
7563
+
7564
+   /* output 8 */
7565
+   CARRY_FORWARD;
7566
+   SQRADDSC(a[0], a[8]); SQRADDAC(a[1], a[7]); SQRADDAC(a[2], a[6]); SQRADDAC(a[3], a[5]); SQRADDDB; SQRADD(a[4], a[4]); 
7567
+   COMBA_STORE(b[8]);
7568
+
7569
+   /* output 9 */
7570
+   CARRY_FORWARD;
7571
+   SQRADDSC(a[0], a[9]); SQRADDAC(a[1], a[8]); SQRADDAC(a[2], a[7]); SQRADDAC(a[3], a[6]); SQRADDAC(a[4], a[5]); SQRADDDB; 
7572
+   COMBA_STORE(b[9]);
7573
+
7574
+   /* output 10 */
7575
+   CARRY_FORWARD;
7576
+   SQRADDSC(a[0], a[10]); SQRADDAC(a[1], a[9]); SQRADDAC(a[2], a[8]); SQRADDAC(a[3], a[7]); SQRADDAC(a[4], a[6]); SQRADDDB; SQRADD(a[5], a[5]); 
7577
+   COMBA_STORE(b[10]);
7578
+
7579
+   /* output 11 */
7580
+   CARRY_FORWARD;
7581
+   SQRADDSC(a[0], a[11]); SQRADDAC(a[1], a[10]); SQRADDAC(a[2], a[9]); SQRADDAC(a[3], a[8]); SQRADDAC(a[4], a[7]); SQRADDAC(a[5], a[6]); SQRADDDB; 
7582
+   COMBA_STORE(b[11]);
7583
+
7584
+   /* output 12 */
7585
+   CARRY_FORWARD;
7586
+   SQRADDSC(a[1], a[11]); SQRADDAC(a[2], a[10]); SQRADDAC(a[3], a[9]); SQRADDAC(a[4], a[8]); SQRADDAC(a[5], a[7]); SQRADDDB; SQRADD(a[6], a[6]); 
7587
+   COMBA_STORE(b[12]);
7588
+
7589
+   /* output 13 */
7590
+   CARRY_FORWARD;
7591
+   SQRADDSC(a[2], a[11]); SQRADDAC(a[3], a[10]); SQRADDAC(a[4], a[9]); SQRADDAC(a[5], a[8]); SQRADDAC(a[6], a[7]); SQRADDDB; 
7592
+   COMBA_STORE(b[13]);
7593
+
7594
+   /* output 14 */
7595
+   CARRY_FORWARD;
7596
+   SQRADDSC(a[3], a[11]); SQRADDAC(a[4], a[10]); SQRADDAC(a[5], a[9]); SQRADDAC(a[6], a[8]); SQRADDDB; SQRADD(a[7], a[7]); 
7597
+   COMBA_STORE(b[14]);
7598
+
7599
+   /* output 15 */
7600
+   CARRY_FORWARD;
7601
+   SQRADDSC(a[4], a[11]); SQRADDAC(a[5], a[10]); SQRADDAC(a[6], a[9]); SQRADDAC(a[7], a[8]); SQRADDDB; 
7602
+   COMBA_STORE(b[15]);
7603
+
7604
+   /* output 16 */
7605
+   CARRY_FORWARD;
7606
+   SQRADDSC(a[5], a[11]); SQRADDAC(a[6], a[10]); SQRADDAC(a[7], a[9]); SQRADDDB; SQRADD(a[8], a[8]); 
7607
+   COMBA_STORE(b[16]);
7608
+
7609
+   /* output 17 */
7610
+   CARRY_FORWARD;
7611
+   SQRADDSC(a[6], a[11]); SQRADDAC(a[7], a[10]); SQRADDAC(a[8], a[9]); SQRADDDB; 
7612
+   COMBA_STORE(b[17]);
7613
+
7614
+   /* output 18 */
7615
+   CARRY_FORWARD;
7616
+   SQRADD2(a[7], a[11]); SQRADD2(a[8], a[10]); SQRADD(a[9], a[9]); 
7617
+   COMBA_STORE(b[18]);
7618
+
7619
+   /* output 19 */
7620
+   CARRY_FORWARD;
7621
+   SQRADD2(a[8], a[11]); SQRADD2(a[9], a[10]); 
7622
+   COMBA_STORE(b[19]);
7623
+
7624
+   /* output 20 */
7625
+   CARRY_FORWARD;
7626
+   SQRADD2(a[9], a[11]); SQRADD(a[10], a[10]); 
7627
+   COMBA_STORE(b[20]);
7628
+
7629
+   /* output 21 */
7630
+   CARRY_FORWARD;
7631
+   SQRADD2(a[10], a[11]); 
7632
+   COMBA_STORE(b[21]);
7633
+
7634
+   /* output 22 */
7635
+   CARRY_FORWARD;
7636
+   SQRADD(a[11], a[11]); 
7637
+   COMBA_STORE(b[22]);
7638
+   COMBA_STORE2(b[23]);
7639
+   COMBA_FINI;
7640
+
7641
+   B->used = 24;
7642
+   B->sign = FP_ZPOS;
7643
+   memcpy(B->dp, b, 24 * sizeof(fp_digit));
7644
+   fp_clamp(B);
7138 7645
 }
7139 7646
 #endif
7140 7647
 
7141
-/* $Source: /cvs/libtom/libtommath/bn_mp_set.c,v $ */
7142
-/* $Revision: 1.3 $ */
7143
-/* $Date: 2006/03/31 14:18:44 $ */
7144 7648
 
7145
-/* End: bn_mp_set.c */
7146
-
7147
-/* Start: bn_mp_set_int.c */
7148
-#include <bignum.h>
7149
-#ifdef BN_MP_SET_INT_C
7150
-/* LibTomMath, multiple-precision integer library -- Tom St Denis
7151
- *
7152
- * LibTomMath is a library that provides multiple-precision
7153
- * integer arithmetic as well as number theoretic functionality.
7154
- *
7155
- * The library was designed directly after the MPI library by
7156
- * Michael Fromberger but has been written from scratch with
7157
- * additional optimizations in place.
7158
- *
7159
- * The library is free for all purposes without any express
7160
- * guarantee it works.
7161
- *
7162
- * Tom St Denis, tomstdenis@gmail.com, http://math.libtomcrypt.com
7163
- */
7164
-
7165
-/* set a 32-bit const */
7166
-int mp_set_int (mp_int * a, unsigned long b)
7167
-{
7168
-  int     x, res;
7169
-
7170
-  mp_zero (a);
7171
-  
7172
-  /* set four bits at a time */
7173
-  for (x = 0; x < 8; x++) {
7174
-    /* shift the number up four bits */
7175
-    if ((res = mp_mul_2d (a, 4, a)) != MP_OKAY) {
7176
-      return res;
7177
-    }
7649
+/* $Source: /cvs/libtom/tomsfastmath/src/sqr/fp_sqr_comba_12.c,v $ */
7650
+/* $Revision: 1.2 $ */
7651
+/* $Date: 2007/02/17 03:39:01 $ */
7178 7652
 
7179
-    /* OR in the top four bits of the source */
7180
-    a->dp[0] |= (b >> 28) & 15;
7653
+/* End: fp_sqr_comba_12.c */
7181 7654
 
7182
-    /* shift the source up to the next four bits */
7183
-    b <<= 4;
7655
+/* Start: fp_sqr_comba_17.c */
7656
+#define TFM_DEFINES
7657
+#include "fp_sqr_comba.c"
7184 7658
 
7185
-    /* ensure that digits are not clamped off */
7186
-    a->used += 1;
7187
-  }
7188
-  mp_clamp (a);
7189
-  return MP_OKAY;
7659
+#ifdef TFM_SQR17
7660
+void fp_sqr_comba17(fp_int *A, fp_int *B)
7661
+{
7662
+   fp_digit *a, b[34], c0, c1, c2, sc0, sc1, sc2;
7663
+#ifdef TFM_ISO
7664
+   fp_word tt;
7665
+#endif
7666
+
7667
+   a = A->dp;
7668
+   COMBA_START; 
7669
+
7670
+   /* clear carries */
7671
+   CLEAR_CARRY;
7672
+
7673
+   /* output 0 */
7674
+   SQRADD(a[0],a[0]);
7675
+   COMBA_STORE(b[0]);
7676
+
7677
+   /* output 1 */
7678
+   CARRY_FORWARD;
7679
+   SQRADD2(a[0], a[1]); 
7680
+   COMBA_STORE(b[1]);
7681
+
7682
+   /* output 2 */
7683
+   CARRY_FORWARD;
7684
+   SQRADD2(a[0], a[2]); SQRADD(a[1], a[1]); 
7685
+   COMBA_STORE(b[2]);
7686
+
7687
+   /* output 3 */
7688
+   CARRY_FORWARD;
7689
+   SQRADD2(a[0], a[3]); SQRADD2(a[1], a[2]); 
7690
+   COMBA_STORE(b[3]);
7691
+
7692
+   /* output 4 */
7693
+   CARRY_FORWARD;
7694
+   SQRADD2(a[0], a[4]); SQRADD2(a[1], a[3]); SQRADD(a[2], a[2]); 
7695
+   COMBA_STORE(b[4]);
7696
+
7697
+   /* output 5 */
7698
+   CARRY_FORWARD;
7699
+   SQRADDSC(a[0], a[5]); SQRADDAC(a[1], a[4]); SQRADDAC(a[2], a[3]); SQRADDDB; 
7700
+   COMBA_STORE(b[5]);
7701
+
7702
+   /* output 6 */
7703
+   CARRY_FORWARD;
7704
+   SQRADDSC(a[0], a[6]); SQRADDAC(a[1], a[5]); SQRADDAC(a[2], a[4]); SQRADDDB; SQRADD(a[3], a[3]); 
7705
+   COMBA_STORE(b[6]);
7706
+
7707
+   /* output 7 */
7708
+   CARRY_FORWARD;
7709
+   SQRADDSC(a[0], a[7]); SQRADDAC(a[1], a[6]); SQRADDAC(a[2], a[5]); SQRADDAC(a[3], a[4]); SQRADDDB; 
7710
+   COMBA_STORE(b[7]);
7711
+
7712
+   /* output 8 */
7713
+   CARRY_FORWARD;
7714
+   SQRADDSC(a[0], a[8]); SQRADDAC(a[1], a[7]); SQRADDAC(a[2], a[6]); SQRADDAC(a[3], a[5]); SQRADDDB; SQRADD(a[4], a[4]); 
7715
+   COMBA_STORE(b[8]);
7716
+
7717
+   /* output 9 */
7718
+   CARRY_FORWARD;
7719
+   SQRADDSC(a[0], a[9]); SQRADDAC(a[1], a[8]); SQRADDAC(a[2], a[7]); SQRADDAC(a[3], a[6]); SQRADDAC(a[4], a[5]); SQRADDDB; 
7720
+   COMBA_STORE(b[9]);
7721
+
7722
+   /* output 10 */
7723
+   CARRY_FORWARD;
7724
+   SQRADDSC(a[0], a[10]); SQRADDAC(a[1], a[9]); SQRADDAC(a[2], a[8]); SQRADDAC(a[3], a[7]); SQRADDAC(a[4], a[6]); SQRADDDB; SQRADD(a[5], a[5]); 
7725
+   COMBA_STORE(b[10]);
7726
+
7727
+   /* output 11 */
7728
+   CARRY_FORWARD;
7729
+   SQRADDSC(a[0], a[11]); SQRADDAC(a[1], a[10]); SQRADDAC(a[2], a[9]); SQRADDAC(a[3], a[8]); SQRADDAC(a[4], a[7]); SQRADDAC(a[5], a[6]); SQRADDDB; 
7730
+   COMBA_STORE(b[11]);
7731
+
7732
+   /* output 12 */
7733
+   CARRY_FORWARD;
7734
+   SQRADDSC(a[0], a[12]); SQRADDAC(a[1], a[11]); SQRADDAC(a[2], a[10]); SQRADDAC(a[3], a[9]); SQRADDAC(a[4], a[8]); SQRADDAC(a[5], a[7]); SQRADDDB; SQRADD(a[6], a[6]); 
7735
+   COMBA_STORE(b[12]);
7736
+
7737
+   /* output 13 */
7738
+   CARRY_FORWARD;
7739
+   SQRADDSC(a[0], a[13]); SQRADDAC(a[1], a[12]); SQRADDAC(a[2], a[11]); SQRADDAC(a[3], a[10]); SQRADDAC(a[4], a[9]); SQRADDAC(a[5], a[8]); SQRADDAC(a[6], a[7]); SQRADDDB; 
7740
+   COMBA_STORE(b[13]);
7741
+
7742
+   /* output 14 */
7743
+   CARRY_FORWARD;
7744
+   SQRADDSC(a[0], a[14]); SQRADDAC(a[1], a[13]); SQRADDAC(a[2], a[12]); SQRADDAC(a[3], a[11]); SQRADDAC(a[4], a[10]); SQRADDAC(a[5], a[9]); SQRADDAC(a[6], a[8]); SQRADDDB; SQRADD(a[7], a[7]); 
7745
+   COMBA_STORE(b[14]);
7746
+
7747
+   /* output 15 */
7748
+   CARRY_FORWARD;
7749
+   SQRADDSC(a[0], a[15]); SQRADDAC(a[1], a[14]); SQRADDAC(a[2], a[13]); SQRADDAC(a[3], a[12]); SQRADDAC(a[4], a[11]); SQRADDAC(a[5], a[10]); SQRADDAC(a[6], a[9]); SQRADDAC(a[7], a[8]); SQRADDDB; 
7750
+   COMBA_STORE(b[15]);
7751
+
7752
+   /* output 16 */
7753
+   CARRY_FORWARD;
7754
+   SQRADDSC(a[0], a[16]); SQRADDAC(a[1], a[15]); SQRADDAC(a[2], a[14]); SQRADDAC(a[3], a[13]); SQRADDAC(a[4], a[12]); SQRADDAC(a[5], a[11]); SQRADDAC(a[6], a[10]); SQRADDAC(a[7], a[9]); SQRADDDB; SQRADD(a[8], a[8]); 
7755
+   COMBA_STORE(b[16]);
7756
+
7757
+   /* output 17 */
7758
+   CARRY_FORWARD;
7759
+   SQRADDSC(a[1], a[16]); SQRADDAC(a[2], a[15]); SQRADDAC(a[3], a[14]); SQRADDAC(a[4], a[13]); SQRADDAC(a[5], a[12]); SQRADDAC(a[6], a[11]); SQRADDAC(a[7], a[10]); SQRADDAC(a[8], a[9]); SQRADDDB; 
7760
+   COMBA_STORE(b[17]);
7761
+
7762
+   /* output 18 */
7763
+   CARRY_FORWARD;
7764
+   SQRADDSC(a[2], a[16]); SQRADDAC(a[3], a[15]); SQRADDAC(a[4], a[14]); SQRADDAC(a[5], a[13]); SQRADDAC(a[6], a[12]); SQRADDAC(a[7], a[11]); SQRADDAC(a[8], a[10]); SQRADDDB; SQRADD(a[9], a[9]); 
7765
+   COMBA_STORE(b[18]);
7766
+
7767
+   /* output 19 */
7768
+   CARRY_FORWARD;
7769
+   SQRADDSC(a[3], a[16]); SQRADDAC(a[4], a[15]); SQRADDAC(a[5], a[14]); SQRADDAC(a[6], a[13]); SQRADDAC(a[7], a[12]); SQRADDAC(a[8], a[11]); SQRADDAC(a[9], a[10]); SQRADDDB; 
7770
+   COMBA_STORE(b[19]);
7771
+
7772
+   /* output 20 */
7773
+   CARRY_FORWARD;
7774
+   SQRADDSC(a[4], a[16]); SQRADDAC(a[5], a[15]); SQRADDAC(a[6], a[14]); SQRADDAC(a[7], a[13]); SQRADDAC(a[8], a[12]); SQRADDAC(a[9], a[11]); SQRADDDB; SQRADD(a[10], a[10]); 
7775
+   COMBA_STORE(b[20]);
7776
+
7777
+   /* output 21 */
7778
+   CARRY_FORWARD;
7779
+   SQRADDSC(a[5], a[16]); SQRADDAC(a[6], a[15]); SQRADDAC(a[7], a[14]); SQRADDAC(a[8], a[13]); SQRADDAC(a[9], a[12]); SQRADDAC(a[10], a[11]); SQRADDDB; 
7780
+   COMBA_STORE(b[21]);
7781
+
7782
+   /* output 22 */
7783
+   CARRY_FORWARD;
7784
+   SQRADDSC(a[6], a[16]); SQRADDAC(a[7], a[15]); SQRADDAC(a[8], a[14]); SQRADDAC(a[9], a[13]); SQRADDAC(a[10], a[12]); SQRADDDB; SQRADD(a[11], a[11]); 
7785
+   COMBA_STORE(b[22]);
7786
+
7787
+   /* output 23 */
7788
+   CARRY_FORWARD;
7789
+   SQRADDSC(a[7], a[16]); SQRADDAC(a[8], a[15]); SQRADDAC(a[9], a[14]); SQRADDAC(a[10], a[13]); SQRADDAC(a[11], a[12]); SQRADDDB; 
7790
+   COMBA_STORE(b[23]);
7791
+
7792
+   /* output 24 */
7793
+   CARRY_FORWARD;
7794
+   SQRADDSC(a[8], a[16]); SQRADDAC(a[9], a[15]); SQRADDAC(a[10], a[14]); SQRADDAC(a[11], a[13]); SQRADDDB; SQRADD(a[12], a[12]); 
7795
+   COMBA_STORE(b[24]);
7796
+
7797
+   /* output 25 */
7798
+   CARRY_FORWARD;
7799
+   SQRADDSC(a[9], a[16]); SQRADDAC(a[10], a[15]); SQRADDAC(a[11], a[14]); SQRADDAC(a[12], a[13]); SQRADDDB; 
7800
+   COMBA_STORE(b[25]);
7801
+
7802
+   /* output 26 */
7803
+   CARRY_FORWARD;
7804
+   SQRADDSC(a[10], a[16]); SQRADDAC(a[11], a[15]); SQRADDAC(a[12], a[14]); SQRADDDB; SQRADD(a[13], a[13]); 
7805
+   COMBA_STORE(b[26]);
7806
+
7807
+   /* output 27 */
7808
+   CARRY_FORWARD;
7809
+   SQRADDSC(a[11], a[16]); SQRADDAC(a[12], a[15]); SQRADDAC(a[13], a[14]); SQRADDDB; 
7810
+   COMBA_STORE(b[27]);
7811
+
7812
+   /* output 28 */
7813
+   CARRY_FORWARD;
7814
+   SQRADD2(a[12], a[16]); SQRADD2(a[13], a[15]); SQRADD(a[14], a[14]); 
7815
+   COMBA_STORE(b[28]);
7816
+
7817
+   /* output 29 */
7818
+   CARRY_FORWARD;
7819
+   SQRADD2(a[13], a[16]); SQRADD2(a[14], a[15]); 
7820
+   COMBA_STORE(b[29]);
7821
+
7822
+   /* output 30 */
7823
+   CARRY_FORWARD;
7824
+   SQRADD2(a[14], a[16]); SQRADD(a[15], a[15]); 
7825
+   COMBA_STORE(b[30]);
7826
+
7827
+   /* output 31 */
7828
+   CARRY_FORWARD;
7829
+   SQRADD2(a[15], a[16]); 
7830
+   COMBA_STORE(b[31]);
7831
+
7832
+   /* output 32 */
7833
+   CARRY_FORWARD;
7834
+   SQRADD(a[16], a[16]); 
7835
+   COMBA_STORE(b[32]);
7836
+   COMBA_STORE2(b[33]);
7837
+   COMBA_FINI;
7838
+
7839
+   B->used = 34;
7840
+   B->sign = FP_ZPOS;
7841
+   memcpy(B->dp, b, 34 * sizeof(fp_digit));
7842
+   fp_clamp(B);
7190 7843
 }
7191 7844
 #endif
7192 7845
 
7193
-/* $Source: /cvs/libtom/libtommath/bn_mp_set_int.c,v $ */
7194
-/* $Revision: 1.3 $ */
7195
-/* $Date: 2006/03/31 14:18:44 $ */
7196 7846
 
7197
-/* End: bn_mp_set_int.c */
7847
+/* $Source: /cvs/libtom/tomsfastmath/src/sqr/fp_sqr_comba_17.c,v $ */
7848
+/* $Revision: 1.2 $ */
7849
+/* $Date: 2007/02/17 03:39:01 $ */
7198 7850
 
7199
-/* Start: bn_mp_shrink.c */
7200
-#include <bignum.h>
7201
-#ifdef BN_MP_SHRINK_C
7202
-/* LibTomMath, multiple-precision integer library -- Tom St Denis
7203
- *
7204
- * LibTomMath is a library that provides multiple-precision
7205
- * integer arithmetic as well as number theoretic functionality.
7206
- *
7207
- * The library was designed directly after the MPI library by
7208
- * Michael Fromberger but has been written from scratch with
7209
- * additional optimizations in place.
7210
- *
7211
- * The library is free for all purposes without any express
7212
- * guarantee it works.
7213
- *
7214
- * Tom St Denis, tomstdenis@gmail.com, http://math.libtomcrypt.com
7215
- */
7851
+/* End: fp_sqr_comba_17.c */
7852
+
7853
+/* Start: fp_sqr_comba_20.c */
7854
+#define TFM_DEFINES
7855
+#include "fp_sqr_comba.c"
7216 7856
 
7217
-/* shrink a bignum */
7218
-int mp_shrink (mp_int * a)
7857
+#ifdef TFM_SQR20
7858
+void fp_sqr_comba20(fp_int *A, fp_int *B)
7219 7859
 {
7220
-  mp_digit *tmp;
7221
-  if (a->alloc != a->used && a->used > 0) {
7222
-    if ((tmp = OPT_CAST(mp_digit) cli_realloc (a->dp, sizeof (mp_digit) * a->used)) == NULL) {
7223
-      return MP_MEM;
7224
-    }
7225
-    a->dp    = tmp;
7226
-    a->alloc = a->used;
7227
-  }
7228
-  return MP_OKAY;
7860
+   fp_digit *a, b[40], c0, c1, c2, sc0, sc1, sc2;
7861
+#ifdef TFM_ISO
7862
+   fp_word tt;
7863
+#endif
7864
+
7865
+   a = A->dp;
7866
+   COMBA_START; 
7867
+
7868
+   /* clear carries */
7869
+   CLEAR_CARRY;
7870
+
7871
+   /* output 0 */
7872
+   SQRADD(a[0],a[0]);
7873
+   COMBA_STORE(b[0]);
7874
+
7875
+   /* output 1 */
7876
+   CARRY_FORWARD;
7877
+   SQRADD2(a[0], a[1]); 
7878
+   COMBA_STORE(b[1]);
7879
+
7880
+   /* output 2 */
7881
+   CARRY_FORWARD;
7882
+   SQRADD2(a[0], a[2]); SQRADD(a[1], a[1]); 
7883
+   COMBA_STORE(b[2]);
7884
+
7885
+   /* output 3 */
7886
+   CARRY_FORWARD;
7887
+   SQRADD2(a[0], a[3]); SQRADD2(a[1], a[2]); 
7888
+   COMBA_STORE(b[3]);
7889
+
7890
+   /* output 4 */
7891
+   CARRY_FORWARD;
7892
+   SQRADD2(a[0], a[4]); SQRADD2(a[1], a[3]); SQRADD(a[2], a[2]); 
7893
+   COMBA_STORE(b[4]);
7894
+
7895
+   /* output 5 */
7896
+   CARRY_FORWARD;
7897
+   SQRADDSC(a[0], a[5]); SQRADDAC(a[1], a[4]); SQRADDAC(a[2], a[3]); SQRADDDB; 
7898
+   COMBA_STORE(b[5]);
7899
+
7900
+   /* output 6 */
7901
+   CARRY_FORWARD;
7902
+   SQRADDSC(a[0], a[6]); SQRADDAC(a[1], a[5]); SQRADDAC(a[2], a[4]); SQRADDDB; SQRADD(a[3], a[3]); 
7903
+   COMBA_STORE(b[6]);
7904
+
7905
+   /* output 7 */
7906
+   CARRY_FORWARD;
7907
+   SQRADDSC(a[0], a[7]); SQRADDAC(a[1], a[6]); SQRADDAC(a[2], a[5]); SQRADDAC(a[3], a[4]); SQRADDDB; 
7908
+   COMBA_STORE(b[7]);
7909
+
7910
+   /* output 8 */
7911
+   CARRY_FORWARD;
7912
+   SQRADDSC(a[0], a[8]); SQRADDAC(a[1], a[7]); SQRADDAC(a[2], a[6]); SQRADDAC(a[3], a[5]); SQRADDDB; SQRADD(a[4], a[4]); 
7913
+   COMBA_STORE(b[8]);
7914
+
7915
+   /* output 9 */
7916
+   CARRY_FORWARD;
7917
+   SQRADDSC(a[0], a[9]); SQRADDAC(a[1], a[8]); SQRADDAC(a[2], a[7]); SQRADDAC(a[3], a[6]); SQRADDAC(a[4], a[5]); SQRADDDB; 
7918
+   COMBA_STORE(b[9]);
7919
+
7920
+   /* output 10 */
7921
+   CARRY_FORWARD;
7922
+   SQRADDSC(a[0], a[10]); SQRADDAC(a[1], a[9]); SQRADDAC(a[2], a[8]); SQRADDAC(a[3], a[7]); SQRADDAC(a[4], a[6]); SQRADDDB; SQRADD(a[5], a[5]); 
7923
+   COMBA_STORE(b[10]);
7924
+
7925
+   /* output 11 */
7926
+   CARRY_FORWARD;
7927
+   SQRADDSC(a[0], a[11]); SQRADDAC(a[1], a[10]); SQRADDAC(a[2], a[9]); SQRADDAC(a[3], a[8]); SQRADDAC(a[4], a[7]); SQRADDAC(a[5], a[6]); SQRADDDB; 
7928
+   COMBA_STORE(b[11]);
7929
+
7930
+   /* output 12 */
7931
+   CARRY_FORWARD;
7932
+   SQRADDSC(a[0], a[12]); SQRADDAC(a[1], a[11]); SQRADDAC(a[2], a[10]); SQRADDAC(a[3], a[9]); SQRADDAC(a[4], a[8]); SQRADDAC(a[5], a[7]); SQRADDDB; SQRADD(a[6], a[6]); 
7933
+   COMBA_STORE(b[12]);
7934
+
7935
+   /* output 13 */
7936
+   CARRY_FORWARD;
7937
+   SQRADDSC(a[0], a[13]); SQRADDAC(a[1], a[12]); SQRADDAC(a[2], a[11]); SQRADDAC(a[3], a[10]); SQRADDAC(a[4], a[9]); SQRADDAC(a[5], a[8]); SQRADDAC(a[6], a[7]); SQRADDDB; 
7938
+   COMBA_STORE(b[13]);
7939
+
7940
+   /* output 14 */
7941
+   CARRY_FORWARD;
7942
+   SQRADDSC(a[0], a[14]); SQRADDAC(a[1], a[13]); SQRADDAC(a[2], a[12]); SQRADDAC(a[3], a[11]); SQRADDAC(a[4], a[10]); SQRADDAC(a[5], a[9]); SQRADDAC(a[6], a[8]); SQRADDDB; SQRADD(a[7], a[7]); 
7943
+   COMBA_STORE(b[14]);
7944
+
7945
+   /* output 15 */
7946
+   CARRY_FORWARD;
7947
+   SQRADDSC(a[0], a[15]); SQRADDAC(a[1], a[14]); SQRADDAC(a[2], a[13]); SQRADDAC(a[3], a[12]); SQRADDAC(a[4], a[11]); SQRADDAC(a[5], a[10]); SQRADDAC(a[6], a[9]); SQRADDAC(a[7], a[8]); SQRADDDB; 
7948
+   COMBA_STORE(b[15]);
7949
+
7950
+   /* output 16 */
7951
+   CARRY_FORWARD;
7952
+   SQRADDSC(a[0], a[16]); SQRADDAC(a[1], a[15]); SQRADDAC(a[2], a[14]); SQRADDAC(a[3], a[13]); SQRADDAC(a[4], a[12]); SQRADDAC(a[5], a[11]); SQRADDAC(a[6], a[10]); SQRADDAC(a[7], a[9]); SQRADDDB; SQRADD(a[8], a[8]); 
7953
+   COMBA_STORE(b[16]);
7954
+
7955
+   /* output 17 */
7956
+   CARRY_FORWARD;
7957
+   SQRADDSC(a[0], a[17]); SQRADDAC(a[1], a[16]); SQRADDAC(a[2], a[15]); SQRADDAC(a[3], a[14]); SQRADDAC(a[4], a[13]); SQRADDAC(a[5], a[12]); SQRADDAC(a[6], a[11]); SQRADDAC(a[7], a[10]); SQRADDAC(a[8], a[9]); SQRADDDB; 
7958
+   COMBA_STORE(b[17]);
7959
+
7960
+   /* output 18 */
7961
+   CARRY_FORWARD;
7962
+   SQRADDSC(a[0], a[18]); SQRADDAC(a[1], a[17]); SQRADDAC(a[2], a[16]); SQRADDAC(a[3], a[15]); SQRADDAC(a[4], a[14]); SQRADDAC(a[5], a[13]); SQRADDAC(a[6], a[12]); SQRADDAC(a[7], a[11]); SQRADDAC(a[8], a[10]); SQRADDDB; SQRADD(a[9], a[9]); 
7963
+   COMBA_STORE(b[18]);
7964
+
7965
+   /* output 19 */
7966
+   CARRY_FORWARD;
7967
+   SQRADDSC(a[0], a[19]); SQRADDAC(a[1], a[18]); SQRADDAC(a[2], a[17]); SQRADDAC(a[3], a[16]); SQRADDAC(a[4], a[15]); SQRADDAC(a[5], a[14]); SQRADDAC(a[6], a[13]); SQRADDAC(a[7], a[12]); SQRADDAC(a[8], a[11]); SQRADDAC(a[9], a[10]); SQRADDDB; 
7968
+   COMBA_STORE(b[19]);
7969
+
7970
+   /* output 20 */
7971
+   CARRY_FORWARD;
7972
+   SQRADDSC(a[1], a[19]); SQRADDAC(a[2], a[18]); SQRADDAC(a[3], a[17]); SQRADDAC(a[4], a[16]); SQRADDAC(a[5], a[15]); SQRADDAC(a[6], a[14]); SQRADDAC(a[7], a[13]); SQRADDAC(a[8], a[12]); SQRADDAC(a[9], a[11]); SQRADDDB; SQRADD(a[10], a[10]); 
7973
+   COMBA_STORE(b[20]);
7974
+
7975
+   /* output 21 */
7976
+   CARRY_FORWARD;
7977
+   SQRADDSC(a[2], a[19]); SQRADDAC(a[3], a[18]); SQRADDAC(a[4], a[17]); SQRADDAC(a[5], a[16]); SQRADDAC(a[6], a[15]); SQRADDAC(a[7], a[14]); SQRADDAC(a[8], a[13]); SQRADDAC(a[9], a[12]); SQRADDAC(a[10], a[11]); SQRADDDB; 
7978
+   COMBA_STORE(b[21]);
7979
+
7980
+   /* output 22 */
7981
+   CARRY_FORWARD;
7982
+   SQRADDSC(a[3], a[19]); SQRADDAC(a[4], a[18]); SQRADDAC(a[5], a[17]); SQRADDAC(a[6], a[16]); SQRADDAC(a[7], a[15]); SQRADDAC(a[8], a[14]); SQRADDAC(a[9], a[13]); SQRADDAC(a[10], a[12]); SQRADDDB; SQRADD(a[11], a[11]); 
7983
+   COMBA_STORE(b[22]);
7984
+
7985
+   /* output 23 */
7986
+   CARRY_FORWARD;
7987
+   SQRADDSC(a[4], a[19]); SQRADDAC(a[5], a[18]); SQRADDAC(a[6], a[17]); SQRADDAC(a[7], a[16]); SQRADDAC(a[8], a[15]); SQRADDAC(a[9], a[14]); SQRADDAC(a[10], a[13]); SQRADDAC(a[11], a[12]); SQRADDDB; 
7988
+   COMBA_STORE(b[23]);
7989
+
7990
+   /* output 24 */
7991
+   CARRY_FORWARD;
7992
+   SQRADDSC(a[5], a[19]); SQRADDAC(a[6], a[18]); SQRADDAC(a[7], a[17]); SQRADDAC(a[8], a[16]); SQRADDAC(a[9], a[15]); SQRADDAC(a[10], a[14]); SQRADDAC(a[11], a[13]); SQRADDDB; SQRADD(a[12], a[12]); 
7993
+   COMBA_STORE(b[24]);
7994
+
7995
+   /* output 25 */
7996
+   CARRY_FORWARD;
7997
+   SQRADDSC(a[6], a[19]); SQRADDAC(a[7], a[18]); SQRADDAC(a[8], a[17]); SQRADDAC(a[9], a[16]); SQRADDAC(a[10], a[15]); SQRADDAC(a[11], a[14]); SQRADDAC(a[12], a[13]); SQRADDDB; 
7998
+   COMBA_STORE(b[25]);
7999
+
8000
+   /* output 26 */
8001
+   CARRY_FORWARD;
8002
+   SQRADDSC(a[7], a[19]); SQRADDAC(a[8], a[18]); SQRADDAC(a[9], a[17]); SQRADDAC(a[10], a[16]); SQRADDAC(a[11], a[15]); SQRADDAC(a[12], a[14]); SQRADDDB; SQRADD(a[13], a[13]); 
8003
+   COMBA_STORE(b[26]);
8004
+
8005
+   /* output 27 */
8006
+   CARRY_FORWARD;
8007
+   SQRADDSC(a[8], a[19]); SQRADDAC(a[9], a[18]); SQRADDAC(a[10], a[17]); SQRADDAC(a[11], a[16]); SQRADDAC(a[12], a[15]); SQRADDAC(a[13], a[14]); SQRADDDB; 
8008
+   COMBA_STORE(b[27]);
8009
+
8010
+   /* output 28 */
8011
+   CARRY_FORWARD;
8012
+   SQRADDSC(a[9], a[19]); SQRADDAC(a[10], a[18]); SQRADDAC(a[11], a[17]); SQRADDAC(a[12], a[16]); SQRADDAC(a[13], a[15]); SQRADDDB; SQRADD(a[14], a[14]); 
8013
+   COMBA_STORE(b[28]);
8014
+
8015
+   /* output 29 */
8016
+   CARRY_FORWARD;
8017
+   SQRADDSC(a[10], a[19]); SQRADDAC(a[11], a[18]); SQRADDAC(a[12], a[17]); SQRADDAC(a[13], a[16]); SQRADDAC(a[14], a[15]); SQRADDDB; 
8018
+   COMBA_STORE(b[29]);
8019
+
8020
+   /* output 30 */
8021
+   CARRY_FORWARD;
8022
+   SQRADDSC(a[11], a[19]); SQRADDAC(a[12], a[18]); SQRADDAC(a[13], a[17]); SQRADDAC(a[14], a[16]); SQRADDDB; SQRADD(a[15], a[15]); 
8023
+   COMBA_STORE(b[30]);
8024
+
8025
+   /* output 31 */
8026
+   CARRY_FORWARD;
8027
+   SQRADDSC(a[12], a[19]); SQRADDAC(a[13], a[18]); SQRADDAC(a[14], a[17]); SQRADDAC(a[15], a[16]); SQRADDDB; 
8028
+   COMBA_STORE(b[31]);
8029
+
8030
+   /* output 32 */
8031
+   CARRY_FORWARD;
8032
+   SQRADDSC(a[13], a[19]); SQRADDAC(a[14], a[18]); SQRADDAC(a[15], a[17]); SQRADDDB; SQRADD(a[16], a[16]); 
8033
+   COMBA_STORE(b[32]);
8034
+
8035
+   /* output 33 */
8036
+   CARRY_FORWARD;
8037
+   SQRADDSC(a[14], a[19]); SQRADDAC(a[15], a[18]); SQRADDAC(a[16], a[17]); SQRADDDB; 
8038
+   COMBA_STORE(b[33]);
8039
+
8040
+   /* output 34 */
8041
+   CARRY_FORWARD;
8042
+   SQRADD2(a[15], a[19]); SQRADD2(a[16], a[18]); SQRADD(a[17], a[17]); 
8043
+   COMBA_STORE(b[34]);
8044
+
8045
+   /* output 35 */
8046
+   CARRY_FORWARD;
8047
+   SQRADD2(a[16], a[19]); SQRADD2(a[17], a[18]); 
8048
+   COMBA_STORE(b[35]);
8049
+
8050
+   /* output 36 */
8051
+   CARRY_FORWARD;
8052
+   SQRADD2(a[17], a[19]); SQRADD(a[18], a[18]); 
8053
+   COMBA_STORE(b[36]);
8054
+
8055
+   /* output 37 */
8056
+   CARRY_FORWARD;
8057
+   SQRADD2(a[18], a[19]); 
8058
+   COMBA_STORE(b[37]);
8059
+
8060
+   /* output 38 */
8061
+   CARRY_FORWARD;
8062
+   SQRADD(a[19], a[19]); 
8063
+   COMBA_STORE(b[38]);
8064
+   COMBA_STORE2(b[39]);
8065
+   COMBA_FINI;
8066
+
8067
+   B->used = 40;
8068
+   B->sign = FP_ZPOS;
8069
+   memcpy(B->dp, b, 40 * sizeof(fp_digit));
8070
+   fp_clamp(B);
7229 8071
 }
7230 8072
 #endif
7231 8073
 
7232
-/* $Source: /cvs/libtom/libtommath/bn_mp_shrink.c,v $ */
7233
-/* $Revision: 1.3 $ */
7234
-/* $Date: 2006/03/31 14:18:44 $ */
7235 8074
 
7236
-/* End: bn_mp_shrink.c */
8075
+/* $Source: /cvs/libtom/tomsfastmath/src/sqr/fp_sqr_comba_20.c,v $ */
8076
+/* $Revision: 1.2 $ */
8077
+/* $Date: 2007/02/17 03:39:01 $ */
7237 8078
 
7238
-/* Start: bn_mp_signed_bin_size.c */
7239
-#include <bignum.h>
7240
-#ifdef BN_MP_SIGNED_BIN_SIZE_C
7241
-/* LibTomMath, multiple-precision integer library -- Tom St Denis
7242
- *
7243
- * LibTomMath is a library that provides multiple-precision
7244
- * integer arithmetic as well as number theoretic functionality.
7245
- *
7246
- * The library was designed directly after the MPI library by
7247
- * Michael Fromberger but has been written from scratch with
7248
- * additional optimizations in place.
7249
- *
7250
- * The library is free for all purposes without any express
7251
- * guarantee it works.
7252
- *
7253
- * Tom St Denis, tomstdenis@gmail.com, http://math.libtomcrypt.com
7254
- */
8079
+/* End: fp_sqr_comba_20.c */
7255 8080
 
7256
-/* get the size for an signed equivalent */
7257
-int mp_signed_bin_size (mp_int * a)
8081
+/* Start: fp_sqr_comba_24.c */
8082
+#define TFM_DEFINES
8083
+#include "fp_sqr_comba.c"
8084
+
8085
+#ifdef TFM_SQR24
8086
+void fp_sqr_comba24(fp_int *A, fp_int *B)
7258 8087
 {
7259
-  return 1 + mp_unsigned_bin_size (a);
8088
+   fp_digit *a, b[48], c0, c1, c2, sc0, sc1, sc2;
8089
+#ifdef TFM_ISO
8090
+   fp_word tt;
8091
+#endif
8092
+
8093
+   a = A->dp;
8094
+   COMBA_START; 
8095
+
8096
+   /* clear carries */
8097
+   CLEAR_CARRY;
8098
+
8099
+   /* output 0 */
8100
+   SQRADD(a[0],a[0]);
8101
+   COMBA_STORE(b[0]);
8102
+
8103
+   /* output 1 */
8104
+   CARRY_FORWARD;
8105
+   SQRADD2(a[0], a[1]); 
8106
+   COMBA_STORE(b[1]);
8107
+
8108
+   /* output 2 */
8109
+   CARRY_FORWARD;
8110
+   SQRADD2(a[0], a[2]); SQRADD(a[1], a[1]); 
8111
+   COMBA_STORE(b[2]);
8112
+
8113
+   /* output 3 */
8114
+   CARRY_FORWARD;
8115
+   SQRADD2(a[0], a[3]); SQRADD2(a[1], a[2]); 
8116
+   COMBA_STORE(b[3]);
8117
+
8118
+   /* output 4 */
8119
+   CARRY_FORWARD;
8120
+   SQRADD2(a[0], a[4]); SQRADD2(a[1], a[3]); SQRADD(a[2], a[2]); 
8121
+   COMBA_STORE(b[4]);
8122
+
8123
+   /* output 5 */
8124
+   CARRY_FORWARD;
8125
+   SQRADDSC(a[0], a[5]); SQRADDAC(a[1], a[4]); SQRADDAC(a[2], a[3]); SQRADDDB; 
8126
+   COMBA_STORE(b[5]);
8127
+
8128
+   /* output 6 */
8129
+   CARRY_FORWARD;
8130
+   SQRADDSC(a[0], a[6]); SQRADDAC(a[1], a[5]); SQRADDAC(a[2], a[4]); SQRADDDB; SQRADD(a[3], a[3]); 
8131
+   COMBA_STORE(b[6]);
8132
+
8133
+   /* output 7 */
8134
+   CARRY_FORWARD;
8135
+   SQRADDSC(a[0], a[7]); SQRADDAC(a[1], a[6]); SQRADDAC(a[2], a[5]); SQRADDAC(a[3], a[4]); SQRADDDB; 
8136
+   COMBA_STORE(b[7]);
8137
+
8138
+   /* output 8 */
8139
+   CARRY_FORWARD;
8140
+   SQRADDSC(a[0], a[8]); SQRADDAC(a[1], a[7]); SQRADDAC(a[2], a[6]); SQRADDAC(a[3], a[5]); SQRADDDB; SQRADD(a[4], a[4]); 
8141
+   COMBA_STORE(b[8]);
8142
+
8143
+   /* output 9 */
8144
+   CARRY_FORWARD;
8145
+   SQRADDSC(a[0], a[9]); SQRADDAC(a[1], a[8]); SQRADDAC(a[2], a[7]); SQRADDAC(a[3], a[6]); SQRADDAC(a[4], a[5]); SQRADDDB; 
8146
+   COMBA_STORE(b[9]);
8147
+
8148
+   /* output 10 */
8149
+   CARRY_FORWARD;
8150
+   SQRADDSC(a[0], a[10]); SQRADDAC(a[1], a[9]); SQRADDAC(a[2], a[8]); SQRADDAC(a[3], a[7]); SQRADDAC(a[4], a[6]); SQRADDDB; SQRADD(a[5], a[5]); 
8151
+   COMBA_STORE(b[10]);
8152
+
8153
+   /* output 11 */
8154
+   CARRY_FORWARD;
8155
+   SQRADDSC(a[0], a[11]); SQRADDAC(a[1], a[10]); SQRADDAC(a[2], a[9]); SQRADDAC(a[3], a[8]); SQRADDAC(a[4], a[7]); SQRADDAC(a[5], a[6]); SQRADDDB; 
8156
+   COMBA_STORE(b[11]);
8157
+
8158
+   /* output 12 */
8159
+   CARRY_FORWARD;
8160
+   SQRADDSC(a[0], a[12]); SQRADDAC(a[1], a[11]); SQRADDAC(a[2], a[10]); SQRADDAC(a[3], a[9]); SQRADDAC(a[4], a[8]); SQRADDAC(a[5], a[7]); SQRADDDB; SQRADD(a[6], a[6]); 
8161
+   COMBA_STORE(b[12]);
8162
+
8163
+   /* output 13 */
8164
+   CARRY_FORWARD;
8165
+   SQRADDSC(a[0], a[13]); SQRADDAC(a[1], a[12]); SQRADDAC(a[2], a[11]); SQRADDAC(a[3], a[10]); SQRADDAC(a[4], a[9]); SQRADDAC(a[5], a[8]); SQRADDAC(a[6], a[7]); SQRADDDB; 
8166
+   COMBA_STORE(b[13]);
8167
+
8168
+   /* output 14 */
8169
+   CARRY_FORWARD;
8170
+   SQRADDSC(a[0], a[14]); SQRADDAC(a[1], a[13]); SQRADDAC(a[2], a[12]); SQRADDAC(a[3], a[11]); SQRADDAC(a[4], a[10]); SQRADDAC(a[5], a[9]); SQRADDAC(a[6], a[8]); SQRADDDB; SQRADD(a[7], a[7]); 
8171
+   COMBA_STORE(b[14]);
8172
+
8173
+   /* output 15 */
8174
+   CARRY_FORWARD;
8175
+   SQRADDSC(a[0], a[15]); SQRADDAC(a[1], a[14]); SQRADDAC(a[2], a[13]); SQRADDAC(a[3], a[12]); SQRADDAC(a[4], a[11]); SQRADDAC(a[5], a[10]); SQRADDAC(a[6], a[9]); SQRADDAC(a[7], a[8]); SQRADDDB; 
8176
+   COMBA_STORE(b[15]);
8177
+
8178
+   /* output 16 */
8179
+   CARRY_FORWARD;
8180
+   SQRADDSC(a[0], a[16]); SQRADDAC(a[1], a[15]); SQRADDAC(a[2], a[14]); SQRADDAC(a[3], a[13]); SQRADDAC(a[4], a[12]); SQRADDAC(a[5], a[11]); SQRADDAC(a[6], a[10]); SQRADDAC(a[7], a[9]); SQRADDDB; SQRADD(a[8], a[8]); 
8181
+   COMBA_STORE(b[16]);
8182
+
8183
+   /* output 17 */
8184
+   CARRY_FORWARD;
8185
+   SQRADDSC(a[0], a[17]); SQRADDAC(a[1], a[16]); SQRADDAC(a[2], a[15]); SQRADDAC(a[3], a[14]); SQRADDAC(a[4], a[13]); SQRADDAC(a[5], a[12]); SQRADDAC(a[6], a[11]); SQRADDAC(a[7], a[10]); SQRADDAC(a[8], a[9]); SQRADDDB; 
8186
+   COMBA_STORE(b[17]);
8187
+
8188
+   /* output 18 */
8189
+   CARRY_FORWARD;
8190
+   SQRADDSC(a[0], a[18]); SQRADDAC(a[1], a[17]); SQRADDAC(a[2], a[16]); SQRADDAC(a[3], a[15]); SQRADDAC(a[4], a[14]); SQRADDAC(a[5], a[13]); SQRADDAC(a[6], a[12]); SQRADDAC(a[7], a[11]); SQRADDAC(a[8], a[10]); SQRADDDB; SQRADD(a[9], a[9]); 
8191
+   COMBA_STORE(b[18]);
8192
+
8193
+   /* output 19 */
8194
+   CARRY_FORWARD;
8195
+   SQRADDSC(a[0], a[19]); SQRADDAC(a[1], a[18]); SQRADDAC(a[2], a[17]); SQRADDAC(a[3], a[16]); SQRADDAC(a[4], a[15]); SQRADDAC(a[5], a[14]); SQRADDAC(a[6], a[13]); SQRADDAC(a[7], a[12]); SQRADDAC(a[8], a[11]); SQRADDAC(a[9], a[10]); SQRADDDB; 
8196
+   COMBA_STORE(b[19]);
8197
+
8198
+   /* output 20 */
8199
+   CARRY_FORWARD;
8200
+   SQRADDSC(a[0], a[20]); SQRADDAC(a[1], a[19]); SQRADDAC(a[2], a[18]); SQRADDAC(a[3], a[17]); SQRADDAC(a[4], a[16]); SQRADDAC(a[5], a[15]); SQRADDAC(a[6], a[14]); SQRADDAC(a[7], a[13]); SQRADDAC(a[8], a[12]); SQRADDAC(a[9], a[11]); SQRADDDB; SQRADD(a[10], a[10]); 
8201
+   COMBA_STORE(b[20]);
8202
+
8203
+   /* output 21 */
8204
+   CARRY_FORWARD;
8205
+   SQRADDSC(a[0], a[21]); SQRADDAC(a[1], a[20]); SQRADDAC(a[2], a[19]); SQRADDAC(a[3], a[18]); SQRADDAC(a[4], a[17]); SQRADDAC(a[5], a[16]); SQRADDAC(a[6], a[15]); SQRADDAC(a[7], a[14]); SQRADDAC(a[8], a[13]); SQRADDAC(a[9], a[12]); SQRADDAC(a[10], a[11]); SQRADDDB; 
8206
+   COMBA_STORE(b[21]);
8207
+
8208
+   /* output 22 */
8209
+   CARRY_FORWARD;
8210
+   SQRADDSC(a[0], a[22]); SQRADDAC(a[1], a[21]); SQRADDAC(a[2], a[20]); SQRADDAC(a[3], a[19]); SQRADDAC(a[4], a[18]); SQRADDAC(a[5], a[17]); SQRADDAC(a[6], a[16]); SQRADDAC(a[7], a[15]); SQRADDAC(a[8], a[14]); SQRADDAC(a[9], a[13]); SQRADDAC(a[10], a[12]); SQRADDDB; SQRADD(a[11], a[11]); 
8211
+   COMBA_STORE(b[22]);
8212
+
8213
+   /* output 23 */
8214
+   CARRY_FORWARD;
8215
+   SQRADDSC(a[0], a[23]); SQRADDAC(a[1], a[22]); SQRADDAC(a[2], a[21]); SQRADDAC(a[3], a[20]); SQRADDAC(a[4], a[19]); SQRADDAC(a[5], a[18]); SQRADDAC(a[6], a[17]); SQRADDAC(a[7], a[16]); SQRADDAC(a[8], a[15]); SQRADDAC(a[9], a[14]); SQRADDAC(a[10], a[13]); SQRADDAC(a[11], a[12]); SQRADDDB; 
8216
+   COMBA_STORE(b[23]);
8217
+
8218
+   /* output 24 */
8219
+   CARRY_FORWARD;
8220
+   SQRADDSC(a[1], a[23]); SQRADDAC(a[2], a[22]); SQRADDAC(a[3], a[21]); SQRADDAC(a[4], a[20]); SQRADDAC(a[5], a[19]); SQRADDAC(a[6], a[18]); SQRADDAC(a[7], a[17]); SQRADDAC(a[8], a[16]); SQRADDAC(a[9], a[15]); SQRADDAC(a[10], a[14]); SQRADDAC(a[11], a[13]); SQRADDDB; SQRADD(a[12], a[12]); 
8221
+   COMBA_STORE(b[24]);
8222
+
8223
+   /* output 25 */
8224
+   CARRY_FORWARD;
8225
+   SQRADDSC(a[2], a[23]); SQRADDAC(a[3], a[22]); SQRADDAC(a[4], a[21]); SQRADDAC(a[5], a[20]); SQRADDAC(a[6], a[19]); SQRADDAC(a[7], a[18]); SQRADDAC(a[8], a[17]); SQRADDAC(a[9], a[16]); SQRADDAC(a[10], a[15]); SQRADDAC(a[11], a[14]); SQRADDAC(a[12], a[13]); SQRADDDB; 
8226
+   COMBA_STORE(b[25]);
8227
+
8228
+   /* output 26 */
8229
+   CARRY_FORWARD;
8230
+   SQRADDSC(a[3], a[23]); SQRADDAC(a[4], a[22]); SQRADDAC(a[5], a[21]); SQRADDAC(a[6], a[20]); SQRADDAC(a[7], a[19]); SQRADDAC(a[8], a[18]); SQRADDAC(a[9], a[17]); SQRADDAC(a[10], a[16]); SQRADDAC(a[11], a[15]); SQRADDAC(a[12], a[14]); SQRADDDB; SQRADD(a[13], a[13]); 
8231
+   COMBA_STORE(b[26]);
8232
+
8233
+   /* output 27 */
8234
+   CARRY_FORWARD;
8235
+   SQRADDSC(a[4], a[23]); SQRADDAC(a[5], a[22]); SQRADDAC(a[6], a[21]); SQRADDAC(a[7], a[20]); SQRADDAC(a[8], a[19]); SQRADDAC(a[9], a[18]); SQRADDAC(a[10], a[17]); SQRADDAC(a[11], a[16]); SQRADDAC(a[12], a[15]); SQRADDAC(a[13], a[14]); SQRADDDB; 
8236
+   COMBA_STORE(b[27]);
8237
+
8238
+   /* output 28 */
8239
+   CARRY_FORWARD;
8240
+   SQRADDSC(a[5], a[23]); SQRADDAC(a[6], a[22]); SQRADDAC(a[7], a[21]); SQRADDAC(a[8], a[20]); SQRADDAC(a[9], a[19]); SQRADDAC(a[10], a[18]); SQRADDAC(a[11], a[17]); SQRADDAC(a[12], a[16]); SQRADDAC(a[13], a[15]); SQRADDDB; SQRADD(a[14], a[14]); 
8241
+   COMBA_STORE(b[28]);
8242
+
8243
+   /* output 29 */
8244
+   CARRY_FORWARD;
8245
+   SQRADDSC(a[6], a[23]); SQRADDAC(a[7], a[22]); SQRADDAC(a[8], a[21]); SQRADDAC(a[9], a[20]); SQRADDAC(a[10], a[19]); SQRADDAC(a[11], a[18]); SQRADDAC(a[12], a[17]); SQRADDAC(a[13], a[16]); SQRADDAC(a[14], a[15]); SQRADDDB; 
8246
+   COMBA_STORE(b[29]);
8247
+
8248
+   /* output 30 */
8249
+   CARRY_FORWARD;
8250
+   SQRADDSC(a[7], a[23]); SQRADDAC(a[8], a[22]); SQRADDAC(a[9], a[21]); SQRADDAC(a[10], a[20]); SQRADDAC(a[11], a[19]); SQRADDAC(a[12], a[18]); SQRADDAC(a[13], a[17]); SQRADDAC(a[14], a[16]); SQRADDDB; SQRADD(a[15], a[15]); 
8251
+   COMBA_STORE(b[30]);
8252
+
8253
+   /* output 31 */
8254
+   CARRY_FORWARD;
8255
+   SQRADDSC(a[8], a[23]); SQRADDAC(a[9], a[22]); SQRADDAC(a[10], a[21]); SQRADDAC(a[11], a[20]); SQRADDAC(a[12], a[19]); SQRADDAC(a[13], a[18]); SQRADDAC(a[14], a[17]); SQRADDAC(a[15], a[16]); SQRADDDB; 
8256
+   COMBA_STORE(b[31]);
8257
+
8258
+   /* output 32 */
8259
+   CARRY_FORWARD;
8260
+   SQRADDSC(a[9], a[23]); SQRADDAC(a[10], a[22]); SQRADDAC(a[11], a[21]); SQRADDAC(a[12], a[20]); SQRADDAC(a[13], a[19]); SQRADDAC(a[14], a[18]); SQRADDAC(a[15], a[17]); SQRADDDB; SQRADD(a[16], a[16]); 
8261
+   COMBA_STORE(b[32]);
8262
+
8263
+   /* output 33 */
8264
+   CARRY_FORWARD;
8265
+   SQRADDSC(a[10], a[23]); SQRADDAC(a[11], a[22]); SQRADDAC(a[12], a[21]); SQRADDAC(a[13], a[20]); SQRADDAC(a[14], a[19]); SQRADDAC(a[15], a[18]); SQRADDAC(a[16], a[17]); SQRADDDB; 
8266
+   COMBA_STORE(b[33]);
8267
+
8268
+   /* output 34 */
8269
+   CARRY_FORWARD;
8270
+   SQRADDSC(a[11], a[23]); SQRADDAC(a[12], a[22]); SQRADDAC(a[13], a[21]); SQRADDAC(a[14], a[20]); SQRADDAC(a[15], a[19]); SQRADDAC(a[16], a[18]); SQRADDDB; SQRADD(a[17], a[17]); 
8271
+   COMBA_STORE(b[34]);
8272
+
8273
+   /* output 35 */
8274
+   CARRY_FORWARD;
8275
+   SQRADDSC(a[12], a[23]); SQRADDAC(a[13], a[22]); SQRADDAC(a[14], a[21]); SQRADDAC(a[15], a[20]); SQRADDAC(a[16], a[19]); SQRADDAC(a[17], a[18]); SQRADDDB; 
8276
+   COMBA_STORE(b[35]);
8277
+
8278
+   /* output 36 */
8279
+   CARRY_FORWARD;
8280
+   SQRADDSC(a[13], a[23]); SQRADDAC(a[14], a[22]); SQRADDAC(a[15], a[21]); SQRADDAC(a[16], a[20]); SQRADDAC(a[17], a[19]); SQRADDDB; SQRADD(a[18], a[18]); 
8281
+   COMBA_STORE(b[36]);
8282
+
8283
+   /* output 37 */
8284
+   CARRY_FORWARD;
8285
+   SQRADDSC(a[14], a[23]); SQRADDAC(a[15], a[22]); SQRADDAC(a[16], a[21]); SQRADDAC(a[17], a[20]); SQRADDAC(a[18], a[19]); SQRADDDB; 
8286
+   COMBA_STORE(b[37]);
8287
+
8288
+   /* output 38 */
8289
+   CARRY_FORWARD;
8290
+   SQRADDSC(a[15], a[23]); SQRADDAC(a[16], a[22]); SQRADDAC(a[17], a[21]); SQRADDAC(a[18], a[20]); SQRADDDB; SQRADD(a[19], a[19]); 
8291
+   COMBA_STORE(b[38]);
8292
+
8293
+   /* output 39 */
8294
+   CARRY_FORWARD;
8295
+   SQRADDSC(a[16], a[23]); SQRADDAC(a[17], a[22]); SQRADDAC(a[18], a[21]); SQRADDAC(a[19], a[20]); SQRADDDB; 
8296
+   COMBA_STORE(b[39]);
8297
+
8298
+   /* output 40 */
8299
+   CARRY_FORWARD;
8300
+   SQRADDSC(a[17], a[23]); SQRADDAC(a[18], a[22]); SQRADDAC(a[19], a[21]); SQRADDDB; SQRADD(a[20], a[20]); 
8301
+   COMBA_STORE(b[40]);
8302
+
8303
+   /* output 41 */
8304
+   CARRY_FORWARD;
8305
+   SQRADDSC(a[18], a[23]); SQRADDAC(a[19], a[22]); SQRADDAC(a[20], a[21]); SQRADDDB; 
8306
+   COMBA_STORE(b[41]);
8307
+
8308
+   /* output 42 */
8309
+   CARRY_FORWARD;
8310
+   SQRADD2(a[19], a[23]); SQRADD2(a[20], a[22]); SQRADD(a[21], a[21]); 
8311
+   COMBA_STORE(b[42]);
8312
+
8313
+   /* output 43 */
8314
+   CARRY_FORWARD;
8315
+   SQRADD2(a[20], a[23]); SQRADD2(a[21], a[22]); 
8316
+   COMBA_STORE(b[43]);
8317
+
8318
+   /* output 44 */
8319
+   CARRY_FORWARD;
8320
+   SQRADD2(a[21], a[23]); SQRADD(a[22], a[22]); 
8321
+   COMBA_STORE(b[44]);
8322
+
8323
+   /* output 45 */
8324
+   CARRY_FORWARD;
8325
+   SQRADD2(a[22], a[23]); 
8326
+   COMBA_STORE(b[45]);
8327
+
8328
+   /* output 46 */
8329
+   CARRY_FORWARD;
8330
+   SQRADD(a[23], a[23]); 
8331
+   COMBA_STORE(b[46]);
8332
+   COMBA_STORE2(b[47]);
8333
+   COMBA_FINI;
8334
+
8335
+   B->used = 48;
8336
+   B->sign = FP_ZPOS;
8337
+   memcpy(B->dp, b, 48 * sizeof(fp_digit));
8338
+   fp_clamp(B);
7260 8339
 }
7261 8340
 #endif
7262 8341
 
7263
-/* $Source: /cvs/libtom/libtommath/bn_mp_signed_bin_size.c,v $ */
7264
-/* $Revision: 1.3 $ */
7265
-/* $Date: 2006/03/31 14:18:44 $ */
7266 8342
 
7267
-/* End: bn_mp_signed_bin_size.c */
8343
+/* $Source: /cvs/libtom/tomsfastmath/src/sqr/fp_sqr_comba_24.c,v $ */
8344
+/* $Revision: 1.2 $ */
8345
+/* $Date: 2007/02/17 03:39:01 $ */
7268 8346
 
7269
-/* Start: bn_mp_sqr.c */
7270
-#include <bignum.h>
7271
-#ifdef BN_MP_SQR_C
7272
-/* LibTomMath, multiple-precision integer library -- Tom St Denis
7273
- *
7274
- * LibTomMath is a library that provides multiple-precision
7275
- * integer arithmetic as well as number theoretic functionality.
7276
- *
7277
- * The library was designed directly after the MPI library by
7278
- * Michael Fromberger but has been written from scratch with
7279
- * additional optimizations in place.
7280
- *
7281
- * The library is free for all purposes without any express
7282
- * guarantee it works.
7283
- *
7284
- * Tom St Denis, tomstdenis@gmail.com, http://math.libtomcrypt.com
7285
- */
8347
+/* End: fp_sqr_comba_24.c */
7286 8348
 
7287
-/* computes b = a*a */
7288
-int
7289
-mp_sqr (mp_int * a, mp_int * b)
7290
-{
7291
-  int     res;
8349
+/* Start: fp_sqr_comba_28.c */
8350
+#define TFM_DEFINES
8351
+#include "fp_sqr_comba.c"
7292 8352
 
7293
-#ifdef BN_MP_TOOM_SQR_C
7294
-  /* use Toom-Cook? */
7295
-  if (a->used >= TOOM_SQR_CUTOFF) {
7296
-    res = mp_toom_sqr(a, b);
7297
-  /* Karatsuba? */
7298
-  } else 
7299
-#endif
7300
-#ifdef BN_MP_KARATSUBA_SQR_C
7301
-if (a->used >= KARATSUBA_SQR_CUTOFF) {
7302
-    res = mp_karatsuba_sqr (a, b);
7303
-  } else 
7304
-#endif
7305
-  {
7306
-#ifdef BN_FAST_S_MP_SQR_C
7307
-    /* can we use the fast comba multiplier? */
7308
-    if ((a->used * 2 + 1) < MP_WARRAY && 
7309
-         a->used < 
7310
-         (1 << (sizeof(mp_word) * CHAR_BIT - 2*DIGIT_BIT - 1))) {
7311
-      res = fast_s_mp_sqr (a, b);
7312
-    } else
7313
-#endif
7314
-#ifdef BN_S_MP_SQR_C
7315
-      res = s_mp_sqr (a, b);
7316
-#else
7317
-      res = MP_VAL;
7318
-#endif
7319
-  }
7320
-  b->sign = MP_ZPOS;
7321
-  return res;
8353
+#ifdef TFM_SQR28
8354
+void fp_sqr_comba28(fp_int *A, fp_int *B)
8355
+{
8356
+   fp_digit *a, b[56], c0, c1, c2, sc0, sc1, sc2;
8357
+#ifdef TFM_ISO
8358
+   fp_word tt;
8359
+#endif
8360
+
8361
+   a = A->dp;
8362
+   COMBA_START; 
8363
+
8364
+   /* clear carries */
8365
+   CLEAR_CARRY;
8366
+
8367
+   /* output 0 */
8368
+   SQRADD(a[0],a[0]);
8369
+   COMBA_STORE(b[0]);
8370
+
8371
+   /* output 1 */
8372
+   CARRY_FORWARD;
8373
+   SQRADD2(a[0], a[1]); 
8374
+   COMBA_STORE(b[1]);
8375
+
8376
+   /* output 2 */
8377
+   CARRY_FORWARD;
8378
+   SQRADD2(a[0], a[2]); SQRADD(a[1], a[1]); 
8379
+   COMBA_STORE(b[2]);
8380
+
8381
+   /* output 3 */
8382
+   CARRY_FORWARD;
8383
+   SQRADD2(a[0], a[3]); SQRADD2(a[1], a[2]); 
8384
+   COMBA_STORE(b[3]);
8385
+
8386
+   /* output 4 */
8387
+   CARRY_FORWARD;
8388
+   SQRADD2(a[0], a[4]); SQRADD2(a[1], a[3]); SQRADD(a[2], a[2]); 
8389
+   COMBA_STORE(b[4]);
8390
+
8391
+   /* output 5 */
8392
+   CARRY_FORWARD;
8393
+   SQRADDSC(a[0], a[5]); SQRADDAC(a[1], a[4]); SQRADDAC(a[2], a[3]); SQRADDDB; 
8394
+   COMBA_STORE(b[5]);
8395
+
8396
+   /* output 6 */
8397
+   CARRY_FORWARD;
8398
+   SQRADDSC(a[0], a[6]); SQRADDAC(a[1], a[5]); SQRADDAC(a[2], a[4]); SQRADDDB; SQRADD(a[3], a[3]); 
8399
+   COMBA_STORE(b[6]);
8400
+
8401
+   /* output 7 */
8402
+   CARRY_FORWARD;
8403
+   SQRADDSC(a[0], a[7]); SQRADDAC(a[1], a[6]); SQRADDAC(a[2], a[5]); SQRADDAC(a[3], a[4]); SQRADDDB; 
8404
+   COMBA_STORE(b[7]);
8405
+
8406
+   /* output 8 */
8407
+   CARRY_FORWARD;
8408
+   SQRADDSC(a[0], a[8]); SQRADDAC(a[1], a[7]); SQRADDAC(a[2], a[6]); SQRADDAC(a[3], a[5]); SQRADDDB; SQRADD(a[4], a[4]); 
8409
+   COMBA_STORE(b[8]);
8410
+
8411
+   /* output 9 */
8412
+   CARRY_FORWARD;
8413
+   SQRADDSC(a[0], a[9]); SQRADDAC(a[1], a[8]); SQRADDAC(a[2], a[7]); SQRADDAC(a[3], a[6]); SQRADDAC(a[4], a[5]); SQRADDDB; 
8414
+   COMBA_STORE(b[9]);
8415
+
8416
+   /* output 10 */
8417
+   CARRY_FORWARD;
8418
+   SQRADDSC(a[0], a[10]); SQRADDAC(a[1], a[9]); SQRADDAC(a[2], a[8]); SQRADDAC(a[3], a[7]); SQRADDAC(a[4], a[6]); SQRADDDB; SQRADD(a[5], a[5]); 
8419
+   COMBA_STORE(b[10]);
8420
+
8421
+   /* output 11 */
8422
+   CARRY_FORWARD;
8423
+   SQRADDSC(a[0], a[11]); SQRADDAC(a[1], a[10]); SQRADDAC(a[2], a[9]); SQRADDAC(a[3], a[8]); SQRADDAC(a[4], a[7]); SQRADDAC(a[5], a[6]); SQRADDDB; 
8424
+   COMBA_STORE(b[11]);
8425
+
8426
+   /* output 12 */
8427
+   CARRY_FORWARD;
8428
+   SQRADDSC(a[0], a[12]); SQRADDAC(a[1], a[11]); SQRADDAC(a[2], a[10]); SQRADDAC(a[3], a[9]); SQRADDAC(a[4], a[8]); SQRADDAC(a[5], a[7]); SQRADDDB; SQRADD(a[6], a[6]); 
8429
+   COMBA_STORE(b[12]);
8430
+
8431
+   /* output 13 */
8432
+   CARRY_FORWARD;
8433
+   SQRADDSC(a[0], a[13]); SQRADDAC(a[1], a[12]); SQRADDAC(a[2], a[11]); SQRADDAC(a[3], a[10]); SQRADDAC(a[4], a[9]); SQRADDAC(a[5], a[8]); SQRADDAC(a[6], a[7]); SQRADDDB; 
8434
+   COMBA_STORE(b[13]);
8435
+
8436
+   /* output 14 */
8437
+   CARRY_FORWARD;
8438
+   SQRADDSC(a[0], a[14]); SQRADDAC(a[1], a[13]); SQRADDAC(a[2], a[12]); SQRADDAC(a[3], a[11]); SQRADDAC(a[4], a[10]); SQRADDAC(a[5], a[9]); SQRADDAC(a[6], a[8]); SQRADDDB; SQRADD(a[7], a[7]); 
8439
+   COMBA_STORE(b[14]);
8440
+
8441
+   /* output 15 */
8442
+   CARRY_FORWARD;
8443
+   SQRADDSC(a[0], a[15]); SQRADDAC(a[1], a[14]); SQRADDAC(a[2], a[13]); SQRADDAC(a[3], a[12]); SQRADDAC(a[4], a[11]); SQRADDAC(a[5], a[10]); SQRADDAC(a[6], a[9]); SQRADDAC(a[7], a[8]); SQRADDDB; 
8444
+   COMBA_STORE(b[15]);
8445
+
8446
+   /* output 16 */
8447
+   CARRY_FORWARD;
8448
+   SQRADDSC(a[0], a[16]); SQRADDAC(a[1], a[15]); SQRADDAC(a[2], a[14]); SQRADDAC(a[3], a[13]); SQRADDAC(a[4], a[12]); SQRADDAC(a[5], a[11]); SQRADDAC(a[6], a[10]); SQRADDAC(a[7], a[9]); SQRADDDB; SQRADD(a[8], a[8]); 
8449
+   COMBA_STORE(b[16]);
8450
+
8451
+   /* output 17 */
8452
+   CARRY_FORWARD;
8453
+   SQRADDSC(a[0], a[17]); SQRADDAC(a[1], a[16]); SQRADDAC(a[2], a[15]); SQRADDAC(a[3], a[14]); SQRADDAC(a[4], a[13]); SQRADDAC(a[5], a[12]); SQRADDAC(a[6], a[11]); SQRADDAC(a[7], a[10]); SQRADDAC(a[8], a[9]); SQRADDDB; 
8454
+   COMBA_STORE(b[17]);
8455
+
8456
+   /* output 18 */
8457
+   CARRY_FORWARD;
8458
+   SQRADDSC(a[0], a[18]); SQRADDAC(a[1], a[17]); SQRADDAC(a[2], a[16]); SQRADDAC(a[3], a[15]); SQRADDAC(a[4], a[14]); SQRADDAC(a[5], a[13]); SQRADDAC(a[6], a[12]); SQRADDAC(a[7], a[11]); SQRADDAC(a[8], a[10]); SQRADDDB; SQRADD(a[9], a[9]); 
8459
+   COMBA_STORE(b[18]);
8460
+
8461
+   /* output 19 */
8462
+   CARRY_FORWARD;
8463
+   SQRADDSC(a[0], a[19]); SQRADDAC(a[1], a[18]); SQRADDAC(a[2], a[17]); SQRADDAC(a[3], a[16]); SQRADDAC(a[4], a[15]); SQRADDAC(a[5], a[14]); SQRADDAC(a[6], a[13]); SQRADDAC(a[7], a[12]); SQRADDAC(a[8], a[11]); SQRADDAC(a[9], a[10]); SQRADDDB; 
8464
+   COMBA_STORE(b[19]);
8465
+
8466
+   /* output 20 */
8467
+   CARRY_FORWARD;
8468
+   SQRADDSC(a[0], a[20]); SQRADDAC(a[1], a[19]); SQRADDAC(a[2], a[18]); SQRADDAC(a[3], a[17]); SQRADDAC(a[4], a[16]); SQRADDAC(a[5], a[15]); SQRADDAC(a[6], a[14]); SQRADDAC(a[7], a[13]); SQRADDAC(a[8], a[12]); SQRADDAC(a[9], a[11]); SQRADDDB; SQRADD(a[10], a[10]); 
8469
+   COMBA_STORE(b[20]);
8470
+
8471
+   /* output 21 */
8472
+   CARRY_FORWARD;
8473
+   SQRADDSC(a[0], a[21]); SQRADDAC(a[1], a[20]); SQRADDAC(a[2], a[19]); SQRADDAC(a[3], a[18]); SQRADDAC(a[4], a[17]); SQRADDAC(a[5], a[16]); SQRADDAC(a[6], a[15]); SQRADDAC(a[7], a[14]); SQRADDAC(a[8], a[13]); SQRADDAC(a[9], a[12]); SQRADDAC(a[10], a[11]); SQRADDDB; 
8474
+   COMBA_STORE(b[21]);
8475
+
8476
+   /* output 22 */
8477
+   CARRY_FORWARD;
8478
+   SQRADDSC(a[0], a[22]); SQRADDAC(a[1], a[21]); SQRADDAC(a[2], a[20]); SQRADDAC(a[3], a[19]); SQRADDAC(a[4], a[18]); SQRADDAC(a[5], a[17]); SQRADDAC(a[6], a[16]); SQRADDAC(a[7], a[15]); SQRADDAC(a[8], a[14]); SQRADDAC(a[9], a[13]); SQRADDAC(a[10], a[12]); SQRADDDB; SQRADD(a[11], a[11]); 
8479
+   COMBA_STORE(b[22]);
8480
+
8481
+   /* output 23 */
8482
+   CARRY_FORWARD;
8483
+   SQRADDSC(a[0], a[23]); SQRADDAC(a[1], a[22]); SQRADDAC(a[2], a[21]); SQRADDAC(a[3], a[20]); SQRADDAC(a[4], a[19]); SQRADDAC(a[5], a[18]); SQRADDAC(a[6], a[17]); SQRADDAC(a[7], a[16]); SQRADDAC(a[8], a[15]); SQRADDAC(a[9], a[14]); SQRADDAC(a[10], a[13]); SQRADDAC(a[11], a[12]); SQRADDDB; 
8484
+   COMBA_STORE(b[23]);
8485
+
8486
+   /* output 24 */
8487
+   CARRY_FORWARD;
8488
+   SQRADDSC(a[0], a[24]); SQRADDAC(a[1], a[23]); SQRADDAC(a[2], a[22]); SQRADDAC(a[3], a[21]); SQRADDAC(a[4], a[20]); SQRADDAC(a[5], a[19]); SQRADDAC(a[6], a[18]); SQRADDAC(a[7], a[17]); SQRADDAC(a[8], a[16]); SQRADDAC(a[9], a[15]); SQRADDAC(a[10], a[14]); SQRADDAC(a[11], a[13]); SQRADDDB; SQRADD(a[12], a[12]); 
8489
+   COMBA_STORE(b[24]);
8490
+
8491
+   /* output 25 */
8492
+   CARRY_FORWARD;
8493
+   SQRADDSC(a[0], a[25]); SQRADDAC(a[1], a[24]); SQRADDAC(a[2], a[23]); SQRADDAC(a[3], a[22]); SQRADDAC(a[4], a[21]); SQRADDAC(a[5], a[20]); SQRADDAC(a[6], a[19]); SQRADDAC(a[7], a[18]); SQRADDAC(a[8], a[17]); SQRADDAC(a[9], a[16]); SQRADDAC(a[10], a[15]); SQRADDAC(a[11], a[14]); SQRADDAC(a[12], a[13]); SQRADDDB; 
8494
+   COMBA_STORE(b[25]);
8495
+
8496
+   /* output 26 */
8497
+   CARRY_FORWARD;
8498
+   SQRADDSC(a[0], a[26]); SQRADDAC(a[1], a[25]); SQRADDAC(a[2], a[24]); SQRADDAC(a[3], a[23]); SQRADDAC(a[4], a[22]); SQRADDAC(a[5], a[21]); SQRADDAC(a[6], a[20]); SQRADDAC(a[7], a[19]); SQRADDAC(a[8], a[18]); SQRADDAC(a[9], a[17]); SQRADDAC(a[10], a[16]); SQRADDAC(a[11], a[15]); SQRADDAC(a[12], a[14]); SQRADDDB; SQRADD(a[13], a[13]); 
8499
+   COMBA_STORE(b[26]);
8500
+
8501
+   /* output 27 */
8502
+   CARRY_FORWARD;
8503
+   SQRADDSC(a[0], a[27]); SQRADDAC(a[1], a[26]); SQRADDAC(a[2], a[25]); SQRADDAC(a[3], a[24]); SQRADDAC(a[4], a[23]); SQRADDAC(a[5], a[22]); SQRADDAC(a[6], a[21]); SQRADDAC(a[7], a[20]); SQRADDAC(a[8], a[19]); SQRADDAC(a[9], a[18]); SQRADDAC(a[10], a[17]); SQRADDAC(a[11], a[16]); SQRADDAC(a[12], a[15]); SQRADDAC(a[13], a[14]); SQRADDDB; 
8504
+   COMBA_STORE(b[27]);
8505
+
8506
+   /* output 28 */
8507
+   CARRY_FORWARD;
8508
+   SQRADDSC(a[1], a[27]); SQRADDAC(a[2], a[26]); SQRADDAC(a[3], a[25]); SQRADDAC(a[4], a[24]); SQRADDAC(a[5], a[23]); SQRADDAC(a[6], a[22]); SQRADDAC(a[7], a[21]); SQRADDAC(a[8], a[20]); SQRADDAC(a[9], a[19]); SQRADDAC(a[10], a[18]); SQRADDAC(a[11], a[17]); SQRADDAC(a[12], a[16]); SQRADDAC(a[13], a[15]); SQRADDDB; SQRADD(a[14], a[14]); 
8509
+   COMBA_STORE(b[28]);
8510
+
8511
+   /* output 29 */
8512
+   CARRY_FORWARD;
8513
+   SQRADDSC(a[2], a[27]); SQRADDAC(a[3], a[26]); SQRADDAC(a[4], a[25]); SQRADDAC(a[5], a[24]); SQRADDAC(a[6], a[23]); SQRADDAC(a[7], a[22]); SQRADDAC(a[8], a[21]); SQRADDAC(a[9], a[20]); SQRADDAC(a[10], a[19]); SQRADDAC(a[11], a[18]); SQRADDAC(a[12], a[17]); SQRADDAC(a[13], a[16]); SQRADDAC(a[14], a[15]); SQRADDDB; 
8514
+   COMBA_STORE(b[29]);
8515
+
8516
+   /* output 30 */
8517
+   CARRY_FORWARD;
8518
+   SQRADDSC(a[3], a[27]); SQRADDAC(a[4], a[26]); SQRADDAC(a[5], a[25]); SQRADDAC(a[6], a[24]); SQRADDAC(a[7], a[23]); SQRADDAC(a[8], a[22]); SQRADDAC(a[9], a[21]); SQRADDAC(a[10], a[20]); SQRADDAC(a[11], a[19]); SQRADDAC(a[12], a[18]); SQRADDAC(a[13], a[17]); SQRADDAC(a[14], a[16]); SQRADDDB; SQRADD(a[15], a[15]); 
8519
+   COMBA_STORE(b[30]);
8520
+
8521
+   /* output 31 */
8522
+   CARRY_FORWARD;
8523
+   SQRADDSC(a[4], a[27]); SQRADDAC(a[5], a[26]); SQRADDAC(a[6], a[25]); SQRADDAC(a[7], a[24]); SQRADDAC(a[8], a[23]); SQRADDAC(a[9], a[22]); SQRADDAC(a[10], a[21]); SQRADDAC(a[11], a[20]); SQRADDAC(a[12], a[19]); SQRADDAC(a[13], a[18]); SQRADDAC(a[14], a[17]); SQRADDAC(a[15], a[16]); SQRADDDB; 
8524
+   COMBA_STORE(b[31]);
8525
+
8526
+   /* output 32 */
8527
+   CARRY_FORWARD;
8528
+   SQRADDSC(a[5], a[27]); SQRADDAC(a[6], a[26]); SQRADDAC(a[7], a[25]); SQRADDAC(a[8], a[24]); SQRADDAC(a[9], a[23]); SQRADDAC(a[10], a[22]); SQRADDAC(a[11], a[21]); SQRADDAC(a[12], a[20]); SQRADDAC(a[13], a[19]); SQRADDAC(a[14], a[18]); SQRADDAC(a[15], a[17]); SQRADDDB; SQRADD(a[16], a[16]); 
8529
+   COMBA_STORE(b[32]);
8530
+
8531
+   /* output 33 */
8532
+   CARRY_FORWARD;
8533
+   SQRADDSC(a[6], a[27]); SQRADDAC(a[7], a[26]); SQRADDAC(a[8], a[25]); SQRADDAC(a[9], a[24]); SQRADDAC(a[10], a[23]); SQRADDAC(a[11], a[22]); SQRADDAC(a[12], a[21]); SQRADDAC(a[13], a[20]); SQRADDAC(a[14], a[19]); SQRADDAC(a[15], a[18]); SQRADDAC(a[16], a[17]); SQRADDDB; 
8534
+   COMBA_STORE(b[33]);
8535
+
8536
+   /* output 34 */
8537
+   CARRY_FORWARD;
8538
+   SQRADDSC(a[7], a[27]); SQRADDAC(a[8], a[26]); SQRADDAC(a[9], a[25]); SQRADDAC(a[10], a[24]); SQRADDAC(a[11], a[23]); SQRADDAC(a[12], a[22]); SQRADDAC(a[13], a[21]); SQRADDAC(a[14], a[20]); SQRADDAC(a[15], a[19]); SQRADDAC(a[16], a[18]); SQRADDDB; SQRADD(a[17], a[17]); 
8539
+   COMBA_STORE(b[34]);
8540
+
8541
+   /* output 35 */
8542
+   CARRY_FORWARD;
8543
+   SQRADDSC(a[8], a[27]); SQRADDAC(a[9], a[26]); SQRADDAC(a[10], a[25]); SQRADDAC(a[11], a[24]); SQRADDAC(a[12], a[23]); SQRADDAC(a[13], a[22]); SQRADDAC(a[14], a[21]); SQRADDAC(a[15], a[20]); SQRADDAC(a[16], a[19]); SQRADDAC(a[17], a[18]); SQRADDDB; 
8544
+   COMBA_STORE(b[35]);
8545
+
8546
+   /* output 36 */
8547
+   CARRY_FORWARD;
8548
+   SQRADDSC(a[9], a[27]); SQRADDAC(a[10], a[26]); SQRADDAC(a[11], a[25]); SQRADDAC(a[12], a[24]); SQRADDAC(a[13], a[23]); SQRADDAC(a[14], a[22]); SQRADDAC(a[15], a[21]); SQRADDAC(a[16], a[20]); SQRADDAC(a[17], a[19]); SQRADDDB; SQRADD(a[18], a[18]); 
8549
+   COMBA_STORE(b[36]);
8550
+
8551
+   /* output 37 */
8552
+   CARRY_FORWARD;
8553
+   SQRADDSC(a[10], a[27]); SQRADDAC(a[11], a[26]); SQRADDAC(a[12], a[25]); SQRADDAC(a[13], a[24]); SQRADDAC(a[14], a[23]); SQRADDAC(a[15], a[22]); SQRADDAC(a[16], a[21]); SQRADDAC(a[17], a[20]); SQRADDAC(a[18], a[19]); SQRADDDB; 
8554
+   COMBA_STORE(b[37]);
8555
+
8556
+   /* output 38 */
8557
+   CARRY_FORWARD;
8558
+   SQRADDSC(a[11], a[27]); SQRADDAC(a[12], a[26]); SQRADDAC(a[13], a[25]); SQRADDAC(a[14], a[24]); SQRADDAC(a[15], a[23]); SQRADDAC(a[16], a[22]); SQRADDAC(a[17], a[21]); SQRADDAC(a[18], a[20]); SQRADDDB; SQRADD(a[19], a[19]); 
8559
+   COMBA_STORE(b[38]);
8560
+
8561
+   /* output 39 */
8562
+   CARRY_FORWARD;
8563
+   SQRADDSC(a[12], a[27]); SQRADDAC(a[13], a[26]); SQRADDAC(a[14], a[25]); SQRADDAC(a[15], a[24]); SQRADDAC(a[16], a[23]); SQRADDAC(a[17], a[22]); SQRADDAC(a[18], a[21]); SQRADDAC(a[19], a[20]); SQRADDDB; 
8564
+   COMBA_STORE(b[39]);
8565
+
8566
+   /* output 40 */
8567
+   CARRY_FORWARD;
8568
+   SQRADDSC(a[13], a[27]); SQRADDAC(a[14], a[26]); SQRADDAC(a[15], a[25]); SQRADDAC(a[16], a[24]); SQRADDAC(a[17], a[23]); SQRADDAC(a[18], a[22]); SQRADDAC(a[19], a[21]); SQRADDDB; SQRADD(a[20], a[20]); 
8569
+   COMBA_STORE(b[40]);
8570
+
8571
+   /* output 41 */
8572
+   CARRY_FORWARD;
8573
+   SQRADDSC(a[14], a[27]); SQRADDAC(a[15], a[26]); SQRADDAC(a[16], a[25]); SQRADDAC(a[17], a[24]); SQRADDAC(a[18], a[23]); SQRADDAC(a[19], a[22]); SQRADDAC(a[20], a[21]); SQRADDDB; 
8574
+   COMBA_STORE(b[41]);
8575
+
8576
+   /* output 42 */
8577
+   CARRY_FORWARD;
8578
+   SQRADDSC(a[15], a[27]); SQRADDAC(a[16], a[26]); SQRADDAC(a[17], a[25]); SQRADDAC(a[18], a[24]); SQRADDAC(a[19], a[23]); SQRADDAC(a[20], a[22]); SQRADDDB; SQRADD(a[21], a[21]); 
8579
+   COMBA_STORE(b[42]);
8580
+
8581
+   /* output 43 */
8582
+   CARRY_FORWARD;
8583
+   SQRADDSC(a[16], a[27]); SQRADDAC(a[17], a[26]); SQRADDAC(a[18], a[25]); SQRADDAC(a[19], a[24]); SQRADDAC(a[20], a[23]); SQRADDAC(a[21], a[22]); SQRADDDB; 
8584
+   COMBA_STORE(b[43]);
8585
+
8586
+   /* output 44 */
8587
+   CARRY_FORWARD;
8588
+   SQRADDSC(a[17], a[27]); SQRADDAC(a[18], a[26]); SQRADDAC(a[19], a[25]); SQRADDAC(a[20], a[24]); SQRADDAC(a[21], a[23]); SQRADDDB; SQRADD(a[22], a[22]); 
8589
+   COMBA_STORE(b[44]);
8590
+
8591
+   /* output 45 */
8592
+   CARRY_FORWARD;
8593
+   SQRADDSC(a[18], a[27]); SQRADDAC(a[19], a[26]); SQRADDAC(a[20], a[25]); SQRADDAC(a[21], a[24]); SQRADDAC(a[22], a[23]); SQRADDDB; 
8594
+   COMBA_STORE(b[45]);
8595
+
8596
+   /* output 46 */
8597
+   CARRY_FORWARD;
8598
+   SQRADDSC(a[19], a[27]); SQRADDAC(a[20], a[26]); SQRADDAC(a[21], a[25]); SQRADDAC(a[22], a[24]); SQRADDDB; SQRADD(a[23], a[23]); 
8599
+   COMBA_STORE(b[46]);
8600
+
8601
+   /* output 47 */
8602
+   CARRY_FORWARD;
8603
+   SQRADDSC(a[20], a[27]); SQRADDAC(a[21], a[26]); SQRADDAC(a[22], a[25]); SQRADDAC(a[23], a[24]); SQRADDDB; 
8604
+   COMBA_STORE(b[47]);
8605
+
8606
+   /* output 48 */
8607
+   CARRY_FORWARD;
8608
+   SQRADDSC(a[21], a[27]); SQRADDAC(a[22], a[26]); SQRADDAC(a[23], a[25]); SQRADDDB; SQRADD(a[24], a[24]); 
8609
+   COMBA_STORE(b[48]);
8610
+
8611
+   /* output 49 */
8612
+   CARRY_FORWARD;
8613
+   SQRADDSC(a[22], a[27]); SQRADDAC(a[23], a[26]); SQRADDAC(a[24], a[25]); SQRADDDB; 
8614
+   COMBA_STORE(b[49]);
8615
+
8616
+   /* output 50 */
8617
+   CARRY_FORWARD;
8618
+   SQRADD2(a[23], a[27]); SQRADD2(a[24], a[26]); SQRADD(a[25], a[25]); 
8619
+   COMBA_STORE(b[50]);
8620
+
8621
+   /* output 51 */
8622
+   CARRY_FORWARD;
8623
+   SQRADD2(a[24], a[27]); SQRADD2(a[25], a[26]); 
8624
+   COMBA_STORE(b[51]);
8625
+
8626
+   /* output 52 */
8627
+   CARRY_FORWARD;
8628
+   SQRADD2(a[25], a[27]); SQRADD(a[26], a[26]); 
8629
+   COMBA_STORE(b[52]);
8630
+
8631
+   /* output 53 */
8632
+   CARRY_FORWARD;
8633
+   SQRADD2(a[26], a[27]); 
8634
+   COMBA_STORE(b[53]);
8635
+
8636
+   /* output 54 */
8637
+   CARRY_FORWARD;
8638
+   SQRADD(a[27], a[27]); 
8639
+   COMBA_STORE(b[54]);
8640
+   COMBA_STORE2(b[55]);
8641
+   COMBA_FINI;
8642
+
8643
+   B->used = 56;
8644
+   B->sign = FP_ZPOS;
8645
+   memcpy(B->dp, b, 56 * sizeof(fp_digit));
8646
+   fp_clamp(B);
7322 8647
 }
7323 8648
 #endif
7324 8649
 
7325
-/* $Source: /cvs/libtom/libtommath/bn_mp_sqr.c,v $ */
7326
-/* $Revision: 1.3 $ */
7327
-/* $Date: 2006/03/31 14:18:44 $ */
7328 8650
 
7329
-/* End: bn_mp_sqr.c */
8651
+/* $Source: /cvs/libtom/tomsfastmath/src/sqr/fp_sqr_comba_28.c,v $ */
8652
+/* $Revision: 1.2 $ */
8653
+/* $Date: 2007/02/17 03:39:01 $ */
7330 8654
 
7331
-/* Start: bn_mp_sqrmod.c */
7332
-#include <bignum.h>
7333
-#ifdef BN_MP_SQRMOD_C
7334
-/* LibTomMath, multiple-precision integer library -- Tom St Denis
7335
- *
7336
- * LibTomMath is a library that provides multiple-precision
7337
- * integer arithmetic as well as number theoretic functionality.
7338
- *
7339
- * The library was designed directly after the MPI library by
7340
- * Michael Fromberger but has been written from scratch with
7341
- * additional optimizations in place.
7342
- *
7343
- * The library is free for all purposes without any express
7344
- * guarantee it works.
7345
- *
7346
- * Tom St Denis, tomstdenis@gmail.com, http://math.libtomcrypt.com
7347
- */
7348
-
7349
-/* c = a * a (mod b) */
7350
-int
7351
-mp_sqrmod (mp_int * a, mp_int * b, mp_int * c)
7352
-{
7353
-  int     res;
7354
-  mp_int  t;
8655
+/* End: fp_sqr_comba_28.c */
7355 8656
 
7356
-  if ((res = mp_init (&t)) != MP_OKAY) {
7357
-    return res;
7358
-  }
8657
+/* Start: fp_sqr_comba_3.c */
8658
+#define TFM_DEFINES
8659
+#include "fp_sqr_comba.c"
7359 8660
 
7360
-  if ((res = mp_sqr (a, &t)) != MP_OKAY) {
7361
-    mp_clear (&t);
7362
-    return res;
7363
-  }
7364
-  res = mp_mod (&t, b, c);
7365
-  mp_clear (&t);
7366
-  return res;
8661
+#ifdef TFM_SQR3
8662
+void fp_sqr_comba3(fp_int *A, fp_int *B)
8663
+{
8664
+   fp_digit *a, b[6], c0, c1, c2, sc0, sc1, sc2;
8665
+#ifdef TFM_ISO
8666
+   fp_word tt;
8667
+#endif
8668
+
8669
+   a = A->dp;
8670
+   COMBA_START; 
8671
+
8672
+   /* clear carries */
8673
+   CLEAR_CARRY;
8674
+
8675
+   /* output 0 */
8676
+   SQRADD(a[0],a[0]);
8677
+   COMBA_STORE(b[0]);
8678
+
8679
+   /* output 1 */
8680
+   CARRY_FORWARD;
8681
+   SQRADD2(a[0], a[1]); 
8682
+   COMBA_STORE(b[1]);
8683
+
8684
+   /* output 2 */
8685
+   CARRY_FORWARD;
8686
+   SQRADD2(a[0], a[2]); SQRADD(a[1], a[1]); 
8687
+   COMBA_STORE(b[2]);
8688
+
8689
+   /* output 3 */
8690
+   CARRY_FORWARD;
8691
+   SQRADD2(a[1], a[2]); 
8692
+   COMBA_STORE(b[3]);
8693
+
8694
+   /* output 4 */
8695
+   CARRY_FORWARD;
8696
+   SQRADD(a[2], a[2]); 
8697
+   COMBA_STORE(b[4]);
8698
+   COMBA_STORE2(b[5]);
8699
+   COMBA_FINI;
8700
+
8701
+   B->used = 6;
8702
+   B->sign = FP_ZPOS;
8703
+   memcpy(B->dp, b, 6 * sizeof(fp_digit));
8704
+   fp_clamp(B);
7367 8705
 }
7368 8706
 #endif
7369 8707
 
7370
-/* $Source: /cvs/libtom/libtommath/bn_mp_sqrmod.c,v $ */
7371
-/* $Revision: 1.3 $ */
7372
-/* $Date: 2006/03/31 14:18:44 $ */
7373
-
7374
-/* End: bn_mp_sqrmod.c */
7375
-
7376
-/* Start: bn_mp_sqrt.c */
7377
-#include <bignum.h>
7378
-#ifdef BN_MP_SQRT_C
7379
-/* LibTomMath, multiple-precision integer library -- Tom St Denis
7380
- *
7381
- * LibTomMath is a library that provides multiple-precision
7382
- * integer arithmetic as well as number theoretic functionality.
7383
- *
7384
- * The library was designed directly after the MPI library by
7385
- * Michael Fromberger but has been written from scratch with
7386
- * additional optimizations in place.
7387
- *
7388
- * The library is free for all purposes without any express
7389
- * guarantee it works.
7390
- *
7391
- * Tom St Denis, tomstdenis@gmail.com, http://math.libtomcrypt.com
7392
- */
7393
-
7394
-/* this function is less generic than mp_n_root, simpler and faster */
7395
-int mp_sqrt(mp_int *arg, mp_int *ret) 
7396
-{
7397
-  int res;
7398
-  mp_int t1,t2;
7399
-
7400
-  /* must be positive */
7401
-  if (arg->sign == MP_NEG) {
7402
-    return MP_VAL;
7403
-  }
7404
-
7405
-  /* easy out */
7406
-  if (mp_iszero(arg) == MP_YES) {
7407
-    mp_zero(ret);
7408
-    return MP_OKAY;
7409
-  }
7410
-
7411
-  if ((res = mp_init_copy(&t1, arg)) != MP_OKAY) {
7412
-    return res;
7413
-  }
7414
-
7415
-  if ((res = mp_init(&t2)) != MP_OKAY) {
7416
-    goto E2;
7417
-  }
7418 8708
 
7419
-  /* First approx. (not very bad for large arg) */
7420
-  mp_rshd (&t1,t1.used/2);
8709
+/* $Source: /cvs/libtom/tomsfastmath/src/sqr/fp_sqr_comba_3.c,v $ */
8710
+/* $Revision: 1.2 $ */
8711
+/* $Date: 2007/02/17 03:39:01 $ */
7421 8712
 
7422
-  /* t1 > 0  */ 
7423
-  if ((res = mp_div(arg,&t1,&t2,NULL)) != MP_OKAY) {
7424
-    goto E1;
7425
-  }
7426
-  if ((res = mp_add(&t1,&t2,&t1)) != MP_OKAY) {
7427
-    goto E1;
7428
-  }
7429
-  if ((res = mp_div_2(&t1,&t1)) != MP_OKAY) {
7430
-    goto E1;
7431
-  }
7432
-  /* And now t1 > sqrt(arg) */
7433
-  do { 
7434
-    if ((res = mp_div(arg,&t1,&t2,NULL)) != MP_OKAY) {
7435
-      goto E1;
7436
-    }
7437
-    if ((res = mp_add(&t1,&t2,&t1)) != MP_OKAY) {
7438
-      goto E1;
7439
-    }
7440
-    if ((res = mp_div_2(&t1,&t1)) != MP_OKAY) {
7441
-      goto E1;
7442
-    }
7443
-    /* t1 >= sqrt(arg) >= t2 at this point */
7444
-  } while (mp_cmp_mag(&t1,&t2) == MP_GT);
8713
+/* End: fp_sqr_comba_3.c */
7445 8714
 
7446
-  mp_exch(&t1,ret);
8715
+/* Start: fp_sqr_comba_32.c */
8716
+#define TFM_DEFINES
8717
+#include "fp_sqr_comba.c"
7447 8718
 
7448
-E1: mp_clear(&t2);
7449
-E2: mp_clear(&t1);
7450
-  return res;
8719
+#ifdef TFM_SQR32
8720
+void fp_sqr_comba32(fp_int *A, fp_int *B)
8721
+{
8722
+   fp_digit *a, b[64], c0, c1, c2, sc0, sc1, sc2;
8723
+#ifdef TFM_ISO
8724
+   fp_word tt;
8725
+#endif
8726
+
8727
+   a = A->dp;
8728
+   COMBA_START; 
8729
+
8730
+   /* clear carries */
8731
+   CLEAR_CARRY;
8732
+
8733
+   /* output 0 */
8734
+   SQRADD(a[0],a[0]);
8735
+   COMBA_STORE(b[0]);
8736
+
8737
+   /* output 1 */
8738
+   CARRY_FORWARD;
8739
+   SQRADD2(a[0], a[1]); 
8740
+   COMBA_STORE(b[1]);
8741
+
8742
+   /* output 2 */
8743
+   CARRY_FORWARD;
8744
+   SQRADD2(a[0], a[2]); SQRADD(a[1], a[1]); 
8745
+   COMBA_STORE(b[2]);
8746
+
8747
+   /* output 3 */
8748
+   CARRY_FORWARD;
8749
+   SQRADD2(a[0], a[3]); SQRADD2(a[1], a[2]); 
8750
+   COMBA_STORE(b[3]);
8751
+
8752
+   /* output 4 */
8753
+   CARRY_FORWARD;
8754
+   SQRADD2(a[0], a[4]); SQRADD2(a[1], a[3]); SQRADD(a[2], a[2]); 
8755
+   COMBA_STORE(b[4]);
8756
+
8757
+   /* output 5 */
8758
+   CARRY_FORWARD;
8759
+   SQRADDSC(a[0], a[5]); SQRADDAC(a[1], a[4]); SQRADDAC(a[2], a[3]); SQRADDDB; 
8760
+   COMBA_STORE(b[5]);
8761
+
8762
+   /* output 6 */
8763
+   CARRY_FORWARD;
8764
+   SQRADDSC(a[0], a[6]); SQRADDAC(a[1], a[5]); SQRADDAC(a[2], a[4]); SQRADDDB; SQRADD(a[3], a[3]); 
8765
+   COMBA_STORE(b[6]);
8766
+
8767
+   /* output 7 */
8768
+   CARRY_FORWARD;
8769
+   SQRADDSC(a[0], a[7]); SQRADDAC(a[1], a[6]); SQRADDAC(a[2], a[5]); SQRADDAC(a[3], a[4]); SQRADDDB; 
8770
+   COMBA_STORE(b[7]);
8771
+
8772
+   /* output 8 */
8773
+   CARRY_FORWARD;
8774
+   SQRADDSC(a[0], a[8]); SQRADDAC(a[1], a[7]); SQRADDAC(a[2], a[6]); SQRADDAC(a[3], a[5]); SQRADDDB; SQRADD(a[4], a[4]); 
8775
+   COMBA_STORE(b[8]);
8776
+
8777
+   /* output 9 */
8778
+   CARRY_FORWARD;
8779
+   SQRADDSC(a[0], a[9]); SQRADDAC(a[1], a[8]); SQRADDAC(a[2], a[7]); SQRADDAC(a[3], a[6]); SQRADDAC(a[4], a[5]); SQRADDDB; 
8780
+   COMBA_STORE(b[9]);
8781
+
8782
+   /* output 10 */
8783
+   CARRY_FORWARD;
8784
+   SQRADDSC(a[0], a[10]); SQRADDAC(a[1], a[9]); SQRADDAC(a[2], a[8]); SQRADDAC(a[3], a[7]); SQRADDAC(a[4], a[6]); SQRADDDB; SQRADD(a[5], a[5]); 
8785
+   COMBA_STORE(b[10]);
8786
+
8787
+   /* output 11 */
8788
+   CARRY_FORWARD;
8789
+   SQRADDSC(a[0], a[11]); SQRADDAC(a[1], a[10]); SQRADDAC(a[2], a[9]); SQRADDAC(a[3], a[8]); SQRADDAC(a[4], a[7]); SQRADDAC(a[5], a[6]); SQRADDDB; 
8790
+   COMBA_STORE(b[11]);
8791
+
8792
+   /* output 12 */
8793
+   CARRY_FORWARD;
8794
+   SQRADDSC(a[0], a[12]); SQRADDAC(a[1], a[11]); SQRADDAC(a[2], a[10]); SQRADDAC(a[3], a[9]); SQRADDAC(a[4], a[8]); SQRADDAC(a[5], a[7]); SQRADDDB; SQRADD(a[6], a[6]); 
8795
+   COMBA_STORE(b[12]);
8796
+
8797
+   /* output 13 */
8798
+   CARRY_FORWARD;
8799
+   SQRADDSC(a[0], a[13]); SQRADDAC(a[1], a[12]); SQRADDAC(a[2], a[11]); SQRADDAC(a[3], a[10]); SQRADDAC(a[4], a[9]); SQRADDAC(a[5], a[8]); SQRADDAC(a[6], a[7]); SQRADDDB; 
8800
+   COMBA_STORE(b[13]);
8801
+
8802
+   /* output 14 */
8803
+   CARRY_FORWARD;
8804
+   SQRADDSC(a[0], a[14]); SQRADDAC(a[1], a[13]); SQRADDAC(a[2], a[12]); SQRADDAC(a[3], a[11]); SQRADDAC(a[4], a[10]); SQRADDAC(a[5], a[9]); SQRADDAC(a[6], a[8]); SQRADDDB; SQRADD(a[7], a[7]); 
8805
+   COMBA_STORE(b[14]);
8806
+
8807
+   /* output 15 */
8808
+   CARRY_FORWARD;
8809
+   SQRADDSC(a[0], a[15]); SQRADDAC(a[1], a[14]); SQRADDAC(a[2], a[13]); SQRADDAC(a[3], a[12]); SQRADDAC(a[4], a[11]); SQRADDAC(a[5], a[10]); SQRADDAC(a[6], a[9]); SQRADDAC(a[7], a[8]); SQRADDDB; 
8810
+   COMBA_STORE(b[15]);
8811
+
8812
+   /* output 16 */
8813
+   CARRY_FORWARD;
8814
+   SQRADDSC(a[0], a[16]); SQRADDAC(a[1], a[15]); SQRADDAC(a[2], a[14]); SQRADDAC(a[3], a[13]); SQRADDAC(a[4], a[12]); SQRADDAC(a[5], a[11]); SQRADDAC(a[6], a[10]); SQRADDAC(a[7], a[9]); SQRADDDB; SQRADD(a[8], a[8]); 
8815
+   COMBA_STORE(b[16]);
8816
+
8817
+   /* output 17 */
8818
+   CARRY_FORWARD;
8819
+   SQRADDSC(a[0], a[17]); SQRADDAC(a[1], a[16]); SQRADDAC(a[2], a[15]); SQRADDAC(a[3], a[14]); SQRADDAC(a[4], a[13]); SQRADDAC(a[5], a[12]); SQRADDAC(a[6], a[11]); SQRADDAC(a[7], a[10]); SQRADDAC(a[8], a[9]); SQRADDDB; 
8820
+   COMBA_STORE(b[17]);
8821
+
8822
+   /* output 18 */
8823
+   CARRY_FORWARD;
8824
+   SQRADDSC(a[0], a[18]); SQRADDAC(a[1], a[17]); SQRADDAC(a[2], a[16]); SQRADDAC(a[3], a[15]); SQRADDAC(a[4], a[14]); SQRADDAC(a[5], a[13]); SQRADDAC(a[6], a[12]); SQRADDAC(a[7], a[11]); SQRADDAC(a[8], a[10]); SQRADDDB; SQRADD(a[9], a[9]); 
8825
+   COMBA_STORE(b[18]);
8826
+
8827
+   /* output 19 */
8828
+   CARRY_FORWARD;
8829
+   SQRADDSC(a[0], a[19]); SQRADDAC(a[1], a[18]); SQRADDAC(a[2], a[17]); SQRADDAC(a[3], a[16]); SQRADDAC(a[4], a[15]); SQRADDAC(a[5], a[14]); SQRADDAC(a[6], a[13]); SQRADDAC(a[7], a[12]); SQRADDAC(a[8], a[11]); SQRADDAC(a[9], a[10]); SQRADDDB; 
8830
+   COMBA_STORE(b[19]);
8831
+
8832
+   /* output 20 */
8833
+   CARRY_FORWARD;
8834
+   SQRADDSC(a[0], a[20]); SQRADDAC(a[1], a[19]); SQRADDAC(a[2], a[18]); SQRADDAC(a[3], a[17]); SQRADDAC(a[4], a[16]); SQRADDAC(a[5], a[15]); SQRADDAC(a[6], a[14]); SQRADDAC(a[7], a[13]); SQRADDAC(a[8], a[12]); SQRADDAC(a[9], a[11]); SQRADDDB; SQRADD(a[10], a[10]); 
8835
+   COMBA_STORE(b[20]);
8836
+
8837
+   /* output 21 */
8838
+   CARRY_FORWARD;
8839
+   SQRADDSC(a[0], a[21]); SQRADDAC(a[1], a[20]); SQRADDAC(a[2], a[19]); SQRADDAC(a[3], a[18]); SQRADDAC(a[4], a[17]); SQRADDAC(a[5], a[16]); SQRADDAC(a[6], a[15]); SQRADDAC(a[7], a[14]); SQRADDAC(a[8], a[13]); SQRADDAC(a[9], a[12]); SQRADDAC(a[10], a[11]); SQRADDDB; 
8840
+   COMBA_STORE(b[21]);
8841
+
8842
+   /* output 22 */
8843
+   CARRY_FORWARD;
8844
+   SQRADDSC(a[0], a[22]); SQRADDAC(a[1], a[21]); SQRADDAC(a[2], a[20]); SQRADDAC(a[3], a[19]); SQRADDAC(a[4], a[18]); SQRADDAC(a[5], a[17]); SQRADDAC(a[6], a[16]); SQRADDAC(a[7], a[15]); SQRADDAC(a[8], a[14]); SQRADDAC(a[9], a[13]); SQRADDAC(a[10], a[12]); SQRADDDB; SQRADD(a[11], a[11]); 
8845
+   COMBA_STORE(b[22]);
8846
+
8847
+   /* output 23 */
8848
+   CARRY_FORWARD;
8849
+   SQRADDSC(a[0], a[23]); SQRADDAC(a[1], a[22]); SQRADDAC(a[2], a[21]); SQRADDAC(a[3], a[20]); SQRADDAC(a[4], a[19]); SQRADDAC(a[5], a[18]); SQRADDAC(a[6], a[17]); SQRADDAC(a[7], a[16]); SQRADDAC(a[8], a[15]); SQRADDAC(a[9], a[14]); SQRADDAC(a[10], a[13]); SQRADDAC(a[11], a[12]); SQRADDDB; 
8850
+   COMBA_STORE(b[23]);
8851
+
8852
+   /* output 24 */
8853
+   CARRY_FORWARD;
8854
+   SQRADDSC(a[0], a[24]); SQRADDAC(a[1], a[23]); SQRADDAC(a[2], a[22]); SQRADDAC(a[3], a[21]); SQRADDAC(a[4], a[20]); SQRADDAC(a[5], a[19]); SQRADDAC(a[6], a[18]); SQRADDAC(a[7], a[17]); SQRADDAC(a[8], a[16]); SQRADDAC(a[9], a[15]); SQRADDAC(a[10], a[14]); SQRADDAC(a[11], a[13]); SQRADDDB; SQRADD(a[12], a[12]); 
8855
+   COMBA_STORE(b[24]);
8856
+
8857
+   /* output 25 */
8858
+   CARRY_FORWARD;
8859
+   SQRADDSC(a[0], a[25]); SQRADDAC(a[1], a[24]); SQRADDAC(a[2], a[23]); SQRADDAC(a[3], a[22]); SQRADDAC(a[4], a[21]); SQRADDAC(a[5], a[20]); SQRADDAC(a[6], a[19]); SQRADDAC(a[7], a[18]); SQRADDAC(a[8], a[17]); SQRADDAC(a[9], a[16]); SQRADDAC(a[10], a[15]); SQRADDAC(a[11], a[14]); SQRADDAC(a[12], a[13]); SQRADDDB; 
8860
+   COMBA_STORE(b[25]);
8861
+
8862
+   /* output 26 */
8863
+   CARRY_FORWARD;
8864
+   SQRADDSC(a[0], a[26]); SQRADDAC(a[1], a[25]); SQRADDAC(a[2], a[24]); SQRADDAC(a[3], a[23]); SQRADDAC(a[4], a[22]); SQRADDAC(a[5], a[21]); SQRADDAC(a[6], a[20]); SQRADDAC(a[7], a[19]); SQRADDAC(a[8], a[18]); SQRADDAC(a[9], a[17]); SQRADDAC(a[10], a[16]); SQRADDAC(a[11], a[15]); SQRADDAC(a[12], a[14]); SQRADDDB; SQRADD(a[13], a[13]); 
8865
+   COMBA_STORE(b[26]);
8866
+
8867
+   /* output 27 */
8868
+   CARRY_FORWARD;
8869
+   SQRADDSC(a[0], a[27]); SQRADDAC(a[1], a[26]); SQRADDAC(a[2], a[25]); SQRADDAC(a[3], a[24]); SQRADDAC(a[4], a[23]); SQRADDAC(a[5], a[22]); SQRADDAC(a[6], a[21]); SQRADDAC(a[7], a[20]); SQRADDAC(a[8], a[19]); SQRADDAC(a[9], a[18]); SQRADDAC(a[10], a[17]); SQRADDAC(a[11], a[16]); SQRADDAC(a[12], a[15]); SQRADDAC(a[13], a[14]); SQRADDDB; 
8870
+   COMBA_STORE(b[27]);
8871
+
8872
+   /* output 28 */
8873
+   CARRY_FORWARD;
8874
+   SQRADDSC(a[0], a[28]); SQRADDAC(a[1], a[27]); SQRADDAC(a[2], a[26]); SQRADDAC(a[3], a[25]); SQRADDAC(a[4], a[24]); SQRADDAC(a[5], a[23]); SQRADDAC(a[6], a[22]); SQRADDAC(a[7], a[21]); SQRADDAC(a[8], a[20]); SQRADDAC(a[9], a[19]); SQRADDAC(a[10], a[18]); SQRADDAC(a[11], a[17]); SQRADDAC(a[12], a[16]); SQRADDAC(a[13], a[15]); SQRADDDB; SQRADD(a[14], a[14]); 
8875
+   COMBA_STORE(b[28]);
8876
+
8877
+   /* output 29 */
8878
+   CARRY_FORWARD;
8879
+   SQRADDSC(a[0], a[29]); SQRADDAC(a[1], a[28]); SQRADDAC(a[2], a[27]); SQRADDAC(a[3], a[26]); SQRADDAC(a[4], a[25]); SQRADDAC(a[5], a[24]); SQRADDAC(a[6], a[23]); SQRADDAC(a[7], a[22]); SQRADDAC(a[8], a[21]); SQRADDAC(a[9], a[20]); SQRADDAC(a[10], a[19]); SQRADDAC(a[11], a[18]); SQRADDAC(a[12], a[17]); SQRADDAC(a[13], a[16]); SQRADDAC(a[14], a[15]); SQRADDDB; 
8880
+   COMBA_STORE(b[29]);
8881
+
8882
+   /* output 30 */
8883
+   CARRY_FORWARD;
8884
+   SQRADDSC(a[0], a[30]); SQRADDAC(a[1], a[29]); SQRADDAC(a[2], a[28]); SQRADDAC(a[3], a[27]); SQRADDAC(a[4], a[26]); SQRADDAC(a[5], a[25]); SQRADDAC(a[6], a[24]); SQRADDAC(a[7], a[23]); SQRADDAC(a[8], a[22]); SQRADDAC(a[9], a[21]); SQRADDAC(a[10], a[20]); SQRADDAC(a[11], a[19]); SQRADDAC(a[12], a[18]); SQRADDAC(a[13], a[17]); SQRADDAC(a[14], a[16]); SQRADDDB; SQRADD(a[15], a[15]); 
8885
+   COMBA_STORE(b[30]);
8886
+
8887
+   /* output 31 */
8888
+   CARRY_FORWARD;
8889
+   SQRADDSC(a[0], a[31]); SQRADDAC(a[1], a[30]); SQRADDAC(a[2], a[29]); SQRADDAC(a[3], a[28]); SQRADDAC(a[4], a[27]); SQRADDAC(a[5], a[26]); SQRADDAC(a[6], a[25]); SQRADDAC(a[7], a[24]); SQRADDAC(a[8], a[23]); SQRADDAC(a[9], a[22]); SQRADDAC(a[10], a[21]); SQRADDAC(a[11], a[20]); SQRADDAC(a[12], a[19]); SQRADDAC(a[13], a[18]); SQRADDAC(a[14], a[17]); SQRADDAC(a[15], a[16]); SQRADDDB; 
8890
+   COMBA_STORE(b[31]);
8891
+
8892
+   /* output 32 */
8893
+   CARRY_FORWARD;
8894
+   SQRADDSC(a[1], a[31]); SQRADDAC(a[2], a[30]); SQRADDAC(a[3], a[29]); SQRADDAC(a[4], a[28]); SQRADDAC(a[5], a[27]); SQRADDAC(a[6], a[26]); SQRADDAC(a[7], a[25]); SQRADDAC(a[8], a[24]); SQRADDAC(a[9], a[23]); SQRADDAC(a[10], a[22]); SQRADDAC(a[11], a[21]); SQRADDAC(a[12], a[20]); SQRADDAC(a[13], a[19]); SQRADDAC(a[14], a[18]); SQRADDAC(a[15], a[17]); SQRADDDB; SQRADD(a[16], a[16]); 
8895
+   COMBA_STORE(b[32]);
8896
+
8897
+   /* output 33 */
8898
+   CARRY_FORWARD;
8899
+   SQRADDSC(a[2], a[31]); SQRADDAC(a[3], a[30]); SQRADDAC(a[4], a[29]); SQRADDAC(a[5], a[28]); SQRADDAC(a[6], a[27]); SQRADDAC(a[7], a[26]); SQRADDAC(a[8], a[25]); SQRADDAC(a[9], a[24]); SQRADDAC(a[10], a[23]); SQRADDAC(a[11], a[22]); SQRADDAC(a[12], a[21]); SQRADDAC(a[13], a[20]); SQRADDAC(a[14], a[19]); SQRADDAC(a[15], a[18]); SQRADDAC(a[16], a[17]); SQRADDDB; 
8900
+   COMBA_STORE(b[33]);
8901
+
8902
+   /* output 34 */
8903
+   CARRY_FORWARD;
8904
+   SQRADDSC(a[3], a[31]); SQRADDAC(a[4], a[30]); SQRADDAC(a[5], a[29]); SQRADDAC(a[6], a[28]); SQRADDAC(a[7], a[27]); SQRADDAC(a[8], a[26]); SQRADDAC(a[9], a[25]); SQRADDAC(a[10], a[24]); SQRADDAC(a[11], a[23]); SQRADDAC(a[12], a[22]); SQRADDAC(a[13], a[21]); SQRADDAC(a[14], a[20]); SQRADDAC(a[15], a[19]); SQRADDAC(a[16], a[18]); SQRADDDB; SQRADD(a[17], a[17]); 
8905
+   COMBA_STORE(b[34]);
8906
+
8907
+   /* output 35 */
8908
+   CARRY_FORWARD;
8909
+   SQRADDSC(a[4], a[31]); SQRADDAC(a[5], a[30]); SQRADDAC(a[6], a[29]); SQRADDAC(a[7], a[28]); SQRADDAC(a[8], a[27]); SQRADDAC(a[9], a[26]); SQRADDAC(a[10], a[25]); SQRADDAC(a[11], a[24]); SQRADDAC(a[12], a[23]); SQRADDAC(a[13], a[22]); SQRADDAC(a[14], a[21]); SQRADDAC(a[15], a[20]); SQRADDAC(a[16], a[19]); SQRADDAC(a[17], a[18]); SQRADDDB; 
8910
+   COMBA_STORE(b[35]);
8911
+
8912
+   /* output 36 */
8913
+   CARRY_FORWARD;
8914
+   SQRADDSC(a[5], a[31]); SQRADDAC(a[6], a[30]); SQRADDAC(a[7], a[29]); SQRADDAC(a[8], a[28]); SQRADDAC(a[9], a[27]); SQRADDAC(a[10], a[26]); SQRADDAC(a[11], a[25]); SQRADDAC(a[12], a[24]); SQRADDAC(a[13], a[23]); SQRADDAC(a[14], a[22]); SQRADDAC(a[15], a[21]); SQRADDAC(a[16], a[20]); SQRADDAC(a[17], a[19]); SQRADDDB; SQRADD(a[18], a[18]); 
8915
+   COMBA_STORE(b[36]);
8916
+
8917
+   /* output 37 */
8918
+   CARRY_FORWARD;
8919
+   SQRADDSC(a[6], a[31]); SQRADDAC(a[7], a[30]); SQRADDAC(a[8], a[29]); SQRADDAC(a[9], a[28]); SQRADDAC(a[10], a[27]); SQRADDAC(a[11], a[26]); SQRADDAC(a[12], a[25]); SQRADDAC(a[13], a[24]); SQRADDAC(a[14], a[23]); SQRADDAC(a[15], a[22]); SQRADDAC(a[16], a[21]); SQRADDAC(a[17], a[20]); SQRADDAC(a[18], a[19]); SQRADDDB; 
8920
+   COMBA_STORE(b[37]);
8921
+
8922
+   /* output 38 */
8923
+   CARRY_FORWARD;
8924
+   SQRADDSC(a[7], a[31]); SQRADDAC(a[8], a[30]); SQRADDAC(a[9], a[29]); SQRADDAC(a[10], a[28]); SQRADDAC(a[11], a[27]); SQRADDAC(a[12], a[26]); SQRADDAC(a[13], a[25]); SQRADDAC(a[14], a[24]); SQRADDAC(a[15], a[23]); SQRADDAC(a[16], a[22]); SQRADDAC(a[17], a[21]); SQRADDAC(a[18], a[20]); SQRADDDB; SQRADD(a[19], a[19]); 
8925
+   COMBA_STORE(b[38]);
8926
+
8927
+   /* output 39 */
8928
+   CARRY_FORWARD;
8929
+   SQRADDSC(a[8], a[31]); SQRADDAC(a[9], a[30]); SQRADDAC(a[10], a[29]); SQRADDAC(a[11], a[28]); SQRADDAC(a[12], a[27]); SQRADDAC(a[13], a[26]); SQRADDAC(a[14], a[25]); SQRADDAC(a[15], a[24]); SQRADDAC(a[16], a[23]); SQRADDAC(a[17], a[22]); SQRADDAC(a[18], a[21]); SQRADDAC(a[19], a[20]); SQRADDDB; 
8930
+   COMBA_STORE(b[39]);
8931
+
8932
+   /* output 40 */
8933
+   CARRY_FORWARD;
8934
+   SQRADDSC(a[9], a[31]); SQRADDAC(a[10], a[30]); SQRADDAC(a[11], a[29]); SQRADDAC(a[12], a[28]); SQRADDAC(a[13], a[27]); SQRADDAC(a[14], a[26]); SQRADDAC(a[15], a[25]); SQRADDAC(a[16], a[24]); SQRADDAC(a[17], a[23]); SQRADDAC(a[18], a[22]); SQRADDAC(a[19], a[21]); SQRADDDB; SQRADD(a[20], a[20]); 
8935
+   COMBA_STORE(b[40]);
8936
+
8937
+   /* output 41 */
8938
+   CARRY_FORWARD;
8939
+   SQRADDSC(a[10], a[31]); SQRADDAC(a[11], a[30]); SQRADDAC(a[12], a[29]); SQRADDAC(a[13], a[28]); SQRADDAC(a[14], a[27]); SQRADDAC(a[15], a[26]); SQRADDAC(a[16], a[25]); SQRADDAC(a[17], a[24]); SQRADDAC(a[18], a[23]); SQRADDAC(a[19], a[22]); SQRADDAC(a[20], a[21]); SQRADDDB; 
8940
+   COMBA_STORE(b[41]);
8941
+
8942
+   /* output 42 */
8943
+   CARRY_FORWARD;
8944
+   SQRADDSC(a[11], a[31]); SQRADDAC(a[12], a[30]); SQRADDAC(a[13], a[29]); SQRADDAC(a[14], a[28]); SQRADDAC(a[15], a[27]); SQRADDAC(a[16], a[26]); SQRADDAC(a[17], a[25]); SQRADDAC(a[18], a[24]); SQRADDAC(a[19], a[23]); SQRADDAC(a[20], a[22]); SQRADDDB; SQRADD(a[21], a[21]); 
8945
+   COMBA_STORE(b[42]);
8946
+
8947
+   /* output 43 */
8948
+   CARRY_FORWARD;
8949
+   SQRADDSC(a[12], a[31]); SQRADDAC(a[13], a[30]); SQRADDAC(a[14], a[29]); SQRADDAC(a[15], a[28]); SQRADDAC(a[16], a[27]); SQRADDAC(a[17], a[26]); SQRADDAC(a[18], a[25]); SQRADDAC(a[19], a[24]); SQRADDAC(a[20], a[23]); SQRADDAC(a[21], a[22]); SQRADDDB; 
8950
+   COMBA_STORE(b[43]);
8951
+
8952
+   /* output 44 */
8953
+   CARRY_FORWARD;
8954
+   SQRADDSC(a[13], a[31]); SQRADDAC(a[14], a[30]); SQRADDAC(a[15], a[29]); SQRADDAC(a[16], a[28]); SQRADDAC(a[17], a[27]); SQRADDAC(a[18], a[26]); SQRADDAC(a[19], a[25]); SQRADDAC(a[20], a[24]); SQRADDAC(a[21], a[23]); SQRADDDB; SQRADD(a[22], a[22]); 
8955
+   COMBA_STORE(b[44]);
8956
+
8957
+   /* output 45 */
8958
+   CARRY_FORWARD;
8959
+   SQRADDSC(a[14], a[31]); SQRADDAC(a[15], a[30]); SQRADDAC(a[16], a[29]); SQRADDAC(a[17], a[28]); SQRADDAC(a[18], a[27]); SQRADDAC(a[19], a[26]); SQRADDAC(a[20], a[25]); SQRADDAC(a[21], a[24]); SQRADDAC(a[22], a[23]); SQRADDDB; 
8960
+   COMBA_STORE(b[45]);
8961
+
8962
+   /* output 46 */
8963
+   CARRY_FORWARD;
8964
+   SQRADDSC(a[15], a[31]); SQRADDAC(a[16], a[30]); SQRADDAC(a[17], a[29]); SQRADDAC(a[18], a[28]); SQRADDAC(a[19], a[27]); SQRADDAC(a[20], a[26]); SQRADDAC(a[21], a[25]); SQRADDAC(a[22], a[24]); SQRADDDB; SQRADD(a[23], a[23]); 
8965
+   COMBA_STORE(b[46]);
8966
+
8967
+   /* output 47 */
8968
+   CARRY_FORWARD;
8969
+   SQRADDSC(a[16], a[31]); SQRADDAC(a[17], a[30]); SQRADDAC(a[18], a[29]); SQRADDAC(a[19], a[28]); SQRADDAC(a[20], a[27]); SQRADDAC(a[21], a[26]); SQRADDAC(a[22], a[25]); SQRADDAC(a[23], a[24]); SQRADDDB; 
8970
+   COMBA_STORE(b[47]);
8971
+
8972
+   /* output 48 */
8973
+   CARRY_FORWARD;
8974
+   SQRADDSC(a[17], a[31]); SQRADDAC(a[18], a[30]); SQRADDAC(a[19], a[29]); SQRADDAC(a[20], a[28]); SQRADDAC(a[21], a[27]); SQRADDAC(a[22], a[26]); SQRADDAC(a[23], a[25]); SQRADDDB; SQRADD(a[24], a[24]); 
8975
+   COMBA_STORE(b[48]);
8976
+
8977
+   /* output 49 */
8978
+   CARRY_FORWARD;
8979
+   SQRADDSC(a[18], a[31]); SQRADDAC(a[19], a[30]); SQRADDAC(a[20], a[29]); SQRADDAC(a[21], a[28]); SQRADDAC(a[22], a[27]); SQRADDAC(a[23], a[26]); SQRADDAC(a[24], a[25]); SQRADDDB; 
8980
+   COMBA_STORE(b[49]);
8981
+
8982
+   /* output 50 */
8983
+   CARRY_FORWARD;
8984
+   SQRADDSC(a[19], a[31]); SQRADDAC(a[20], a[30]); SQRADDAC(a[21], a[29]); SQRADDAC(a[22], a[28]); SQRADDAC(a[23], a[27]); SQRADDAC(a[24], a[26]); SQRADDDB; SQRADD(a[25], a[25]); 
8985
+   COMBA_STORE(b[50]);
8986
+
8987
+   /* output 51 */
8988
+   CARRY_FORWARD;
8989
+   SQRADDSC(a[20], a[31]); SQRADDAC(a[21], a[30]); SQRADDAC(a[22], a[29]); SQRADDAC(a[23], a[28]); SQRADDAC(a[24], a[27]); SQRADDAC(a[25], a[26]); SQRADDDB; 
8990
+   COMBA_STORE(b[51]);
8991
+
8992
+   /* output 52 */
8993
+   CARRY_FORWARD;
8994
+   SQRADDSC(a[21], a[31]); SQRADDAC(a[22], a[30]); SQRADDAC(a[23], a[29]); SQRADDAC(a[24], a[28]); SQRADDAC(a[25], a[27]); SQRADDDB; SQRADD(a[26], a[26]); 
8995
+   COMBA_STORE(b[52]);
8996
+
8997
+   /* output 53 */
8998
+   CARRY_FORWARD;
8999
+   SQRADDSC(a[22], a[31]); SQRADDAC(a[23], a[30]); SQRADDAC(a[24], a[29]); SQRADDAC(a[25], a[28]); SQRADDAC(a[26], a[27]); SQRADDDB; 
9000
+   COMBA_STORE(b[53]);
9001
+
9002
+   /* output 54 */
9003
+   CARRY_FORWARD;
9004
+   SQRADDSC(a[23], a[31]); SQRADDAC(a[24], a[30]); SQRADDAC(a[25], a[29]); SQRADDAC(a[26], a[28]); SQRADDDB; SQRADD(a[27], a[27]); 
9005
+   COMBA_STORE(b[54]);
9006
+
9007
+   /* output 55 */
9008
+   CARRY_FORWARD;
9009
+   SQRADDSC(a[24], a[31]); SQRADDAC(a[25], a[30]); SQRADDAC(a[26], a[29]); SQRADDAC(a[27], a[28]); SQRADDDB; 
9010
+   COMBA_STORE(b[55]);
9011
+
9012
+   /* output 56 */
9013
+   CARRY_FORWARD;
9014
+   SQRADDSC(a[25], a[31]); SQRADDAC(a[26], a[30]); SQRADDAC(a[27], a[29]); SQRADDDB; SQRADD(a[28], a[28]); 
9015
+   COMBA_STORE(b[56]);
9016
+
9017
+   /* output 57 */
9018
+   CARRY_FORWARD;
9019
+   SQRADDSC(a[26], a[31]); SQRADDAC(a[27], a[30]); SQRADDAC(a[28], a[29]); SQRADDDB; 
9020
+   COMBA_STORE(b[57]);
9021
+
9022
+   /* output 58 */
9023
+   CARRY_FORWARD;
9024
+   SQRADD2(a[27], a[31]); SQRADD2(a[28], a[30]); SQRADD(a[29], a[29]); 
9025
+   COMBA_STORE(b[58]);
9026
+
9027
+   /* output 59 */
9028
+   CARRY_FORWARD;
9029
+   SQRADD2(a[28], a[31]); SQRADD2(a[29], a[30]); 
9030
+   COMBA_STORE(b[59]);
9031
+
9032
+   /* output 60 */
9033
+   CARRY_FORWARD;
9034
+   SQRADD2(a[29], a[31]); SQRADD(a[30], a[30]); 
9035
+   COMBA_STORE(b[60]);
9036
+
9037
+   /* output 61 */
9038
+   CARRY_FORWARD;
9039
+   SQRADD2(a[30], a[31]); 
9040
+   COMBA_STORE(b[61]);
9041
+
9042
+   /* output 62 */
9043
+   CARRY_FORWARD;
9044
+   SQRADD(a[31], a[31]); 
9045
+   COMBA_STORE(b[62]);
9046
+   COMBA_STORE2(b[63]);
9047
+   COMBA_FINI;
9048
+
9049
+   B->used = 64;
9050
+   B->sign = FP_ZPOS;
9051
+   memcpy(B->dp, b, 64 * sizeof(fp_digit));
9052
+   fp_clamp(B);
7451 9053
 }
7452
-
7453 9054
 #endif
7454 9055
 
7455
-/* $Source: /cvs/libtom/libtommath/bn_mp_sqrt.c,v $ */
7456
-/* $Revision: 1.3 $ */
7457
-/* $Date: 2006/03/31 14:18:44 $ */
7458 9056
 
7459
-/* End: bn_mp_sqrt.c */
9057
+/* $Source: /cvs/libtom/tomsfastmath/src/sqr/fp_sqr_comba_32.c,v $ */
9058
+/* $Revision: 1.2 $ */
9059
+/* $Date: 2007/02/17 03:39:01 $ */
7460 9060
 
7461
-/* Start: bn_mp_sub.c */
7462
-#include <bignum.h>
7463
-#ifdef BN_MP_SUB_C
7464
-/* LibTomMath, multiple-precision integer library -- Tom St Denis
7465
- *
7466
- * LibTomMath is a library that provides multiple-precision
7467
- * integer arithmetic as well as number theoretic functionality.
7468
- *
7469
- * The library was designed directly after the MPI library by
7470
- * Michael Fromberger but has been written from scratch with
7471
- * additional optimizations in place.
7472
- *
7473
- * The library is free for all purposes without any express
7474
- * guarantee it works.
7475
- *
7476
- * Tom St Denis, tomstdenis@gmail.com, http://math.libtomcrypt.com
7477
- */
9061
+/* End: fp_sqr_comba_32.c */
7478 9062
 
7479
-/* high level subtraction (handles signs) */
7480
-int
7481
-mp_sub (mp_int * a, mp_int * b, mp_int * c)
7482
-{
7483
-  int     sa, sb, res;
9063
+/* Start: fp_sqr_comba_4.c */
9064
+#define TFM_DEFINES
9065
+#include "fp_sqr_comba.c"
7484 9066
 
7485
-  sa = a->sign;
7486
-  sb = b->sign;
7487
-
7488
-  if (sa != sb) {
7489
-    /* subtract a negative from a positive, OR */
7490
-    /* subtract a positive from a negative. */
7491
-    /* In either case, ADD their magnitudes, */
7492
-    /* and use the sign of the first number. */
7493
-    c->sign = sa;
7494
-    res = s_mp_add (a, b, c);
7495
-  } else {
7496
-    /* subtract a positive from a positive, OR */
7497
-    /* subtract a negative from a negative. */
7498
-    /* First, take the difference between their */
7499
-    /* magnitudes, then... */
7500
-    if (mp_cmp_mag (a, b) != MP_LT) {
7501
-      /* Copy the sign from the first */
7502
-      c->sign = sa;
7503
-      /* The first has a larger or equal magnitude */
7504
-      res = s_mp_sub (a, b, c);
7505
-    } else {
7506
-      /* The result has the *opposite* sign from */
7507
-      /* the first number. */
7508
-      c->sign = (sa == MP_ZPOS) ? MP_NEG : MP_ZPOS;
7509
-      /* The second has a larger magnitude */
7510
-      res = s_mp_sub (b, a, c);
7511
-    }
7512
-  }
7513
-  return res;
9067
+#ifdef TFM_SQR4
9068
+void fp_sqr_comba4(fp_int *A, fp_int *B)
9069
+{
9070
+   fp_digit *a, b[8], c0, c1, c2, sc0, sc1, sc2;
9071
+#ifdef TFM_ISO
9072
+   fp_word tt;
9073
+#endif
9074
+
9075
+   a = A->dp;
9076
+   COMBA_START; 
9077
+
9078
+   /* clear carries */
9079
+   CLEAR_CARRY;
9080
+
9081
+   /* output 0 */
9082
+   SQRADD(a[0],a[0]);
9083
+   COMBA_STORE(b[0]);
9084
+
9085
+   /* output 1 */
9086
+   CARRY_FORWARD;
9087
+   SQRADD2(a[0], a[1]); 
9088
+   COMBA_STORE(b[1]);
9089
+
9090
+   /* output 2 */
9091
+   CARRY_FORWARD;
9092
+   SQRADD2(a[0], a[2]); SQRADD(a[1], a[1]); 
9093
+   COMBA_STORE(b[2]);
9094
+
9095
+   /* output 3 */
9096
+   CARRY_FORWARD;
9097
+   SQRADD2(a[0], a[3]); SQRADD2(a[1], a[2]); 
9098
+   COMBA_STORE(b[3]);
9099
+
9100
+   /* output 4 */
9101
+   CARRY_FORWARD;
9102
+   SQRADD2(a[1], a[3]); SQRADD(a[2], a[2]); 
9103
+   COMBA_STORE(b[4]);
9104
+
9105
+   /* output 5 */
9106
+   CARRY_FORWARD;
9107
+   SQRADD2(a[2], a[3]); 
9108
+   COMBA_STORE(b[5]);
9109
+
9110
+   /* output 6 */
9111
+   CARRY_FORWARD;
9112
+   SQRADD(a[3], a[3]); 
9113
+   COMBA_STORE(b[6]);
9114
+   COMBA_STORE2(b[7]);
9115
+   COMBA_FINI;
9116
+
9117
+   B->used = 8;
9118
+   B->sign = FP_ZPOS;
9119
+   memcpy(B->dp, b, 8 * sizeof(fp_digit));
9120
+   fp_clamp(B);
7514 9121
 }
7515
-
7516 9122
 #endif
7517 9123
 
7518
-/* $Source: /cvs/libtom/libtommath/bn_mp_sub.c,v $ */
7519
-/* $Revision: 1.3 $ */
7520
-/* $Date: 2006/03/31 14:18:44 $ */
7521
-
7522
-/* End: bn_mp_sub.c */
7523
-
7524
-/* Start: bn_mp_sub_d.c */
7525
-#include <bignum.h>
7526
-#ifdef BN_MP_SUB_D_C
7527
-/* LibTomMath, multiple-precision integer library -- Tom St Denis
7528
- *
7529
- * LibTomMath is a library that provides multiple-precision
7530
- * integer arithmetic as well as number theoretic functionality.
7531
- *
7532
- * The library was designed directly after the MPI library by
7533
- * Michael Fromberger but has been written from scratch with
7534
- * additional optimizations in place.
7535
- *
7536
- * The library is free for all purposes without any express
7537
- * guarantee it works.
7538
- *
7539
- * Tom St Denis, tomstdenis@gmail.com, http://math.libtomcrypt.com
7540
- */
7541
-
7542
-/* single digit subtraction */
7543
-int
7544
-mp_sub_d (mp_int * a, mp_digit b, mp_int * c)
7545
-{
7546
-  mp_digit *tmpa, *tmpc, mu;
7547
-  int       res, ix, oldused;
7548
-
7549
-  /* grow c as required */
7550
-  if (c->alloc < a->used + 1) {
7551
-     if ((res = mp_grow(c, a->used + 1)) != MP_OKAY) {
7552
-        return res;
7553
-     }
7554
-  }
7555 9124
 
7556
-  /* if a is negative just do an unsigned
7557
-   * addition [with fudged signs]
7558
-   */
7559
-  if (a->sign == MP_NEG) {
7560
-     a->sign = MP_ZPOS;
7561
-     res     = mp_add_d(a, b, c);
7562
-     a->sign = c->sign = MP_NEG;
9125
+/* $Source: /cvs/libtom/tomsfastmath/src/sqr/fp_sqr_comba_4.c,v $ */
9126
+/* $Revision: 1.2 $ */
9127
+/* $Date: 2007/02/17 03:39:01 $ */
7563 9128
 
7564
-     /* clamp */
7565
-     mp_clamp(c);
9129
+/* End: fp_sqr_comba_4.c */
7566 9130
 
7567
-     return res;
7568
-  }
9131
+/* Start: fp_sqr_comba_48.c */
9132
+#define TFM_DEFINES
9133
+#include "fp_sqr_comba.c"
7569 9134
 
7570
-  /* setup regs */
7571
-  oldused = c->used;
7572
-  tmpa    = a->dp;
7573
-  tmpc    = c->dp;
7574
-
7575
-  /* if a <= b simply fix the single digit */
7576
-  if ((a->used == 1 && a->dp[0] <= b) || a->used == 0) {
7577
-     if (a->used == 1) {
7578
-        *tmpc++ = b - *tmpa;
7579
-     } else {
7580
-        *tmpc++ = b;
7581
-     }
7582
-     ix      = 1;
7583
-
7584
-     /* negative/1digit */
7585
-     c->sign = MP_NEG;
7586
-     c->used = 1;
7587
-  } else {
7588
-     /* positive/size */
7589
-     c->sign = MP_ZPOS;
7590
-     c->used = a->used;
7591
-
7592
-     /* subtract first digit */
7593
-     *tmpc    = *tmpa++ - b;
7594
-     mu       = *tmpc >> (sizeof(mp_digit) * CHAR_BIT - 1);
7595
-     *tmpc++ &= MP_MASK;
7596
-
7597
-     /* handle rest of the digits */
7598
-     for (ix = 1; ix < a->used; ix++) {
7599
-        *tmpc    = *tmpa++ - mu;
7600
-        mu       = *tmpc >> (sizeof(mp_digit) * CHAR_BIT - 1);
7601
-        *tmpc++ &= MP_MASK;
7602
-     }
7603
-  }
7604
-
7605
-  /* zero excess digits */
7606
-  while (ix++ < oldused) {
7607
-     *tmpc++ = 0;
7608
-  }
7609
-  mp_clamp(c);
7610
-  return MP_OKAY;
9135
+#ifdef TFM_SQR48
9136
+void fp_sqr_comba48(fp_int *A, fp_int *B)
9137
+{
9138
+   fp_digit *a, b[96], c0, c1, c2, sc0, sc1, sc2;
9139
+#ifdef TFM_ISO
9140
+   fp_word tt;
9141
+#endif
9142
+
9143
+   a = A->dp;
9144
+   COMBA_START; 
9145
+
9146
+   /* clear carries */
9147
+   CLEAR_CARRY;
9148
+
9149
+   /* output 0 */
9150
+   SQRADD(a[0],a[0]);
9151
+   COMBA_STORE(b[0]);
9152
+
9153
+   /* output 1 */
9154
+   CARRY_FORWARD;
9155
+   SQRADD2(a[0], a[1]); 
9156
+   COMBA_STORE(b[1]);
9157
+
9158
+   /* output 2 */
9159
+   CARRY_FORWARD;
9160
+   SQRADD2(a[0], a[2]); SQRADD(a[1], a[1]); 
9161
+   COMBA_STORE(b[2]);
9162
+
9163
+   /* output 3 */
9164
+   CARRY_FORWARD;
9165
+   SQRADD2(a[0], a[3]); SQRADD2(a[1], a[2]); 
9166
+   COMBA_STORE(b[3]);
9167
+
9168
+   /* output 4 */
9169
+   CARRY_FORWARD;
9170
+   SQRADD2(a[0], a[4]); SQRADD2(a[1], a[3]); SQRADD(a[2], a[2]); 
9171
+   COMBA_STORE(b[4]);
9172
+
9173
+   /* output 5 */
9174
+   CARRY_FORWARD;
9175
+   SQRADDSC(a[0], a[5]); SQRADDAC(a[1], a[4]); SQRADDAC(a[2], a[3]); SQRADDDB; 
9176
+   COMBA_STORE(b[5]);
9177
+
9178
+   /* output 6 */
9179
+   CARRY_FORWARD;
9180
+   SQRADDSC(a[0], a[6]); SQRADDAC(a[1], a[5]); SQRADDAC(a[2], a[4]); SQRADDDB; SQRADD(a[3], a[3]); 
9181
+   COMBA_STORE(b[6]);
9182
+
9183
+   /* output 7 */
9184
+   CARRY_FORWARD;
9185
+   SQRADDSC(a[0], a[7]); SQRADDAC(a[1], a[6]); SQRADDAC(a[2], a[5]); SQRADDAC(a[3], a[4]); SQRADDDB; 
9186
+   COMBA_STORE(b[7]);
9187
+
9188
+   /* output 8 */
9189
+   CARRY_FORWARD;
9190
+   SQRADDSC(a[0], a[8]); SQRADDAC(a[1], a[7]); SQRADDAC(a[2], a[6]); SQRADDAC(a[3], a[5]); SQRADDDB; SQRADD(a[4], a[4]); 
9191
+   COMBA_STORE(b[8]);
9192
+
9193
+   /* output 9 */
9194
+   CARRY_FORWARD;
9195
+   SQRADDSC(a[0], a[9]); SQRADDAC(a[1], a[8]); SQRADDAC(a[2], a[7]); SQRADDAC(a[3], a[6]); SQRADDAC(a[4], a[5]); SQRADDDB; 
9196
+   COMBA_STORE(b[9]);
9197
+
9198
+   /* output 10 */
9199
+   CARRY_FORWARD;
9200
+   SQRADDSC(a[0], a[10]); SQRADDAC(a[1], a[9]); SQRADDAC(a[2], a[8]); SQRADDAC(a[3], a[7]); SQRADDAC(a[4], a[6]); SQRADDDB; SQRADD(a[5], a[5]); 
9201
+   COMBA_STORE(b[10]);
9202
+
9203
+   /* output 11 */
9204
+   CARRY_FORWARD;
9205
+   SQRADDSC(a[0], a[11]); SQRADDAC(a[1], a[10]); SQRADDAC(a[2], a[9]); SQRADDAC(a[3], a[8]); SQRADDAC(a[4], a[7]); SQRADDAC(a[5], a[6]); SQRADDDB; 
9206
+   COMBA_STORE(b[11]);
9207
+
9208
+   /* output 12 */
9209
+   CARRY_FORWARD;
9210
+   SQRADDSC(a[0], a[12]); SQRADDAC(a[1], a[11]); SQRADDAC(a[2], a[10]); SQRADDAC(a[3], a[9]); SQRADDAC(a[4], a[8]); SQRADDAC(a[5], a[7]); SQRADDDB; SQRADD(a[6], a[6]); 
9211
+   COMBA_STORE(b[12]);
9212
+
9213
+   /* output 13 */
9214
+   CARRY_FORWARD;
9215
+   SQRADDSC(a[0], a[13]); SQRADDAC(a[1], a[12]); SQRADDAC(a[2], a[11]); SQRADDAC(a[3], a[10]); SQRADDAC(a[4], a[9]); SQRADDAC(a[5], a[8]); SQRADDAC(a[6], a[7]); SQRADDDB; 
9216
+   COMBA_STORE(b[13]);
9217
+
9218
+   /* output 14 */
9219
+   CARRY_FORWARD;
9220
+   SQRADDSC(a[0], a[14]); SQRADDAC(a[1], a[13]); SQRADDAC(a[2], a[12]); SQRADDAC(a[3], a[11]); SQRADDAC(a[4], a[10]); SQRADDAC(a[5], a[9]); SQRADDAC(a[6], a[8]); SQRADDDB; SQRADD(a[7], a[7]); 
9221
+   COMBA_STORE(b[14]);
9222
+
9223
+   /* output 15 */
9224
+   CARRY_FORWARD;
9225
+   SQRADDSC(a[0], a[15]); SQRADDAC(a[1], a[14]); SQRADDAC(a[2], a[13]); SQRADDAC(a[3], a[12]); SQRADDAC(a[4], a[11]); SQRADDAC(a[5], a[10]); SQRADDAC(a[6], a[9]); SQRADDAC(a[7], a[8]); SQRADDDB; 
9226
+   COMBA_STORE(b[15]);
9227
+
9228
+   /* output 16 */
9229
+   CARRY_FORWARD;
9230
+   SQRADDSC(a[0], a[16]); SQRADDAC(a[1], a[15]); SQRADDAC(a[2], a[14]); SQRADDAC(a[3], a[13]); SQRADDAC(a[4], a[12]); SQRADDAC(a[5], a[11]); SQRADDAC(a[6], a[10]); SQRADDAC(a[7], a[9]); SQRADDDB; SQRADD(a[8], a[8]); 
9231
+   COMBA_STORE(b[16]);
9232
+
9233
+   /* output 17 */
9234
+   CARRY_FORWARD;
9235
+   SQRADDSC(a[0], a[17]); SQRADDAC(a[1], a[16]); SQRADDAC(a[2], a[15]); SQRADDAC(a[3], a[14]); SQRADDAC(a[4], a[13]); SQRADDAC(a[5], a[12]); SQRADDAC(a[6], a[11]); SQRADDAC(a[7], a[10]); SQRADDAC(a[8], a[9]); SQRADDDB; 
9236
+   COMBA_STORE(b[17]);
9237
+
9238
+   /* output 18 */
9239
+   CARRY_FORWARD;
9240
+   SQRADDSC(a[0], a[18]); SQRADDAC(a[1], a[17]); SQRADDAC(a[2], a[16]); SQRADDAC(a[3], a[15]); SQRADDAC(a[4], a[14]); SQRADDAC(a[5], a[13]); SQRADDAC(a[6], a[12]); SQRADDAC(a[7], a[11]); SQRADDAC(a[8], a[10]); SQRADDDB; SQRADD(a[9], a[9]); 
9241
+   COMBA_STORE(b[18]);
9242
+
9243
+   /* output 19 */
9244
+   CARRY_FORWARD;
9245
+   SQRADDSC(a[0], a[19]); SQRADDAC(a[1], a[18]); SQRADDAC(a[2], a[17]); SQRADDAC(a[3], a[16]); SQRADDAC(a[4], a[15]); SQRADDAC(a[5], a[14]); SQRADDAC(a[6], a[13]); SQRADDAC(a[7], a[12]); SQRADDAC(a[8], a[11]); SQRADDAC(a[9], a[10]); SQRADDDB; 
9246
+   COMBA_STORE(b[19]);
9247
+
9248
+   /* output 20 */
9249
+   CARRY_FORWARD;
9250
+   SQRADDSC(a[0], a[20]); SQRADDAC(a[1], a[19]); SQRADDAC(a[2], a[18]); SQRADDAC(a[3], a[17]); SQRADDAC(a[4], a[16]); SQRADDAC(a[5], a[15]); SQRADDAC(a[6], a[14]); SQRADDAC(a[7], a[13]); SQRADDAC(a[8], a[12]); SQRADDAC(a[9], a[11]); SQRADDDB; SQRADD(a[10], a[10]); 
9251
+   COMBA_STORE(b[20]);
9252
+
9253
+   /* output 21 */
9254
+   CARRY_FORWARD;
9255
+   SQRADDSC(a[0], a[21]); SQRADDAC(a[1], a[20]); SQRADDAC(a[2], a[19]); SQRADDAC(a[3], a[18]); SQRADDAC(a[4], a[17]); SQRADDAC(a[5], a[16]); SQRADDAC(a[6], a[15]); SQRADDAC(a[7], a[14]); SQRADDAC(a[8], a[13]); SQRADDAC(a[9], a[12]); SQRADDAC(a[10], a[11]); SQRADDDB; 
9256
+   COMBA_STORE(b[21]);
9257
+
9258
+   /* output 22 */
9259
+   CARRY_FORWARD;
9260
+   SQRADDSC(a[0], a[22]); SQRADDAC(a[1], a[21]); SQRADDAC(a[2], a[20]); SQRADDAC(a[3], a[19]); SQRADDAC(a[4], a[18]); SQRADDAC(a[5], a[17]); SQRADDAC(a[6], a[16]); SQRADDAC(a[7], a[15]); SQRADDAC(a[8], a[14]); SQRADDAC(a[9], a[13]); SQRADDAC(a[10], a[12]); SQRADDDB; SQRADD(a[11], a[11]); 
9261
+   COMBA_STORE(b[22]);
9262
+
9263
+   /* output 23 */
9264
+   CARRY_FORWARD;
9265
+   SQRADDSC(a[0], a[23]); SQRADDAC(a[1], a[22]); SQRADDAC(a[2], a[21]); SQRADDAC(a[3], a[20]); SQRADDAC(a[4], a[19]); SQRADDAC(a[5], a[18]); SQRADDAC(a[6], a[17]); SQRADDAC(a[7], a[16]); SQRADDAC(a[8], a[15]); SQRADDAC(a[9], a[14]); SQRADDAC(a[10], a[13]); SQRADDAC(a[11], a[12]); SQRADDDB; 
9266
+   COMBA_STORE(b[23]);
9267
+
9268
+   /* output 24 */
9269
+   CARRY_FORWARD;
9270
+   SQRADDSC(a[0], a[24]); SQRADDAC(a[1], a[23]); SQRADDAC(a[2], a[22]); SQRADDAC(a[3], a[21]); SQRADDAC(a[4], a[20]); SQRADDAC(a[5], a[19]); SQRADDAC(a[6], a[18]); SQRADDAC(a[7], a[17]); SQRADDAC(a[8], a[16]); SQRADDAC(a[9], a[15]); SQRADDAC(a[10], a[14]); SQRADDAC(a[11], a[13]); SQRADDDB; SQRADD(a[12], a[12]); 
9271
+   COMBA_STORE(b[24]);
9272
+
9273
+   /* output 25 */
9274
+   CARRY_FORWARD;
9275
+   SQRADDSC(a[0], a[25]); SQRADDAC(a[1], a[24]); SQRADDAC(a[2], a[23]); SQRADDAC(a[3], a[22]); SQRADDAC(a[4], a[21]); SQRADDAC(a[5], a[20]); SQRADDAC(a[6], a[19]); SQRADDAC(a[7], a[18]); SQRADDAC(a[8], a[17]); SQRADDAC(a[9], a[16]); SQRADDAC(a[10], a[15]); SQRADDAC(a[11], a[14]); SQRADDAC(a[12], a[13]); SQRADDDB; 
9276
+   COMBA_STORE(b[25]);
9277
+
9278
+   /* output 26 */
9279
+   CARRY_FORWARD;
9280
+   SQRADDSC(a[0], a[26]); SQRADDAC(a[1], a[25]); SQRADDAC(a[2], a[24]); SQRADDAC(a[3], a[23]); SQRADDAC(a[4], a[22]); SQRADDAC(a[5], a[21]); SQRADDAC(a[6], a[20]); SQRADDAC(a[7], a[19]); SQRADDAC(a[8], a[18]); SQRADDAC(a[9], a[17]); SQRADDAC(a[10], a[16]); SQRADDAC(a[11], a[15]); SQRADDAC(a[12], a[14]); SQRADDDB; SQRADD(a[13], a[13]); 
9281
+   COMBA_STORE(b[26]);
9282
+
9283
+   /* output 27 */
9284
+   CARRY_FORWARD;
9285
+   SQRADDSC(a[0], a[27]); SQRADDAC(a[1], a[26]); SQRADDAC(a[2], a[25]); SQRADDAC(a[3], a[24]); SQRADDAC(a[4], a[23]); SQRADDAC(a[5], a[22]); SQRADDAC(a[6], a[21]); SQRADDAC(a[7], a[20]); SQRADDAC(a[8], a[19]); SQRADDAC(a[9], a[18]); SQRADDAC(a[10], a[17]); SQRADDAC(a[11], a[16]); SQRADDAC(a[12], a[15]); SQRADDAC(a[13], a[14]); SQRADDDB; 
9286
+   COMBA_STORE(b[27]);
9287
+
9288
+   /* output 28 */
9289
+   CARRY_FORWARD;
9290
+   SQRADDSC(a[0], a[28]); SQRADDAC(a[1], a[27]); SQRADDAC(a[2], a[26]); SQRADDAC(a[3], a[25]); SQRADDAC(a[4], a[24]); SQRADDAC(a[5], a[23]); SQRADDAC(a[6], a[22]); SQRADDAC(a[7], a[21]); SQRADDAC(a[8], a[20]); SQRADDAC(a[9], a[19]); SQRADDAC(a[10], a[18]); SQRADDAC(a[11], a[17]); SQRADDAC(a[12], a[16]); SQRADDAC(a[13], a[15]); SQRADDDB; SQRADD(a[14], a[14]); 
9291
+   COMBA_STORE(b[28]);
9292
+
9293
+   /* output 29 */
9294
+   CARRY_FORWARD;
9295
+   SQRADDSC(a[0], a[29]); SQRADDAC(a[1], a[28]); SQRADDAC(a[2], a[27]); SQRADDAC(a[3], a[26]); SQRADDAC(a[4], a[25]); SQRADDAC(a[5], a[24]); SQRADDAC(a[6], a[23]); SQRADDAC(a[7], a[22]); SQRADDAC(a[8], a[21]); SQRADDAC(a[9], a[20]); SQRADDAC(a[10], a[19]); SQRADDAC(a[11], a[18]); SQRADDAC(a[12], a[17]); SQRADDAC(a[13], a[16]); SQRADDAC(a[14], a[15]); SQRADDDB; 
9296
+   COMBA_STORE(b[29]);
9297
+
9298
+   /* output 30 */
9299
+   CARRY_FORWARD;
9300
+   SQRADDSC(a[0], a[30]); SQRADDAC(a[1], a[29]); SQRADDAC(a[2], a[28]); SQRADDAC(a[3], a[27]); SQRADDAC(a[4], a[26]); SQRADDAC(a[5], a[25]); SQRADDAC(a[6], a[24]); SQRADDAC(a[7], a[23]); SQRADDAC(a[8], a[22]); SQRADDAC(a[9], a[21]); SQRADDAC(a[10], a[20]); SQRADDAC(a[11], a[19]); SQRADDAC(a[12], a[18]); SQRADDAC(a[13], a[17]); SQRADDAC(a[14], a[16]); SQRADDDB; SQRADD(a[15], a[15]); 
9301
+   COMBA_STORE(b[30]);
9302
+
9303
+   /* output 31 */
9304
+   CARRY_FORWARD;
9305
+   SQRADDSC(a[0], a[31]); SQRADDAC(a[1], a[30]); SQRADDAC(a[2], a[29]); SQRADDAC(a[3], a[28]); SQRADDAC(a[4], a[27]); SQRADDAC(a[5], a[26]); SQRADDAC(a[6], a[25]); SQRADDAC(a[7], a[24]); SQRADDAC(a[8], a[23]); SQRADDAC(a[9], a[22]); SQRADDAC(a[10], a[21]); SQRADDAC(a[11], a[20]); SQRADDAC(a[12], a[19]); SQRADDAC(a[13], a[18]); SQRADDAC(a[14], a[17]); SQRADDAC(a[15], a[16]); SQRADDDB; 
9306
+   COMBA_STORE(b[31]);
9307
+
9308
+   /* output 32 */
9309
+   CARRY_FORWARD;
9310
+   SQRADDSC(a[0], a[32]); SQRADDAC(a[1], a[31]); SQRADDAC(a[2], a[30]); SQRADDAC(a[3], a[29]); SQRADDAC(a[4], a[28]); SQRADDAC(a[5], a[27]); SQRADDAC(a[6], a[26]); SQRADDAC(a[7], a[25]); SQRADDAC(a[8], a[24]); SQRADDAC(a[9], a[23]); SQRADDAC(a[10], a[22]); SQRADDAC(a[11], a[21]); SQRADDAC(a[12], a[20]); SQRADDAC(a[13], a[19]); SQRADDAC(a[14], a[18]); SQRADDAC(a[15], a[17]); SQRADDDB; SQRADD(a[16], a[16]); 
9311
+   COMBA_STORE(b[32]);
9312
+
9313
+   /* output 33 */
9314
+   CARRY_FORWARD;
9315
+   SQRADDSC(a[0], a[33]); SQRADDAC(a[1], a[32]); SQRADDAC(a[2], a[31]); SQRADDAC(a[3], a[30]); SQRADDAC(a[4], a[29]); SQRADDAC(a[5], a[28]); SQRADDAC(a[6], a[27]); SQRADDAC(a[7], a[26]); SQRADDAC(a[8], a[25]); SQRADDAC(a[9], a[24]); SQRADDAC(a[10], a[23]); SQRADDAC(a[11], a[22]); SQRADDAC(a[12], a[21]); SQRADDAC(a[13], a[20]); SQRADDAC(a[14], a[19]); SQRADDAC(a[15], a[18]); SQRADDAC(a[16], a[17]); SQRADDDB; 
9316
+   COMBA_STORE(b[33]);
9317
+
9318
+   /* output 34 */
9319
+   CARRY_FORWARD;
9320
+   SQRADDSC(a[0], a[34]); SQRADDAC(a[1], a[33]); SQRADDAC(a[2], a[32]); SQRADDAC(a[3], a[31]); SQRADDAC(a[4], a[30]); SQRADDAC(a[5], a[29]); SQRADDAC(a[6], a[28]); SQRADDAC(a[7], a[27]); SQRADDAC(a[8], a[26]); SQRADDAC(a[9], a[25]); SQRADDAC(a[10], a[24]); SQRADDAC(a[11], a[23]); SQRADDAC(a[12], a[22]); SQRADDAC(a[13], a[21]); SQRADDAC(a[14], a[20]); SQRADDAC(a[15], a[19]); SQRADDAC(a[16], a[18]); SQRADDDB; SQRADD(a[17], a[17]); 
9321
+   COMBA_STORE(b[34]);
9322
+
9323
+   /* output 35 */
9324
+   CARRY_FORWARD;
9325
+   SQRADDSC(a[0], a[35]); SQRADDAC(a[1], a[34]); SQRADDAC(a[2], a[33]); SQRADDAC(a[3], a[32]); SQRADDAC(a[4], a[31]); SQRADDAC(a[5], a[30]); SQRADDAC(a[6], a[29]); SQRADDAC(a[7], a[28]); SQRADDAC(a[8], a[27]); SQRADDAC(a[9], a[26]); SQRADDAC(a[10], a[25]); SQRADDAC(a[11], a[24]); SQRADDAC(a[12], a[23]); SQRADDAC(a[13], a[22]); SQRADDAC(a[14], a[21]); SQRADDAC(a[15], a[20]); SQRADDAC(a[16], a[19]); SQRADDAC(a[17], a[18]); SQRADDDB; 
9326
+   COMBA_STORE(b[35]);
9327
+
9328
+   /* output 36 */
9329
+   CARRY_FORWARD;
9330
+   SQRADDSC(a[0], a[36]); SQRADDAC(a[1], a[35]); SQRADDAC(a[2], a[34]); SQRADDAC(a[3], a[33]); SQRADDAC(a[4], a[32]); SQRADDAC(a[5], a[31]); SQRADDAC(a[6], a[30]); SQRADDAC(a[7], a[29]); SQRADDAC(a[8], a[28]); SQRADDAC(a[9], a[27]); SQRADDAC(a[10], a[26]); SQRADDAC(a[11], a[25]); SQRADDAC(a[12], a[24]); SQRADDAC(a[13], a[23]); SQRADDAC(a[14], a[22]); SQRADDAC(a[15], a[21]); SQRADDAC(a[16], a[20]); SQRADDAC(a[17], a[19]); SQRADDDB; SQRADD(a[18], a[18]); 
9331
+   COMBA_STORE(b[36]);
9332
+
9333
+   /* output 37 */
9334
+   CARRY_FORWARD;
9335
+   SQRADDSC(a[0], a[37]); SQRADDAC(a[1], a[36]); SQRADDAC(a[2], a[35]); SQRADDAC(a[3], a[34]); SQRADDAC(a[4], a[33]); SQRADDAC(a[5], a[32]); SQRADDAC(a[6], a[31]); SQRADDAC(a[7], a[30]); SQRADDAC(a[8], a[29]); SQRADDAC(a[9], a[28]); SQRADDAC(a[10], a[27]); SQRADDAC(a[11], a[26]); SQRADDAC(a[12], a[25]); SQRADDAC(a[13], a[24]); SQRADDAC(a[14], a[23]); SQRADDAC(a[15], a[22]); SQRADDAC(a[16], a[21]); SQRADDAC(a[17], a[20]); SQRADDAC(a[18], a[19]); SQRADDDB; 
9336
+   COMBA_STORE(b[37]);
9337
+
9338
+   /* output 38 */
9339
+   CARRY_FORWARD;
9340
+   SQRADDSC(a[0], a[38]); SQRADDAC(a[1], a[37]); SQRADDAC(a[2], a[36]); SQRADDAC(a[3], a[35]); SQRADDAC(a[4], a[34]); SQRADDAC(a[5], a[33]); SQRADDAC(a[6], a[32]); SQRADDAC(a[7], a[31]); SQRADDAC(a[8], a[30]); SQRADDAC(a[9], a[29]); SQRADDAC(a[10], a[28]); SQRADDAC(a[11], a[27]); SQRADDAC(a[12], a[26]); SQRADDAC(a[13], a[25]); SQRADDAC(a[14], a[24]); SQRADDAC(a[15], a[23]); SQRADDAC(a[16], a[22]); SQRADDAC(a[17], a[21]); SQRADDAC(a[18], a[20]); SQRADDDB; SQRADD(a[19], a[19]); 
9341
+   COMBA_STORE(b[38]);
9342
+
9343
+   /* output 39 */
9344
+   CARRY_FORWARD;
9345
+   SQRADDSC(a[0], a[39]); SQRADDAC(a[1], a[38]); SQRADDAC(a[2], a[37]); SQRADDAC(a[3], a[36]); SQRADDAC(a[4], a[35]); SQRADDAC(a[5], a[34]); SQRADDAC(a[6], a[33]); SQRADDAC(a[7], a[32]); SQRADDAC(a[8], a[31]); SQRADDAC(a[9], a[30]); SQRADDAC(a[10], a[29]); SQRADDAC(a[11], a[28]); SQRADDAC(a[12], a[27]); SQRADDAC(a[13], a[26]); SQRADDAC(a[14], a[25]); SQRADDAC(a[15], a[24]); SQRADDAC(a[16], a[23]); SQRADDAC(a[17], a[22]); SQRADDAC(a[18], a[21]); SQRADDAC(a[19], a[20]); SQRADDDB; 
9346
+   COMBA_STORE(b[39]);
9347
+
9348
+   /* output 40 */
9349
+   CARRY_FORWARD;
9350
+   SQRADDSC(a[0], a[40]); SQRADDAC(a[1], a[39]); SQRADDAC(a[2], a[38]); SQRADDAC(a[3], a[37]); SQRADDAC(a[4], a[36]); SQRADDAC(a[5], a[35]); SQRADDAC(a[6], a[34]); SQRADDAC(a[7], a[33]); SQRADDAC(a[8], a[32]); SQRADDAC(a[9], a[31]); SQRADDAC(a[10], a[30]); SQRADDAC(a[11], a[29]); SQRADDAC(a[12], a[28]); SQRADDAC(a[13], a[27]); SQRADDAC(a[14], a[26]); SQRADDAC(a[15], a[25]); SQRADDAC(a[16], a[24]); SQRADDAC(a[17], a[23]); SQRADDAC(a[18], a[22]); SQRADDAC(a[19], a[21]); SQRADDDB; SQRADD(a[20], a[20]); 
9351
+   COMBA_STORE(b[40]);
9352
+
9353
+   /* output 41 */
9354
+   CARRY_FORWARD;
9355
+   SQRADDSC(a[0], a[41]); SQRADDAC(a[1], a[40]); SQRADDAC(a[2], a[39]); SQRADDAC(a[3], a[38]); SQRADDAC(a[4], a[37]); SQRADDAC(a[5], a[36]); SQRADDAC(a[6], a[35]); SQRADDAC(a[7], a[34]); SQRADDAC(a[8], a[33]); SQRADDAC(a[9], a[32]); SQRADDAC(a[10], a[31]); SQRADDAC(a[11], a[30]); SQRADDAC(a[12], a[29]); SQRADDAC(a[13], a[28]); SQRADDAC(a[14], a[27]); SQRADDAC(a[15], a[26]); SQRADDAC(a[16], a[25]); SQRADDAC(a[17], a[24]); SQRADDAC(a[18], a[23]); SQRADDAC(a[19], a[22]); SQRADDAC(a[20], a[21]); SQRADDDB; 
9356
+   COMBA_STORE(b[41]);
9357
+
9358
+   /* output 42 */
9359
+   CARRY_FORWARD;
9360
+   SQRADDSC(a[0], a[42]); SQRADDAC(a[1], a[41]); SQRADDAC(a[2], a[40]); SQRADDAC(a[3], a[39]); SQRADDAC(a[4], a[38]); SQRADDAC(a[5], a[37]); SQRADDAC(a[6], a[36]); SQRADDAC(a[7], a[35]); SQRADDAC(a[8], a[34]); SQRADDAC(a[9], a[33]); SQRADDAC(a[10], a[32]); SQRADDAC(a[11], a[31]); SQRADDAC(a[12], a[30]); SQRADDAC(a[13], a[29]); SQRADDAC(a[14], a[28]); SQRADDAC(a[15], a[27]); SQRADDAC(a[16], a[26]); SQRADDAC(a[17], a[25]); SQRADDAC(a[18], a[24]); SQRADDAC(a[19], a[23]); SQRADDAC(a[20], a[22]); SQRADDDB; SQRADD(a[21], a[21]); 
9361
+   COMBA_STORE(b[42]);
9362
+
9363
+   /* output 43 */
9364
+   CARRY_FORWARD;
9365
+   SQRADDSC(a[0], a[43]); SQRADDAC(a[1], a[42]); SQRADDAC(a[2], a[41]); SQRADDAC(a[3], a[40]); SQRADDAC(a[4], a[39]); SQRADDAC(a[5], a[38]); SQRADDAC(a[6], a[37]); SQRADDAC(a[7], a[36]); SQRADDAC(a[8], a[35]); SQRADDAC(a[9], a[34]); SQRADDAC(a[10], a[33]); SQRADDAC(a[11], a[32]); SQRADDAC(a[12], a[31]); SQRADDAC(a[13], a[30]); SQRADDAC(a[14], a[29]); SQRADDAC(a[15], a[28]); SQRADDAC(a[16], a[27]); SQRADDAC(a[17], a[26]); SQRADDAC(a[18], a[25]); SQRADDAC(a[19], a[24]); SQRADDAC(a[20], a[23]); SQRADDAC(a[21], a[22]); SQRADDDB; 
9366
+   COMBA_STORE(b[43]);
9367
+
9368
+   /* output 44 */
9369
+   CARRY_FORWARD;
9370
+   SQRADDSC(a[0], a[44]); SQRADDAC(a[1], a[43]); SQRADDAC(a[2], a[42]); SQRADDAC(a[3], a[41]); SQRADDAC(a[4], a[40]); SQRADDAC(a[5], a[39]); SQRADDAC(a[6], a[38]); SQRADDAC(a[7], a[37]); SQRADDAC(a[8], a[36]); SQRADDAC(a[9], a[35]); SQRADDAC(a[10], a[34]); SQRADDAC(a[11], a[33]); SQRADDAC(a[12], a[32]); SQRADDAC(a[13], a[31]); SQRADDAC(a[14], a[30]); SQRADDAC(a[15], a[29]); SQRADDAC(a[16], a[28]); SQRADDAC(a[17], a[27]); SQRADDAC(a[18], a[26]); SQRADDAC(a[19], a[25]); SQRADDAC(a[20], a[24]); SQRADDAC(a[21], a[23]); SQRADDDB; SQRADD(a[22], a[22]); 
9371
+   COMBA_STORE(b[44]);
9372
+
9373
+   /* output 45 */
9374
+   CARRY_FORWARD;
9375
+   SQRADDSC(a[0], a[45]); SQRADDAC(a[1], a[44]); SQRADDAC(a[2], a[43]); SQRADDAC(a[3], a[42]); SQRADDAC(a[4], a[41]); SQRADDAC(a[5], a[40]); SQRADDAC(a[6], a[39]); SQRADDAC(a[7], a[38]); SQRADDAC(a[8], a[37]); SQRADDAC(a[9], a[36]); SQRADDAC(a[10], a[35]); SQRADDAC(a[11], a[34]); SQRADDAC(a[12], a[33]); SQRADDAC(a[13], a[32]); SQRADDAC(a[14], a[31]); SQRADDAC(a[15], a[30]); SQRADDAC(a[16], a[29]); SQRADDAC(a[17], a[28]); SQRADDAC(a[18], a[27]); SQRADDAC(a[19], a[26]); SQRADDAC(a[20], a[25]); SQRADDAC(a[21], a[24]); SQRADDAC(a[22], a[23]); SQRADDDB; 
9376
+   COMBA_STORE(b[45]);
9377
+
9378
+   /* output 46 */
9379
+   CARRY_FORWARD;
9380
+   SQRADDSC(a[0], a[46]); SQRADDAC(a[1], a[45]); SQRADDAC(a[2], a[44]); SQRADDAC(a[3], a[43]); SQRADDAC(a[4], a[42]); SQRADDAC(a[5], a[41]); SQRADDAC(a[6], a[40]); SQRADDAC(a[7], a[39]); SQRADDAC(a[8], a[38]); SQRADDAC(a[9], a[37]); SQRADDAC(a[10], a[36]); SQRADDAC(a[11], a[35]); SQRADDAC(a[12], a[34]); SQRADDAC(a[13], a[33]); SQRADDAC(a[14], a[32]); SQRADDAC(a[15], a[31]); SQRADDAC(a[16], a[30]); SQRADDAC(a[17], a[29]); SQRADDAC(a[18], a[28]); SQRADDAC(a[19], a[27]); SQRADDAC(a[20], a[26]); SQRADDAC(a[21], a[25]); SQRADDAC(a[22], a[24]); SQRADDDB; SQRADD(a[23], a[23]); 
9381
+   COMBA_STORE(b[46]);
9382
+
9383
+   /* output 47 */
9384
+   CARRY_FORWARD;
9385
+   SQRADDSC(a[0], a[47]); SQRADDAC(a[1], a[46]); SQRADDAC(a[2], a[45]); SQRADDAC(a[3], a[44]); SQRADDAC(a[4], a[43]); SQRADDAC(a[5], a[42]); SQRADDAC(a[6], a[41]); SQRADDAC(a[7], a[40]); SQRADDAC(a[8], a[39]); SQRADDAC(a[9], a[38]); SQRADDAC(a[10], a[37]); SQRADDAC(a[11], a[36]); SQRADDAC(a[12], a[35]); SQRADDAC(a[13], a[34]); SQRADDAC(a[14], a[33]); SQRADDAC(a[15], a[32]); SQRADDAC(a[16], a[31]); SQRADDAC(a[17], a[30]); SQRADDAC(a[18], a[29]); SQRADDAC(a[19], a[28]); SQRADDAC(a[20], a[27]); SQRADDAC(a[21], a[26]); SQRADDAC(a[22], a[25]); SQRADDAC(a[23], a[24]); SQRADDDB; 
9386
+   COMBA_STORE(b[47]);
9387
+
9388
+   /* output 48 */
9389
+   CARRY_FORWARD;
9390
+   SQRADDSC(a[1], a[47]); SQRADDAC(a[2], a[46]); SQRADDAC(a[3], a[45]); SQRADDAC(a[4], a[44]); SQRADDAC(a[5], a[43]); SQRADDAC(a[6], a[42]); SQRADDAC(a[7], a[41]); SQRADDAC(a[8], a[40]); SQRADDAC(a[9], a[39]); SQRADDAC(a[10], a[38]); SQRADDAC(a[11], a[37]); SQRADDAC(a[12], a[36]); SQRADDAC(a[13], a[35]); SQRADDAC(a[14], a[34]); SQRADDAC(a[15], a[33]); SQRADDAC(a[16], a[32]); SQRADDAC(a[17], a[31]); SQRADDAC(a[18], a[30]); SQRADDAC(a[19], a[29]); SQRADDAC(a[20], a[28]); SQRADDAC(a[21], a[27]); SQRADDAC(a[22], a[26]); SQRADDAC(a[23], a[25]); SQRADDDB; SQRADD(a[24], a[24]); 
9391
+   COMBA_STORE(b[48]);
9392
+
9393
+   /* output 49 */
9394
+   CARRY_FORWARD;
9395
+   SQRADDSC(a[2], a[47]); SQRADDAC(a[3], a[46]); SQRADDAC(a[4], a[45]); SQRADDAC(a[5], a[44]); SQRADDAC(a[6], a[43]); SQRADDAC(a[7], a[42]); SQRADDAC(a[8], a[41]); SQRADDAC(a[9], a[40]); SQRADDAC(a[10], a[39]); SQRADDAC(a[11], a[38]); SQRADDAC(a[12], a[37]); SQRADDAC(a[13], a[36]); SQRADDAC(a[14], a[35]); SQRADDAC(a[15], a[34]); SQRADDAC(a[16], a[33]); SQRADDAC(a[17], a[32]); SQRADDAC(a[18], a[31]); SQRADDAC(a[19], a[30]); SQRADDAC(a[20], a[29]); SQRADDAC(a[21], a[28]); SQRADDAC(a[22], a[27]); SQRADDAC(a[23], a[26]); SQRADDAC(a[24], a[25]); SQRADDDB; 
9396
+   COMBA_STORE(b[49]);
9397
+
9398
+   /* output 50 */
9399
+   CARRY_FORWARD;
9400
+   SQRADDSC(a[3], a[47]); SQRADDAC(a[4], a[46]); SQRADDAC(a[5], a[45]); SQRADDAC(a[6], a[44]); SQRADDAC(a[7], a[43]); SQRADDAC(a[8], a[42]); SQRADDAC(a[9], a[41]); SQRADDAC(a[10], a[40]); SQRADDAC(a[11], a[39]); SQRADDAC(a[12], a[38]); SQRADDAC(a[13], a[37]); SQRADDAC(a[14], a[36]); SQRADDAC(a[15], a[35]); SQRADDAC(a[16], a[34]); SQRADDAC(a[17], a[33]); SQRADDAC(a[18], a[32]); SQRADDAC(a[19], a[31]); SQRADDAC(a[20], a[30]); SQRADDAC(a[21], a[29]); SQRADDAC(a[22], a[28]); SQRADDAC(a[23], a[27]); SQRADDAC(a[24], a[26]); SQRADDDB; SQRADD(a[25], a[25]); 
9401
+   COMBA_STORE(b[50]);
9402
+
9403
+   /* output 51 */
9404
+   CARRY_FORWARD;
9405
+   SQRADDSC(a[4], a[47]); SQRADDAC(a[5], a[46]); SQRADDAC(a[6], a[45]); SQRADDAC(a[7], a[44]); SQRADDAC(a[8], a[43]); SQRADDAC(a[9], a[42]); SQRADDAC(a[10], a[41]); SQRADDAC(a[11], a[40]); SQRADDAC(a[12], a[39]); SQRADDAC(a[13], a[38]); SQRADDAC(a[14], a[37]); SQRADDAC(a[15], a[36]); SQRADDAC(a[16], a[35]); SQRADDAC(a[17], a[34]); SQRADDAC(a[18], a[33]); SQRADDAC(a[19], a[32]); SQRADDAC(a[20], a[31]); SQRADDAC(a[21], a[30]); SQRADDAC(a[22], a[29]); SQRADDAC(a[23], a[28]); SQRADDAC(a[24], a[27]); SQRADDAC(a[25], a[26]); SQRADDDB; 
9406
+   COMBA_STORE(b[51]);
9407
+
9408
+   /* output 52 */
9409
+   CARRY_FORWARD;
9410
+   SQRADDSC(a[5], a[47]); SQRADDAC(a[6], a[46]); SQRADDAC(a[7], a[45]); SQRADDAC(a[8], a[44]); SQRADDAC(a[9], a[43]); SQRADDAC(a[10], a[42]); SQRADDAC(a[11], a[41]); SQRADDAC(a[12], a[40]); SQRADDAC(a[13], a[39]); SQRADDAC(a[14], a[38]); SQRADDAC(a[15], a[37]); SQRADDAC(a[16], a[36]); SQRADDAC(a[17], a[35]); SQRADDAC(a[18], a[34]); SQRADDAC(a[19], a[33]); SQRADDAC(a[20], a[32]); SQRADDAC(a[21], a[31]); SQRADDAC(a[22], a[30]); SQRADDAC(a[23], a[29]); SQRADDAC(a[24], a[28]); SQRADDAC(a[25], a[27]); SQRADDDB; SQRADD(a[26], a[26]); 
9411
+   COMBA_STORE(b[52]);
9412
+
9413
+   /* output 53 */
9414
+   CARRY_FORWARD;
9415
+   SQRADDSC(a[6], a[47]); SQRADDAC(a[7], a[46]); SQRADDAC(a[8], a[45]); SQRADDAC(a[9], a[44]); SQRADDAC(a[10], a[43]); SQRADDAC(a[11], a[42]); SQRADDAC(a[12], a[41]); SQRADDAC(a[13], a[40]); SQRADDAC(a[14], a[39]); SQRADDAC(a[15], a[38]); SQRADDAC(a[16], a[37]); SQRADDAC(a[17], a[36]); SQRADDAC(a[18], a[35]); SQRADDAC(a[19], a[34]); SQRADDAC(a[20], a[33]); SQRADDAC(a[21], a[32]); SQRADDAC(a[22], a[31]); SQRADDAC(a[23], a[30]); SQRADDAC(a[24], a[29]); SQRADDAC(a[25], a[28]); SQRADDAC(a[26], a[27]); SQRADDDB; 
9416
+   COMBA_STORE(b[53]);
9417
+
9418
+   /* output 54 */
9419
+   CARRY_FORWARD;
9420
+   SQRADDSC(a[7], a[47]); SQRADDAC(a[8], a[46]); SQRADDAC(a[9], a[45]); SQRADDAC(a[10], a[44]); SQRADDAC(a[11], a[43]); SQRADDAC(a[12], a[42]); SQRADDAC(a[13], a[41]); SQRADDAC(a[14], a[40]); SQRADDAC(a[15], a[39]); SQRADDAC(a[16], a[38]); SQRADDAC(a[17], a[37]); SQRADDAC(a[18], a[36]); SQRADDAC(a[19], a[35]); SQRADDAC(a[20], a[34]); SQRADDAC(a[21], a[33]); SQRADDAC(a[22], a[32]); SQRADDAC(a[23], a[31]); SQRADDAC(a[24], a[30]); SQRADDAC(a[25], a[29]); SQRADDAC(a[26], a[28]); SQRADDDB; SQRADD(a[27], a[27]); 
9421
+   COMBA_STORE(b[54]);
9422
+
9423
+   /* output 55 */
9424
+   CARRY_FORWARD;
9425
+   SQRADDSC(a[8], a[47]); SQRADDAC(a[9], a[46]); SQRADDAC(a[10], a[45]); SQRADDAC(a[11], a[44]); SQRADDAC(a[12], a[43]); SQRADDAC(a[13], a[42]); SQRADDAC(a[14], a[41]); SQRADDAC(a[15], a[40]); SQRADDAC(a[16], a[39]); SQRADDAC(a[17], a[38]); SQRADDAC(a[18], a[37]); SQRADDAC(a[19], a[36]); SQRADDAC(a[20], a[35]); SQRADDAC(a[21], a[34]); SQRADDAC(a[22], a[33]); SQRADDAC(a[23], a[32]); SQRADDAC(a[24], a[31]); SQRADDAC(a[25], a[30]); SQRADDAC(a[26], a[29]); SQRADDAC(a[27], a[28]); SQRADDDB; 
9426
+   COMBA_STORE(b[55]);
9427
+
9428
+   /* output 56 */
9429
+   CARRY_FORWARD;
9430
+   SQRADDSC(a[9], a[47]); SQRADDAC(a[10], a[46]); SQRADDAC(a[11], a[45]); SQRADDAC(a[12], a[44]); SQRADDAC(a[13], a[43]); SQRADDAC(a[14], a[42]); SQRADDAC(a[15], a[41]); SQRADDAC(a[16], a[40]); SQRADDAC(a[17], a[39]); SQRADDAC(a[18], a[38]); SQRADDAC(a[19], a[37]); SQRADDAC(a[20], a[36]); SQRADDAC(a[21], a[35]); SQRADDAC(a[22], a[34]); SQRADDAC(a[23], a[33]); SQRADDAC(a[24], a[32]); SQRADDAC(a[25], a[31]); SQRADDAC(a[26], a[30]); SQRADDAC(a[27], a[29]); SQRADDDB; SQRADD(a[28], a[28]); 
9431
+   COMBA_STORE(b[56]);
9432
+
9433
+   /* output 57 */
9434
+   CARRY_FORWARD;
9435
+   SQRADDSC(a[10], a[47]); SQRADDAC(a[11], a[46]); SQRADDAC(a[12], a[45]); SQRADDAC(a[13], a[44]); SQRADDAC(a[14], a[43]); SQRADDAC(a[15], a[42]); SQRADDAC(a[16], a[41]); SQRADDAC(a[17], a[40]); SQRADDAC(a[18], a[39]); SQRADDAC(a[19], a[38]); SQRADDAC(a[20], a[37]); SQRADDAC(a[21], a[36]); SQRADDAC(a[22], a[35]); SQRADDAC(a[23], a[34]); SQRADDAC(a[24], a[33]); SQRADDAC(a[25], a[32]); SQRADDAC(a[26], a[31]); SQRADDAC(a[27], a[30]); SQRADDAC(a[28], a[29]); SQRADDDB; 
9436
+   COMBA_STORE(b[57]);
9437
+
9438
+   /* output 58 */
9439
+   CARRY_FORWARD;
9440
+   SQRADDSC(a[11], a[47]); SQRADDAC(a[12], a[46]); SQRADDAC(a[13], a[45]); SQRADDAC(a[14], a[44]); SQRADDAC(a[15], a[43]); SQRADDAC(a[16], a[42]); SQRADDAC(a[17], a[41]); SQRADDAC(a[18], a[40]); SQRADDAC(a[19], a[39]); SQRADDAC(a[20], a[38]); SQRADDAC(a[21], a[37]); SQRADDAC(a[22], a[36]); SQRADDAC(a[23], a[35]); SQRADDAC(a[24], a[34]); SQRADDAC(a[25], a[33]); SQRADDAC(a[26], a[32]); SQRADDAC(a[27], a[31]); SQRADDAC(a[28], a[30]); SQRADDDB; SQRADD(a[29], a[29]); 
9441
+   COMBA_STORE(b[58]);
9442
+
9443
+   /* output 59 */
9444
+   CARRY_FORWARD;
9445
+   SQRADDSC(a[12], a[47]); SQRADDAC(a[13], a[46]); SQRADDAC(a[14], a[45]); SQRADDAC(a[15], a[44]); SQRADDAC(a[16], a[43]); SQRADDAC(a[17], a[42]); SQRADDAC(a[18], a[41]); SQRADDAC(a[19], a[40]); SQRADDAC(a[20], a[39]); SQRADDAC(a[21], a[38]); SQRADDAC(a[22], a[37]); SQRADDAC(a[23], a[36]); SQRADDAC(a[24], a[35]); SQRADDAC(a[25], a[34]); SQRADDAC(a[26], a[33]); SQRADDAC(a[27], a[32]); SQRADDAC(a[28], a[31]); SQRADDAC(a[29], a[30]); SQRADDDB; 
9446
+   COMBA_STORE(b[59]);
9447
+
9448
+   /* output 60 */
9449
+   CARRY_FORWARD;
9450
+   SQRADDSC(a[13], a[47]); SQRADDAC(a[14], a[46]); SQRADDAC(a[15], a[45]); SQRADDAC(a[16], a[44]); SQRADDAC(a[17], a[43]); SQRADDAC(a[18], a[42]); SQRADDAC(a[19], a[41]); SQRADDAC(a[20], a[40]); SQRADDAC(a[21], a[39]); SQRADDAC(a[22], a[38]); SQRADDAC(a[23], a[37]); SQRADDAC(a[24], a[36]); SQRADDAC(a[25], a[35]); SQRADDAC(a[26], a[34]); SQRADDAC(a[27], a[33]); SQRADDAC(a[28], a[32]); SQRADDAC(a[29], a[31]); SQRADDDB; SQRADD(a[30], a[30]); 
9451
+   COMBA_STORE(b[60]);
9452
+
9453
+   /* output 61 */
9454
+   CARRY_FORWARD;
9455
+   SQRADDSC(a[14], a[47]); SQRADDAC(a[15], a[46]); SQRADDAC(a[16], a[45]); SQRADDAC(a[17], a[44]); SQRADDAC(a[18], a[43]); SQRADDAC(a[19], a[42]); SQRADDAC(a[20], a[41]); SQRADDAC(a[21], a[40]); SQRADDAC(a[22], a[39]); SQRADDAC(a[23], a[38]); SQRADDAC(a[24], a[37]); SQRADDAC(a[25], a[36]); SQRADDAC(a[26], a[35]); SQRADDAC(a[27], a[34]); SQRADDAC(a[28], a[33]); SQRADDAC(a[29], a[32]); SQRADDAC(a[30], a[31]); SQRADDDB; 
9456
+   COMBA_STORE(b[61]);
9457
+
9458
+   /* output 62 */
9459
+   CARRY_FORWARD;
9460
+   SQRADDSC(a[15], a[47]); SQRADDAC(a[16], a[46]); SQRADDAC(a[17], a[45]); SQRADDAC(a[18], a[44]); SQRADDAC(a[19], a[43]); SQRADDAC(a[20], a[42]); SQRADDAC(a[21], a[41]); SQRADDAC(a[22], a[40]); SQRADDAC(a[23], a[39]); SQRADDAC(a[24], a[38]); SQRADDAC(a[25], a[37]); SQRADDAC(a[26], a[36]); SQRADDAC(a[27], a[35]); SQRADDAC(a[28], a[34]); SQRADDAC(a[29], a[33]); SQRADDAC(a[30], a[32]); SQRADDDB; SQRADD(a[31], a[31]); 
9461
+   COMBA_STORE(b[62]);
9462
+
9463
+   /* output 63 */
9464
+   CARRY_FORWARD;
9465
+   SQRADDSC(a[16], a[47]); SQRADDAC(a[17], a[46]); SQRADDAC(a[18], a[45]); SQRADDAC(a[19], a[44]); SQRADDAC(a[20], a[43]); SQRADDAC(a[21], a[42]); SQRADDAC(a[22], a[41]); SQRADDAC(a[23], a[40]); SQRADDAC(a[24], a[39]); SQRADDAC(a[25], a[38]); SQRADDAC(a[26], a[37]); SQRADDAC(a[27], a[36]); SQRADDAC(a[28], a[35]); SQRADDAC(a[29], a[34]); SQRADDAC(a[30], a[33]); SQRADDAC(a[31], a[32]); SQRADDDB; 
9466
+   COMBA_STORE(b[63]);
9467
+
9468
+   /* output 64 */
9469
+   CARRY_FORWARD;
9470
+   SQRADDSC(a[17], a[47]); SQRADDAC(a[18], a[46]); SQRADDAC(a[19], a[45]); SQRADDAC(a[20], a[44]); SQRADDAC(a[21], a[43]); SQRADDAC(a[22], a[42]); SQRADDAC(a[23], a[41]); SQRADDAC(a[24], a[40]); SQRADDAC(a[25], a[39]); SQRADDAC(a[26], a[38]); SQRADDAC(a[27], a[37]); SQRADDAC(a[28], a[36]); SQRADDAC(a[29], a[35]); SQRADDAC(a[30], a[34]); SQRADDAC(a[31], a[33]); SQRADDDB; SQRADD(a[32], a[32]); 
9471
+   COMBA_STORE(b[64]);
9472
+
9473
+   /* output 65 */
9474
+   CARRY_FORWARD;
9475
+   SQRADDSC(a[18], a[47]); SQRADDAC(a[19], a[46]); SQRADDAC(a[20], a[45]); SQRADDAC(a[21], a[44]); SQRADDAC(a[22], a[43]); SQRADDAC(a[23], a[42]); SQRADDAC(a[24], a[41]); SQRADDAC(a[25], a[40]); SQRADDAC(a[26], a[39]); SQRADDAC(a[27], a[38]); SQRADDAC(a[28], a[37]); SQRADDAC(a[29], a[36]); SQRADDAC(a[30], a[35]); SQRADDAC(a[31], a[34]); SQRADDAC(a[32], a[33]); SQRADDDB; 
9476
+   COMBA_STORE(b[65]);
9477
+
9478
+   /* output 66 */
9479
+   CARRY_FORWARD;
9480
+   SQRADDSC(a[19], a[47]); SQRADDAC(a[20], a[46]); SQRADDAC(a[21], a[45]); SQRADDAC(a[22], a[44]); SQRADDAC(a[23], a[43]); SQRADDAC(a[24], a[42]); SQRADDAC(a[25], a[41]); SQRADDAC(a[26], a[40]); SQRADDAC(a[27], a[39]); SQRADDAC(a[28], a[38]); SQRADDAC(a[29], a[37]); SQRADDAC(a[30], a[36]); SQRADDAC(a[31], a[35]); SQRADDAC(a[32], a[34]); SQRADDDB; SQRADD(a[33], a[33]); 
9481
+   COMBA_STORE(b[66]);
9482
+
9483
+   /* output 67 */
9484
+   CARRY_FORWARD;
9485
+   SQRADDSC(a[20], a[47]); SQRADDAC(a[21], a[46]); SQRADDAC(a[22], a[45]); SQRADDAC(a[23], a[44]); SQRADDAC(a[24], a[43]); SQRADDAC(a[25], a[42]); SQRADDAC(a[26], a[41]); SQRADDAC(a[27], a[40]); SQRADDAC(a[28], a[39]); SQRADDAC(a[29], a[38]); SQRADDAC(a[30], a[37]); SQRADDAC(a[31], a[36]); SQRADDAC(a[32], a[35]); SQRADDAC(a[33], a[34]); SQRADDDB; 
9486
+   COMBA_STORE(b[67]);
9487
+
9488
+   /* output 68 */
9489
+   CARRY_FORWARD;
9490
+   SQRADDSC(a[21], a[47]); SQRADDAC(a[22], a[46]); SQRADDAC(a[23], a[45]); SQRADDAC(a[24], a[44]); SQRADDAC(a[25], a[43]); SQRADDAC(a[26], a[42]); SQRADDAC(a[27], a[41]); SQRADDAC(a[28], a[40]); SQRADDAC(a[29], a[39]); SQRADDAC(a[30], a[38]); SQRADDAC(a[31], a[37]); SQRADDAC(a[32], a[36]); SQRADDAC(a[33], a[35]); SQRADDDB; SQRADD(a[34], a[34]); 
9491
+   COMBA_STORE(b[68]);
9492
+
9493
+   /* output 69 */
9494
+   CARRY_FORWARD;
9495
+   SQRADDSC(a[22], a[47]); SQRADDAC(a[23], a[46]); SQRADDAC(a[24], a[45]); SQRADDAC(a[25], a[44]); SQRADDAC(a[26], a[43]); SQRADDAC(a[27], a[42]); SQRADDAC(a[28], a[41]); SQRADDAC(a[29], a[40]); SQRADDAC(a[30], a[39]); SQRADDAC(a[31], a[38]); SQRADDAC(a[32], a[37]); SQRADDAC(a[33], a[36]); SQRADDAC(a[34], a[35]); SQRADDDB; 
9496
+   COMBA_STORE(b[69]);
9497
+
9498
+   /* output 70 */
9499
+   CARRY_FORWARD;
9500
+   SQRADDSC(a[23], a[47]); SQRADDAC(a[24], a[46]); SQRADDAC(a[25], a[45]); SQRADDAC(a[26], a[44]); SQRADDAC(a[27], a[43]); SQRADDAC(a[28], a[42]); SQRADDAC(a[29], a[41]); SQRADDAC(a[30], a[40]); SQRADDAC(a[31], a[39]); SQRADDAC(a[32], a[38]); SQRADDAC(a[33], a[37]); SQRADDAC(a[34], a[36]); SQRADDDB; SQRADD(a[35], a[35]); 
9501
+   COMBA_STORE(b[70]);
9502
+
9503
+   /* output 71 */
9504
+   CARRY_FORWARD;
9505
+   SQRADDSC(a[24], a[47]); SQRADDAC(a[25], a[46]); SQRADDAC(a[26], a[45]); SQRADDAC(a[27], a[44]); SQRADDAC(a[28], a[43]); SQRADDAC(a[29], a[42]); SQRADDAC(a[30], a[41]); SQRADDAC(a[31], a[40]); SQRADDAC(a[32], a[39]); SQRADDAC(a[33], a[38]); SQRADDAC(a[34], a[37]); SQRADDAC(a[35], a[36]); SQRADDDB; 
9506
+   COMBA_STORE(b[71]);
9507
+
9508
+   /* output 72 */
9509
+   CARRY_FORWARD;
9510
+   SQRADDSC(a[25], a[47]); SQRADDAC(a[26], a[46]); SQRADDAC(a[27], a[45]); SQRADDAC(a[28], a[44]); SQRADDAC(a[29], a[43]); SQRADDAC(a[30], a[42]); SQRADDAC(a[31], a[41]); SQRADDAC(a[32], a[40]); SQRADDAC(a[33], a[39]); SQRADDAC(a[34], a[38]); SQRADDAC(a[35], a[37]); SQRADDDB; SQRADD(a[36], a[36]); 
9511
+   COMBA_STORE(b[72]);
9512
+
9513
+   /* output 73 */
9514
+   CARRY_FORWARD;
9515
+   SQRADDSC(a[26], a[47]); SQRADDAC(a[27], a[46]); SQRADDAC(a[28], a[45]); SQRADDAC(a[29], a[44]); SQRADDAC(a[30], a[43]); SQRADDAC(a[31], a[42]); SQRADDAC(a[32], a[41]); SQRADDAC(a[33], a[40]); SQRADDAC(a[34], a[39]); SQRADDAC(a[35], a[38]); SQRADDAC(a[36], a[37]); SQRADDDB; 
9516
+   COMBA_STORE(b[73]);
9517
+
9518
+   /* output 74 */
9519
+   CARRY_FORWARD;
9520
+   SQRADDSC(a[27], a[47]); SQRADDAC(a[28], a[46]); SQRADDAC(a[29], a[45]); SQRADDAC(a[30], a[44]); SQRADDAC(a[31], a[43]); SQRADDAC(a[32], a[42]); SQRADDAC(a[33], a[41]); SQRADDAC(a[34], a[40]); SQRADDAC(a[35], a[39]); SQRADDAC(a[36], a[38]); SQRADDDB; SQRADD(a[37], a[37]); 
9521
+   COMBA_STORE(b[74]);
9522
+
9523
+   /* output 75 */
9524
+   CARRY_FORWARD;
9525
+   SQRADDSC(a[28], a[47]); SQRADDAC(a[29], a[46]); SQRADDAC(a[30], a[45]); SQRADDAC(a[31], a[44]); SQRADDAC(a[32], a[43]); SQRADDAC(a[33], a[42]); SQRADDAC(a[34], a[41]); SQRADDAC(a[35], a[40]); SQRADDAC(a[36], a[39]); SQRADDAC(a[37], a[38]); SQRADDDB; 
9526
+   COMBA_STORE(b[75]);
9527
+
9528
+   /* output 76 */
9529
+   CARRY_FORWARD;
9530
+   SQRADDSC(a[29], a[47]); SQRADDAC(a[30], a[46]); SQRADDAC(a[31], a[45]); SQRADDAC(a[32], a[44]); SQRADDAC(a[33], a[43]); SQRADDAC(a[34], a[42]); SQRADDAC(a[35], a[41]); SQRADDAC(a[36], a[40]); SQRADDAC(a[37], a[39]); SQRADDDB; SQRADD(a[38], a[38]); 
9531
+   COMBA_STORE(b[76]);
9532
+
9533
+   /* output 77 */
9534
+   CARRY_FORWARD;
9535
+   SQRADDSC(a[30], a[47]); SQRADDAC(a[31], a[46]); SQRADDAC(a[32], a[45]); SQRADDAC(a[33], a[44]); SQRADDAC(a[34], a[43]); SQRADDAC(a[35], a[42]); SQRADDAC(a[36], a[41]); SQRADDAC(a[37], a[40]); SQRADDAC(a[38], a[39]); SQRADDDB; 
9536
+   COMBA_STORE(b[77]);
9537
+
9538
+   /* output 78 */
9539
+   CARRY_FORWARD;
9540
+   SQRADDSC(a[31], a[47]); SQRADDAC(a[32], a[46]); SQRADDAC(a[33], a[45]); SQRADDAC(a[34], a[44]); SQRADDAC(a[35], a[43]); SQRADDAC(a[36], a[42]); SQRADDAC(a[37], a[41]); SQRADDAC(a[38], a[40]); SQRADDDB; SQRADD(a[39], a[39]); 
9541
+   COMBA_STORE(b[78]);
9542
+
9543
+   /* output 79 */
9544
+   CARRY_FORWARD;
9545
+   SQRADDSC(a[32], a[47]); SQRADDAC(a[33], a[46]); SQRADDAC(a[34], a[45]); SQRADDAC(a[35], a[44]); SQRADDAC(a[36], a[43]); SQRADDAC(a[37], a[42]); SQRADDAC(a[38], a[41]); SQRADDAC(a[39], a[40]); SQRADDDB; 
9546
+   COMBA_STORE(b[79]);
9547
+
9548
+   /* output 80 */
9549
+   CARRY_FORWARD;
9550
+   SQRADDSC(a[33], a[47]); SQRADDAC(a[34], a[46]); SQRADDAC(a[35], a[45]); SQRADDAC(a[36], a[44]); SQRADDAC(a[37], a[43]); SQRADDAC(a[38], a[42]); SQRADDAC(a[39], a[41]); SQRADDDB; SQRADD(a[40], a[40]); 
9551
+   COMBA_STORE(b[80]);
9552
+
9553
+   /* output 81 */
9554
+   CARRY_FORWARD;
9555
+   SQRADDSC(a[34], a[47]); SQRADDAC(a[35], a[46]); SQRADDAC(a[36], a[45]); SQRADDAC(a[37], a[44]); SQRADDAC(a[38], a[43]); SQRADDAC(a[39], a[42]); SQRADDAC(a[40], a[41]); SQRADDDB; 
9556
+   COMBA_STORE(b[81]);
9557
+
9558
+   /* output 82 */
9559
+   CARRY_FORWARD;
9560
+   SQRADDSC(a[35], a[47]); SQRADDAC(a[36], a[46]); SQRADDAC(a[37], a[45]); SQRADDAC(a[38], a[44]); SQRADDAC(a[39], a[43]); SQRADDAC(a[40], a[42]); SQRADDDB; SQRADD(a[41], a[41]); 
9561
+   COMBA_STORE(b[82]);
9562
+
9563
+   /* output 83 */
9564
+   CARRY_FORWARD;
9565
+   SQRADDSC(a[36], a[47]); SQRADDAC(a[37], a[46]); SQRADDAC(a[38], a[45]); SQRADDAC(a[39], a[44]); SQRADDAC(a[40], a[43]); SQRADDAC(a[41], a[42]); SQRADDDB; 
9566
+   COMBA_STORE(b[83]);
9567
+
9568
+   /* output 84 */
9569
+   CARRY_FORWARD;
9570
+   SQRADDSC(a[37], a[47]); SQRADDAC(a[38], a[46]); SQRADDAC(a[39], a[45]); SQRADDAC(a[40], a[44]); SQRADDAC(a[41], a[43]); SQRADDDB; SQRADD(a[42], a[42]); 
9571
+   COMBA_STORE(b[84]);
9572
+
9573
+   /* output 85 */
9574
+   CARRY_FORWARD;
9575
+   SQRADDSC(a[38], a[47]); SQRADDAC(a[39], a[46]); SQRADDAC(a[40], a[45]); SQRADDAC(a[41], a[44]); SQRADDAC(a[42], a[43]); SQRADDDB; 
9576
+   COMBA_STORE(b[85]);
9577
+
9578
+   /* output 86 */
9579
+   CARRY_FORWARD;
9580
+   SQRADDSC(a[39], a[47]); SQRADDAC(a[40], a[46]); SQRADDAC(a[41], a[45]); SQRADDAC(a[42], a[44]); SQRADDDB; SQRADD(a[43], a[43]); 
9581
+   COMBA_STORE(b[86]);
9582
+
9583
+   /* output 87 */
9584
+   CARRY_FORWARD;
9585
+   SQRADDSC(a[40], a[47]); SQRADDAC(a[41], a[46]); SQRADDAC(a[42], a[45]); SQRADDAC(a[43], a[44]); SQRADDDB; 
9586
+   COMBA_STORE(b[87]);
9587
+
9588
+   /* output 88 */
9589
+   CARRY_FORWARD;
9590
+   SQRADDSC(a[41], a[47]); SQRADDAC(a[42], a[46]); SQRADDAC(a[43], a[45]); SQRADDDB; SQRADD(a[44], a[44]); 
9591
+   COMBA_STORE(b[88]);
9592
+
9593
+   /* output 89 */
9594
+   CARRY_FORWARD;
9595
+   SQRADDSC(a[42], a[47]); SQRADDAC(a[43], a[46]); SQRADDAC(a[44], a[45]); SQRADDDB; 
9596
+   COMBA_STORE(b[89]);
9597
+
9598
+   /* output 90 */
9599
+   CARRY_FORWARD;
9600
+   SQRADD2(a[43], a[47]); SQRADD2(a[44], a[46]); SQRADD(a[45], a[45]); 
9601
+   COMBA_STORE(b[90]);
9602
+
9603
+   /* output 91 */
9604
+   CARRY_FORWARD;
9605
+   SQRADD2(a[44], a[47]); SQRADD2(a[45], a[46]); 
9606
+   COMBA_STORE(b[91]);
9607
+
9608
+   /* output 92 */
9609
+   CARRY_FORWARD;
9610
+   SQRADD2(a[45], a[47]); SQRADD(a[46], a[46]); 
9611
+   COMBA_STORE(b[92]);
9612
+
9613
+   /* output 93 */
9614
+   CARRY_FORWARD;
9615
+   SQRADD2(a[46], a[47]); 
9616
+   COMBA_STORE(b[93]);
9617
+
9618
+   /* output 94 */
9619
+   CARRY_FORWARD;
9620
+   SQRADD(a[47], a[47]); 
9621
+   COMBA_STORE(b[94]);
9622
+   COMBA_STORE2(b[95]);
9623
+   COMBA_FINI;
9624
+
9625
+   B->used = 96;
9626
+   B->sign = FP_ZPOS;
9627
+   memcpy(B->dp, b, 96 * sizeof(fp_digit));
9628
+   fp_clamp(B);
7611 9629
 }
7612
-
7613 9630
 #endif
7614 9631
 
7615
-/* $Source: /cvs/libtom/libtommath/bn_mp_sub_d.c,v $ */
7616
-/* $Revision: 1.5 $ */
7617
-/* $Date: 2006/03/31 14:18:44 $ */
7618 9632
 
7619
-/* End: bn_mp_sub_d.c */
9633
+/* $Source: /cvs/libtom/tomsfastmath/src/sqr/fp_sqr_comba_48.c,v $ */
9634
+/* $Revision: 1.2 $ */
9635
+/* $Date: 2007/02/17 03:39:01 $ */
7620 9636
 
7621
-/* Start: bn_mp_submod.c */
7622
-#include <bignum.h>
7623
-#ifdef BN_MP_SUBMOD_C
7624
-/* LibTomMath, multiple-precision integer library -- Tom St Denis
7625
- *
7626
- * LibTomMath is a library that provides multiple-precision
7627
- * integer arithmetic as well as number theoretic functionality.
7628
- *
7629
- * The library was designed directly after the MPI library by
7630
- * Michael Fromberger but has been written from scratch with
7631
- * additional optimizations in place.
7632
- *
7633
- * The library is free for all purposes without any express
7634
- * guarantee it works.
7635
- *
7636
- * Tom St Denis, tomstdenis@gmail.com, http://math.libtomcrypt.com
7637
- */
9637
+/* End: fp_sqr_comba_48.c */
7638 9638
 
7639
-/* d = a - b (mod c) */
7640
-int
7641
-mp_submod (mp_int * a, mp_int * b, mp_int * c, mp_int * d)
7642
-{
7643
-  int     res;
7644
-  mp_int  t;
7645
-
7646
-
7647
-  if ((res = mp_init (&t)) != MP_OKAY) {
7648
-    return res;
7649
-  }
9639
+/* Start: fp_sqr_comba_6.c */
9640
+#define TFM_DEFINES
9641
+#include "fp_sqr_comba.c"
7650 9642
 
7651
-  if ((res = mp_sub (a, b, &t)) != MP_OKAY) {
7652
-    mp_clear (&t);
7653
-    return res;
7654
-  }
7655
-  res = mp_mod (&t, c, d);
7656
-  mp_clear (&t);
7657
-  return res;
9643
+#ifdef TFM_SQR6
9644
+void fp_sqr_comba6(fp_int *A, fp_int *B)
9645
+{
9646
+   fp_digit *a, b[12], c0, c1, c2, sc0, sc1, sc2;
9647
+#ifdef TFM_ISO
9648
+   fp_word tt;
9649
+#endif
9650
+
9651
+   a = A->dp;
9652
+   COMBA_START; 
9653
+
9654
+   /* clear carries */
9655
+   CLEAR_CARRY;
9656
+
9657
+   /* output 0 */
9658
+   SQRADD(a[0],a[0]);
9659
+   COMBA_STORE(b[0]);
9660
+
9661
+   /* output 1 */
9662
+   CARRY_FORWARD;
9663
+   SQRADD2(a[0], a[1]); 
9664
+   COMBA_STORE(b[1]);
9665
+
9666
+   /* output 2 */
9667
+   CARRY_FORWARD;
9668
+   SQRADD2(a[0], a[2]); SQRADD(a[1], a[1]); 
9669
+   COMBA_STORE(b[2]);
9670
+
9671
+   /* output 3 */
9672
+   CARRY_FORWARD;
9673
+   SQRADD2(a[0], a[3]); SQRADD2(a[1], a[2]); 
9674
+   COMBA_STORE(b[3]);
9675
+
9676
+   /* output 4 */
9677
+   CARRY_FORWARD;
9678
+   SQRADD2(a[0], a[4]); SQRADD2(a[1], a[3]); SQRADD(a[2], a[2]); 
9679
+   COMBA_STORE(b[4]);
9680
+
9681
+   /* output 5 */
9682
+   CARRY_FORWARD;
9683
+   SQRADDSC(a[0], a[5]); SQRADDAC(a[1], a[4]); SQRADDAC(a[2], a[3]); SQRADDDB; 
9684
+   COMBA_STORE(b[5]);
9685
+
9686
+   /* output 6 */
9687
+   CARRY_FORWARD;
9688
+   SQRADD2(a[1], a[5]); SQRADD2(a[2], a[4]); SQRADD(a[3], a[3]); 
9689
+   COMBA_STORE(b[6]);
9690
+
9691
+   /* output 7 */
9692
+   CARRY_FORWARD;
9693
+   SQRADD2(a[2], a[5]); SQRADD2(a[3], a[4]); 
9694
+   COMBA_STORE(b[7]);
9695
+
9696
+   /* output 8 */
9697
+   CARRY_FORWARD;
9698
+   SQRADD2(a[3], a[5]); SQRADD(a[4], a[4]); 
9699
+   COMBA_STORE(b[8]);
9700
+
9701
+   /* output 9 */
9702
+   CARRY_FORWARD;
9703
+   SQRADD2(a[4], a[5]); 
9704
+   COMBA_STORE(b[9]);
9705
+
9706
+   /* output 10 */
9707
+   CARRY_FORWARD;
9708
+   SQRADD(a[5], a[5]); 
9709
+   COMBA_STORE(b[10]);
9710
+   COMBA_STORE2(b[11]);
9711
+   COMBA_FINI;
9712
+
9713
+   B->used = 12;
9714
+   B->sign = FP_ZPOS;
9715
+   memcpy(B->dp, b, 12 * sizeof(fp_digit));
9716
+   fp_clamp(B);
7658 9717
 }
7659 9718
 #endif
7660 9719
 
7661
-/* $Source: /cvs/libtom/libtommath/bn_mp_submod.c,v $ */
7662
-/* $Revision: 1.3 $ */
7663
-/* $Date: 2006/03/31 14:18:44 $ */
7664 9720
 
7665
-/* End: bn_mp_submod.c */
9721
+/* $Source: /cvs/libtom/tomsfastmath/src/sqr/fp_sqr_comba_6.c,v $ */
9722
+/* $Revision: 1.2 $ */
9723
+/* $Date: 2007/02/17 03:39:01 $ */
7666 9724
 
7667
-/* Start: bn_mp_to_signed_bin.c */
7668
-#include <bignum.h>
7669
-#ifdef BN_MP_TO_SIGNED_BIN_C
7670
-/* LibTomMath, multiple-precision integer library -- Tom St Denis
7671
- *
7672
- * LibTomMath is a library that provides multiple-precision
7673
- * integer arithmetic as well as number theoretic functionality.
7674
- *
7675
- * The library was designed directly after the MPI library by
7676
- * Michael Fromberger but has been written from scratch with
7677
- * additional optimizations in place.
7678
- *
7679
- * The library is free for all purposes without any express
7680
- * guarantee it works.
7681
- *
7682
- * Tom St Denis, tomstdenis@gmail.com, http://math.libtomcrypt.com
7683
- */
9725
+/* End: fp_sqr_comba_6.c */
7684 9726
 
7685
-/* store in signed [big endian] format */
7686
-int mp_to_signed_bin (mp_int * a, unsigned char *b)
7687
-{
7688
-  int     res;
9727
+/* Start: fp_sqr_comba_64.c */
9728
+#define TFM_DEFINES
9729
+#include "fp_sqr_comba.c"
7689 9730
 
7690
-  if ((res = mp_to_unsigned_bin (a, b + 1)) != MP_OKAY) {
7691
-    return res;
7692
-  }
7693
-  b[0] = (unsigned char) ((a->sign == MP_ZPOS) ? 0 : 1);
7694
-  return MP_OKAY;
9731
+#ifdef TFM_SQR64
9732
+void fp_sqr_comba64(fp_int *A, fp_int *B)
9733
+{
9734
+   fp_digit *a, b[128], c0, c1, c2, sc0, sc1, sc2;
9735
+#ifdef TFM_ISO
9736
+   fp_word tt;
9737
+#endif
9738
+
9739
+   a = A->dp;
9740
+   COMBA_START; 
9741
+
9742
+   /* clear carries */
9743
+   CLEAR_CARRY;
9744
+
9745
+   /* output 0 */
9746
+   SQRADD(a[0],a[0]);
9747
+   COMBA_STORE(b[0]);
9748
+
9749
+   /* output 1 */
9750
+   CARRY_FORWARD;
9751
+   SQRADD2(a[0], a[1]); 
9752
+   COMBA_STORE(b[1]);
9753
+
9754
+   /* output 2 */
9755
+   CARRY_FORWARD;
9756
+   SQRADD2(a[0], a[2]); SQRADD(a[1], a[1]); 
9757
+   COMBA_STORE(b[2]);
9758
+
9759
+   /* output 3 */
9760
+   CARRY_FORWARD;
9761
+   SQRADD2(a[0], a[3]); SQRADD2(a[1], a[2]); 
9762
+   COMBA_STORE(b[3]);
9763
+
9764
+   /* output 4 */
9765
+   CARRY_FORWARD;
9766
+   SQRADD2(a[0], a[4]); SQRADD2(a[1], a[3]); SQRADD(a[2], a[2]); 
9767
+   COMBA_STORE(b[4]);
9768
+
9769
+   /* output 5 */
9770
+   CARRY_FORWARD;
9771
+   SQRADDSC(a[0], a[5]); SQRADDAC(a[1], a[4]); SQRADDAC(a[2], a[3]); SQRADDDB; 
9772
+   COMBA_STORE(b[5]);
9773
+
9774
+   /* output 6 */
9775
+   CARRY_FORWARD;
9776
+   SQRADDSC(a[0], a[6]); SQRADDAC(a[1], a[5]); SQRADDAC(a[2], a[4]); SQRADDDB; SQRADD(a[3], a[3]); 
9777
+   COMBA_STORE(b[6]);
9778
+
9779
+   /* output 7 */
9780
+   CARRY_FORWARD;
9781
+   SQRADDSC(a[0], a[7]); SQRADDAC(a[1], a[6]); SQRADDAC(a[2], a[5]); SQRADDAC(a[3], a[4]); SQRADDDB; 
9782
+   COMBA_STORE(b[7]);
9783
+
9784
+   /* output 8 */
9785
+   CARRY_FORWARD;
9786
+   SQRADDSC(a[0], a[8]); SQRADDAC(a[1], a[7]); SQRADDAC(a[2], a[6]); SQRADDAC(a[3], a[5]); SQRADDDB; SQRADD(a[4], a[4]); 
9787
+   COMBA_STORE(b[8]);
9788
+
9789
+   /* output 9 */
9790
+   CARRY_FORWARD;
9791
+   SQRADDSC(a[0], a[9]); SQRADDAC(a[1], a[8]); SQRADDAC(a[2], a[7]); SQRADDAC(a[3], a[6]); SQRADDAC(a[4], a[5]); SQRADDDB; 
9792
+   COMBA_STORE(b[9]);
9793
+
9794
+   /* output 10 */
9795
+   CARRY_FORWARD;
9796
+   SQRADDSC(a[0], a[10]); SQRADDAC(a[1], a[9]); SQRADDAC(a[2], a[8]); SQRADDAC(a[3], a[7]); SQRADDAC(a[4], a[6]); SQRADDDB; SQRADD(a[5], a[5]); 
9797
+   COMBA_STORE(b[10]);
9798
+
9799
+   /* output 11 */
9800
+   CARRY_FORWARD;
9801
+   SQRADDSC(a[0], a[11]); SQRADDAC(a[1], a[10]); SQRADDAC(a[2], a[9]); SQRADDAC(a[3], a[8]); SQRADDAC(a[4], a[7]); SQRADDAC(a[5], a[6]); SQRADDDB; 
9802
+   COMBA_STORE(b[11]);
9803
+
9804
+   /* output 12 */
9805
+   CARRY_FORWARD;
9806
+   SQRADDSC(a[0], a[12]); SQRADDAC(a[1], a[11]); SQRADDAC(a[2], a[10]); SQRADDAC(a[3], a[9]); SQRADDAC(a[4], a[8]); SQRADDAC(a[5], a[7]); SQRADDDB; SQRADD(a[6], a[6]); 
9807
+   COMBA_STORE(b[12]);
9808
+
9809
+   /* output 13 */
9810
+   CARRY_FORWARD;
9811
+   SQRADDSC(a[0], a[13]); SQRADDAC(a[1], a[12]); SQRADDAC(a[2], a[11]); SQRADDAC(a[3], a[10]); SQRADDAC(a[4], a[9]); SQRADDAC(a[5], a[8]); SQRADDAC(a[6], a[7]); SQRADDDB; 
9812
+   COMBA_STORE(b[13]);
9813
+
9814
+   /* output 14 */
9815
+   CARRY_FORWARD;
9816
+   SQRADDSC(a[0], a[14]); SQRADDAC(a[1], a[13]); SQRADDAC(a[2], a[12]); SQRADDAC(a[3], a[11]); SQRADDAC(a[4], a[10]); SQRADDAC(a[5], a[9]); SQRADDAC(a[6], a[8]); SQRADDDB; SQRADD(a[7], a[7]); 
9817
+   COMBA_STORE(b[14]);
9818
+
9819
+   /* output 15 */
9820
+   CARRY_FORWARD;
9821
+   SQRADDSC(a[0], a[15]); SQRADDAC(a[1], a[14]); SQRADDAC(a[2], a[13]); SQRADDAC(a[3], a[12]); SQRADDAC(a[4], a[11]); SQRADDAC(a[5], a[10]); SQRADDAC(a[6], a[9]); SQRADDAC(a[7], a[8]); SQRADDDB; 
9822
+   COMBA_STORE(b[15]);
9823
+
9824
+   /* output 16 */
9825
+   CARRY_FORWARD;
9826
+   SQRADDSC(a[0], a[16]); SQRADDAC(a[1], a[15]); SQRADDAC(a[2], a[14]); SQRADDAC(a[3], a[13]); SQRADDAC(a[4], a[12]); SQRADDAC(a[5], a[11]); SQRADDAC(a[6], a[10]); SQRADDAC(a[7], a[9]); SQRADDDB; SQRADD(a[8], a[8]); 
9827
+   COMBA_STORE(b[16]);
9828
+
9829
+   /* output 17 */
9830
+   CARRY_FORWARD;
9831
+   SQRADDSC(a[0], a[17]); SQRADDAC(a[1], a[16]); SQRADDAC(a[2], a[15]); SQRADDAC(a[3], a[14]); SQRADDAC(a[4], a[13]); SQRADDAC(a[5], a[12]); SQRADDAC(a[6], a[11]); SQRADDAC(a[7], a[10]); SQRADDAC(a[8], a[9]); SQRADDDB; 
9832
+   COMBA_STORE(b[17]);
9833
+
9834
+   /* output 18 */
9835
+   CARRY_FORWARD;
9836
+   SQRADDSC(a[0], a[18]); SQRADDAC(a[1], a[17]); SQRADDAC(a[2], a[16]); SQRADDAC(a[3], a[15]); SQRADDAC(a[4], a[14]); SQRADDAC(a[5], a[13]); SQRADDAC(a[6], a[12]); SQRADDAC(a[7], a[11]); SQRADDAC(a[8], a[10]); SQRADDDB; SQRADD(a[9], a[9]); 
9837
+   COMBA_STORE(b[18]);
9838
+
9839
+   /* output 19 */
9840
+   CARRY_FORWARD;
9841
+   SQRADDSC(a[0], a[19]); SQRADDAC(a[1], a[18]); SQRADDAC(a[2], a[17]); SQRADDAC(a[3], a[16]); SQRADDAC(a[4], a[15]); SQRADDAC(a[5], a[14]); SQRADDAC(a[6], a[13]); SQRADDAC(a[7], a[12]); SQRADDAC(a[8], a[11]); SQRADDAC(a[9], a[10]); SQRADDDB; 
9842
+   COMBA_STORE(b[19]);
9843
+
9844
+   /* output 20 */
9845
+   CARRY_FORWARD;
9846
+   SQRADDSC(a[0], a[20]); SQRADDAC(a[1], a[19]); SQRADDAC(a[2], a[18]); SQRADDAC(a[3], a[17]); SQRADDAC(a[4], a[16]); SQRADDAC(a[5], a[15]); SQRADDAC(a[6], a[14]); SQRADDAC(a[7], a[13]); SQRADDAC(a[8], a[12]); SQRADDAC(a[9], a[11]); SQRADDDB; SQRADD(a[10], a[10]); 
9847
+   COMBA_STORE(b[20]);
9848
+
9849
+   /* output 21 */
9850
+   CARRY_FORWARD;
9851
+   SQRADDSC(a[0], a[21]); SQRADDAC(a[1], a[20]); SQRADDAC(a[2], a[19]); SQRADDAC(a[3], a[18]); SQRADDAC(a[4], a[17]); SQRADDAC(a[5], a[16]); SQRADDAC(a[6], a[15]); SQRADDAC(a[7], a[14]); SQRADDAC(a[8], a[13]); SQRADDAC(a[9], a[12]); SQRADDAC(a[10], a[11]); SQRADDDB; 
9852
+   COMBA_STORE(b[21]);
9853
+
9854
+   /* output 22 */
9855
+   CARRY_FORWARD;
9856
+   SQRADDSC(a[0], a[22]); SQRADDAC(a[1], a[21]); SQRADDAC(a[2], a[20]); SQRADDAC(a[3], a[19]); SQRADDAC(a[4], a[18]); SQRADDAC(a[5], a[17]); SQRADDAC(a[6], a[16]); SQRADDAC(a[7], a[15]); SQRADDAC(a[8], a[14]); SQRADDAC(a[9], a[13]); SQRADDAC(a[10], a[12]); SQRADDDB; SQRADD(a[11], a[11]); 
9857
+   COMBA_STORE(b[22]);
9858
+
9859
+   /* output 23 */
9860
+   CARRY_FORWARD;
9861
+   SQRADDSC(a[0], a[23]); SQRADDAC(a[1], a[22]); SQRADDAC(a[2], a[21]); SQRADDAC(a[3], a[20]); SQRADDAC(a[4], a[19]); SQRADDAC(a[5], a[18]); SQRADDAC(a[6], a[17]); SQRADDAC(a[7], a[16]); SQRADDAC(a[8], a[15]); SQRADDAC(a[9], a[14]); SQRADDAC(a[10], a[13]); SQRADDAC(a[11], a[12]); SQRADDDB; 
9862
+   COMBA_STORE(b[23]);
9863
+
9864
+   /* output 24 */
9865
+   CARRY_FORWARD;
9866
+   SQRADDSC(a[0], a[24]); SQRADDAC(a[1], a[23]); SQRADDAC(a[2], a[22]); SQRADDAC(a[3], a[21]); SQRADDAC(a[4], a[20]); SQRADDAC(a[5], a[19]); SQRADDAC(a[6], a[18]); SQRADDAC(a[7], a[17]); SQRADDAC(a[8], a[16]); SQRADDAC(a[9], a[15]); SQRADDAC(a[10], a[14]); SQRADDAC(a[11], a[13]); SQRADDDB; SQRADD(a[12], a[12]); 
9867
+   COMBA_STORE(b[24]);
9868
+
9869
+   /* output 25 */
9870
+   CARRY_FORWARD;
9871
+   SQRADDSC(a[0], a[25]); SQRADDAC(a[1], a[24]); SQRADDAC(a[2], a[23]); SQRADDAC(a[3], a[22]); SQRADDAC(a[4], a[21]); SQRADDAC(a[5], a[20]); SQRADDAC(a[6], a[19]); SQRADDAC(a[7], a[18]); SQRADDAC(a[8], a[17]); SQRADDAC(a[9], a[16]); SQRADDAC(a[10], a[15]); SQRADDAC(a[11], a[14]); SQRADDAC(a[12], a[13]); SQRADDDB; 
9872
+   COMBA_STORE(b[25]);
9873
+
9874
+   /* output 26 */
9875
+   CARRY_FORWARD;
9876
+   SQRADDSC(a[0], a[26]); SQRADDAC(a[1], a[25]); SQRADDAC(a[2], a[24]); SQRADDAC(a[3], a[23]); SQRADDAC(a[4], a[22]); SQRADDAC(a[5], a[21]); SQRADDAC(a[6], a[20]); SQRADDAC(a[7], a[19]); SQRADDAC(a[8], a[18]); SQRADDAC(a[9], a[17]); SQRADDAC(a[10], a[16]); SQRADDAC(a[11], a[15]); SQRADDAC(a[12], a[14]); SQRADDDB; SQRADD(a[13], a[13]); 
9877
+   COMBA_STORE(b[26]);
9878
+
9879
+   /* output 27 */
9880
+   CARRY_FORWARD;
9881
+   SQRADDSC(a[0], a[27]); SQRADDAC(a[1], a[26]); SQRADDAC(a[2], a[25]); SQRADDAC(a[3], a[24]); SQRADDAC(a[4], a[23]); SQRADDAC(a[5], a[22]); SQRADDAC(a[6], a[21]); SQRADDAC(a[7], a[20]); SQRADDAC(a[8], a[19]); SQRADDAC(a[9], a[18]); SQRADDAC(a[10], a[17]); SQRADDAC(a[11], a[16]); SQRADDAC(a[12], a[15]); SQRADDAC(a[13], a[14]); SQRADDDB; 
9882
+   COMBA_STORE(b[27]);
9883
+
9884
+   /* output 28 */
9885
+   CARRY_FORWARD;
9886
+   SQRADDSC(a[0], a[28]); SQRADDAC(a[1], a[27]); SQRADDAC(a[2], a[26]); SQRADDAC(a[3], a[25]); SQRADDAC(a[4], a[24]); SQRADDAC(a[5], a[23]); SQRADDAC(a[6], a[22]); SQRADDAC(a[7], a[21]); SQRADDAC(a[8], a[20]); SQRADDAC(a[9], a[19]); SQRADDAC(a[10], a[18]); SQRADDAC(a[11], a[17]); SQRADDAC(a[12], a[16]); SQRADDAC(a[13], a[15]); SQRADDDB; SQRADD(a[14], a[14]); 
9887
+   COMBA_STORE(b[28]);
9888
+
9889
+   /* output 29 */
9890
+   CARRY_FORWARD;
9891
+   SQRADDSC(a[0], a[29]); SQRADDAC(a[1], a[28]); SQRADDAC(a[2], a[27]); SQRADDAC(a[3], a[26]); SQRADDAC(a[4], a[25]); SQRADDAC(a[5], a[24]); SQRADDAC(a[6], a[23]); SQRADDAC(a[7], a[22]); SQRADDAC(a[8], a[21]); SQRADDAC(a[9], a[20]); SQRADDAC(a[10], a[19]); SQRADDAC(a[11], a[18]); SQRADDAC(a[12], a[17]); SQRADDAC(a[13], a[16]); SQRADDAC(a[14], a[15]); SQRADDDB; 
9892
+   COMBA_STORE(b[29]);
9893
+
9894
+   /* output 30 */
9895
+   CARRY_FORWARD;
9896
+   SQRADDSC(a[0], a[30]); SQRADDAC(a[1], a[29]); SQRADDAC(a[2], a[28]); SQRADDAC(a[3], a[27]); SQRADDAC(a[4], a[26]); SQRADDAC(a[5], a[25]); SQRADDAC(a[6], a[24]); SQRADDAC(a[7], a[23]); SQRADDAC(a[8], a[22]); SQRADDAC(a[9], a[21]); SQRADDAC(a[10], a[20]); SQRADDAC(a[11], a[19]); SQRADDAC(a[12], a[18]); SQRADDAC(a[13], a[17]); SQRADDAC(a[14], a[16]); SQRADDDB; SQRADD(a[15], a[15]); 
9897
+   COMBA_STORE(b[30]);
9898
+
9899
+   /* output 31 */
9900
+   CARRY_FORWARD;
9901
+   SQRADDSC(a[0], a[31]); SQRADDAC(a[1], a[30]); SQRADDAC(a[2], a[29]); SQRADDAC(a[3], a[28]); SQRADDAC(a[4], a[27]); SQRADDAC(a[5], a[26]); SQRADDAC(a[6], a[25]); SQRADDAC(a[7], a[24]); SQRADDAC(a[8], a[23]); SQRADDAC(a[9], a[22]); SQRADDAC(a[10], a[21]); SQRADDAC(a[11], a[20]); SQRADDAC(a[12], a[19]); SQRADDAC(a[13], a[18]); SQRADDAC(a[14], a[17]); SQRADDAC(a[15], a[16]); SQRADDDB; 
9902
+   COMBA_STORE(b[31]);
9903
+
9904
+   /* output 32 */
9905
+   CARRY_FORWARD;
9906
+   SQRADDSC(a[0], a[32]); SQRADDAC(a[1], a[31]); SQRADDAC(a[2], a[30]); SQRADDAC(a[3], a[29]); SQRADDAC(a[4], a[28]); SQRADDAC(a[5], a[27]); SQRADDAC(a[6], a[26]); SQRADDAC(a[7], a[25]); SQRADDAC(a[8], a[24]); SQRADDAC(a[9], a[23]); SQRADDAC(a[10], a[22]); SQRADDAC(a[11], a[21]); SQRADDAC(a[12], a[20]); SQRADDAC(a[13], a[19]); SQRADDAC(a[14], a[18]); SQRADDAC(a[15], a[17]); SQRADDDB; SQRADD(a[16], a[16]); 
9907
+   COMBA_STORE(b[32]);
9908
+
9909
+   /* output 33 */
9910
+   CARRY_FORWARD;
9911
+   SQRADDSC(a[0], a[33]); SQRADDAC(a[1], a[32]); SQRADDAC(a[2], a[31]); SQRADDAC(a[3], a[30]); SQRADDAC(a[4], a[29]); SQRADDAC(a[5], a[28]); SQRADDAC(a[6], a[27]); SQRADDAC(a[7], a[26]); SQRADDAC(a[8], a[25]); SQRADDAC(a[9], a[24]); SQRADDAC(a[10], a[23]); SQRADDAC(a[11], a[22]); SQRADDAC(a[12], a[21]); SQRADDAC(a[13], a[20]); SQRADDAC(a[14], a[19]); SQRADDAC(a[15], a[18]); SQRADDAC(a[16], a[17]); SQRADDDB; 
9912
+   COMBA_STORE(b[33]);
9913
+
9914
+   /* output 34 */
9915
+   CARRY_FORWARD;
9916
+   SQRADDSC(a[0], a[34]); SQRADDAC(a[1], a[33]); SQRADDAC(a[2], a[32]); SQRADDAC(a[3], a[31]); SQRADDAC(a[4], a[30]); SQRADDAC(a[5], a[29]); SQRADDAC(a[6], a[28]); SQRADDAC(a[7], a[27]); SQRADDAC(a[8], a[26]); SQRADDAC(a[9], a[25]); SQRADDAC(a[10], a[24]); SQRADDAC(a[11], a[23]); SQRADDAC(a[12], a[22]); SQRADDAC(a[13], a[21]); SQRADDAC(a[14], a[20]); SQRADDAC(a[15], a[19]); SQRADDAC(a[16], a[18]); SQRADDDB; SQRADD(a[17], a[17]); 
9917
+   COMBA_STORE(b[34]);
9918
+
9919
+   /* output 35 */
9920
+   CARRY_FORWARD;
9921
+   SQRADDSC(a[0], a[35]); SQRADDAC(a[1], a[34]); SQRADDAC(a[2], a[33]); SQRADDAC(a[3], a[32]); SQRADDAC(a[4], a[31]); SQRADDAC(a[5], a[30]); SQRADDAC(a[6], a[29]); SQRADDAC(a[7], a[28]); SQRADDAC(a[8], a[27]); SQRADDAC(a[9], a[26]); SQRADDAC(a[10], a[25]); SQRADDAC(a[11], a[24]); SQRADDAC(a[12], a[23]); SQRADDAC(a[13], a[22]); SQRADDAC(a[14], a[21]); SQRADDAC(a[15], a[20]); SQRADDAC(a[16], a[19]); SQRADDAC(a[17], a[18]); SQRADDDB; 
9922
+   COMBA_STORE(b[35]);
9923
+
9924
+   /* output 36 */
9925
+   CARRY_FORWARD;
9926
+   SQRADDSC(a[0], a[36]); SQRADDAC(a[1], a[35]); SQRADDAC(a[2], a[34]); SQRADDAC(a[3], a[33]); SQRADDAC(a[4], a[32]); SQRADDAC(a[5], a[31]); SQRADDAC(a[6], a[30]); SQRADDAC(a[7], a[29]); SQRADDAC(a[8], a[28]); SQRADDAC(a[9], a[27]); SQRADDAC(a[10], a[26]); SQRADDAC(a[11], a[25]); SQRADDAC(a[12], a[24]); SQRADDAC(a[13], a[23]); SQRADDAC(a[14], a[22]); SQRADDAC(a[15], a[21]); SQRADDAC(a[16], a[20]); SQRADDAC(a[17], a[19]); SQRADDDB; SQRADD(a[18], a[18]); 
9927
+   COMBA_STORE(b[36]);
9928
+
9929
+   /* output 37 */
9930
+   CARRY_FORWARD;
9931
+   SQRADDSC(a[0], a[37]); SQRADDAC(a[1], a[36]); SQRADDAC(a[2], a[35]); SQRADDAC(a[3], a[34]); SQRADDAC(a[4], a[33]); SQRADDAC(a[5], a[32]); SQRADDAC(a[6], a[31]); SQRADDAC(a[7], a[30]); SQRADDAC(a[8], a[29]); SQRADDAC(a[9], a[28]); SQRADDAC(a[10], a[27]); SQRADDAC(a[11], a[26]); SQRADDAC(a[12], a[25]); SQRADDAC(a[13], a[24]); SQRADDAC(a[14], a[23]); SQRADDAC(a[15], a[22]); SQRADDAC(a[16], a[21]); SQRADDAC(a[17], a[20]); SQRADDAC(a[18], a[19]); SQRADDDB; 
9932
+   COMBA_STORE(b[37]);
9933
+
9934
+   /* output 38 */
9935
+   CARRY_FORWARD;
9936
+   SQRADDSC(a[0], a[38]); SQRADDAC(a[1], a[37]); SQRADDAC(a[2], a[36]); SQRADDAC(a[3], a[35]); SQRADDAC(a[4], a[34]); SQRADDAC(a[5], a[33]); SQRADDAC(a[6], a[32]); SQRADDAC(a[7], a[31]); SQRADDAC(a[8], a[30]); SQRADDAC(a[9], a[29]); SQRADDAC(a[10], a[28]); SQRADDAC(a[11], a[27]); SQRADDAC(a[12], a[26]); SQRADDAC(a[13], a[25]); SQRADDAC(a[14], a[24]); SQRADDAC(a[15], a[23]); SQRADDAC(a[16], a[22]); SQRADDAC(a[17], a[21]); SQRADDAC(a[18], a[20]); SQRADDDB; SQRADD(a[19], a[19]); 
9937
+   COMBA_STORE(b[38]);
9938
+
9939
+   /* output 39 */
9940
+   CARRY_FORWARD;
9941
+   SQRADDSC(a[0], a[39]); SQRADDAC(a[1], a[38]); SQRADDAC(a[2], a[37]); SQRADDAC(a[3], a[36]); SQRADDAC(a[4], a[35]); SQRADDAC(a[5], a[34]); SQRADDAC(a[6], a[33]); SQRADDAC(a[7], a[32]); SQRADDAC(a[8], a[31]); SQRADDAC(a[9], a[30]); SQRADDAC(a[10], a[29]); SQRADDAC(a[11], a[28]); SQRADDAC(a[12], a[27]); SQRADDAC(a[13], a[26]); SQRADDAC(a[14], a[25]); SQRADDAC(a[15], a[24]); SQRADDAC(a[16], a[23]); SQRADDAC(a[17], a[22]); SQRADDAC(a[18], a[21]); SQRADDAC(a[19], a[20]); SQRADDDB; 
9942
+   COMBA_STORE(b[39]);
9943
+
9944
+   /* output 40 */
9945
+   CARRY_FORWARD;
9946
+   SQRADDSC(a[0], a[40]); SQRADDAC(a[1], a[39]); SQRADDAC(a[2], a[38]); SQRADDAC(a[3], a[37]); SQRADDAC(a[4], a[36]); SQRADDAC(a[5], a[35]); SQRADDAC(a[6], a[34]); SQRADDAC(a[7], a[33]); SQRADDAC(a[8], a[32]); SQRADDAC(a[9], a[31]); SQRADDAC(a[10], a[30]); SQRADDAC(a[11], a[29]); SQRADDAC(a[12], a[28]); SQRADDAC(a[13], a[27]); SQRADDAC(a[14], a[26]); SQRADDAC(a[15], a[25]); SQRADDAC(a[16], a[24]); SQRADDAC(a[17], a[23]); SQRADDAC(a[18], a[22]); SQRADDAC(a[19], a[21]); SQRADDDB; SQRADD(a[20], a[20]); 
9947
+   COMBA_STORE(b[40]);
9948
+
9949
+   /* output 41 */
9950
+   CARRY_FORWARD;
9951
+   SQRADDSC(a[0], a[41]); SQRADDAC(a[1], a[40]); SQRADDAC(a[2], a[39]); SQRADDAC(a[3], a[38]); SQRADDAC(a[4], a[37]); SQRADDAC(a[5], a[36]); SQRADDAC(a[6], a[35]); SQRADDAC(a[7], a[34]); SQRADDAC(a[8], a[33]); SQRADDAC(a[9], a[32]); SQRADDAC(a[10], a[31]); SQRADDAC(a[11], a[30]); SQRADDAC(a[12], a[29]); SQRADDAC(a[13], a[28]); SQRADDAC(a[14], a[27]); SQRADDAC(a[15], a[26]); SQRADDAC(a[16], a[25]); SQRADDAC(a[17], a[24]); SQRADDAC(a[18], a[23]); SQRADDAC(a[19], a[22]); SQRADDAC(a[20], a[21]); SQRADDDB; 
9952
+   COMBA_STORE(b[41]);
9953
+
9954
+   /* output 42 */
9955
+   CARRY_FORWARD;
9956
+   SQRADDSC(a[0], a[42]); SQRADDAC(a[1], a[41]); SQRADDAC(a[2], a[40]); SQRADDAC(a[3], a[39]); SQRADDAC(a[4], a[38]); SQRADDAC(a[5], a[37]); SQRADDAC(a[6], a[36]); SQRADDAC(a[7], a[35]); SQRADDAC(a[8], a[34]); SQRADDAC(a[9], a[33]); SQRADDAC(a[10], a[32]); SQRADDAC(a[11], a[31]); SQRADDAC(a[12], a[30]); SQRADDAC(a[13], a[29]); SQRADDAC(a[14], a[28]); SQRADDAC(a[15], a[27]); SQRADDAC(a[16], a[26]); SQRADDAC(a[17], a[25]); SQRADDAC(a[18], a[24]); SQRADDAC(a[19], a[23]); SQRADDAC(a[20], a[22]); SQRADDDB; SQRADD(a[21], a[21]); 
9957
+   COMBA_STORE(b[42]);
9958
+
9959
+   /* output 43 */
9960
+   CARRY_FORWARD;
9961
+   SQRADDSC(a[0], a[43]); SQRADDAC(a[1], a[42]); SQRADDAC(a[2], a[41]); SQRADDAC(a[3], a[40]); SQRADDAC(a[4], a[39]); SQRADDAC(a[5], a[38]); SQRADDAC(a[6], a[37]); SQRADDAC(a[7], a[36]); SQRADDAC(a[8], a[35]); SQRADDAC(a[9], a[34]); SQRADDAC(a[10], a[33]); SQRADDAC(a[11], a[32]); SQRADDAC(a[12], a[31]); SQRADDAC(a[13], a[30]); SQRADDAC(a[14], a[29]); SQRADDAC(a[15], a[28]); SQRADDAC(a[16], a[27]); SQRADDAC(a[17], a[26]); SQRADDAC(a[18], a[25]); SQRADDAC(a[19], a[24]); SQRADDAC(a[20], a[23]); SQRADDAC(a[21], a[22]); SQRADDDB; 
9962
+   COMBA_STORE(b[43]);
9963
+
9964
+   /* output 44 */
9965
+   CARRY_FORWARD;
9966
+   SQRADDSC(a[0], a[44]); SQRADDAC(a[1], a[43]); SQRADDAC(a[2], a[42]); SQRADDAC(a[3], a[41]); SQRADDAC(a[4], a[40]); SQRADDAC(a[5], a[39]); SQRADDAC(a[6], a[38]); SQRADDAC(a[7], a[37]); SQRADDAC(a[8], a[36]); SQRADDAC(a[9], a[35]); SQRADDAC(a[10], a[34]); SQRADDAC(a[11], a[33]); SQRADDAC(a[12], a[32]); SQRADDAC(a[13], a[31]); SQRADDAC(a[14], a[30]); SQRADDAC(a[15], a[29]); SQRADDAC(a[16], a[28]); SQRADDAC(a[17], a[27]); SQRADDAC(a[18], a[26]); SQRADDAC(a[19], a[25]); SQRADDAC(a[20], a[24]); SQRADDAC(a[21], a[23]); SQRADDDB; SQRADD(a[22], a[22]); 
9967
+   COMBA_STORE(b[44]);
9968
+
9969
+   /* output 45 */
9970
+   CARRY_FORWARD;
9971
+   SQRADDSC(a[0], a[45]); SQRADDAC(a[1], a[44]); SQRADDAC(a[2], a[43]); SQRADDAC(a[3], a[42]); SQRADDAC(a[4], a[41]); SQRADDAC(a[5], a[40]); SQRADDAC(a[6], a[39]); SQRADDAC(a[7], a[38]); SQRADDAC(a[8], a[37]); SQRADDAC(a[9], a[36]); SQRADDAC(a[10], a[35]); SQRADDAC(a[11], a[34]); SQRADDAC(a[12], a[33]); SQRADDAC(a[13], a[32]); SQRADDAC(a[14], a[31]); SQRADDAC(a[15], a[30]); SQRADDAC(a[16], a[29]); SQRADDAC(a[17], a[28]); SQRADDAC(a[18], a[27]); SQRADDAC(a[19], a[26]); SQRADDAC(a[20], a[25]); SQRADDAC(a[21], a[24]); SQRADDAC(a[22], a[23]); SQRADDDB; 
9972
+   COMBA_STORE(b[45]);
9973
+
9974
+   /* output 46 */
9975
+   CARRY_FORWARD;
9976
+   SQRADDSC(a[0], a[46]); SQRADDAC(a[1], a[45]); SQRADDAC(a[2], a[44]); SQRADDAC(a[3], a[43]); SQRADDAC(a[4], a[42]); SQRADDAC(a[5], a[41]); SQRADDAC(a[6], a[40]); SQRADDAC(a[7], a[39]); SQRADDAC(a[8], a[38]); SQRADDAC(a[9], a[37]); SQRADDAC(a[10], a[36]); SQRADDAC(a[11], a[35]); SQRADDAC(a[12], a[34]); SQRADDAC(a[13], a[33]); SQRADDAC(a[14], a[32]); SQRADDAC(a[15], a[31]); SQRADDAC(a[16], a[30]); SQRADDAC(a[17], a[29]); SQRADDAC(a[18], a[28]); SQRADDAC(a[19], a[27]); SQRADDAC(a[20], a[26]); SQRADDAC(a[21], a[25]); SQRADDAC(a[22], a[24]); SQRADDDB; SQRADD(a[23], a[23]); 
9977
+   COMBA_STORE(b[46]);
9978
+
9979
+   /* output 47 */
9980
+   CARRY_FORWARD;
9981
+   SQRADDSC(a[0], a[47]); SQRADDAC(a[1], a[46]); SQRADDAC(a[2], a[45]); SQRADDAC(a[3], a[44]); SQRADDAC(a[4], a[43]); SQRADDAC(a[5], a[42]); SQRADDAC(a[6], a[41]); SQRADDAC(a[7], a[40]); SQRADDAC(a[8], a[39]); SQRADDAC(a[9], a[38]); SQRADDAC(a[10], a[37]); SQRADDAC(a[11], a[36]); SQRADDAC(a[12], a[35]); SQRADDAC(a[13], a[34]); SQRADDAC(a[14], a[33]); SQRADDAC(a[15], a[32]); SQRADDAC(a[16], a[31]); SQRADDAC(a[17], a[30]); SQRADDAC(a[18], a[29]); SQRADDAC(a[19], a[28]); SQRADDAC(a[20], a[27]); SQRADDAC(a[21], a[26]); SQRADDAC(a[22], a[25]); SQRADDAC(a[23], a[24]); SQRADDDB; 
9982
+   COMBA_STORE(b[47]);
9983
+
9984
+   /* output 48 */
9985
+   CARRY_FORWARD;
9986
+   SQRADDSC(a[0], a[48]); SQRADDAC(a[1], a[47]); SQRADDAC(a[2], a[46]); SQRADDAC(a[3], a[45]); SQRADDAC(a[4], a[44]); SQRADDAC(a[5], a[43]); SQRADDAC(a[6], a[42]); SQRADDAC(a[7], a[41]); SQRADDAC(a[8], a[40]); SQRADDAC(a[9], a[39]); SQRADDAC(a[10], a[38]); SQRADDAC(a[11], a[37]); SQRADDAC(a[12], a[36]); SQRADDAC(a[13], a[35]); SQRADDAC(a[14], a[34]); SQRADDAC(a[15], a[33]); SQRADDAC(a[16], a[32]); SQRADDAC(a[17], a[31]); SQRADDAC(a[18], a[30]); SQRADDAC(a[19], a[29]); SQRADDAC(a[20], a[28]); SQRADDAC(a[21], a[27]); SQRADDAC(a[22], a[26]); SQRADDAC(a[23], a[25]); SQRADDDB; SQRADD(a[24], a[24]); 
9987
+   COMBA_STORE(b[48]);
9988
+
9989
+   /* output 49 */
9990
+   CARRY_FORWARD;
9991
+   SQRADDSC(a[0], a[49]); SQRADDAC(a[1], a[48]); SQRADDAC(a[2], a[47]); SQRADDAC(a[3], a[46]); SQRADDAC(a[4], a[45]); SQRADDAC(a[5], a[44]); SQRADDAC(a[6], a[43]); SQRADDAC(a[7], a[42]); SQRADDAC(a[8], a[41]); SQRADDAC(a[9], a[40]); SQRADDAC(a[10], a[39]); SQRADDAC(a[11], a[38]); SQRADDAC(a[12], a[37]); SQRADDAC(a[13], a[36]); SQRADDAC(a[14], a[35]); SQRADDAC(a[15], a[34]); SQRADDAC(a[16], a[33]); SQRADDAC(a[17], a[32]); SQRADDAC(a[18], a[31]); SQRADDAC(a[19], a[30]); SQRADDAC(a[20], a[29]); SQRADDAC(a[21], a[28]); SQRADDAC(a[22], a[27]); SQRADDAC(a[23], a[26]); SQRADDAC(a[24], a[25]); SQRADDDB; 
9992
+   COMBA_STORE(b[49]);
9993
+
9994
+   /* output 50 */
9995
+   CARRY_FORWARD;
9996
+   SQRADDSC(a[0], a[50]); SQRADDAC(a[1], a[49]); SQRADDAC(a[2], a[48]); SQRADDAC(a[3], a[47]); SQRADDAC(a[4], a[46]); SQRADDAC(a[5], a[45]); SQRADDAC(a[6], a[44]); SQRADDAC(a[7], a[43]); SQRADDAC(a[8], a[42]); SQRADDAC(a[9], a[41]); SQRADDAC(a[10], a[40]); SQRADDAC(a[11], a[39]); SQRADDAC(a[12], a[38]); SQRADDAC(a[13], a[37]); SQRADDAC(a[14], a[36]); SQRADDAC(a[15], a[35]); SQRADDAC(a[16], a[34]); SQRADDAC(a[17], a[33]); SQRADDAC(a[18], a[32]); SQRADDAC(a[19], a[31]); SQRADDAC(a[20], a[30]); SQRADDAC(a[21], a[29]); SQRADDAC(a[22], a[28]); SQRADDAC(a[23], a[27]); SQRADDAC(a[24], a[26]); SQRADDDB; SQRADD(a[25], a[25]); 
9997
+   COMBA_STORE(b[50]);
9998
+
9999
+   /* output 51 */
10000
+   CARRY_FORWARD;
10001
+   SQRADDSC(a[0], a[51]); SQRADDAC(a[1], a[50]); SQRADDAC(a[2], a[49]); SQRADDAC(a[3], a[48]); SQRADDAC(a[4], a[47]); SQRADDAC(a[5], a[46]); SQRADDAC(a[6], a[45]); SQRADDAC(a[7], a[44]); SQRADDAC(a[8], a[43]); SQRADDAC(a[9], a[42]); SQRADDAC(a[10], a[41]); SQRADDAC(a[11], a[40]); SQRADDAC(a[12], a[39]); SQRADDAC(a[13], a[38]); SQRADDAC(a[14], a[37]); SQRADDAC(a[15], a[36]); SQRADDAC(a[16], a[35]); SQRADDAC(a[17], a[34]); SQRADDAC(a[18], a[33]); SQRADDAC(a[19], a[32]); SQRADDAC(a[20], a[31]); SQRADDAC(a[21], a[30]); SQRADDAC(a[22], a[29]); SQRADDAC(a[23], a[28]); SQRADDAC(a[24], a[27]); SQRADDAC(a[25], a[26]); SQRADDDB; 
10002
+   COMBA_STORE(b[51]);
10003
+
10004
+   /* output 52 */
10005
+   CARRY_FORWARD;
10006
+   SQRADDSC(a[0], a[52]); SQRADDAC(a[1], a[51]); SQRADDAC(a[2], a[50]); SQRADDAC(a[3], a[49]); SQRADDAC(a[4], a[48]); SQRADDAC(a[5], a[47]); SQRADDAC(a[6], a[46]); SQRADDAC(a[7], a[45]); SQRADDAC(a[8], a[44]); SQRADDAC(a[9], a[43]); SQRADDAC(a[10], a[42]); SQRADDAC(a[11], a[41]); SQRADDAC(a[12], a[40]); SQRADDAC(a[13], a[39]); SQRADDAC(a[14], a[38]); SQRADDAC(a[15], a[37]); SQRADDAC(a[16], a[36]); SQRADDAC(a[17], a[35]); SQRADDAC(a[18], a[34]); SQRADDAC(a[19], a[33]); SQRADDAC(a[20], a[32]); SQRADDAC(a[21], a[31]); SQRADDAC(a[22], a[30]); SQRADDAC(a[23], a[29]); SQRADDAC(a[24], a[28]); SQRADDAC(a[25], a[27]); SQRADDDB; SQRADD(a[26], a[26]); 
10007
+   COMBA_STORE(b[52]);
10008
+
10009
+   /* output 53 */
10010
+   CARRY_FORWARD;
10011
+   SQRADDSC(a[0], a[53]); SQRADDAC(a[1], a[52]); SQRADDAC(a[2], a[51]); SQRADDAC(a[3], a[50]); SQRADDAC(a[4], a[49]); SQRADDAC(a[5], a[48]); SQRADDAC(a[6], a[47]); SQRADDAC(a[7], a[46]); SQRADDAC(a[8], a[45]); SQRADDAC(a[9], a[44]); SQRADDAC(a[10], a[43]); SQRADDAC(a[11], a[42]); SQRADDAC(a[12], a[41]); SQRADDAC(a[13], a[40]); SQRADDAC(a[14], a[39]); SQRADDAC(a[15], a[38]); SQRADDAC(a[16], a[37]); SQRADDAC(a[17], a[36]); SQRADDAC(a[18], a[35]); SQRADDAC(a[19], a[34]); SQRADDAC(a[20], a[33]); SQRADDAC(a[21], a[32]); SQRADDAC(a[22], a[31]); SQRADDAC(a[23], a[30]); SQRADDAC(a[24], a[29]); SQRADDAC(a[25], a[28]); SQRADDAC(a[26], a[27]); SQRADDDB; 
10012
+   COMBA_STORE(b[53]);
10013
+
10014
+   /* output 54 */
10015
+   CARRY_FORWARD;
10016
+   SQRADDSC(a[0], a[54]); SQRADDAC(a[1], a[53]); SQRADDAC(a[2], a[52]); SQRADDAC(a[3], a[51]); SQRADDAC(a[4], a[50]); SQRADDAC(a[5], a[49]); SQRADDAC(a[6], a[48]); SQRADDAC(a[7], a[47]); SQRADDAC(a[8], a[46]); SQRADDAC(a[9], a[45]); SQRADDAC(a[10], a[44]); SQRADDAC(a[11], a[43]); SQRADDAC(a[12], a[42]); SQRADDAC(a[13], a[41]); SQRADDAC(a[14], a[40]); SQRADDAC(a[15], a[39]); SQRADDAC(a[16], a[38]); SQRADDAC(a[17], a[37]); SQRADDAC(a[18], a[36]); SQRADDAC(a[19], a[35]); SQRADDAC(a[20], a[34]); SQRADDAC(a[21], a[33]); SQRADDAC(a[22], a[32]); SQRADDAC(a[23], a[31]); SQRADDAC(a[24], a[30]); SQRADDAC(a[25], a[29]); SQRADDAC(a[26], a[28]); SQRADDDB; SQRADD(a[27], a[27]); 
10017
+   COMBA_STORE(b[54]);
10018
+
10019
+   /* output 55 */
10020
+   CARRY_FORWARD;
10021
+   SQRADDSC(a[0], a[55]); SQRADDAC(a[1], a[54]); SQRADDAC(a[2], a[53]); SQRADDAC(a[3], a[52]); SQRADDAC(a[4], a[51]); SQRADDAC(a[5], a[50]); SQRADDAC(a[6], a[49]); SQRADDAC(a[7], a[48]); SQRADDAC(a[8], a[47]); SQRADDAC(a[9], a[46]); SQRADDAC(a[10], a[45]); SQRADDAC(a[11], a[44]); SQRADDAC(a[12], a[43]); SQRADDAC(a[13], a[42]); SQRADDAC(a[14], a[41]); SQRADDAC(a[15], a[40]); SQRADDAC(a[16], a[39]); SQRADDAC(a[17], a[38]); SQRADDAC(a[18], a[37]); SQRADDAC(a[19], a[36]); SQRADDAC(a[20], a[35]); SQRADDAC(a[21], a[34]); SQRADDAC(a[22], a[33]); SQRADDAC(a[23], a[32]); SQRADDAC(a[24], a[31]); SQRADDAC(a[25], a[30]); SQRADDAC(a[26], a[29]); SQRADDAC(a[27], a[28]); SQRADDDB; 
10022
+   COMBA_STORE(b[55]);
10023
+
10024
+   /* output 56 */
10025
+   CARRY_FORWARD;
10026
+   SQRADDSC(a[0], a[56]); SQRADDAC(a[1], a[55]); SQRADDAC(a[2], a[54]); SQRADDAC(a[3], a[53]); SQRADDAC(a[4], a[52]); SQRADDAC(a[5], a[51]); SQRADDAC(a[6], a[50]); SQRADDAC(a[7], a[49]); SQRADDAC(a[8], a[48]); SQRADDAC(a[9], a[47]); SQRADDAC(a[10], a[46]); SQRADDAC(a[11], a[45]); SQRADDAC(a[12], a[44]); SQRADDAC(a[13], a[43]); SQRADDAC(a[14], a[42]); SQRADDAC(a[15], a[41]); SQRADDAC(a[16], a[40]); SQRADDAC(a[17], a[39]); SQRADDAC(a[18], a[38]); SQRADDAC(a[19], a[37]); SQRADDAC(a[20], a[36]); SQRADDAC(a[21], a[35]); SQRADDAC(a[22], a[34]); SQRADDAC(a[23], a[33]); SQRADDAC(a[24], a[32]); SQRADDAC(a[25], a[31]); SQRADDAC(a[26], a[30]); SQRADDAC(a[27], a[29]); SQRADDDB; SQRADD(a[28], a[28]); 
10027
+   COMBA_STORE(b[56]);
10028
+
10029
+   /* output 57 */
10030
+   CARRY_FORWARD;
10031
+   SQRADDSC(a[0], a[57]); SQRADDAC(a[1], a[56]); SQRADDAC(a[2], a[55]); SQRADDAC(a[3], a[54]); SQRADDAC(a[4], a[53]); SQRADDAC(a[5], a[52]); SQRADDAC(a[6], a[51]); SQRADDAC(a[7], a[50]); SQRADDAC(a[8], a[49]); SQRADDAC(a[9], a[48]); SQRADDAC(a[10], a[47]); SQRADDAC(a[11], a[46]); SQRADDAC(a[12], a[45]); SQRADDAC(a[13], a[44]); SQRADDAC(a[14], a[43]); SQRADDAC(a[15], a[42]); SQRADDAC(a[16], a[41]); SQRADDAC(a[17], a[40]); SQRADDAC(a[18], a[39]); SQRADDAC(a[19], a[38]); SQRADDAC(a[20], a[37]); SQRADDAC(a[21], a[36]); SQRADDAC(a[22], a[35]); SQRADDAC(a[23], a[34]); SQRADDAC(a[24], a[33]); SQRADDAC(a[25], a[32]); SQRADDAC(a[26], a[31]); SQRADDAC(a[27], a[30]); SQRADDAC(a[28], a[29]); SQRADDDB; 
10032
+   COMBA_STORE(b[57]);
10033
+
10034
+   /* output 58 */
10035
+   CARRY_FORWARD;
10036
+   SQRADDSC(a[0], a[58]); SQRADDAC(a[1], a[57]); SQRADDAC(a[2], a[56]); SQRADDAC(a[3], a[55]); SQRADDAC(a[4], a[54]); SQRADDAC(a[5], a[53]); SQRADDAC(a[6], a[52]); SQRADDAC(a[7], a[51]); SQRADDAC(a[8], a[50]); SQRADDAC(a[9], a[49]); SQRADDAC(a[10], a[48]); SQRADDAC(a[11], a[47]); SQRADDAC(a[12], a[46]); SQRADDAC(a[13], a[45]); SQRADDAC(a[14], a[44]); SQRADDAC(a[15], a[43]); SQRADDAC(a[16], a[42]); SQRADDAC(a[17], a[41]); SQRADDAC(a[18], a[40]); SQRADDAC(a[19], a[39]); SQRADDAC(a[20], a[38]); SQRADDAC(a[21], a[37]); SQRADDAC(a[22], a[36]); SQRADDAC(a[23], a[35]); SQRADDAC(a[24], a[34]); SQRADDAC(a[25], a[33]); SQRADDAC(a[26], a[32]); SQRADDAC(a[27], a[31]); SQRADDAC(a[28], a[30]); SQRADDDB; SQRADD(a[29], a[29]); 
10037
+   COMBA_STORE(b[58]);
10038
+
10039
+   /* output 59 */
10040
+   CARRY_FORWARD;
10041
+   SQRADDSC(a[0], a[59]); SQRADDAC(a[1], a[58]); SQRADDAC(a[2], a[57]); SQRADDAC(a[3], a[56]); SQRADDAC(a[4], a[55]); SQRADDAC(a[5], a[54]); SQRADDAC(a[6], a[53]); SQRADDAC(a[7], a[52]); SQRADDAC(a[8], a[51]); SQRADDAC(a[9], a[50]); SQRADDAC(a[10], a[49]); SQRADDAC(a[11], a[48]); SQRADDAC(a[12], a[47]); SQRADDAC(a[13], a[46]); SQRADDAC(a[14], a[45]); SQRADDAC(a[15], a[44]); SQRADDAC(a[16], a[43]); SQRADDAC(a[17], a[42]); SQRADDAC(a[18], a[41]); SQRADDAC(a[19], a[40]); SQRADDAC(a[20], a[39]); SQRADDAC(a[21], a[38]); SQRADDAC(a[22], a[37]); SQRADDAC(a[23], a[36]); SQRADDAC(a[24], a[35]); SQRADDAC(a[25], a[34]); SQRADDAC(a[26], a[33]); SQRADDAC(a[27], a[32]); SQRADDAC(a[28], a[31]); SQRADDAC(a[29], a[30]); SQRADDDB; 
10042
+   COMBA_STORE(b[59]);
10043
+
10044
+   /* output 60 */
10045
+   CARRY_FORWARD;
10046
+   SQRADDSC(a[0], a[60]); SQRADDAC(a[1], a[59]); SQRADDAC(a[2], a[58]); SQRADDAC(a[3], a[57]); SQRADDAC(a[4], a[56]); SQRADDAC(a[5], a[55]); SQRADDAC(a[6], a[54]); SQRADDAC(a[7], a[53]); SQRADDAC(a[8], a[52]); SQRADDAC(a[9], a[51]); SQRADDAC(a[10], a[50]); SQRADDAC(a[11], a[49]); SQRADDAC(a[12], a[48]); SQRADDAC(a[13], a[47]); SQRADDAC(a[14], a[46]); SQRADDAC(a[15], a[45]); SQRADDAC(a[16], a[44]); SQRADDAC(a[17], a[43]); SQRADDAC(a[18], a[42]); SQRADDAC(a[19], a[41]); SQRADDAC(a[20], a[40]); SQRADDAC(a[21], a[39]); SQRADDAC(a[22], a[38]); SQRADDAC(a[23], a[37]); SQRADDAC(a[24], a[36]); SQRADDAC(a[25], a[35]); SQRADDAC(a[26], a[34]); SQRADDAC(a[27], a[33]); SQRADDAC(a[28], a[32]); SQRADDAC(a[29], a[31]); SQRADDDB; SQRADD(a[30], a[30]); 
10047
+   COMBA_STORE(b[60]);
10048
+
10049
+   /* output 61 */
10050
+   CARRY_FORWARD;
10051
+   SQRADDSC(a[0], a[61]); SQRADDAC(a[1], a[60]); SQRADDAC(a[2], a[59]); SQRADDAC(a[3], a[58]); SQRADDAC(a[4], a[57]); SQRADDAC(a[5], a[56]); SQRADDAC(a[6], a[55]); SQRADDAC(a[7], a[54]); SQRADDAC(a[8], a[53]); SQRADDAC(a[9], a[52]); SQRADDAC(a[10], a[51]); SQRADDAC(a[11], a[50]); SQRADDAC(a[12], a[49]); SQRADDAC(a[13], a[48]); SQRADDAC(a[14], a[47]); SQRADDAC(a[15], a[46]); SQRADDAC(a[16], a[45]); SQRADDAC(a[17], a[44]); SQRADDAC(a[18], a[43]); SQRADDAC(a[19], a[42]); SQRADDAC(a[20], a[41]); SQRADDAC(a[21], a[40]); SQRADDAC(a[22], a[39]); SQRADDAC(a[23], a[38]); SQRADDAC(a[24], a[37]); SQRADDAC(a[25], a[36]); SQRADDAC(a[26], a[35]); SQRADDAC(a[27], a[34]); SQRADDAC(a[28], a[33]); SQRADDAC(a[29], a[32]); SQRADDAC(a[30], a[31]); SQRADDDB; 
10052
+   COMBA_STORE(b[61]);
10053
+
10054
+   /* output 62 */
10055
+   CARRY_FORWARD;
10056
+   SQRADDSC(a[0], a[62]); SQRADDAC(a[1], a[61]); SQRADDAC(a[2], a[60]); SQRADDAC(a[3], a[59]); SQRADDAC(a[4], a[58]); SQRADDAC(a[5], a[57]); SQRADDAC(a[6], a[56]); SQRADDAC(a[7], a[55]); SQRADDAC(a[8], a[54]); SQRADDAC(a[9], a[53]); SQRADDAC(a[10], a[52]); SQRADDAC(a[11], a[51]); SQRADDAC(a[12], a[50]); SQRADDAC(a[13], a[49]); SQRADDAC(a[14], a[48]); SQRADDAC(a[15], a[47]); SQRADDAC(a[16], a[46]); SQRADDAC(a[17], a[45]); SQRADDAC(a[18], a[44]); SQRADDAC(a[19], a[43]); SQRADDAC(a[20], a[42]); SQRADDAC(a[21], a[41]); SQRADDAC(a[22], a[40]); SQRADDAC(a[23], a[39]); SQRADDAC(a[24], a[38]); SQRADDAC(a[25], a[37]); SQRADDAC(a[26], a[36]); SQRADDAC(a[27], a[35]); SQRADDAC(a[28], a[34]); SQRADDAC(a[29], a[33]); SQRADDAC(a[30], a[32]); SQRADDDB; SQRADD(a[31], a[31]); 
10057
+   COMBA_STORE(b[62]);
10058
+
10059
+   /* output 63 */
10060
+   CARRY_FORWARD;
10061
+   SQRADDSC(a[0], a[63]); SQRADDAC(a[1], a[62]); SQRADDAC(a[2], a[61]); SQRADDAC(a[3], a[60]); SQRADDAC(a[4], a[59]); SQRADDAC(a[5], a[58]); SQRADDAC(a[6], a[57]); SQRADDAC(a[7], a[56]); SQRADDAC(a[8], a[55]); SQRADDAC(a[9], a[54]); SQRADDAC(a[10], a[53]); SQRADDAC(a[11], a[52]); SQRADDAC(a[12], a[51]); SQRADDAC(a[13], a[50]); SQRADDAC(a[14], a[49]); SQRADDAC(a[15], a[48]); SQRADDAC(a[16], a[47]); SQRADDAC(a[17], a[46]); SQRADDAC(a[18], a[45]); SQRADDAC(a[19], a[44]); SQRADDAC(a[20], a[43]); SQRADDAC(a[21], a[42]); SQRADDAC(a[22], a[41]); SQRADDAC(a[23], a[40]); SQRADDAC(a[24], a[39]); SQRADDAC(a[25], a[38]); SQRADDAC(a[26], a[37]); SQRADDAC(a[27], a[36]); SQRADDAC(a[28], a[35]); SQRADDAC(a[29], a[34]); SQRADDAC(a[30], a[33]); SQRADDAC(a[31], a[32]); SQRADDDB; 
10062
+   COMBA_STORE(b[63]);
10063
+
10064
+   /* output 64 */
10065
+   CARRY_FORWARD;
10066
+   SQRADDSC(a[1], a[63]); SQRADDAC(a[2], a[62]); SQRADDAC(a[3], a[61]); SQRADDAC(a[4], a[60]); SQRADDAC(a[5], a[59]); SQRADDAC(a[6], a[58]); SQRADDAC(a[7], a[57]); SQRADDAC(a[8], a[56]); SQRADDAC(a[9], a[55]); SQRADDAC(a[10], a[54]); SQRADDAC(a[11], a[53]); SQRADDAC(a[12], a[52]); SQRADDAC(a[13], a[51]); SQRADDAC(a[14], a[50]); SQRADDAC(a[15], a[49]); SQRADDAC(a[16], a[48]); SQRADDAC(a[17], a[47]); SQRADDAC(a[18], a[46]); SQRADDAC(a[19], a[45]); SQRADDAC(a[20], a[44]); SQRADDAC(a[21], a[43]); SQRADDAC(a[22], a[42]); SQRADDAC(a[23], a[41]); SQRADDAC(a[24], a[40]); SQRADDAC(a[25], a[39]); SQRADDAC(a[26], a[38]); SQRADDAC(a[27], a[37]); SQRADDAC(a[28], a[36]); SQRADDAC(a[29], a[35]); SQRADDAC(a[30], a[34]); SQRADDAC(a[31], a[33]); SQRADDDB; SQRADD(a[32], a[32]); 
10067
+   COMBA_STORE(b[64]);
10068
+
10069
+   /* output 65 */
10070
+   CARRY_FORWARD;
10071
+   SQRADDSC(a[2], a[63]); SQRADDAC(a[3], a[62]); SQRADDAC(a[4], a[61]); SQRADDAC(a[5], a[60]); SQRADDAC(a[6], a[59]); SQRADDAC(a[7], a[58]); SQRADDAC(a[8], a[57]); SQRADDAC(a[9], a[56]); SQRADDAC(a[10], a[55]); SQRADDAC(a[11], a[54]); SQRADDAC(a[12], a[53]); SQRADDAC(a[13], a[52]); SQRADDAC(a[14], a[51]); SQRADDAC(a[15], a[50]); SQRADDAC(a[16], a[49]); SQRADDAC(a[17], a[48]); SQRADDAC(a[18], a[47]); SQRADDAC(a[19], a[46]); SQRADDAC(a[20], a[45]); SQRADDAC(a[21], a[44]); SQRADDAC(a[22], a[43]); SQRADDAC(a[23], a[42]); SQRADDAC(a[24], a[41]); SQRADDAC(a[25], a[40]); SQRADDAC(a[26], a[39]); SQRADDAC(a[27], a[38]); SQRADDAC(a[28], a[37]); SQRADDAC(a[29], a[36]); SQRADDAC(a[30], a[35]); SQRADDAC(a[31], a[34]); SQRADDAC(a[32], a[33]); SQRADDDB; 
10072
+   COMBA_STORE(b[65]);
10073
+
10074
+   /* output 66 */
10075
+   CARRY_FORWARD;
10076
+   SQRADDSC(a[3], a[63]); SQRADDAC(a[4], a[62]); SQRADDAC(a[5], a[61]); SQRADDAC(a[6], a[60]); SQRADDAC(a[7], a[59]); SQRADDAC(a[8], a[58]); SQRADDAC(a[9], a[57]); SQRADDAC(a[10], a[56]); SQRADDAC(a[11], a[55]); SQRADDAC(a[12], a[54]); SQRADDAC(a[13], a[53]); SQRADDAC(a[14], a[52]); SQRADDAC(a[15], a[51]); SQRADDAC(a[16], a[50]); SQRADDAC(a[17], a[49]); SQRADDAC(a[18], a[48]); SQRADDAC(a[19], a[47]); SQRADDAC(a[20], a[46]); SQRADDAC(a[21], a[45]); SQRADDAC(a[22], a[44]); SQRADDAC(a[23], a[43]); SQRADDAC(a[24], a[42]); SQRADDAC(a[25], a[41]); SQRADDAC(a[26], a[40]); SQRADDAC(a[27], a[39]); SQRADDAC(a[28], a[38]); SQRADDAC(a[29], a[37]); SQRADDAC(a[30], a[36]); SQRADDAC(a[31], a[35]); SQRADDAC(a[32], a[34]); SQRADDDB; SQRADD(a[33], a[33]); 
10077
+   COMBA_STORE(b[66]);
10078
+
10079
+   /* output 67 */
10080
+   CARRY_FORWARD;
10081
+   SQRADDSC(a[4], a[63]); SQRADDAC(a[5], a[62]); SQRADDAC(a[6], a[61]); SQRADDAC(a[7], a[60]); SQRADDAC(a[8], a[59]); SQRADDAC(a[9], a[58]); SQRADDAC(a[10], a[57]); SQRADDAC(a[11], a[56]); SQRADDAC(a[12], a[55]); SQRADDAC(a[13], a[54]); SQRADDAC(a[14], a[53]); SQRADDAC(a[15], a[52]); SQRADDAC(a[16], a[51]); SQRADDAC(a[17], a[50]); SQRADDAC(a[18], a[49]); SQRADDAC(a[19], a[48]); SQRADDAC(a[20], a[47]); SQRADDAC(a[21], a[46]); SQRADDAC(a[22], a[45]); SQRADDAC(a[23], a[44]); SQRADDAC(a[24], a[43]); SQRADDAC(a[25], a[42]); SQRADDAC(a[26], a[41]); SQRADDAC(a[27], a[40]); SQRADDAC(a[28], a[39]); SQRADDAC(a[29], a[38]); SQRADDAC(a[30], a[37]); SQRADDAC(a[31], a[36]); SQRADDAC(a[32], a[35]); SQRADDAC(a[33], a[34]); SQRADDDB; 
10082
+   COMBA_STORE(b[67]);
10083
+
10084
+   /* output 68 */
10085
+   CARRY_FORWARD;
10086
+   SQRADDSC(a[5], a[63]); SQRADDAC(a[6], a[62]); SQRADDAC(a[7], a[61]); SQRADDAC(a[8], a[60]); SQRADDAC(a[9], a[59]); SQRADDAC(a[10], a[58]); SQRADDAC(a[11], a[57]); SQRADDAC(a[12], a[56]); SQRADDAC(a[13], a[55]); SQRADDAC(a[14], a[54]); SQRADDAC(a[15], a[53]); SQRADDAC(a[16], a[52]); SQRADDAC(a[17], a[51]); SQRADDAC(a[18], a[50]); SQRADDAC(a[19], a[49]); SQRADDAC(a[20], a[48]); SQRADDAC(a[21], a[47]); SQRADDAC(a[22], a[46]); SQRADDAC(a[23], a[45]); SQRADDAC(a[24], a[44]); SQRADDAC(a[25], a[43]); SQRADDAC(a[26], a[42]); SQRADDAC(a[27], a[41]); SQRADDAC(a[28], a[40]); SQRADDAC(a[29], a[39]); SQRADDAC(a[30], a[38]); SQRADDAC(a[31], a[37]); SQRADDAC(a[32], a[36]); SQRADDAC(a[33], a[35]); SQRADDDB; SQRADD(a[34], a[34]); 
10087
+   COMBA_STORE(b[68]);
10088
+
10089
+   /* output 69 */
10090
+   CARRY_FORWARD;
10091
+   SQRADDSC(a[6], a[63]); SQRADDAC(a[7], a[62]); SQRADDAC(a[8], a[61]); SQRADDAC(a[9], a[60]); SQRADDAC(a[10], a[59]); SQRADDAC(a[11], a[58]); SQRADDAC(a[12], a[57]); SQRADDAC(a[13], a[56]); SQRADDAC(a[14], a[55]); SQRADDAC(a[15], a[54]); SQRADDAC(a[16], a[53]); SQRADDAC(a[17], a[52]); SQRADDAC(a[18], a[51]); SQRADDAC(a[19], a[50]); SQRADDAC(a[20], a[49]); SQRADDAC(a[21], a[48]); SQRADDAC(a[22], a[47]); SQRADDAC(a[23], a[46]); SQRADDAC(a[24], a[45]); SQRADDAC(a[25], a[44]); SQRADDAC(a[26], a[43]); SQRADDAC(a[27], a[42]); SQRADDAC(a[28], a[41]); SQRADDAC(a[29], a[40]); SQRADDAC(a[30], a[39]); SQRADDAC(a[31], a[38]); SQRADDAC(a[32], a[37]); SQRADDAC(a[33], a[36]); SQRADDAC(a[34], a[35]); SQRADDDB; 
10092
+   COMBA_STORE(b[69]);
10093
+
10094
+   /* output 70 */
10095
+   CARRY_FORWARD;
10096
+   SQRADDSC(a[7], a[63]); SQRADDAC(a[8], a[62]); SQRADDAC(a[9], a[61]); SQRADDAC(a[10], a[60]); SQRADDAC(a[11], a[59]); SQRADDAC(a[12], a[58]); SQRADDAC(a[13], a[57]); SQRADDAC(a[14], a[56]); SQRADDAC(a[15], a[55]); SQRADDAC(a[16], a[54]); SQRADDAC(a[17], a[53]); SQRADDAC(a[18], a[52]); SQRADDAC(a[19], a[51]); SQRADDAC(a[20], a[50]); SQRADDAC(a[21], a[49]); SQRADDAC(a[22], a[48]); SQRADDAC(a[23], a[47]); SQRADDAC(a[24], a[46]); SQRADDAC(a[25], a[45]); SQRADDAC(a[26], a[44]); SQRADDAC(a[27], a[43]); SQRADDAC(a[28], a[42]); SQRADDAC(a[29], a[41]); SQRADDAC(a[30], a[40]); SQRADDAC(a[31], a[39]); SQRADDAC(a[32], a[38]); SQRADDAC(a[33], a[37]); SQRADDAC(a[34], a[36]); SQRADDDB; SQRADD(a[35], a[35]); 
10097
+   COMBA_STORE(b[70]);
10098
+
10099
+   /* output 71 */
10100
+   CARRY_FORWARD;
10101
+   SQRADDSC(a[8], a[63]); SQRADDAC(a[9], a[62]); SQRADDAC(a[10], a[61]); SQRADDAC(a[11], a[60]); SQRADDAC(a[12], a[59]); SQRADDAC(a[13], a[58]); SQRADDAC(a[14], a[57]); SQRADDAC(a[15], a[56]); SQRADDAC(a[16], a[55]); SQRADDAC(a[17], a[54]); SQRADDAC(a[18], a[53]); SQRADDAC(a[19], a[52]); SQRADDAC(a[20], a[51]); SQRADDAC(a[21], a[50]); SQRADDAC(a[22], a[49]); SQRADDAC(a[23], a[48]); SQRADDAC(a[24], a[47]); SQRADDAC(a[25], a[46]); SQRADDAC(a[26], a[45]); SQRADDAC(a[27], a[44]); SQRADDAC(a[28], a[43]); SQRADDAC(a[29], a[42]); SQRADDAC(a[30], a[41]); SQRADDAC(a[31], a[40]); SQRADDAC(a[32], a[39]); SQRADDAC(a[33], a[38]); SQRADDAC(a[34], a[37]); SQRADDAC(a[35], a[36]); SQRADDDB; 
10102
+   COMBA_STORE(b[71]);
10103
+
10104
+   /* output 72 */
10105
+   CARRY_FORWARD;
10106
+   SQRADDSC(a[9], a[63]); SQRADDAC(a[10], a[62]); SQRADDAC(a[11], a[61]); SQRADDAC(a[12], a[60]); SQRADDAC(a[13], a[59]); SQRADDAC(a[14], a[58]); SQRADDAC(a[15], a[57]); SQRADDAC(a[16], a[56]); SQRADDAC(a[17], a[55]); SQRADDAC(a[18], a[54]); SQRADDAC(a[19], a[53]); SQRADDAC(a[20], a[52]); SQRADDAC(a[21], a[51]); SQRADDAC(a[22], a[50]); SQRADDAC(a[23], a[49]); SQRADDAC(a[24], a[48]); SQRADDAC(a[25], a[47]); SQRADDAC(a[26], a[46]); SQRADDAC(a[27], a[45]); SQRADDAC(a[28], a[44]); SQRADDAC(a[29], a[43]); SQRADDAC(a[30], a[42]); SQRADDAC(a[31], a[41]); SQRADDAC(a[32], a[40]); SQRADDAC(a[33], a[39]); SQRADDAC(a[34], a[38]); SQRADDAC(a[35], a[37]); SQRADDDB; SQRADD(a[36], a[36]); 
10107
+   COMBA_STORE(b[72]);
10108
+
10109
+   /* output 73 */
10110
+   CARRY_FORWARD;
10111
+   SQRADDSC(a[10], a[63]); SQRADDAC(a[11], a[62]); SQRADDAC(a[12], a[61]); SQRADDAC(a[13], a[60]); SQRADDAC(a[14], a[59]); SQRADDAC(a[15], a[58]); SQRADDAC(a[16], a[57]); SQRADDAC(a[17], a[56]); SQRADDAC(a[18], a[55]); SQRADDAC(a[19], a[54]); SQRADDAC(a[20], a[53]); SQRADDAC(a[21], a[52]); SQRADDAC(a[22], a[51]); SQRADDAC(a[23], a[50]); SQRADDAC(a[24], a[49]); SQRADDAC(a[25], a[48]); SQRADDAC(a[26], a[47]); SQRADDAC(a[27], a[46]); SQRADDAC(a[28], a[45]); SQRADDAC(a[29], a[44]); SQRADDAC(a[30], a[43]); SQRADDAC(a[31], a[42]); SQRADDAC(a[32], a[41]); SQRADDAC(a[33], a[40]); SQRADDAC(a[34], a[39]); SQRADDAC(a[35], a[38]); SQRADDAC(a[36], a[37]); SQRADDDB; 
10112
+   COMBA_STORE(b[73]);
10113
+
10114
+   /* output 74 */
10115
+   CARRY_FORWARD;
10116
+   SQRADDSC(a[11], a[63]); SQRADDAC(a[12], a[62]); SQRADDAC(a[13], a[61]); SQRADDAC(a[14], a[60]); SQRADDAC(a[15], a[59]); SQRADDAC(a[16], a[58]); SQRADDAC(a[17], a[57]); SQRADDAC(a[18], a[56]); SQRADDAC(a[19], a[55]); SQRADDAC(a[20], a[54]); SQRADDAC(a[21], a[53]); SQRADDAC(a[22], a[52]); SQRADDAC(a[23], a[51]); SQRADDAC(a[24], a[50]); SQRADDAC(a[25], a[49]); SQRADDAC(a[26], a[48]); SQRADDAC(a[27], a[47]); SQRADDAC(a[28], a[46]); SQRADDAC(a[29], a[45]); SQRADDAC(a[30], a[44]); SQRADDAC(a[31], a[43]); SQRADDAC(a[32], a[42]); SQRADDAC(a[33], a[41]); SQRADDAC(a[34], a[40]); SQRADDAC(a[35], a[39]); SQRADDAC(a[36], a[38]); SQRADDDB; SQRADD(a[37], a[37]); 
10117
+   COMBA_STORE(b[74]);
10118
+
10119
+   /* output 75 */
10120
+   CARRY_FORWARD;
10121
+   SQRADDSC(a[12], a[63]); SQRADDAC(a[13], a[62]); SQRADDAC(a[14], a[61]); SQRADDAC(a[15], a[60]); SQRADDAC(a[16], a[59]); SQRADDAC(a[17], a[58]); SQRADDAC(a[18], a[57]); SQRADDAC(a[19], a[56]); SQRADDAC(a[20], a[55]); SQRADDAC(a[21], a[54]); SQRADDAC(a[22], a[53]); SQRADDAC(a[23], a[52]); SQRADDAC(a[24], a[51]); SQRADDAC(a[25], a[50]); SQRADDAC(a[26], a[49]); SQRADDAC(a[27], a[48]); SQRADDAC(a[28], a[47]); SQRADDAC(a[29], a[46]); SQRADDAC(a[30], a[45]); SQRADDAC(a[31], a[44]); SQRADDAC(a[32], a[43]); SQRADDAC(a[33], a[42]); SQRADDAC(a[34], a[41]); SQRADDAC(a[35], a[40]); SQRADDAC(a[36], a[39]); SQRADDAC(a[37], a[38]); SQRADDDB; 
10122
+   COMBA_STORE(b[75]);
10123
+
10124
+   /* output 76 */
10125
+   CARRY_FORWARD;
10126
+   SQRADDSC(a[13], a[63]); SQRADDAC(a[14], a[62]); SQRADDAC(a[15], a[61]); SQRADDAC(a[16], a[60]); SQRADDAC(a[17], a[59]); SQRADDAC(a[18], a[58]); SQRADDAC(a[19], a[57]); SQRADDAC(a[20], a[56]); SQRADDAC(a[21], a[55]); SQRADDAC(a[22], a[54]); SQRADDAC(a[23], a[53]); SQRADDAC(a[24], a[52]); SQRADDAC(a[25], a[51]); SQRADDAC(a[26], a[50]); SQRADDAC(a[27], a[49]); SQRADDAC(a[28], a[48]); SQRADDAC(a[29], a[47]); SQRADDAC(a[30], a[46]); SQRADDAC(a[31], a[45]); SQRADDAC(a[32], a[44]); SQRADDAC(a[33], a[43]); SQRADDAC(a[34], a[42]); SQRADDAC(a[35], a[41]); SQRADDAC(a[36], a[40]); SQRADDAC(a[37], a[39]); SQRADDDB; SQRADD(a[38], a[38]); 
10127
+   COMBA_STORE(b[76]);
10128
+
10129
+   /* output 77 */
10130
+   CARRY_FORWARD;
10131
+   SQRADDSC(a[14], a[63]); SQRADDAC(a[15], a[62]); SQRADDAC(a[16], a[61]); SQRADDAC(a[17], a[60]); SQRADDAC(a[18], a[59]); SQRADDAC(a[19], a[58]); SQRADDAC(a[20], a[57]); SQRADDAC(a[21], a[56]); SQRADDAC(a[22], a[55]); SQRADDAC(a[23], a[54]); SQRADDAC(a[24], a[53]); SQRADDAC(a[25], a[52]); SQRADDAC(a[26], a[51]); SQRADDAC(a[27], a[50]); SQRADDAC(a[28], a[49]); SQRADDAC(a[29], a[48]); SQRADDAC(a[30], a[47]); SQRADDAC(a[31], a[46]); SQRADDAC(a[32], a[45]); SQRADDAC(a[33], a[44]); SQRADDAC(a[34], a[43]); SQRADDAC(a[35], a[42]); SQRADDAC(a[36], a[41]); SQRADDAC(a[37], a[40]); SQRADDAC(a[38], a[39]); SQRADDDB; 
10132
+   COMBA_STORE(b[77]);
10133
+
10134
+   /* output 78 */
10135
+   CARRY_FORWARD;
10136
+   SQRADDSC(a[15], a[63]); SQRADDAC(a[16], a[62]); SQRADDAC(a[17], a[61]); SQRADDAC(a[18], a[60]); SQRADDAC(a[19], a[59]); SQRADDAC(a[20], a[58]); SQRADDAC(a[21], a[57]); SQRADDAC(a[22], a[56]); SQRADDAC(a[23], a[55]); SQRADDAC(a[24], a[54]); SQRADDAC(a[25], a[53]); SQRADDAC(a[26], a[52]); SQRADDAC(a[27], a[51]); SQRADDAC(a[28], a[50]); SQRADDAC(a[29], a[49]); SQRADDAC(a[30], a[48]); SQRADDAC(a[31], a[47]); SQRADDAC(a[32], a[46]); SQRADDAC(a[33], a[45]); SQRADDAC(a[34], a[44]); SQRADDAC(a[35], a[43]); SQRADDAC(a[36], a[42]); SQRADDAC(a[37], a[41]); SQRADDAC(a[38], a[40]); SQRADDDB; SQRADD(a[39], a[39]); 
10137
+   COMBA_STORE(b[78]);
10138
+
10139
+   /* output 79 */
10140
+   CARRY_FORWARD;
10141
+   SQRADDSC(a[16], a[63]); SQRADDAC(a[17], a[62]); SQRADDAC(a[18], a[61]); SQRADDAC(a[19], a[60]); SQRADDAC(a[20], a[59]); SQRADDAC(a[21], a[58]); SQRADDAC(a[22], a[57]); SQRADDAC(a[23], a[56]); SQRADDAC(a[24], a[55]); SQRADDAC(a[25], a[54]); SQRADDAC(a[26], a[53]); SQRADDAC(a[27], a[52]); SQRADDAC(a[28], a[51]); SQRADDAC(a[29], a[50]); SQRADDAC(a[30], a[49]); SQRADDAC(a[31], a[48]); SQRADDAC(a[32], a[47]); SQRADDAC(a[33], a[46]); SQRADDAC(a[34], a[45]); SQRADDAC(a[35], a[44]); SQRADDAC(a[36], a[43]); SQRADDAC(a[37], a[42]); SQRADDAC(a[38], a[41]); SQRADDAC(a[39], a[40]); SQRADDDB; 
10142
+   COMBA_STORE(b[79]);
10143
+
10144
+   /* output 80 */
10145
+   CARRY_FORWARD;
10146
+   SQRADDSC(a[17], a[63]); SQRADDAC(a[18], a[62]); SQRADDAC(a[19], a[61]); SQRADDAC(a[20], a[60]); SQRADDAC(a[21], a[59]); SQRADDAC(a[22], a[58]); SQRADDAC(a[23], a[57]); SQRADDAC(a[24], a[56]); SQRADDAC(a[25], a[55]); SQRADDAC(a[26], a[54]); SQRADDAC(a[27], a[53]); SQRADDAC(a[28], a[52]); SQRADDAC(a[29], a[51]); SQRADDAC(a[30], a[50]); SQRADDAC(a[31], a[49]); SQRADDAC(a[32], a[48]); SQRADDAC(a[33], a[47]); SQRADDAC(a[34], a[46]); SQRADDAC(a[35], a[45]); SQRADDAC(a[36], a[44]); SQRADDAC(a[37], a[43]); SQRADDAC(a[38], a[42]); SQRADDAC(a[39], a[41]); SQRADDDB; SQRADD(a[40], a[40]); 
10147
+   COMBA_STORE(b[80]);
10148
+
10149
+   /* output 81 */
10150
+   CARRY_FORWARD;
10151
+   SQRADDSC(a[18], a[63]); SQRADDAC(a[19], a[62]); SQRADDAC(a[20], a[61]); SQRADDAC(a[21], a[60]); SQRADDAC(a[22], a[59]); SQRADDAC(a[23], a[58]); SQRADDAC(a[24], a[57]); SQRADDAC(a[25], a[56]); SQRADDAC(a[26], a[55]); SQRADDAC(a[27], a[54]); SQRADDAC(a[28], a[53]); SQRADDAC(a[29], a[52]); SQRADDAC(a[30], a[51]); SQRADDAC(a[31], a[50]); SQRADDAC(a[32], a[49]); SQRADDAC(a[33], a[48]); SQRADDAC(a[34], a[47]); SQRADDAC(a[35], a[46]); SQRADDAC(a[36], a[45]); SQRADDAC(a[37], a[44]); SQRADDAC(a[38], a[43]); SQRADDAC(a[39], a[42]); SQRADDAC(a[40], a[41]); SQRADDDB; 
10152
+   COMBA_STORE(b[81]);
10153
+
10154
+   /* output 82 */
10155
+   CARRY_FORWARD;
10156
+   SQRADDSC(a[19], a[63]); SQRADDAC(a[20], a[62]); SQRADDAC(a[21], a[61]); SQRADDAC(a[22], a[60]); SQRADDAC(a[23], a[59]); SQRADDAC(a[24], a[58]); SQRADDAC(a[25], a[57]); SQRADDAC(a[26], a[56]); SQRADDAC(a[27], a[55]); SQRADDAC(a[28], a[54]); SQRADDAC(a[29], a[53]); SQRADDAC(a[30], a[52]); SQRADDAC(a[31], a[51]); SQRADDAC(a[32], a[50]); SQRADDAC(a[33], a[49]); SQRADDAC(a[34], a[48]); SQRADDAC(a[35], a[47]); SQRADDAC(a[36], a[46]); SQRADDAC(a[37], a[45]); SQRADDAC(a[38], a[44]); SQRADDAC(a[39], a[43]); SQRADDAC(a[40], a[42]); SQRADDDB; SQRADD(a[41], a[41]); 
10157
+   COMBA_STORE(b[82]);
10158
+
10159
+   /* output 83 */
10160
+   CARRY_FORWARD;
10161
+   SQRADDSC(a[20], a[63]); SQRADDAC(a[21], a[62]); SQRADDAC(a[22], a[61]); SQRADDAC(a[23], a[60]); SQRADDAC(a[24], a[59]); SQRADDAC(a[25], a[58]); SQRADDAC(a[26], a[57]); SQRADDAC(a[27], a[56]); SQRADDAC(a[28], a[55]); SQRADDAC(a[29], a[54]); SQRADDAC(a[30], a[53]); SQRADDAC(a[31], a[52]); SQRADDAC(a[32], a[51]); SQRADDAC(a[33], a[50]); SQRADDAC(a[34], a[49]); SQRADDAC(a[35], a[48]); SQRADDAC(a[36], a[47]); SQRADDAC(a[37], a[46]); SQRADDAC(a[38], a[45]); SQRADDAC(a[39], a[44]); SQRADDAC(a[40], a[43]); SQRADDAC(a[41], a[42]); SQRADDDB; 
10162
+   COMBA_STORE(b[83]);
10163
+
10164
+   /* output 84 */
10165
+   CARRY_FORWARD;
10166
+   SQRADDSC(a[21], a[63]); SQRADDAC(a[22], a[62]); SQRADDAC(a[23], a[61]); SQRADDAC(a[24], a[60]); SQRADDAC(a[25], a[59]); SQRADDAC(a[26], a[58]); SQRADDAC(a[27], a[57]); SQRADDAC(a[28], a[56]); SQRADDAC(a[29], a[55]); SQRADDAC(a[30], a[54]); SQRADDAC(a[31], a[53]); SQRADDAC(a[32], a[52]); SQRADDAC(a[33], a[51]); SQRADDAC(a[34], a[50]); SQRADDAC(a[35], a[49]); SQRADDAC(a[36], a[48]); SQRADDAC(a[37], a[47]); SQRADDAC(a[38], a[46]); SQRADDAC(a[39], a[45]); SQRADDAC(a[40], a[44]); SQRADDAC(a[41], a[43]); SQRADDDB; SQRADD(a[42], a[42]); 
10167
+   COMBA_STORE(b[84]);
10168
+
10169
+   /* output 85 */
10170
+   CARRY_FORWARD;
10171
+   SQRADDSC(a[22], a[63]); SQRADDAC(a[23], a[62]); SQRADDAC(a[24], a[61]); SQRADDAC(a[25], a[60]); SQRADDAC(a[26], a[59]); SQRADDAC(a[27], a[58]); SQRADDAC(a[28], a[57]); SQRADDAC(a[29], a[56]); SQRADDAC(a[30], a[55]); SQRADDAC(a[31], a[54]); SQRADDAC(a[32], a[53]); SQRADDAC(a[33], a[52]); SQRADDAC(a[34], a[51]); SQRADDAC(a[35], a[50]); SQRADDAC(a[36], a[49]); SQRADDAC(a[37], a[48]); SQRADDAC(a[38], a[47]); SQRADDAC(a[39], a[46]); SQRADDAC(a[40], a[45]); SQRADDAC(a[41], a[44]); SQRADDAC(a[42], a[43]); SQRADDDB; 
10172
+   COMBA_STORE(b[85]);
10173
+
10174
+   /* output 86 */
10175
+   CARRY_FORWARD;
10176
+   SQRADDSC(a[23], a[63]); SQRADDAC(a[24], a[62]); SQRADDAC(a[25], a[61]); SQRADDAC(a[26], a[60]); SQRADDAC(a[27], a[59]); SQRADDAC(a[28], a[58]); SQRADDAC(a[29], a[57]); SQRADDAC(a[30], a[56]); SQRADDAC(a[31], a[55]); SQRADDAC(a[32], a[54]); SQRADDAC(a[33], a[53]); SQRADDAC(a[34], a[52]); SQRADDAC(a[35], a[51]); SQRADDAC(a[36], a[50]); SQRADDAC(a[37], a[49]); SQRADDAC(a[38], a[48]); SQRADDAC(a[39], a[47]); SQRADDAC(a[40], a[46]); SQRADDAC(a[41], a[45]); SQRADDAC(a[42], a[44]); SQRADDDB; SQRADD(a[43], a[43]); 
10177
+   COMBA_STORE(b[86]);
10178
+
10179
+   /* output 87 */
10180
+   CARRY_FORWARD;
10181
+   SQRADDSC(a[24], a[63]); SQRADDAC(a[25], a[62]); SQRADDAC(a[26], a[61]); SQRADDAC(a[27], a[60]); SQRADDAC(a[28], a[59]); SQRADDAC(a[29], a[58]); SQRADDAC(a[30], a[57]); SQRADDAC(a[31], a[56]); SQRADDAC(a[32], a[55]); SQRADDAC(a[33], a[54]); SQRADDAC(a[34], a[53]); SQRADDAC(a[35], a[52]); SQRADDAC(a[36], a[51]); SQRADDAC(a[37], a[50]); SQRADDAC(a[38], a[49]); SQRADDAC(a[39], a[48]); SQRADDAC(a[40], a[47]); SQRADDAC(a[41], a[46]); SQRADDAC(a[42], a[45]); SQRADDAC(a[43], a[44]); SQRADDDB; 
10182
+   COMBA_STORE(b[87]);
10183
+
10184
+   /* output 88 */
10185
+   CARRY_FORWARD;
10186
+   SQRADDSC(a[25], a[63]); SQRADDAC(a[26], a[62]); SQRADDAC(a[27], a[61]); SQRADDAC(a[28], a[60]); SQRADDAC(a[29], a[59]); SQRADDAC(a[30], a[58]); SQRADDAC(a[31], a[57]); SQRADDAC(a[32], a[56]); SQRADDAC(a[33], a[55]); SQRADDAC(a[34], a[54]); SQRADDAC(a[35], a[53]); SQRADDAC(a[36], a[52]); SQRADDAC(a[37], a[51]); SQRADDAC(a[38], a[50]); SQRADDAC(a[39], a[49]); SQRADDAC(a[40], a[48]); SQRADDAC(a[41], a[47]); SQRADDAC(a[42], a[46]); SQRADDAC(a[43], a[45]); SQRADDDB; SQRADD(a[44], a[44]); 
10187
+   COMBA_STORE(b[88]);
10188
+
10189
+   /* output 89 */
10190
+   CARRY_FORWARD;
10191
+   SQRADDSC(a[26], a[63]); SQRADDAC(a[27], a[62]); SQRADDAC(a[28], a[61]); SQRADDAC(a[29], a[60]); SQRADDAC(a[30], a[59]); SQRADDAC(a[31], a[58]); SQRADDAC(a[32], a[57]); SQRADDAC(a[33], a[56]); SQRADDAC(a[34], a[55]); SQRADDAC(a[35], a[54]); SQRADDAC(a[36], a[53]); SQRADDAC(a[37], a[52]); SQRADDAC(a[38], a[51]); SQRADDAC(a[39], a[50]); SQRADDAC(a[40], a[49]); SQRADDAC(a[41], a[48]); SQRADDAC(a[42], a[47]); SQRADDAC(a[43], a[46]); SQRADDAC(a[44], a[45]); SQRADDDB; 
10192
+   COMBA_STORE(b[89]);
10193
+
10194
+   /* output 90 */
10195
+   CARRY_FORWARD;
10196
+   SQRADDSC(a[27], a[63]); SQRADDAC(a[28], a[62]); SQRADDAC(a[29], a[61]); SQRADDAC(a[30], a[60]); SQRADDAC(a[31], a[59]); SQRADDAC(a[32], a[58]); SQRADDAC(a[33], a[57]); SQRADDAC(a[34], a[56]); SQRADDAC(a[35], a[55]); SQRADDAC(a[36], a[54]); SQRADDAC(a[37], a[53]); SQRADDAC(a[38], a[52]); SQRADDAC(a[39], a[51]); SQRADDAC(a[40], a[50]); SQRADDAC(a[41], a[49]); SQRADDAC(a[42], a[48]); SQRADDAC(a[43], a[47]); SQRADDAC(a[44], a[46]); SQRADDDB; SQRADD(a[45], a[45]); 
10197
+   COMBA_STORE(b[90]);
10198
+
10199
+   /* output 91 */
10200
+   CARRY_FORWARD;
10201
+   SQRADDSC(a[28], a[63]); SQRADDAC(a[29], a[62]); SQRADDAC(a[30], a[61]); SQRADDAC(a[31], a[60]); SQRADDAC(a[32], a[59]); SQRADDAC(a[33], a[58]); SQRADDAC(a[34], a[57]); SQRADDAC(a[35], a[56]); SQRADDAC(a[36], a[55]); SQRADDAC(a[37], a[54]); SQRADDAC(a[38], a[53]); SQRADDAC(a[39], a[52]); SQRADDAC(a[40], a[51]); SQRADDAC(a[41], a[50]); SQRADDAC(a[42], a[49]); SQRADDAC(a[43], a[48]); SQRADDAC(a[44], a[47]); SQRADDAC(a[45], a[46]); SQRADDDB; 
10202
+   COMBA_STORE(b[91]);
10203
+
10204
+   /* output 92 */
10205
+   CARRY_FORWARD;
10206
+   SQRADDSC(a[29], a[63]); SQRADDAC(a[30], a[62]); SQRADDAC(a[31], a[61]); SQRADDAC(a[32], a[60]); SQRADDAC(a[33], a[59]); SQRADDAC(a[34], a[58]); SQRADDAC(a[35], a[57]); SQRADDAC(a[36], a[56]); SQRADDAC(a[37], a[55]); SQRADDAC(a[38], a[54]); SQRADDAC(a[39], a[53]); SQRADDAC(a[40], a[52]); SQRADDAC(a[41], a[51]); SQRADDAC(a[42], a[50]); SQRADDAC(a[43], a[49]); SQRADDAC(a[44], a[48]); SQRADDAC(a[45], a[47]); SQRADDDB; SQRADD(a[46], a[46]); 
10207
+   COMBA_STORE(b[92]);
10208
+
10209
+   /* output 93 */
10210
+   CARRY_FORWARD;
10211
+   SQRADDSC(a[30], a[63]); SQRADDAC(a[31], a[62]); SQRADDAC(a[32], a[61]); SQRADDAC(a[33], a[60]); SQRADDAC(a[34], a[59]); SQRADDAC(a[35], a[58]); SQRADDAC(a[36], a[57]); SQRADDAC(a[37], a[56]); SQRADDAC(a[38], a[55]); SQRADDAC(a[39], a[54]); SQRADDAC(a[40], a[53]); SQRADDAC(a[41], a[52]); SQRADDAC(a[42], a[51]); SQRADDAC(a[43], a[50]); SQRADDAC(a[44], a[49]); SQRADDAC(a[45], a[48]); SQRADDAC(a[46], a[47]); SQRADDDB; 
10212
+   COMBA_STORE(b[93]);
10213
+
10214
+   /* output 94 */
10215
+   CARRY_FORWARD;
10216
+   SQRADDSC(a[31], a[63]); SQRADDAC(a[32], a[62]); SQRADDAC(a[33], a[61]); SQRADDAC(a[34], a[60]); SQRADDAC(a[35], a[59]); SQRADDAC(a[36], a[58]); SQRADDAC(a[37], a[57]); SQRADDAC(a[38], a[56]); SQRADDAC(a[39], a[55]); SQRADDAC(a[40], a[54]); SQRADDAC(a[41], a[53]); SQRADDAC(a[42], a[52]); SQRADDAC(a[43], a[51]); SQRADDAC(a[44], a[50]); SQRADDAC(a[45], a[49]); SQRADDAC(a[46], a[48]); SQRADDDB; SQRADD(a[47], a[47]); 
10217
+   COMBA_STORE(b[94]);
10218
+
10219
+   /* output 95 */
10220
+   CARRY_FORWARD;
10221
+   SQRADDSC(a[32], a[63]); SQRADDAC(a[33], a[62]); SQRADDAC(a[34], a[61]); SQRADDAC(a[35], a[60]); SQRADDAC(a[36], a[59]); SQRADDAC(a[37], a[58]); SQRADDAC(a[38], a[57]); SQRADDAC(a[39], a[56]); SQRADDAC(a[40], a[55]); SQRADDAC(a[41], a[54]); SQRADDAC(a[42], a[53]); SQRADDAC(a[43], a[52]); SQRADDAC(a[44], a[51]); SQRADDAC(a[45], a[50]); SQRADDAC(a[46], a[49]); SQRADDAC(a[47], a[48]); SQRADDDB; 
10222
+   COMBA_STORE(b[95]);
10223
+
10224
+   /* output 96 */
10225
+   CARRY_FORWARD;
10226
+   SQRADDSC(a[33], a[63]); SQRADDAC(a[34], a[62]); SQRADDAC(a[35], a[61]); SQRADDAC(a[36], a[60]); SQRADDAC(a[37], a[59]); SQRADDAC(a[38], a[58]); SQRADDAC(a[39], a[57]); SQRADDAC(a[40], a[56]); SQRADDAC(a[41], a[55]); SQRADDAC(a[42], a[54]); SQRADDAC(a[43], a[53]); SQRADDAC(a[44], a[52]); SQRADDAC(a[45], a[51]); SQRADDAC(a[46], a[50]); SQRADDAC(a[47], a[49]); SQRADDDB; SQRADD(a[48], a[48]); 
10227
+   COMBA_STORE(b[96]);
10228
+
10229
+   /* output 97 */
10230
+   CARRY_FORWARD;
10231
+   SQRADDSC(a[34], a[63]); SQRADDAC(a[35], a[62]); SQRADDAC(a[36], a[61]); SQRADDAC(a[37], a[60]); SQRADDAC(a[38], a[59]); SQRADDAC(a[39], a[58]); SQRADDAC(a[40], a[57]); SQRADDAC(a[41], a[56]); SQRADDAC(a[42], a[55]); SQRADDAC(a[43], a[54]); SQRADDAC(a[44], a[53]); SQRADDAC(a[45], a[52]); SQRADDAC(a[46], a[51]); SQRADDAC(a[47], a[50]); SQRADDAC(a[48], a[49]); SQRADDDB; 
10232
+   COMBA_STORE(b[97]);
10233
+
10234
+   /* output 98 */
10235
+   CARRY_FORWARD;
10236
+   SQRADDSC(a[35], a[63]); SQRADDAC(a[36], a[62]); SQRADDAC(a[37], a[61]); SQRADDAC(a[38], a[60]); SQRADDAC(a[39], a[59]); SQRADDAC(a[40], a[58]); SQRADDAC(a[41], a[57]); SQRADDAC(a[42], a[56]); SQRADDAC(a[43], a[55]); SQRADDAC(a[44], a[54]); SQRADDAC(a[45], a[53]); SQRADDAC(a[46], a[52]); SQRADDAC(a[47], a[51]); SQRADDAC(a[48], a[50]); SQRADDDB; SQRADD(a[49], a[49]); 
10237
+   COMBA_STORE(b[98]);
10238
+
10239
+   /* output 99 */
10240
+   CARRY_FORWARD;
10241
+   SQRADDSC(a[36], a[63]); SQRADDAC(a[37], a[62]); SQRADDAC(a[38], a[61]); SQRADDAC(a[39], a[60]); SQRADDAC(a[40], a[59]); SQRADDAC(a[41], a[58]); SQRADDAC(a[42], a[57]); SQRADDAC(a[43], a[56]); SQRADDAC(a[44], a[55]); SQRADDAC(a[45], a[54]); SQRADDAC(a[46], a[53]); SQRADDAC(a[47], a[52]); SQRADDAC(a[48], a[51]); SQRADDAC(a[49], a[50]); SQRADDDB; 
10242
+   COMBA_STORE(b[99]);
10243
+
10244
+   /* output 100 */
10245
+   CARRY_FORWARD;
10246
+   SQRADDSC(a[37], a[63]); SQRADDAC(a[38], a[62]); SQRADDAC(a[39], a[61]); SQRADDAC(a[40], a[60]); SQRADDAC(a[41], a[59]); SQRADDAC(a[42], a[58]); SQRADDAC(a[43], a[57]); SQRADDAC(a[44], a[56]); SQRADDAC(a[45], a[55]); SQRADDAC(a[46], a[54]); SQRADDAC(a[47], a[53]); SQRADDAC(a[48], a[52]); SQRADDAC(a[49], a[51]); SQRADDDB; SQRADD(a[50], a[50]); 
10247
+   COMBA_STORE(b[100]);
10248
+
10249
+   /* output 101 */
10250
+   CARRY_FORWARD;
10251
+   SQRADDSC(a[38], a[63]); SQRADDAC(a[39], a[62]); SQRADDAC(a[40], a[61]); SQRADDAC(a[41], a[60]); SQRADDAC(a[42], a[59]); SQRADDAC(a[43], a[58]); SQRADDAC(a[44], a[57]); SQRADDAC(a[45], a[56]); SQRADDAC(a[46], a[55]); SQRADDAC(a[47], a[54]); SQRADDAC(a[48], a[53]); SQRADDAC(a[49], a[52]); SQRADDAC(a[50], a[51]); SQRADDDB; 
10252
+   COMBA_STORE(b[101]);
10253
+
10254
+   /* output 102 */
10255
+   CARRY_FORWARD;
10256
+   SQRADDSC(a[39], a[63]); SQRADDAC(a[40], a[62]); SQRADDAC(a[41], a[61]); SQRADDAC(a[42], a[60]); SQRADDAC(a[43], a[59]); SQRADDAC(a[44], a[58]); SQRADDAC(a[45], a[57]); SQRADDAC(a[46], a[56]); SQRADDAC(a[47], a[55]); SQRADDAC(a[48], a[54]); SQRADDAC(a[49], a[53]); SQRADDAC(a[50], a[52]); SQRADDDB; SQRADD(a[51], a[51]); 
10257
+   COMBA_STORE(b[102]);
10258
+
10259
+   /* output 103 */
10260
+   CARRY_FORWARD;
10261
+   SQRADDSC(a[40], a[63]); SQRADDAC(a[41], a[62]); SQRADDAC(a[42], a[61]); SQRADDAC(a[43], a[60]); SQRADDAC(a[44], a[59]); SQRADDAC(a[45], a[58]); SQRADDAC(a[46], a[57]); SQRADDAC(a[47], a[56]); SQRADDAC(a[48], a[55]); SQRADDAC(a[49], a[54]); SQRADDAC(a[50], a[53]); SQRADDAC(a[51], a[52]); SQRADDDB; 
10262
+   COMBA_STORE(b[103]);
10263
+
10264
+   /* output 104 */
10265
+   CARRY_FORWARD;
10266
+   SQRADDSC(a[41], a[63]); SQRADDAC(a[42], a[62]); SQRADDAC(a[43], a[61]); SQRADDAC(a[44], a[60]); SQRADDAC(a[45], a[59]); SQRADDAC(a[46], a[58]); SQRADDAC(a[47], a[57]); SQRADDAC(a[48], a[56]); SQRADDAC(a[49], a[55]); SQRADDAC(a[50], a[54]); SQRADDAC(a[51], a[53]); SQRADDDB; SQRADD(a[52], a[52]); 
10267
+   COMBA_STORE(b[104]);
10268
+
10269
+   /* output 105 */
10270
+   CARRY_FORWARD;
10271
+   SQRADDSC(a[42], a[63]); SQRADDAC(a[43], a[62]); SQRADDAC(a[44], a[61]); SQRADDAC(a[45], a[60]); SQRADDAC(a[46], a[59]); SQRADDAC(a[47], a[58]); SQRADDAC(a[48], a[57]); SQRADDAC(a[49], a[56]); SQRADDAC(a[50], a[55]); SQRADDAC(a[51], a[54]); SQRADDAC(a[52], a[53]); SQRADDDB; 
10272
+   COMBA_STORE(b[105]);
10273
+
10274
+   /* output 106 */
10275
+   CARRY_FORWARD;
10276
+   SQRADDSC(a[43], a[63]); SQRADDAC(a[44], a[62]); SQRADDAC(a[45], a[61]); SQRADDAC(a[46], a[60]); SQRADDAC(a[47], a[59]); SQRADDAC(a[48], a[58]); SQRADDAC(a[49], a[57]); SQRADDAC(a[50], a[56]); SQRADDAC(a[51], a[55]); SQRADDAC(a[52], a[54]); SQRADDDB; SQRADD(a[53], a[53]); 
10277
+   COMBA_STORE(b[106]);
10278
+
10279
+   /* output 107 */
10280
+   CARRY_FORWARD;
10281
+   SQRADDSC(a[44], a[63]); SQRADDAC(a[45], a[62]); SQRADDAC(a[46], a[61]); SQRADDAC(a[47], a[60]); SQRADDAC(a[48], a[59]); SQRADDAC(a[49], a[58]); SQRADDAC(a[50], a[57]); SQRADDAC(a[51], a[56]); SQRADDAC(a[52], a[55]); SQRADDAC(a[53], a[54]); SQRADDDB; 
10282
+   COMBA_STORE(b[107]);
10283
+
10284
+   /* output 108 */
10285
+   CARRY_FORWARD;
10286
+   SQRADDSC(a[45], a[63]); SQRADDAC(a[46], a[62]); SQRADDAC(a[47], a[61]); SQRADDAC(a[48], a[60]); SQRADDAC(a[49], a[59]); SQRADDAC(a[50], a[58]); SQRADDAC(a[51], a[57]); SQRADDAC(a[52], a[56]); SQRADDAC(a[53], a[55]); SQRADDDB; SQRADD(a[54], a[54]); 
10287
+   COMBA_STORE(b[108]);
10288
+
10289
+   /* output 109 */
10290
+   CARRY_FORWARD;
10291
+   SQRADDSC(a[46], a[63]); SQRADDAC(a[47], a[62]); SQRADDAC(a[48], a[61]); SQRADDAC(a[49], a[60]); SQRADDAC(a[50], a[59]); SQRADDAC(a[51], a[58]); SQRADDAC(a[52], a[57]); SQRADDAC(a[53], a[56]); SQRADDAC(a[54], a[55]); SQRADDDB; 
10292
+   COMBA_STORE(b[109]);
10293
+
10294
+   /* output 110 */
10295
+   CARRY_FORWARD;
10296
+   SQRADDSC(a[47], a[63]); SQRADDAC(a[48], a[62]); SQRADDAC(a[49], a[61]); SQRADDAC(a[50], a[60]); SQRADDAC(a[51], a[59]); SQRADDAC(a[52], a[58]); SQRADDAC(a[53], a[57]); SQRADDAC(a[54], a[56]); SQRADDDB; SQRADD(a[55], a[55]); 
10297
+   COMBA_STORE(b[110]);
10298
+
10299
+   /* output 111 */
10300
+   CARRY_FORWARD;
10301
+   SQRADDSC(a[48], a[63]); SQRADDAC(a[49], a[62]); SQRADDAC(a[50], a[61]); SQRADDAC(a[51], a[60]); SQRADDAC(a[52], a[59]); SQRADDAC(a[53], a[58]); SQRADDAC(a[54], a[57]); SQRADDAC(a[55], a[56]); SQRADDDB; 
10302
+   COMBA_STORE(b[111]);
10303
+
10304
+   /* output 112 */
10305
+   CARRY_FORWARD;
10306
+   SQRADDSC(a[49], a[63]); SQRADDAC(a[50], a[62]); SQRADDAC(a[51], a[61]); SQRADDAC(a[52], a[60]); SQRADDAC(a[53], a[59]); SQRADDAC(a[54], a[58]); SQRADDAC(a[55], a[57]); SQRADDDB; SQRADD(a[56], a[56]); 
10307
+   COMBA_STORE(b[112]);
10308
+
10309
+   /* output 113 */
10310
+   CARRY_FORWARD;
10311
+   SQRADDSC(a[50], a[63]); SQRADDAC(a[51], a[62]); SQRADDAC(a[52], a[61]); SQRADDAC(a[53], a[60]); SQRADDAC(a[54], a[59]); SQRADDAC(a[55], a[58]); SQRADDAC(a[56], a[57]); SQRADDDB; 
10312
+   COMBA_STORE(b[113]);
10313
+
10314
+   /* output 114 */
10315
+   CARRY_FORWARD;
10316
+   SQRADDSC(a[51], a[63]); SQRADDAC(a[52], a[62]); SQRADDAC(a[53], a[61]); SQRADDAC(a[54], a[60]); SQRADDAC(a[55], a[59]); SQRADDAC(a[56], a[58]); SQRADDDB; SQRADD(a[57], a[57]); 
10317
+   COMBA_STORE(b[114]);
10318
+
10319
+   /* output 115 */
10320
+   CARRY_FORWARD;
10321
+   SQRADDSC(a[52], a[63]); SQRADDAC(a[53], a[62]); SQRADDAC(a[54], a[61]); SQRADDAC(a[55], a[60]); SQRADDAC(a[56], a[59]); SQRADDAC(a[57], a[58]); SQRADDDB; 
10322
+   COMBA_STORE(b[115]);
10323
+
10324
+   /* output 116 */
10325
+   CARRY_FORWARD;
10326
+   SQRADDSC(a[53], a[63]); SQRADDAC(a[54], a[62]); SQRADDAC(a[55], a[61]); SQRADDAC(a[56], a[60]); SQRADDAC(a[57], a[59]); SQRADDDB; SQRADD(a[58], a[58]); 
10327
+   COMBA_STORE(b[116]);
10328
+
10329
+   /* output 117 */
10330
+   CARRY_FORWARD;
10331
+   SQRADDSC(a[54], a[63]); SQRADDAC(a[55], a[62]); SQRADDAC(a[56], a[61]); SQRADDAC(a[57], a[60]); SQRADDAC(a[58], a[59]); SQRADDDB; 
10332
+   COMBA_STORE(b[117]);
10333
+
10334
+   /* output 118 */
10335
+   CARRY_FORWARD;
10336
+   SQRADDSC(a[55], a[63]); SQRADDAC(a[56], a[62]); SQRADDAC(a[57], a[61]); SQRADDAC(a[58], a[60]); SQRADDDB; SQRADD(a[59], a[59]); 
10337
+   COMBA_STORE(b[118]);
10338
+
10339
+   /* output 119 */
10340
+   CARRY_FORWARD;
10341
+   SQRADDSC(a[56], a[63]); SQRADDAC(a[57], a[62]); SQRADDAC(a[58], a[61]); SQRADDAC(a[59], a[60]); SQRADDDB; 
10342
+   COMBA_STORE(b[119]);
10343
+
10344
+   /* output 120 */
10345
+   CARRY_FORWARD;
10346
+   SQRADDSC(a[57], a[63]); SQRADDAC(a[58], a[62]); SQRADDAC(a[59], a[61]); SQRADDDB; SQRADD(a[60], a[60]); 
10347
+   COMBA_STORE(b[120]);
10348
+
10349
+   /* output 121 */
10350
+   CARRY_FORWARD;
10351
+   SQRADDSC(a[58], a[63]); SQRADDAC(a[59], a[62]); SQRADDAC(a[60], a[61]); SQRADDDB; 
10352
+   COMBA_STORE(b[121]);
10353
+
10354
+   /* output 122 */
10355
+   CARRY_FORWARD;
10356
+   SQRADD2(a[59], a[63]); SQRADD2(a[60], a[62]); SQRADD(a[61], a[61]); 
10357
+   COMBA_STORE(b[122]);
10358
+
10359
+   /* output 123 */
10360
+   CARRY_FORWARD;
10361
+   SQRADD2(a[60], a[63]); SQRADD2(a[61], a[62]); 
10362
+   COMBA_STORE(b[123]);
10363
+
10364
+   /* output 124 */
10365
+   CARRY_FORWARD;
10366
+   SQRADD2(a[61], a[63]); SQRADD(a[62], a[62]); 
10367
+   COMBA_STORE(b[124]);
10368
+
10369
+   /* output 125 */
10370
+   CARRY_FORWARD;
10371
+   SQRADD2(a[62], a[63]); 
10372
+   COMBA_STORE(b[125]);
10373
+
10374
+   /* output 126 */
10375
+   CARRY_FORWARD;
10376
+   SQRADD(a[63], a[63]); 
10377
+   COMBA_STORE(b[126]);
10378
+   COMBA_STORE2(b[127]);
10379
+   COMBA_FINI;
10380
+
10381
+   B->used = 128;
10382
+   B->sign = FP_ZPOS;
10383
+   memcpy(B->dp, b, 128 * sizeof(fp_digit));
10384
+   fp_clamp(B);
7695 10385
 }
7696 10386
 #endif
7697 10387
 
7698
-/* $Source: /cvs/libtom/libtommath/bn_mp_to_signed_bin.c,v $ */
7699
-/* $Revision: 1.3 $ */
7700
-/* $Date: 2006/03/31 14:18:44 $ */
7701 10388
 
7702
-/* End: bn_mp_to_signed_bin.c */
10389
+/* $Source: /cvs/libtom/tomsfastmath/src/sqr/fp_sqr_comba_64.c,v $ */
10390
+/* $Revision: 1.2 $ */
10391
+/* $Date: 2007/02/17 03:39:01 $ */
7703 10392
 
7704
-/* Start: bn_mp_to_signed_bin_n.c */
7705
-#include <bignum.h>
7706
-#ifdef BN_MP_TO_SIGNED_BIN_N_C
7707
-/* LibTomMath, multiple-precision integer library -- Tom St Denis
7708
- *
7709
- * LibTomMath is a library that provides multiple-precision
7710
- * integer arithmetic as well as number theoretic functionality.
7711
- *
7712
- * The library was designed directly after the MPI library by
7713
- * Michael Fromberger but has been written from scratch with
7714
- * additional optimizations in place.
7715
- *
7716
- * The library is free for all purposes without any express
7717
- * guarantee it works.
7718
- *
7719
- * Tom St Denis, tomstdenis@gmail.com, http://math.libtomcrypt.com
7720
- */
10393
+/* End: fp_sqr_comba_64.c */
10394
+
10395
+/* Start: fp_sqr_comba_7.c */
10396
+#define TFM_DEFINES
10397
+#include "fp_sqr_comba.c"
7721 10398
 
7722
-/* store in signed [big endian] format */
7723
-int mp_to_signed_bin_n (mp_int * a, unsigned char *b, unsigned long *outlen)
10399
+#ifdef TFM_SQR7
10400
+void fp_sqr_comba7(fp_int *A, fp_int *B)
7724 10401
 {
7725
-   if (*outlen < (unsigned long)mp_signed_bin_size(a)) {
7726
-      return MP_VAL;
7727
-   }
7728
-   *outlen = mp_signed_bin_size(a);
7729
-   return mp_to_signed_bin(a, b);
10402
+   fp_digit *a, b[14], c0, c1, c2, sc0, sc1, sc2;
10403
+#ifdef TFM_ISO
10404
+   fp_word tt;
10405
+#endif
10406
+
10407
+   a = A->dp;
10408
+   COMBA_START; 
10409
+
10410
+   /* clear carries */
10411
+   CLEAR_CARRY;
10412
+
10413
+   /* output 0 */
10414
+   SQRADD(a[0],a[0]);
10415
+   COMBA_STORE(b[0]);
10416
+
10417
+   /* output 1 */
10418
+   CARRY_FORWARD;
10419
+   SQRADD2(a[0], a[1]); 
10420
+   COMBA_STORE(b[1]);
10421
+
10422
+   /* output 2 */
10423
+   CARRY_FORWARD;
10424
+   SQRADD2(a[0], a[2]); SQRADD(a[1], a[1]); 
10425
+   COMBA_STORE(b[2]);
10426
+
10427
+   /* output 3 */
10428
+   CARRY_FORWARD;
10429
+   SQRADD2(a[0], a[3]); SQRADD2(a[1], a[2]); 
10430
+   COMBA_STORE(b[3]);
10431
+
10432
+   /* output 4 */
10433
+   CARRY_FORWARD;
10434
+   SQRADD2(a[0], a[4]); SQRADD2(a[1], a[3]); SQRADD(a[2], a[2]); 
10435
+   COMBA_STORE(b[4]);
10436
+
10437
+   /* output 5 */
10438
+   CARRY_FORWARD;
10439
+   SQRADDSC(a[0], a[5]); SQRADDAC(a[1], a[4]); SQRADDAC(a[2], a[3]); SQRADDDB; 
10440
+   COMBA_STORE(b[5]);
10441
+
10442
+   /* output 6 */
10443
+   CARRY_FORWARD;
10444
+   SQRADDSC(a[0], a[6]); SQRADDAC(a[1], a[5]); SQRADDAC(a[2], a[4]); SQRADDDB; SQRADD(a[3], a[3]); 
10445
+   COMBA_STORE(b[6]);
10446
+
10447
+   /* output 7 */
10448
+   CARRY_FORWARD;
10449
+   SQRADDSC(a[1], a[6]); SQRADDAC(a[2], a[5]); SQRADDAC(a[3], a[4]); SQRADDDB; 
10450
+   COMBA_STORE(b[7]);
10451
+
10452
+   /* output 8 */
10453
+   CARRY_FORWARD;
10454
+   SQRADD2(a[2], a[6]); SQRADD2(a[3], a[5]); SQRADD(a[4], a[4]); 
10455
+   COMBA_STORE(b[8]);
10456
+
10457
+   /* output 9 */
10458
+   CARRY_FORWARD;
10459
+   SQRADD2(a[3], a[6]); SQRADD2(a[4], a[5]); 
10460
+   COMBA_STORE(b[9]);
10461
+
10462
+   /* output 10 */
10463
+   CARRY_FORWARD;
10464
+   SQRADD2(a[4], a[6]); SQRADD(a[5], a[5]); 
10465
+   COMBA_STORE(b[10]);
10466
+
10467
+   /* output 11 */
10468
+   CARRY_FORWARD;
10469
+   SQRADD2(a[5], a[6]); 
10470
+   COMBA_STORE(b[11]);
10471
+
10472
+   /* output 12 */
10473
+   CARRY_FORWARD;
10474
+   SQRADD(a[6], a[6]); 
10475
+   COMBA_STORE(b[12]);
10476
+   COMBA_STORE2(b[13]);
10477
+   COMBA_FINI;
10478
+
10479
+   B->used = 14;
10480
+   B->sign = FP_ZPOS;
10481
+   memcpy(B->dp, b, 14 * sizeof(fp_digit));
10482
+   fp_clamp(B);
7730 10483
 }
7731 10484
 #endif
7732 10485
 
7733
-/* $Source: /cvs/libtom/libtommath/bn_mp_to_signed_bin_n.c,v $ */
7734
-/* $Revision: 1.3 $ */
7735
-/* $Date: 2006/03/31 14:18:44 $ */
7736
-
7737
-/* End: bn_mp_to_signed_bin_n.c */
7738 10486
 
7739
-/* Start: bn_mp_to_unsigned_bin.c */
7740
-#include <bignum.h>
7741
-#ifdef BN_MP_TO_UNSIGNED_BIN_C
7742
-/* LibTomMath, multiple-precision integer library -- Tom St Denis
7743
- *
7744
- * LibTomMath is a library that provides multiple-precision
7745
- * integer arithmetic as well as number theoretic functionality.
7746
- *
7747
- * The library was designed directly after the MPI library by
7748
- * Michael Fromberger but has been written from scratch with
7749
- * additional optimizations in place.
7750
- *
7751
- * The library is free for all purposes without any express
7752
- * guarantee it works.
7753
- *
7754
- * Tom St Denis, tomstdenis@gmail.com, http://math.libtomcrypt.com
7755
- */
10487
+/* $Source: /cvs/libtom/tomsfastmath/src/sqr/fp_sqr_comba_7.c,v $ */
10488
+/* $Revision: 1.2 $ */
10489
+/* $Date: 2007/02/17 03:39:01 $ */
7756 10490
 
7757
-/* store in unsigned [big endian] format */
7758
-int mp_to_unsigned_bin (mp_int * a, unsigned char *b)
7759
-{
7760
-  int     x, res;
7761
-  mp_int  t;
10491
+/* End: fp_sqr_comba_7.c */
7762 10492
 
7763
-  if ((res = mp_init_copy (&t, a)) != MP_OKAY) {
7764
-    return res;
7765
-  }
10493
+/* Start: fp_sqr_comba_8.c */
10494
+#define TFM_DEFINES
10495
+#include "fp_sqr_comba.c"
7766 10496
 
7767
-  x = 0;
7768
-  while (mp_iszero (&t) == 0) {
7769
-#ifndef MP_8BIT
7770
-      b[x++] = (unsigned char) (t.dp[0] & 255);
7771
-#else
7772
-      b[x++] = (unsigned char) (t.dp[0] | ((t.dp[1] & 0x01) << 7));
7773
-#endif
7774
-    if ((res = mp_div_2d (&t, 8, &t, NULL)) != MP_OKAY) {
7775
-      mp_clear (&t);
7776
-      return res;
7777
-    }
7778
-  }
7779
-  bn_reverse (b, x);
7780
-  mp_clear (&t);
7781
-  return MP_OKAY;
10497
+#ifdef TFM_SQR8
10498
+void fp_sqr_comba8(fp_int *A, fp_int *B)
10499
+{
10500
+   fp_digit *a, b[16], c0, c1, c2, sc0, sc1, sc2;
10501
+#ifdef TFM_ISO
10502
+   fp_word tt;
10503
+#endif
10504
+
10505
+   a = A->dp;
10506
+   COMBA_START; 
10507
+
10508
+   /* clear carries */
10509
+   CLEAR_CARRY;
10510
+
10511
+   /* output 0 */
10512
+   SQRADD(a[0],a[0]);
10513
+   COMBA_STORE(b[0]);
10514
+
10515
+   /* output 1 */
10516
+   CARRY_FORWARD;
10517
+   SQRADD2(a[0], a[1]); 
10518
+   COMBA_STORE(b[1]);
10519
+
10520
+   /* output 2 */
10521
+   CARRY_FORWARD;
10522
+   SQRADD2(a[0], a[2]); SQRADD(a[1], a[1]); 
10523
+   COMBA_STORE(b[2]);
10524
+
10525
+   /* output 3 */
10526
+   CARRY_FORWARD;
10527
+   SQRADD2(a[0], a[3]); SQRADD2(a[1], a[2]); 
10528
+   COMBA_STORE(b[3]);
10529
+
10530
+   /* output 4 */
10531
+   CARRY_FORWARD;
10532
+   SQRADD2(a[0], a[4]); SQRADD2(a[1], a[3]); SQRADD(a[2], a[2]); 
10533
+   COMBA_STORE(b[4]);
10534
+
10535
+   /* output 5 */
10536
+   CARRY_FORWARD;
10537
+   SQRADDSC(a[0], a[5]); SQRADDAC(a[1], a[4]); SQRADDAC(a[2], a[3]); SQRADDDB; 
10538
+   COMBA_STORE(b[5]);
10539
+
10540
+   /* output 6 */
10541
+   CARRY_FORWARD;
10542
+   SQRADDSC(a[0], a[6]); SQRADDAC(a[1], a[5]); SQRADDAC(a[2], a[4]); SQRADDDB; SQRADD(a[3], a[3]); 
10543
+   COMBA_STORE(b[6]);
10544
+
10545
+   /* output 7 */
10546
+   CARRY_FORWARD;
10547
+   SQRADDSC(a[0], a[7]); SQRADDAC(a[1], a[6]); SQRADDAC(a[2], a[5]); SQRADDAC(a[3], a[4]); SQRADDDB; 
10548
+   COMBA_STORE(b[7]);
10549
+
10550
+   /* output 8 */
10551
+   CARRY_FORWARD;
10552
+   SQRADDSC(a[1], a[7]); SQRADDAC(a[2], a[6]); SQRADDAC(a[3], a[5]); SQRADDDB; SQRADD(a[4], a[4]); 
10553
+   COMBA_STORE(b[8]);
10554
+
10555
+   /* output 9 */
10556
+   CARRY_FORWARD;
10557
+   SQRADDSC(a[2], a[7]); SQRADDAC(a[3], a[6]); SQRADDAC(a[4], a[5]); SQRADDDB; 
10558
+   COMBA_STORE(b[9]);
10559
+
10560
+   /* output 10 */
10561
+   CARRY_FORWARD;
10562
+   SQRADD2(a[3], a[7]); SQRADD2(a[4], a[6]); SQRADD(a[5], a[5]); 
10563
+   COMBA_STORE(b[10]);
10564
+
10565
+   /* output 11 */
10566
+   CARRY_FORWARD;
10567
+   SQRADD2(a[4], a[7]); SQRADD2(a[5], a[6]); 
10568
+   COMBA_STORE(b[11]);
10569
+
10570
+   /* output 12 */
10571
+   CARRY_FORWARD;
10572
+   SQRADD2(a[5], a[7]); SQRADD(a[6], a[6]); 
10573
+   COMBA_STORE(b[12]);
10574
+
10575
+   /* output 13 */
10576
+   CARRY_FORWARD;
10577
+   SQRADD2(a[6], a[7]); 
10578
+   COMBA_STORE(b[13]);
10579
+
10580
+   /* output 14 */
10581
+   CARRY_FORWARD;
10582
+   SQRADD(a[7], a[7]); 
10583
+   COMBA_STORE(b[14]);
10584
+   COMBA_STORE2(b[15]);
10585
+   COMBA_FINI;
10586
+
10587
+   B->used = 16;
10588
+   B->sign = FP_ZPOS;
10589
+   memcpy(B->dp, b, 16 * sizeof(fp_digit));
10590
+   fp_clamp(B);
7782 10591
 }
7783 10592
 #endif
7784 10593
 
7785
-/* $Source: /cvs/libtom/libtommath/bn_mp_to_unsigned_bin.c,v $ */
7786
-/* $Revision: 1.3 $ */
7787
-/* $Date: 2006/03/31 14:18:44 $ */
7788 10594
 
7789
-/* End: bn_mp_to_unsigned_bin.c */
10595
+/* $Source: /cvs/libtom/tomsfastmath/src/sqr/fp_sqr_comba_8.c,v $ */
10596
+/* $Revision: 1.2 $ */
10597
+/* $Date: 2007/02/17 03:39:01 $ */
7790 10598
 
7791
-/* Start: bn_mp_to_unsigned_bin_n.c */
7792
-#include <bignum.h>
7793
-#ifdef BN_MP_TO_UNSIGNED_BIN_N_C
7794
-/* LibTomMath, multiple-precision integer library -- Tom St Denis
7795
- *
7796
- * LibTomMath is a library that provides multiple-precision
7797
- * integer arithmetic as well as number theoretic functionality.
7798
- *
7799
- * The library was designed directly after the MPI library by
7800
- * Michael Fromberger but has been written from scratch with
7801
- * additional optimizations in place.
7802
- *
7803
- * The library is free for all purposes without any express
7804
- * guarantee it works.
7805
- *
7806
- * Tom St Denis, tomstdenis@gmail.com, http://math.libtomcrypt.com
7807
- */
10599
+/* End: fp_sqr_comba_8.c */
10600
+
10601
+/* Start: fp_sqr_comba_9.c */
10602
+#define TFM_DEFINES
10603
+#include "fp_sqr_comba.c"
7808 10604
 
7809
-/* store in unsigned [big endian] format */
7810
-int mp_to_unsigned_bin_n (mp_int * a, unsigned char *b, unsigned long *outlen)
10605
+#ifdef TFM_SQR9
10606
+void fp_sqr_comba9(fp_int *A, fp_int *B)
7811 10607
 {
7812
-   if (*outlen < (unsigned long)mp_unsigned_bin_size(a)) {
7813
-      return MP_VAL;
7814
-   }
7815
-   *outlen = mp_unsigned_bin_size(a);
7816
-   return mp_to_unsigned_bin(a, b);
10608
+   fp_digit *a, b[18], c0, c1, c2, sc0, sc1, sc2;
10609
+#ifdef TFM_ISO
10610
+   fp_word tt;
10611
+#endif
10612
+
10613
+   a = A->dp;
10614
+   COMBA_START; 
10615
+
10616
+   /* clear carries */
10617
+   CLEAR_CARRY;
10618
+
10619
+   /* output 0 */
10620
+   SQRADD(a[0],a[0]);
10621
+   COMBA_STORE(b[0]);
10622
+
10623
+   /* output 1 */
10624
+   CARRY_FORWARD;
10625
+   SQRADD2(a[0], a[1]); 
10626
+   COMBA_STORE(b[1]);
10627
+
10628
+   /* output 2 */
10629
+   CARRY_FORWARD;
10630
+   SQRADD2(a[0], a[2]); SQRADD(a[1], a[1]); 
10631
+   COMBA_STORE(b[2]);
10632
+
10633
+   /* output 3 */
10634
+   CARRY_FORWARD;
10635
+   SQRADD2(a[0], a[3]); SQRADD2(a[1], a[2]); 
10636
+   COMBA_STORE(b[3]);
10637
+
10638
+   /* output 4 */
10639
+   CARRY_FORWARD;
10640
+   SQRADD2(a[0], a[4]); SQRADD2(a[1], a[3]); SQRADD(a[2], a[2]); 
10641
+   COMBA_STORE(b[4]);
10642
+
10643
+   /* output 5 */
10644
+   CARRY_FORWARD;
10645
+   SQRADDSC(a[0], a[5]); SQRADDAC(a[1], a[4]); SQRADDAC(a[2], a[3]); SQRADDDB; 
10646
+   COMBA_STORE(b[5]);
10647
+
10648
+   /* output 6 */
10649
+   CARRY_FORWARD;
10650
+   SQRADDSC(a[0], a[6]); SQRADDAC(a[1], a[5]); SQRADDAC(a[2], a[4]); SQRADDDB; SQRADD(a[3], a[3]); 
10651
+   COMBA_STORE(b[6]);
10652
+
10653
+   /* output 7 */
10654
+   CARRY_FORWARD;
10655
+   SQRADDSC(a[0], a[7]); SQRADDAC(a[1], a[6]); SQRADDAC(a[2], a[5]); SQRADDAC(a[3], a[4]); SQRADDDB; 
10656
+   COMBA_STORE(b[7]);
10657
+
10658
+   /* output 8 */
10659
+   CARRY_FORWARD;
10660
+   SQRADDSC(a[0], a[8]); SQRADDAC(a[1], a[7]); SQRADDAC(a[2], a[6]); SQRADDAC(a[3], a[5]); SQRADDDB; SQRADD(a[4], a[4]); 
10661
+   COMBA_STORE(b[8]);
10662
+
10663
+   /* output 9 */
10664
+   CARRY_FORWARD;
10665
+   SQRADDSC(a[1], a[8]); SQRADDAC(a[2], a[7]); SQRADDAC(a[3], a[6]); SQRADDAC(a[4], a[5]); SQRADDDB; 
10666
+   COMBA_STORE(b[9]);
10667
+
10668
+   /* output 10 */
10669
+   CARRY_FORWARD;
10670
+   SQRADDSC(a[2], a[8]); SQRADDAC(a[3], a[7]); SQRADDAC(a[4], a[6]); SQRADDDB; SQRADD(a[5], a[5]); 
10671
+   COMBA_STORE(b[10]);
10672
+
10673
+   /* output 11 */
10674
+   CARRY_FORWARD;
10675
+   SQRADDSC(a[3], a[8]); SQRADDAC(a[4], a[7]); SQRADDAC(a[5], a[6]); SQRADDDB; 
10676
+   COMBA_STORE(b[11]);
10677
+
10678
+   /* output 12 */
10679
+   CARRY_FORWARD;
10680
+   SQRADD2(a[4], a[8]); SQRADD2(a[5], a[7]); SQRADD(a[6], a[6]); 
10681
+   COMBA_STORE(b[12]);
10682
+
10683
+   /* output 13 */
10684
+   CARRY_FORWARD;
10685
+   SQRADD2(a[5], a[8]); SQRADD2(a[6], a[7]); 
10686
+   COMBA_STORE(b[13]);
10687
+
10688
+   /* output 14 */
10689
+   CARRY_FORWARD;
10690
+   SQRADD2(a[6], a[8]); SQRADD(a[7], a[7]); 
10691
+   COMBA_STORE(b[14]);
10692
+
10693
+   /* output 15 */
10694
+   CARRY_FORWARD;
10695
+   SQRADD2(a[7], a[8]); 
10696
+   COMBA_STORE(b[15]);
10697
+
10698
+   /* output 16 */
10699
+   CARRY_FORWARD;
10700
+   SQRADD(a[8], a[8]); 
10701
+   COMBA_STORE(b[16]);
10702
+   COMBA_STORE2(b[17]);
10703
+   COMBA_FINI;
10704
+
10705
+   B->used = 18;
10706
+   B->sign = FP_ZPOS;
10707
+   memcpy(B->dp, b, 18 * sizeof(fp_digit));
10708
+   fp_clamp(B);
7817 10709
 }
7818 10710
 #endif
7819 10711
 
7820
-/* $Source: /cvs/libtom/libtommath/bn_mp_to_unsigned_bin_n.c,v $ */
7821
-/* $Revision: 1.3 $ */
7822
-/* $Date: 2006/03/31 14:18:44 $ */
7823
-
7824
-/* End: bn_mp_to_unsigned_bin_n.c */
7825
-
7826
-/* Start: bn_mp_toom_mul.c */
7827
-#include <bignum.h>
7828
-#ifdef BN_MP_TOOM_MUL_C
7829
-/* LibTomMath, multiple-precision integer library -- Tom St Denis
7830
- *
7831
- * LibTomMath is a library that provides multiple-precision
7832
- * integer arithmetic as well as number theoretic functionality.
7833
- *
7834
- * The library was designed directly after the MPI library by
7835
- * Michael Fromberger but has been written from scratch with
7836
- * additional optimizations in place.
7837
- *
7838
- * The library is free for all purposes without any express
7839
- * guarantee it works.
7840
- *
7841
- * Tom St Denis, tomstdenis@gmail.com, http://math.libtomcrypt.com
7842
- */
7843
-
7844
-/* multiplication using the Toom-Cook 3-way algorithm 
7845
- *
7846
- * Much more complicated than Karatsuba but has a lower 
7847
- * asymptotic running time of O(N**1.464).  This algorithm is 
7848
- * only particularly useful on VERY large inputs 
7849
- * (we're talking 1000s of digits here...).
7850
-*/
7851
-int mp_toom_mul(mp_int *a, mp_int *b, mp_int *c)
7852
-{
7853
-    mp_int w0, w1, w2, w3, w4, tmp1, tmp2, a0, a1, a2, b0, b1, b2;
7854
-    int res, B;
7855
-        
7856
-    /* init temps */
7857
-    if ((res = mp_init_multi(&w0, &w1, &w2, &w3, &w4, 
7858
-                             &a0, &a1, &a2, &b0, &b1, 
7859
-                             &b2, &tmp1, &tmp2, NULL)) != MP_OKAY) {
7860
-       return res;
7861
-    }
7862
-    
7863
-    /* B */
7864
-    B = MIN(a->used, b->used) / 3;
7865
-    
7866
-    /* a = a2 * B**2 + a1 * B + a0 */
7867
-    if ((res = mp_mod_2d(a, DIGIT_BIT * B, &a0)) != MP_OKAY) {
7868
-       goto ERR;
7869
-    }
7870
-
7871
-    if ((res = mp_copy(a, &a1)) != MP_OKAY) {
7872
-       goto ERR;
7873
-    }
7874
-    mp_rshd(&a1, B);
7875
-    mp_mod_2d(&a1, DIGIT_BIT * B, &a1);
7876
-
7877
-    if ((res = mp_copy(a, &a2)) != MP_OKAY) {
7878
-       goto ERR;
7879
-    }
7880
-    mp_rshd(&a2, B*2);
7881
-    
7882
-    /* b = b2 * B**2 + b1 * B + b0 */
7883
-    if ((res = mp_mod_2d(b, DIGIT_BIT * B, &b0)) != MP_OKAY) {
7884
-       goto ERR;
7885
-    }
7886
-
7887
-    if ((res = mp_copy(b, &b1)) != MP_OKAY) {
7888
-       goto ERR;
7889
-    }
7890
-    mp_rshd(&b1, B);
7891
-    mp_mod_2d(&b1, DIGIT_BIT * B, &b1);
7892
-
7893
-    if ((res = mp_copy(b, &b2)) != MP_OKAY) {
7894
-       goto ERR;
7895
-    }
7896
-    mp_rshd(&b2, B*2);
7897
-    
7898
-    /* w0 = a0*b0 */
7899
-    if ((res = mp_mul(&a0, &b0, &w0)) != MP_OKAY) {
7900
-       goto ERR;
7901
-    }
7902
-    
7903
-    /* w4 = a2 * b2 */
7904
-    if ((res = mp_mul(&a2, &b2, &w4)) != MP_OKAY) {
7905
-       goto ERR;
7906
-    }
7907
-    
7908
-    /* w1 = (a2 + 2(a1 + 2a0))(b2 + 2(b1 + 2b0)) */
7909
-    if ((res = mp_mul_2(&a0, &tmp1)) != MP_OKAY) {
7910
-       goto ERR;
7911
-    }
7912
-    if ((res = mp_add(&tmp1, &a1, &tmp1)) != MP_OKAY) {
7913
-       goto ERR;
7914
-    }
7915
-    if ((res = mp_mul_2(&tmp1, &tmp1)) != MP_OKAY) {
7916
-       goto ERR;
7917
-    }
7918
-    if ((res = mp_add(&tmp1, &a2, &tmp1)) != MP_OKAY) {
7919
-       goto ERR;
7920
-    }
7921
-    
7922
-    if ((res = mp_mul_2(&b0, &tmp2)) != MP_OKAY) {
7923
-       goto ERR;
7924
-    }
7925
-    if ((res = mp_add(&tmp2, &b1, &tmp2)) != MP_OKAY) {
7926
-       goto ERR;
7927
-    }
7928
-    if ((res = mp_mul_2(&tmp2, &tmp2)) != MP_OKAY) {
7929
-       goto ERR;
7930
-    }
7931
-    if ((res = mp_add(&tmp2, &b2, &tmp2)) != MP_OKAY) {
7932
-       goto ERR;
7933
-    }
7934
-    
7935
-    if ((res = mp_mul(&tmp1, &tmp2, &w1)) != MP_OKAY) {
7936
-       goto ERR;
7937
-    }
7938
-    
7939
-    /* w3 = (a0 + 2(a1 + 2a2))(b0 + 2(b1 + 2b2)) */
7940
-    if ((res = mp_mul_2(&a2, &tmp1)) != MP_OKAY) {
7941
-       goto ERR;
7942
-    }
7943
-    if ((res = mp_add(&tmp1, &a1, &tmp1)) != MP_OKAY) {
7944
-       goto ERR;
7945
-    }
7946
-    if ((res = mp_mul_2(&tmp1, &tmp1)) != MP_OKAY) {
7947
-       goto ERR;
7948
-    }
7949
-    if ((res = mp_add(&tmp1, &a0, &tmp1)) != MP_OKAY) {
7950
-       goto ERR;
7951
-    }
7952
-    
7953
-    if ((res = mp_mul_2(&b2, &tmp2)) != MP_OKAY) {
7954
-       goto ERR;
7955
-    }
7956
-    if ((res = mp_add(&tmp2, &b1, &tmp2)) != MP_OKAY) {
7957
-       goto ERR;
7958
-    }
7959
-    if ((res = mp_mul_2(&tmp2, &tmp2)) != MP_OKAY) {
7960
-       goto ERR;
7961
-    }
7962
-    if ((res = mp_add(&tmp2, &b0, &tmp2)) != MP_OKAY) {
7963
-       goto ERR;
7964
-    }
7965
-    
7966
-    if ((res = mp_mul(&tmp1, &tmp2, &w3)) != MP_OKAY) {
7967
-       goto ERR;
7968
-    }
7969
-    
7970
-
7971
-    /* w2 = (a2 + a1 + a0)(b2 + b1 + b0) */
7972
-    if ((res = mp_add(&a2, &a1, &tmp1)) != MP_OKAY) {
7973
-       goto ERR;
7974
-    }
7975
-    if ((res = mp_add(&tmp1, &a0, &tmp1)) != MP_OKAY) {
7976
-       goto ERR;
7977
-    }
7978
-    if ((res = mp_add(&b2, &b1, &tmp2)) != MP_OKAY) {
7979
-       goto ERR;
7980
-    }
7981
-    if ((res = mp_add(&tmp2, &b0, &tmp2)) != MP_OKAY) {
7982
-       goto ERR;
7983
-    }
7984
-    if ((res = mp_mul(&tmp1, &tmp2, &w2)) != MP_OKAY) {
7985
-       goto ERR;
7986
-    }
7987
-    
7988
-    /* now solve the matrix 
7989
-    
7990
-       0  0  0  0  1
7991
-       1  2  4  8  16
7992
-       1  1  1  1  1
7993
-       16 8  4  2  1
7994
-       1  0  0  0  0
7995
-       
7996
-       using 12 subtractions, 4 shifts, 
7997
-              2 small divisions and 1 small multiplication 
7998
-     */
7999
-     
8000
-     /* r1 - r4 */
8001
-     if ((res = mp_sub(&w1, &w4, &w1)) != MP_OKAY) {
8002
-        goto ERR;
8003
-     }
8004
-     /* r3 - r0 */
8005
-     if ((res = mp_sub(&w3, &w0, &w3)) != MP_OKAY) {
8006
-        goto ERR;
8007
-     }
8008
-     /* r1/2 */
8009
-     if ((res = mp_div_2(&w1, &w1)) != MP_OKAY) {
8010
-        goto ERR;
8011
-     }
8012
-     /* r3/2 */
8013
-     if ((res = mp_div_2(&w3, &w3)) != MP_OKAY) {
8014
-        goto ERR;
8015
-     }
8016
-     /* r2 - r0 - r4 */
8017
-     if ((res = mp_sub(&w2, &w0, &w2)) != MP_OKAY) {
8018
-        goto ERR;
8019
-     }
8020
-     if ((res = mp_sub(&w2, &w4, &w2)) != MP_OKAY) {
8021
-        goto ERR;
8022
-     }
8023
-     /* r1 - r2 */
8024
-     if ((res = mp_sub(&w1, &w2, &w1)) != MP_OKAY) {
8025
-        goto ERR;
8026
-     }
8027
-     /* r3 - r2 */
8028
-     if ((res = mp_sub(&w3, &w2, &w3)) != MP_OKAY) {
8029
-        goto ERR;
8030
-     }
8031
-     /* r1 - 8r0 */
8032
-     if ((res = mp_mul_2d(&w0, 3, &tmp1)) != MP_OKAY) {
8033
-        goto ERR;
8034
-     }
8035
-     if ((res = mp_sub(&w1, &tmp1, &w1)) != MP_OKAY) {
8036
-        goto ERR;
8037
-     }
8038
-     /* r3 - 8r4 */
8039
-     if ((res = mp_mul_2d(&w4, 3, &tmp1)) != MP_OKAY) {
8040
-        goto ERR;
8041
-     }
8042
-     if ((res = mp_sub(&w3, &tmp1, &w3)) != MP_OKAY) {
8043
-        goto ERR;
8044
-     }
8045
-     /* 3r2 - r1 - r3 */
8046
-     if ((res = mp_mul_d(&w2, 3, &w2)) != MP_OKAY) {
8047
-        goto ERR;
8048
-     }
8049
-     if ((res = mp_sub(&w2, &w1, &w2)) != MP_OKAY) {
8050
-        goto ERR;
8051
-     }
8052
-     if ((res = mp_sub(&w2, &w3, &w2)) != MP_OKAY) {
8053
-        goto ERR;
8054
-     }
8055
-     /* r1 - r2 */
8056
-     if ((res = mp_sub(&w1, &w2, &w1)) != MP_OKAY) {
8057
-        goto ERR;
8058
-     }
8059
-     /* r3 - r2 */
8060
-     if ((res = mp_sub(&w3, &w2, &w3)) != MP_OKAY) {
8061
-        goto ERR;
8062
-     }
8063
-     /* r1/3 */
8064
-     if ((res = mp_div_3(&w1, &w1, NULL)) != MP_OKAY) {
8065
-        goto ERR;
8066
-     }
8067
-     /* r3/3 */
8068
-     if ((res = mp_div_3(&w3, &w3, NULL)) != MP_OKAY) {
8069
-        goto ERR;
8070
-     }
8071
-     
8072
-     /* at this point shift W[n] by B*n */
8073
-     if ((res = mp_lshd(&w1, 1*B)) != MP_OKAY) {
8074
-        goto ERR;
8075
-     }
8076
-     if ((res = mp_lshd(&w2, 2*B)) != MP_OKAY) {
8077
-        goto ERR;
8078
-     }
8079
-     if ((res = mp_lshd(&w3, 3*B)) != MP_OKAY) {
8080
-        goto ERR;
8081
-     }
8082
-     if ((res = mp_lshd(&w4, 4*B)) != MP_OKAY) {
8083
-        goto ERR;
8084
-     }     
8085
-     
8086
-     if ((res = mp_add(&w0, &w1, c)) != MP_OKAY) {
8087
-        goto ERR;
8088
-     }
8089
-     if ((res = mp_add(&w2, &w3, &tmp1)) != MP_OKAY) {
8090
-        goto ERR;
8091
-     }
8092
-     if ((res = mp_add(&w4, &tmp1, &tmp1)) != MP_OKAY) {
8093
-        goto ERR;
8094
-     }
8095
-     if ((res = mp_add(&tmp1, c, c)) != MP_OKAY) {
8096
-        goto ERR;
8097
-     }     
8098
-     
8099
-ERR:
8100
-     mp_clear_multi(&w0, &w1, &w2, &w3, &w4, 
8101
-                    &a0, &a1, &a2, &b0, &b1, 
8102
-                    &b2, &tmp1, &tmp2, NULL);
8103
-     return res;
8104
-}     
8105
-     
8106
-#endif
8107 10712
 
8108
-/* $Source: /cvs/libtom/libtommath/bn_mp_toom_mul.c,v $ */
8109
-/* $Revision: 1.3 $ */
8110
-/* $Date: 2006/03/31 14:18:44 $ */
10713
+/* $Source: /cvs/libtom/tomsfastmath/src/sqr/fp_sqr_comba_9.c,v $ */
10714
+/* $Revision: 1.2 $ */
10715
+/* $Date: 2007/02/17 03:39:01 $ */
8111 10716
 
8112
-/* End: bn_mp_toom_mul.c */
10717
+/* End: fp_sqr_comba_9.c */
8113 10718
 
8114
-/* Start: bn_mp_toom_sqr.c */
8115
-#include <bignum.h>
8116
-#ifdef BN_MP_TOOM_SQR_C
8117
-/* LibTomMath, multiple-precision integer library -- Tom St Denis
8118
- *
8119
- * LibTomMath is a library that provides multiple-precision
8120
- * integer arithmetic as well as number theoretic functionality.
8121
- *
8122
- * The library was designed directly after the MPI library by
8123
- * Michael Fromberger but has been written from scratch with
8124
- * additional optimizations in place.
8125
- *
8126
- * The library is free for all purposes without any express
8127
- * guarantee it works.
8128
- *
8129
- * Tom St Denis, tomstdenis@gmail.com, http://math.libtomcrypt.com
8130
- */
10719
+/* Start: fp_sqr_comba_small_set.c */
10720
+#define TFM_DEFINES
10721
+#include "fp_sqr_comba.c"
8131 10722
 
8132
-/* squaring using Toom-Cook 3-way algorithm */
8133
-int
8134
-mp_toom_sqr(mp_int *a, mp_int *b)
10723
+#if defined(TFM_SMALL_SET)
10724
+void fp_sqr_comba_small(fp_int *A, fp_int *B)
8135 10725
 {
8136
-    mp_int w0, w1, w2, w3, w4, tmp1, a0, a1, a2;
8137
-    int res, B;
8138
-
8139
-    /* init temps */
8140
-    if ((res = mp_init_multi(&w0, &w1, &w2, &w3, &w4, &a0, &a1, &a2, &tmp1, NULL)) != MP_OKAY) {
8141
-       return res;
8142
-    }
8143
-
8144
-    /* B */
8145
-    B = a->used / 3;
8146
-
8147
-    /* a = a2 * B**2 + a1 * B + a0 */
8148
-    if ((res = mp_mod_2d(a, DIGIT_BIT * B, &a0)) != MP_OKAY) {
8149
-       goto ERR;
8150
-    }
8151
-
8152
-    if ((res = mp_copy(a, &a1)) != MP_OKAY) {
8153
-       goto ERR;
8154
-    }
8155
-    mp_rshd(&a1, B);
8156
-    mp_mod_2d(&a1, DIGIT_BIT * B, &a1);
8157
-
8158
-    if ((res = mp_copy(a, &a2)) != MP_OKAY) {
8159
-       goto ERR;
8160
-    }
8161
-    mp_rshd(&a2, B*2);
10726
+   fp_digit *a, b[32], c0, c1, c2, sc0, sc1, sc2;
10727
+#ifdef TFM_ISO
10728
+   fp_word   tt;   
10729
+#endif   
10730
+   switch (A->used) { 
10731
+   case 1:
10732
+      a = A->dp;
10733
+      COMBA_START; 
10734
+
10735
+      /* clear carries */
10736
+      CLEAR_CARRY;
10737
+
10738
+      /* output 0 */
10739
+      SQRADD(a[0],a[0]);
10740
+      COMBA_STORE(b[0]);
10741
+      COMBA_STORE2(b[1]);
10742
+      COMBA_FINI;
10743
+
10744
+      B->used = 2;
10745
+      B->sign = FP_ZPOS;
10746
+      memcpy(B->dp, b, 2 * sizeof(fp_digit));
10747
+      fp_clamp(B);
10748
+      break;
8162 10749
 
8163
-    /* w0 = a0*a0 */
8164
-    if ((res = mp_sqr(&a0, &w0)) != MP_OKAY) {
8165
-       goto ERR;
8166
-    }
10750
+   case 2:
10751
+      a = A->dp;
10752
+      COMBA_START; 
10753
+
10754
+      /* clear carries */
10755
+      CLEAR_CARRY;
10756
+
10757
+      /* output 0 */
10758
+      SQRADD(a[0],a[0]);
10759
+      COMBA_STORE(b[0]);
10760
+
10761
+      /* output 1 */
10762
+      CARRY_FORWARD;
10763
+      SQRADD2(a[0], a[1]); 
10764
+      COMBA_STORE(b[1]);
10765
+
10766
+      /* output 2 */
10767
+      CARRY_FORWARD;
10768
+      SQRADD(a[1], a[1]); 
10769
+      COMBA_STORE(b[2]);
10770
+      COMBA_STORE2(b[3]);
10771
+      COMBA_FINI;
10772
+
10773
+      B->used = 4;
10774
+      B->sign = FP_ZPOS;
10775
+      memcpy(B->dp, b, 4 * sizeof(fp_digit));
10776
+      fp_clamp(B);
10777
+      break;
8167 10778
 
8168
-    /* w4 = a2 * a2 */
8169
-    if ((res = mp_sqr(&a2, &w4)) != MP_OKAY) {
8170
-       goto ERR;
8171
-    }
10779
+   case 3:
10780
+      a = A->dp;
10781
+      COMBA_START; 
10782
+
10783
+      /* clear carries */
10784
+      CLEAR_CARRY;
10785
+
10786
+      /* output 0 */
10787
+      SQRADD(a[0],a[0]);
10788
+      COMBA_STORE(b[0]);
10789
+
10790
+      /* output 1 */
10791
+      CARRY_FORWARD;
10792
+      SQRADD2(a[0], a[1]); 
10793
+      COMBA_STORE(b[1]);
10794
+
10795
+      /* output 2 */
10796
+      CARRY_FORWARD;
10797
+      SQRADD2(a[0], a[2]);    SQRADD(a[1], a[1]); 
10798
+      COMBA_STORE(b[2]);
10799
+
10800
+      /* output 3 */
10801
+      CARRY_FORWARD;
10802
+      SQRADD2(a[1], a[2]); 
10803
+      COMBA_STORE(b[3]);
10804
+
10805
+      /* output 4 */
10806
+      CARRY_FORWARD;
10807
+      SQRADD(a[2], a[2]); 
10808
+      COMBA_STORE(b[4]);
10809
+      COMBA_STORE2(b[5]);
10810
+      COMBA_FINI;
10811
+
10812
+      B->used = 6;
10813
+      B->sign = FP_ZPOS;
10814
+      memcpy(B->dp, b, 6 * sizeof(fp_digit));
10815
+      fp_clamp(B);
10816
+      break;
8172 10817
 
8173
-    /* w1 = (a2 + 2(a1 + 2a0))**2 */
8174
-    if ((res = mp_mul_2(&a0, &tmp1)) != MP_OKAY) {
8175
-       goto ERR;
8176
-    }
8177
-    if ((res = mp_add(&tmp1, &a1, &tmp1)) != MP_OKAY) {
8178
-       goto ERR;
8179
-    }
8180
-    if ((res = mp_mul_2(&tmp1, &tmp1)) != MP_OKAY) {
8181
-       goto ERR;
8182
-    }
8183
-    if ((res = mp_add(&tmp1, &a2, &tmp1)) != MP_OKAY) {
8184
-       goto ERR;
8185
-    }
10818
+   case 4:
10819
+      a = A->dp;
10820
+      COMBA_START; 
10821
+
10822
+      /* clear carries */
10823
+      CLEAR_CARRY;
10824
+
10825
+      /* output 0 */
10826
+      SQRADD(a[0],a[0]);
10827
+      COMBA_STORE(b[0]);
10828
+
10829
+      /* output 1 */
10830
+      CARRY_FORWARD;
10831
+      SQRADD2(a[0], a[1]); 
10832
+      COMBA_STORE(b[1]);
10833
+
10834
+      /* output 2 */
10835
+      CARRY_FORWARD;
10836
+      SQRADD2(a[0], a[2]);    SQRADD(a[1], a[1]); 
10837
+      COMBA_STORE(b[2]);
10838
+
10839
+      /* output 3 */
10840
+      CARRY_FORWARD;
10841
+      SQRADD2(a[0], a[3]);    SQRADD2(a[1], a[2]); 
10842
+      COMBA_STORE(b[3]);
10843
+
10844
+      /* output 4 */
10845
+      CARRY_FORWARD;
10846
+      SQRADD2(a[1], a[3]);    SQRADD(a[2], a[2]); 
10847
+      COMBA_STORE(b[4]);
10848
+
10849
+      /* output 5 */
10850
+      CARRY_FORWARD;
10851
+      SQRADD2(a[2], a[3]); 
10852
+      COMBA_STORE(b[5]);
10853
+
10854
+      /* output 6 */
10855
+      CARRY_FORWARD;
10856
+      SQRADD(a[3], a[3]); 
10857
+      COMBA_STORE(b[6]);
10858
+      COMBA_STORE2(b[7]);
10859
+      COMBA_FINI;
10860
+
10861
+      B->used = 8;
10862
+      B->sign = FP_ZPOS;
10863
+      memcpy(B->dp, b, 8 * sizeof(fp_digit));
10864
+      fp_clamp(B);
10865
+      break;
8186 10866
 
8187
-    if ((res = mp_sqr(&tmp1, &w1)) != MP_OKAY) {
8188
-       goto ERR;
8189
-    }
10867
+   case 5:
10868
+      a = A->dp;
10869
+      COMBA_START; 
10870
+
10871
+      /* clear carries */
10872
+      CLEAR_CARRY;
10873
+
10874
+      /* output 0 */
10875
+      SQRADD(a[0],a[0]);
10876
+      COMBA_STORE(b[0]);
10877
+
10878
+      /* output 1 */
10879
+      CARRY_FORWARD;
10880
+      SQRADD2(a[0], a[1]); 
10881
+      COMBA_STORE(b[1]);
10882
+
10883
+      /* output 2 */
10884
+      CARRY_FORWARD;
10885
+      SQRADD2(a[0], a[2]);    SQRADD(a[1], a[1]); 
10886
+      COMBA_STORE(b[2]);
10887
+
10888
+      /* output 3 */
10889
+      CARRY_FORWARD;
10890
+      SQRADD2(a[0], a[3]);    SQRADD2(a[1], a[2]); 
10891
+      COMBA_STORE(b[3]);
10892
+
10893
+      /* output 4 */
10894
+      CARRY_FORWARD;
10895
+      SQRADD2(a[0], a[4]);    SQRADD2(a[1], a[3]);    SQRADD(a[2], a[2]); 
10896
+      COMBA_STORE(b[4]);
10897
+
10898
+      /* output 5 */
10899
+      CARRY_FORWARD;
10900
+      SQRADD2(a[1], a[4]);    SQRADD2(a[2], a[3]); 
10901
+      COMBA_STORE(b[5]);
10902
+
10903
+      /* output 6 */
10904
+      CARRY_FORWARD;
10905
+      SQRADD2(a[2], a[4]);    SQRADD(a[3], a[3]); 
10906
+      COMBA_STORE(b[6]);
10907
+
10908
+      /* output 7 */
10909
+      CARRY_FORWARD;
10910
+      SQRADD2(a[3], a[4]); 
10911
+      COMBA_STORE(b[7]);
10912
+
10913
+      /* output 8 */
10914
+      CARRY_FORWARD;
10915
+      SQRADD(a[4], a[4]); 
10916
+      COMBA_STORE(b[8]);
10917
+      COMBA_STORE2(b[9]);
10918
+      COMBA_FINI;
10919
+
10920
+      B->used = 10;
10921
+      B->sign = FP_ZPOS;
10922
+      memcpy(B->dp, b, 10 * sizeof(fp_digit));
10923
+      fp_clamp(B);
10924
+      break;
8190 10925
 
8191
-    /* w3 = (a0 + 2(a1 + 2a2))**2 */
8192
-    if ((res = mp_mul_2(&a2, &tmp1)) != MP_OKAY) {
8193
-       goto ERR;
8194
-    }
8195
-    if ((res = mp_add(&tmp1, &a1, &tmp1)) != MP_OKAY) {
8196
-       goto ERR;
8197
-    }
8198
-    if ((res = mp_mul_2(&tmp1, &tmp1)) != MP_OKAY) {
8199
-       goto ERR;
8200
-    }
8201
-    if ((res = mp_add(&tmp1, &a0, &tmp1)) != MP_OKAY) {
8202
-       goto ERR;
8203
-    }
10926
+   case 6:
10927
+      a = A->dp;
10928
+      COMBA_START; 
10929
+
10930
+      /* clear carries */
10931
+      CLEAR_CARRY;
10932
+
10933
+      /* output 0 */
10934
+      SQRADD(a[0],a[0]);
10935
+      COMBA_STORE(b[0]);
10936
+
10937
+      /* output 1 */
10938
+      CARRY_FORWARD;
10939
+      SQRADD2(a[0], a[1]); 
10940
+      COMBA_STORE(b[1]);
10941
+
10942
+      /* output 2 */
10943
+      CARRY_FORWARD;
10944
+      SQRADD2(a[0], a[2]);    SQRADD(a[1], a[1]); 
10945
+      COMBA_STORE(b[2]);
10946
+
10947
+      /* output 3 */
10948
+      CARRY_FORWARD;
10949
+      SQRADD2(a[0], a[3]);    SQRADD2(a[1], a[2]); 
10950
+      COMBA_STORE(b[3]);
10951
+
10952
+      /* output 4 */
10953
+      CARRY_FORWARD;
10954
+      SQRADD2(a[0], a[4]);    SQRADD2(a[1], a[3]);    SQRADD(a[2], a[2]); 
10955
+      COMBA_STORE(b[4]);
10956
+
10957
+      /* output 5 */
10958
+      CARRY_FORWARD;
10959
+   SQRADDSC(a[0], a[5]); SQRADDAC(a[1], a[4]); SQRADDAC(a[2], a[3]); SQRADDDB; 
10960
+      COMBA_STORE(b[5]);
10961
+
10962
+      /* output 6 */
10963
+      CARRY_FORWARD;
10964
+      SQRADD2(a[1], a[5]);    SQRADD2(a[2], a[4]);    SQRADD(a[3], a[3]); 
10965
+      COMBA_STORE(b[6]);
10966
+
10967
+      /* output 7 */
10968
+      CARRY_FORWARD;
10969
+      SQRADD2(a[2], a[5]);    SQRADD2(a[3], a[4]); 
10970
+      COMBA_STORE(b[7]);
10971
+
10972
+      /* output 8 */
10973
+      CARRY_FORWARD;
10974
+      SQRADD2(a[3], a[5]);    SQRADD(a[4], a[4]); 
10975
+      COMBA_STORE(b[8]);
10976
+
10977
+      /* output 9 */
10978
+      CARRY_FORWARD;
10979
+      SQRADD2(a[4], a[5]); 
10980
+      COMBA_STORE(b[9]);
10981
+
10982
+      /* output 10 */
10983
+      CARRY_FORWARD;
10984
+      SQRADD(a[5], a[5]); 
10985
+      COMBA_STORE(b[10]);
10986
+      COMBA_STORE2(b[11]);
10987
+      COMBA_FINI;
10988
+
10989
+      B->used = 12;
10990
+      B->sign = FP_ZPOS;
10991
+      memcpy(B->dp, b, 12 * sizeof(fp_digit));
10992
+      fp_clamp(B);
10993
+      break;
8204 10994
 
8205
-    if ((res = mp_sqr(&tmp1, &w3)) != MP_OKAY) {
8206
-       goto ERR;
8207
-    }
10995
+   case 7:
10996
+      a = A->dp;
10997
+      COMBA_START; 
10998
+
10999
+      /* clear carries */
11000
+      CLEAR_CARRY;
11001
+
11002
+      /* output 0 */
11003
+      SQRADD(a[0],a[0]);
11004
+      COMBA_STORE(b[0]);
11005
+
11006
+      /* output 1 */
11007
+      CARRY_FORWARD;
11008
+      SQRADD2(a[0], a[1]); 
11009
+      COMBA_STORE(b[1]);
11010
+
11011
+      /* output 2 */
11012
+      CARRY_FORWARD;
11013
+      SQRADD2(a[0], a[2]);    SQRADD(a[1], a[1]); 
11014
+      COMBA_STORE(b[2]);
11015
+
11016
+      /* output 3 */
11017
+      CARRY_FORWARD;
11018
+      SQRADD2(a[0], a[3]);    SQRADD2(a[1], a[2]); 
11019
+      COMBA_STORE(b[3]);
11020
+
11021
+      /* output 4 */
11022
+      CARRY_FORWARD;
11023
+      SQRADD2(a[0], a[4]);    SQRADD2(a[1], a[3]);    SQRADD(a[2], a[2]); 
11024
+      COMBA_STORE(b[4]);
11025
+
11026
+      /* output 5 */
11027
+      CARRY_FORWARD;
11028
+   SQRADDSC(a[0], a[5]); SQRADDAC(a[1], a[4]); SQRADDAC(a[2], a[3]); SQRADDDB; 
11029
+      COMBA_STORE(b[5]);
11030
+
11031
+      /* output 6 */
11032
+      CARRY_FORWARD;
11033
+   SQRADDSC(a[0], a[6]); SQRADDAC(a[1], a[5]); SQRADDAC(a[2], a[4]); SQRADDDB; SQRADD(a[3], a[3]); 
11034
+      COMBA_STORE(b[6]);
11035
+
11036
+      /* output 7 */
11037
+      CARRY_FORWARD;
11038
+   SQRADDSC(a[1], a[6]); SQRADDAC(a[2], a[5]); SQRADDAC(a[3], a[4]); SQRADDDB; 
11039
+      COMBA_STORE(b[7]);
11040
+
11041
+      /* output 8 */
11042
+      CARRY_FORWARD;
11043
+      SQRADD2(a[2], a[6]);    SQRADD2(a[3], a[5]);    SQRADD(a[4], a[4]); 
11044
+      COMBA_STORE(b[8]);
11045
+
11046
+      /* output 9 */
11047
+      CARRY_FORWARD;
11048
+      SQRADD2(a[3], a[6]);    SQRADD2(a[4], a[5]); 
11049
+      COMBA_STORE(b[9]);
11050
+
11051
+      /* output 10 */
11052
+      CARRY_FORWARD;
11053
+      SQRADD2(a[4], a[6]);    SQRADD(a[5], a[5]); 
11054
+      COMBA_STORE(b[10]);
11055
+
11056
+      /* output 11 */
11057
+      CARRY_FORWARD;
11058
+      SQRADD2(a[5], a[6]); 
11059
+      COMBA_STORE(b[11]);
11060
+
11061
+      /* output 12 */
11062
+      CARRY_FORWARD;
11063
+      SQRADD(a[6], a[6]); 
11064
+      COMBA_STORE(b[12]);
11065
+      COMBA_STORE2(b[13]);
11066
+      COMBA_FINI;
11067
+
11068
+      B->used = 14;
11069
+      B->sign = FP_ZPOS;
11070
+      memcpy(B->dp, b, 14 * sizeof(fp_digit));
11071
+      fp_clamp(B);
11072
+      break;
8208 11073
 
11074
+   case 8:
11075
+      a = A->dp;
11076
+      COMBA_START; 
11077
+
11078
+      /* clear carries */
11079
+      CLEAR_CARRY;
11080
+
11081
+      /* output 0 */
11082
+      SQRADD(a[0],a[0]);
11083
+      COMBA_STORE(b[0]);
11084
+
11085
+      /* output 1 */
11086
+      CARRY_FORWARD;
11087
+      SQRADD2(a[0], a[1]); 
11088
+      COMBA_STORE(b[1]);
11089
+
11090
+      /* output 2 */
11091
+      CARRY_FORWARD;
11092
+      SQRADD2(a[0], a[2]);    SQRADD(a[1], a[1]); 
11093
+      COMBA_STORE(b[2]);
11094
+
11095
+      /* output 3 */
11096
+      CARRY_FORWARD;
11097
+      SQRADD2(a[0], a[3]);    SQRADD2(a[1], a[2]); 
11098
+      COMBA_STORE(b[3]);
11099
+
11100
+      /* output 4 */
11101
+      CARRY_FORWARD;
11102
+      SQRADD2(a[0], a[4]);    SQRADD2(a[1], a[3]);    SQRADD(a[2], a[2]); 
11103
+      COMBA_STORE(b[4]);
11104
+
11105
+      /* output 5 */
11106
+      CARRY_FORWARD;
11107
+   SQRADDSC(a[0], a[5]); SQRADDAC(a[1], a[4]); SQRADDAC(a[2], a[3]); SQRADDDB; 
11108
+      COMBA_STORE(b[5]);
11109
+
11110
+      /* output 6 */
11111
+      CARRY_FORWARD;
11112
+   SQRADDSC(a[0], a[6]); SQRADDAC(a[1], a[5]); SQRADDAC(a[2], a[4]); SQRADDDB; SQRADD(a[3], a[3]); 
11113
+      COMBA_STORE(b[6]);
11114
+
11115
+      /* output 7 */
11116
+      CARRY_FORWARD;
11117
+   SQRADDSC(a[0], a[7]); SQRADDAC(a[1], a[6]); SQRADDAC(a[2], a[5]); SQRADDAC(a[3], a[4]); SQRADDDB; 
11118
+      COMBA_STORE(b[7]);
11119
+
11120
+      /* output 8 */
11121
+      CARRY_FORWARD;
11122
+   SQRADDSC(a[1], a[7]); SQRADDAC(a[2], a[6]); SQRADDAC(a[3], a[5]); SQRADDDB; SQRADD(a[4], a[4]); 
11123
+      COMBA_STORE(b[8]);
11124
+
11125
+      /* output 9 */
11126
+      CARRY_FORWARD;
11127
+   SQRADDSC(a[2], a[7]); SQRADDAC(a[3], a[6]); SQRADDAC(a[4], a[5]); SQRADDDB; 
11128
+      COMBA_STORE(b[9]);
11129
+
11130
+      /* output 10 */
11131
+      CARRY_FORWARD;
11132
+      SQRADD2(a[3], a[7]);    SQRADD2(a[4], a[6]);    SQRADD(a[5], a[5]); 
11133
+      COMBA_STORE(b[10]);
11134
+
11135
+      /* output 11 */
11136
+      CARRY_FORWARD;
11137
+      SQRADD2(a[4], a[7]);    SQRADD2(a[5], a[6]); 
11138
+      COMBA_STORE(b[11]);
11139
+
11140
+      /* output 12 */
11141
+      CARRY_FORWARD;
11142
+      SQRADD2(a[5], a[7]);    SQRADD(a[6], a[6]); 
11143
+      COMBA_STORE(b[12]);
11144
+
11145
+      /* output 13 */
11146
+      CARRY_FORWARD;
11147
+      SQRADD2(a[6], a[7]); 
11148
+      COMBA_STORE(b[13]);
11149
+
11150
+      /* output 14 */
11151
+      CARRY_FORWARD;
11152
+      SQRADD(a[7], a[7]); 
11153
+      COMBA_STORE(b[14]);
11154
+      COMBA_STORE2(b[15]);
11155
+      COMBA_FINI;
11156
+
11157
+      B->used = 16;
11158
+      B->sign = FP_ZPOS;
11159
+      memcpy(B->dp, b, 16 * sizeof(fp_digit));
11160
+      fp_clamp(B);
11161
+      break;
8209 11162
 
8210
-    /* w2 = (a2 + a1 + a0)**2 */
8211
-    if ((res = mp_add(&a2, &a1, &tmp1)) != MP_OKAY) {
8212
-       goto ERR;
8213
-    }
8214
-    if ((res = mp_add(&tmp1, &a0, &tmp1)) != MP_OKAY) {
8215
-       goto ERR;
8216
-    }
8217
-    if ((res = mp_sqr(&tmp1, &w2)) != MP_OKAY) {
8218
-       goto ERR;
8219
-    }
11163
+   case 9:
11164
+      a = A->dp;
11165
+      COMBA_START; 
11166
+
11167
+      /* clear carries */
11168
+      CLEAR_CARRY;
11169
+
11170
+      /* output 0 */
11171
+      SQRADD(a[0],a[0]);
11172
+      COMBA_STORE(b[0]);
11173
+
11174
+      /* output 1 */
11175
+      CARRY_FORWARD;
11176
+      SQRADD2(a[0], a[1]); 
11177
+      COMBA_STORE(b[1]);
11178
+
11179
+      /* output 2 */
11180
+      CARRY_FORWARD;
11181
+      SQRADD2(a[0], a[2]);    SQRADD(a[1], a[1]); 
11182
+      COMBA_STORE(b[2]);
11183
+
11184
+      /* output 3 */
11185
+      CARRY_FORWARD;
11186
+      SQRADD2(a[0], a[3]);    SQRADD2(a[1], a[2]); 
11187
+      COMBA_STORE(b[3]);
11188
+
11189
+      /* output 4 */
11190
+      CARRY_FORWARD;
11191
+      SQRADD2(a[0], a[4]);    SQRADD2(a[1], a[3]);    SQRADD(a[2], a[2]); 
11192
+      COMBA_STORE(b[4]);
11193
+
11194
+      /* output 5 */
11195
+      CARRY_FORWARD;
11196
+   SQRADDSC(a[0], a[5]); SQRADDAC(a[1], a[4]); SQRADDAC(a[2], a[3]); SQRADDDB; 
11197
+      COMBA_STORE(b[5]);
11198
+
11199
+      /* output 6 */
11200
+      CARRY_FORWARD;
11201
+   SQRADDSC(a[0], a[6]); SQRADDAC(a[1], a[5]); SQRADDAC(a[2], a[4]); SQRADDDB; SQRADD(a[3], a[3]); 
11202
+      COMBA_STORE(b[6]);
11203
+
11204
+      /* output 7 */
11205
+      CARRY_FORWARD;
11206
+   SQRADDSC(a[0], a[7]); SQRADDAC(a[1], a[6]); SQRADDAC(a[2], a[5]); SQRADDAC(a[3], a[4]); SQRADDDB; 
11207
+      COMBA_STORE(b[7]);
11208
+
11209
+      /* output 8 */
11210
+      CARRY_FORWARD;
11211
+   SQRADDSC(a[0], a[8]); SQRADDAC(a[1], a[7]); SQRADDAC(a[2], a[6]); SQRADDAC(a[3], a[5]); SQRADDDB; SQRADD(a[4], a[4]); 
11212
+      COMBA_STORE(b[8]);
11213
+
11214
+      /* output 9 */
11215
+      CARRY_FORWARD;
11216
+   SQRADDSC(a[1], a[8]); SQRADDAC(a[2], a[7]); SQRADDAC(a[3], a[6]); SQRADDAC(a[4], a[5]); SQRADDDB; 
11217
+      COMBA_STORE(b[9]);
11218
+
11219
+      /* output 10 */
11220
+      CARRY_FORWARD;
11221
+   SQRADDSC(a[2], a[8]); SQRADDAC(a[3], a[7]); SQRADDAC(a[4], a[6]); SQRADDDB; SQRADD(a[5], a[5]); 
11222
+      COMBA_STORE(b[10]);
11223
+
11224
+      /* output 11 */
11225
+      CARRY_FORWARD;
11226
+   SQRADDSC(a[3], a[8]); SQRADDAC(a[4], a[7]); SQRADDAC(a[5], a[6]); SQRADDDB; 
11227
+      COMBA_STORE(b[11]);
11228
+
11229
+      /* output 12 */
11230
+      CARRY_FORWARD;
11231
+      SQRADD2(a[4], a[8]);    SQRADD2(a[5], a[7]);    SQRADD(a[6], a[6]); 
11232
+      COMBA_STORE(b[12]);
11233
+
11234
+      /* output 13 */
11235
+      CARRY_FORWARD;
11236
+      SQRADD2(a[5], a[8]);    SQRADD2(a[6], a[7]); 
11237
+      COMBA_STORE(b[13]);
11238
+
11239
+      /* output 14 */
11240
+      CARRY_FORWARD;
11241
+      SQRADD2(a[6], a[8]);    SQRADD(a[7], a[7]); 
11242
+      COMBA_STORE(b[14]);
11243
+
11244
+      /* output 15 */
11245
+      CARRY_FORWARD;
11246
+      SQRADD2(a[7], a[8]); 
11247
+      COMBA_STORE(b[15]);
11248
+
11249
+      /* output 16 */
11250
+      CARRY_FORWARD;
11251
+      SQRADD(a[8], a[8]); 
11252
+      COMBA_STORE(b[16]);
11253
+      COMBA_STORE2(b[17]);
11254
+      COMBA_FINI;
11255
+
11256
+      B->used = 18;
11257
+      B->sign = FP_ZPOS;
11258
+      memcpy(B->dp, b, 18 * sizeof(fp_digit));
11259
+      fp_clamp(B);
11260
+      break;
8220 11261
 
8221
-    /* now solve the matrix
11262
+   case 10:
11263
+      a = A->dp;
11264
+      COMBA_START; 
11265
+
11266
+      /* clear carries */
11267
+      CLEAR_CARRY;
11268
+
11269
+      /* output 0 */
11270
+      SQRADD(a[0],a[0]);
11271
+      COMBA_STORE(b[0]);
11272
+
11273
+      /* output 1 */
11274
+      CARRY_FORWARD;
11275
+      SQRADD2(a[0], a[1]); 
11276
+      COMBA_STORE(b[1]);
11277
+
11278
+      /* output 2 */
11279
+      CARRY_FORWARD;
11280
+      SQRADD2(a[0], a[2]);    SQRADD(a[1], a[1]); 
11281
+      COMBA_STORE(b[2]);
11282
+
11283
+      /* output 3 */
11284
+      CARRY_FORWARD;
11285
+      SQRADD2(a[0], a[3]);    SQRADD2(a[1], a[2]); 
11286
+      COMBA_STORE(b[3]);
11287
+
11288
+      /* output 4 */
11289
+      CARRY_FORWARD;
11290
+      SQRADD2(a[0], a[4]);    SQRADD2(a[1], a[3]);    SQRADD(a[2], a[2]); 
11291
+      COMBA_STORE(b[4]);
11292
+
11293
+      /* output 5 */
11294
+      CARRY_FORWARD;
11295
+   SQRADDSC(a[0], a[5]); SQRADDAC(a[1], a[4]); SQRADDAC(a[2], a[3]); SQRADDDB; 
11296
+      COMBA_STORE(b[5]);
11297
+
11298
+      /* output 6 */
11299
+      CARRY_FORWARD;
11300
+   SQRADDSC(a[0], a[6]); SQRADDAC(a[1], a[5]); SQRADDAC(a[2], a[4]); SQRADDDB; SQRADD(a[3], a[3]); 
11301
+      COMBA_STORE(b[6]);
11302
+
11303
+      /* output 7 */
11304
+      CARRY_FORWARD;
11305
+   SQRADDSC(a[0], a[7]); SQRADDAC(a[1], a[6]); SQRADDAC(a[2], a[5]); SQRADDAC(a[3], a[4]); SQRADDDB; 
11306
+      COMBA_STORE(b[7]);
11307
+
11308
+      /* output 8 */
11309
+      CARRY_FORWARD;
11310
+   SQRADDSC(a[0], a[8]); SQRADDAC(a[1], a[7]); SQRADDAC(a[2], a[6]); SQRADDAC(a[3], a[5]); SQRADDDB; SQRADD(a[4], a[4]); 
11311
+      COMBA_STORE(b[8]);
11312
+
11313
+      /* output 9 */
11314
+      CARRY_FORWARD;
11315
+   SQRADDSC(a[0], a[9]); SQRADDAC(a[1], a[8]); SQRADDAC(a[2], a[7]); SQRADDAC(a[3], a[6]); SQRADDAC(a[4], a[5]); SQRADDDB; 
11316
+      COMBA_STORE(b[9]);
11317
+
11318
+      /* output 10 */
11319
+      CARRY_FORWARD;
11320
+   SQRADDSC(a[1], a[9]); SQRADDAC(a[2], a[8]); SQRADDAC(a[3], a[7]); SQRADDAC(a[4], a[6]); SQRADDDB; SQRADD(a[5], a[5]); 
11321
+      COMBA_STORE(b[10]);
11322
+
11323
+      /* output 11 */
11324
+      CARRY_FORWARD;
11325
+   SQRADDSC(a[2], a[9]); SQRADDAC(a[3], a[8]); SQRADDAC(a[4], a[7]); SQRADDAC(a[5], a[6]); SQRADDDB; 
11326
+      COMBA_STORE(b[11]);
11327
+
11328
+      /* output 12 */
11329
+      CARRY_FORWARD;
11330
+   SQRADDSC(a[3], a[9]); SQRADDAC(a[4], a[8]); SQRADDAC(a[5], a[7]); SQRADDDB; SQRADD(a[6], a[6]); 
11331
+      COMBA_STORE(b[12]);
11332
+
11333
+      /* output 13 */
11334
+      CARRY_FORWARD;
11335
+   SQRADDSC(a[4], a[9]); SQRADDAC(a[5], a[8]); SQRADDAC(a[6], a[7]); SQRADDDB; 
11336
+      COMBA_STORE(b[13]);
11337
+
11338
+      /* output 14 */
11339
+      CARRY_FORWARD;
11340
+      SQRADD2(a[5], a[9]);    SQRADD2(a[6], a[8]);    SQRADD(a[7], a[7]); 
11341
+      COMBA_STORE(b[14]);
11342
+
11343
+      /* output 15 */
11344
+      CARRY_FORWARD;
11345
+      SQRADD2(a[6], a[9]);    SQRADD2(a[7], a[8]); 
11346
+      COMBA_STORE(b[15]);
11347
+
11348
+      /* output 16 */
11349
+      CARRY_FORWARD;
11350
+      SQRADD2(a[7], a[9]);    SQRADD(a[8], a[8]); 
11351
+      COMBA_STORE(b[16]);
11352
+
11353
+      /* output 17 */
11354
+      CARRY_FORWARD;
11355
+      SQRADD2(a[8], a[9]); 
11356
+      COMBA_STORE(b[17]);
11357
+
11358
+      /* output 18 */
11359
+      CARRY_FORWARD;
11360
+      SQRADD(a[9], a[9]); 
11361
+      COMBA_STORE(b[18]);
11362
+      COMBA_STORE2(b[19]);
11363
+      COMBA_FINI;
11364
+
11365
+      B->used = 20;
11366
+      B->sign = FP_ZPOS;
11367
+      memcpy(B->dp, b, 20 * sizeof(fp_digit));
11368
+      fp_clamp(B);
11369
+      break;
8222 11370
 
8223
-       0  0  0  0  1
8224
-       1  2  4  8  16
8225
-       1  1  1  1  1
8226
-       16 8  4  2  1
8227
-       1  0  0  0  0
11371
+   case 11:
11372
+      a = A->dp;
11373
+      COMBA_START; 
11374
+
11375
+      /* clear carries */
11376
+      CLEAR_CARRY;
11377
+
11378
+      /* output 0 */
11379
+      SQRADD(a[0],a[0]);
11380
+      COMBA_STORE(b[0]);
11381
+
11382
+      /* output 1 */
11383
+      CARRY_FORWARD;
11384
+      SQRADD2(a[0], a[1]); 
11385
+      COMBA_STORE(b[1]);
11386
+
11387
+      /* output 2 */
11388
+      CARRY_FORWARD;
11389
+      SQRADD2(a[0], a[2]);    SQRADD(a[1], a[1]); 
11390
+      COMBA_STORE(b[2]);
11391
+
11392
+      /* output 3 */
11393
+      CARRY_FORWARD;
11394
+      SQRADD2(a[0], a[3]);    SQRADD2(a[1], a[2]); 
11395
+      COMBA_STORE(b[3]);
11396
+
11397
+      /* output 4 */
11398
+      CARRY_FORWARD;
11399
+      SQRADD2(a[0], a[4]);    SQRADD2(a[1], a[3]);    SQRADD(a[2], a[2]); 
11400
+      COMBA_STORE(b[4]);
11401
+
11402
+      /* output 5 */
11403
+      CARRY_FORWARD;
11404
+   SQRADDSC(a[0], a[5]); SQRADDAC(a[1], a[4]); SQRADDAC(a[2], a[3]); SQRADDDB; 
11405
+      COMBA_STORE(b[5]);
11406
+
11407
+      /* output 6 */
11408
+      CARRY_FORWARD;
11409
+   SQRADDSC(a[0], a[6]); SQRADDAC(a[1], a[5]); SQRADDAC(a[2], a[4]); SQRADDDB; SQRADD(a[3], a[3]); 
11410
+      COMBA_STORE(b[6]);
11411
+
11412
+      /* output 7 */
11413
+      CARRY_FORWARD;
11414
+   SQRADDSC(a[0], a[7]); SQRADDAC(a[1], a[6]); SQRADDAC(a[2], a[5]); SQRADDAC(a[3], a[4]); SQRADDDB; 
11415
+      COMBA_STORE(b[7]);
11416
+
11417
+      /* output 8 */
11418
+      CARRY_FORWARD;
11419
+   SQRADDSC(a[0], a[8]); SQRADDAC(a[1], a[7]); SQRADDAC(a[2], a[6]); SQRADDAC(a[3], a[5]); SQRADDDB; SQRADD(a[4], a[4]); 
11420
+      COMBA_STORE(b[8]);
11421
+
11422
+      /* output 9 */
11423
+      CARRY_FORWARD;
11424
+   SQRADDSC(a[0], a[9]); SQRADDAC(a[1], a[8]); SQRADDAC(a[2], a[7]); SQRADDAC(a[3], a[6]); SQRADDAC(a[4], a[5]); SQRADDDB; 
11425
+      COMBA_STORE(b[9]);
11426
+
11427
+      /* output 10 */
11428
+      CARRY_FORWARD;
11429
+   SQRADDSC(a[0], a[10]); SQRADDAC(a[1], a[9]); SQRADDAC(a[2], a[8]); SQRADDAC(a[3], a[7]); SQRADDAC(a[4], a[6]); SQRADDDB; SQRADD(a[5], a[5]); 
11430
+      COMBA_STORE(b[10]);
11431
+
11432
+      /* output 11 */
11433
+      CARRY_FORWARD;
11434
+   SQRADDSC(a[1], a[10]); SQRADDAC(a[2], a[9]); SQRADDAC(a[3], a[8]); SQRADDAC(a[4], a[7]); SQRADDAC(a[5], a[6]); SQRADDDB; 
11435
+      COMBA_STORE(b[11]);
11436
+
11437
+      /* output 12 */
11438
+      CARRY_FORWARD;
11439
+   SQRADDSC(a[2], a[10]); SQRADDAC(a[3], a[9]); SQRADDAC(a[4], a[8]); SQRADDAC(a[5], a[7]); SQRADDDB; SQRADD(a[6], a[6]); 
11440
+      COMBA_STORE(b[12]);
11441
+
11442
+      /* output 13 */
11443
+      CARRY_FORWARD;
11444
+   SQRADDSC(a[3], a[10]); SQRADDAC(a[4], a[9]); SQRADDAC(a[5], a[8]); SQRADDAC(a[6], a[7]); SQRADDDB; 
11445
+      COMBA_STORE(b[13]);
11446
+
11447
+      /* output 14 */
11448
+      CARRY_FORWARD;
11449
+   SQRADDSC(a[4], a[10]); SQRADDAC(a[5], a[9]); SQRADDAC(a[6], a[8]); SQRADDDB; SQRADD(a[7], a[7]); 
11450
+      COMBA_STORE(b[14]);
11451
+
11452
+      /* output 15 */
11453
+      CARRY_FORWARD;
11454
+   SQRADDSC(a[5], a[10]); SQRADDAC(a[6], a[9]); SQRADDAC(a[7], a[8]); SQRADDDB; 
11455
+      COMBA_STORE(b[15]);
11456
+
11457
+      /* output 16 */
11458
+      CARRY_FORWARD;
11459
+      SQRADD2(a[6], a[10]);    SQRADD2(a[7], a[9]);    SQRADD(a[8], a[8]); 
11460
+      COMBA_STORE(b[16]);
11461
+
11462
+      /* output 17 */
11463
+      CARRY_FORWARD;
11464
+      SQRADD2(a[7], a[10]);    SQRADD2(a[8], a[9]); 
11465
+      COMBA_STORE(b[17]);
11466
+
11467
+      /* output 18 */
11468
+      CARRY_FORWARD;
11469
+      SQRADD2(a[8], a[10]);    SQRADD(a[9], a[9]); 
11470
+      COMBA_STORE(b[18]);
11471
+
11472
+      /* output 19 */
11473
+      CARRY_FORWARD;
11474
+      SQRADD2(a[9], a[10]); 
11475
+      COMBA_STORE(b[19]);
11476
+
11477
+      /* output 20 */
11478
+      CARRY_FORWARD;
11479
+      SQRADD(a[10], a[10]); 
11480
+      COMBA_STORE(b[20]);
11481
+      COMBA_STORE2(b[21]);
11482
+      COMBA_FINI;
11483
+
11484
+      B->used = 22;
11485
+      B->sign = FP_ZPOS;
11486
+      memcpy(B->dp, b, 22 * sizeof(fp_digit));
11487
+      fp_clamp(B);
11488
+      break;
8228 11489
 
8229
-       using 12 subtractions, 4 shifts, 2 small divisions and 1 small multiplication.
8230
-     */
11490
+   case 12:
11491
+      a = A->dp;
11492
+      COMBA_START; 
11493
+
11494
+      /* clear carries */
11495
+      CLEAR_CARRY;
11496
+
11497
+      /* output 0 */
11498
+      SQRADD(a[0],a[0]);
11499
+      COMBA_STORE(b[0]);
11500
+
11501
+      /* output 1 */
11502
+      CARRY_FORWARD;
11503
+      SQRADD2(a[0], a[1]); 
11504
+      COMBA_STORE(b[1]);
11505
+
11506
+      /* output 2 */
11507
+      CARRY_FORWARD;
11508
+      SQRADD2(a[0], a[2]);    SQRADD(a[1], a[1]); 
11509
+      COMBA_STORE(b[2]);
11510
+
11511
+      /* output 3 */
11512
+      CARRY_FORWARD;
11513
+      SQRADD2(a[0], a[3]);    SQRADD2(a[1], a[2]); 
11514
+      COMBA_STORE(b[3]);
11515
+
11516
+      /* output 4 */
11517
+      CARRY_FORWARD;
11518
+      SQRADD2(a[0], a[4]);    SQRADD2(a[1], a[3]);    SQRADD(a[2], a[2]); 
11519
+      COMBA_STORE(b[4]);
11520
+
11521
+      /* output 5 */
11522
+      CARRY_FORWARD;
11523
+   SQRADDSC(a[0], a[5]); SQRADDAC(a[1], a[4]); SQRADDAC(a[2], a[3]); SQRADDDB; 
11524
+      COMBA_STORE(b[5]);
11525
+
11526
+      /* output 6 */
11527
+      CARRY_FORWARD;
11528
+   SQRADDSC(a[0], a[6]); SQRADDAC(a[1], a[5]); SQRADDAC(a[2], a[4]); SQRADDDB; SQRADD(a[3], a[3]); 
11529
+      COMBA_STORE(b[6]);
11530
+
11531
+      /* output 7 */
11532
+      CARRY_FORWARD;
11533
+   SQRADDSC(a[0], a[7]); SQRADDAC(a[1], a[6]); SQRADDAC(a[2], a[5]); SQRADDAC(a[3], a[4]); SQRADDDB; 
11534
+      COMBA_STORE(b[7]);
11535
+
11536
+      /* output 8 */
11537
+      CARRY_FORWARD;
11538
+   SQRADDSC(a[0], a[8]); SQRADDAC(a[1], a[7]); SQRADDAC(a[2], a[6]); SQRADDAC(a[3], a[5]); SQRADDDB; SQRADD(a[4], a[4]); 
11539
+      COMBA_STORE(b[8]);
11540
+
11541
+      /* output 9 */
11542
+      CARRY_FORWARD;
11543
+   SQRADDSC(a[0], a[9]); SQRADDAC(a[1], a[8]); SQRADDAC(a[2], a[7]); SQRADDAC(a[3], a[6]); SQRADDAC(a[4], a[5]); SQRADDDB; 
11544
+      COMBA_STORE(b[9]);
11545
+
11546
+      /* output 10 */
11547
+      CARRY_FORWARD;
11548
+   SQRADDSC(a[0], a[10]); SQRADDAC(a[1], a[9]); SQRADDAC(a[2], a[8]); SQRADDAC(a[3], a[7]); SQRADDAC(a[4], a[6]); SQRADDDB; SQRADD(a[5], a[5]); 
11549
+      COMBA_STORE(b[10]);
11550
+
11551
+      /* output 11 */
11552
+      CARRY_FORWARD;
11553
+   SQRADDSC(a[0], a[11]); SQRADDAC(a[1], a[10]); SQRADDAC(a[2], a[9]); SQRADDAC(a[3], a[8]); SQRADDAC(a[4], a[7]); SQRADDAC(a[5], a[6]); SQRADDDB; 
11554
+      COMBA_STORE(b[11]);
11555
+
11556
+      /* output 12 */
11557
+      CARRY_FORWARD;
11558
+   SQRADDSC(a[1], a[11]); SQRADDAC(a[2], a[10]); SQRADDAC(a[3], a[9]); SQRADDAC(a[4], a[8]); SQRADDAC(a[5], a[7]); SQRADDDB; SQRADD(a[6], a[6]); 
11559
+      COMBA_STORE(b[12]);
11560
+
11561
+      /* output 13 */
11562
+      CARRY_FORWARD;
11563
+   SQRADDSC(a[2], a[11]); SQRADDAC(a[3], a[10]); SQRADDAC(a[4], a[9]); SQRADDAC(a[5], a[8]); SQRADDAC(a[6], a[7]); SQRADDDB; 
11564
+      COMBA_STORE(b[13]);
11565
+
11566
+      /* output 14 */
11567
+      CARRY_FORWARD;
11568
+   SQRADDSC(a[3], a[11]); SQRADDAC(a[4], a[10]); SQRADDAC(a[5], a[9]); SQRADDAC(a[6], a[8]); SQRADDDB; SQRADD(a[7], a[7]); 
11569
+      COMBA_STORE(b[14]);
11570
+
11571
+      /* output 15 */
11572
+      CARRY_FORWARD;
11573
+   SQRADDSC(a[4], a[11]); SQRADDAC(a[5], a[10]); SQRADDAC(a[6], a[9]); SQRADDAC(a[7], a[8]); SQRADDDB; 
11574
+      COMBA_STORE(b[15]);
11575
+
11576
+      /* output 16 */
11577
+      CARRY_FORWARD;
11578
+   SQRADDSC(a[5], a[11]); SQRADDAC(a[6], a[10]); SQRADDAC(a[7], a[9]); SQRADDDB; SQRADD(a[8], a[8]); 
11579
+      COMBA_STORE(b[16]);
11580
+
11581
+      /* output 17 */
11582
+      CARRY_FORWARD;
11583
+   SQRADDSC(a[6], a[11]); SQRADDAC(a[7], a[10]); SQRADDAC(a[8], a[9]); SQRADDDB; 
11584
+      COMBA_STORE(b[17]);
11585
+
11586
+      /* output 18 */
11587
+      CARRY_FORWARD;
11588
+      SQRADD2(a[7], a[11]);    SQRADD2(a[8], a[10]);    SQRADD(a[9], a[9]); 
11589
+      COMBA_STORE(b[18]);
11590
+
11591
+      /* output 19 */
11592
+      CARRY_FORWARD;
11593
+      SQRADD2(a[8], a[11]);    SQRADD2(a[9], a[10]); 
11594
+      COMBA_STORE(b[19]);
11595
+
11596
+      /* output 20 */
11597
+      CARRY_FORWARD;
11598
+      SQRADD2(a[9], a[11]);    SQRADD(a[10], a[10]); 
11599
+      COMBA_STORE(b[20]);
11600
+
11601
+      /* output 21 */
11602
+      CARRY_FORWARD;
11603
+      SQRADD2(a[10], a[11]); 
11604
+      COMBA_STORE(b[21]);
11605
+
11606
+      /* output 22 */
11607
+      CARRY_FORWARD;
11608
+      SQRADD(a[11], a[11]); 
11609
+      COMBA_STORE(b[22]);
11610
+      COMBA_STORE2(b[23]);
11611
+      COMBA_FINI;
11612
+
11613
+      B->used = 24;
11614
+      B->sign = FP_ZPOS;
11615
+      memcpy(B->dp, b, 24 * sizeof(fp_digit));
11616
+      fp_clamp(B);
11617
+      break;
8231 11618
 
8232
-     /* r1 - r4 */
8233
-     if ((res = mp_sub(&w1, &w4, &w1)) != MP_OKAY) {
8234
-        goto ERR;
8235
-     }
8236
-     /* r3 - r0 */
8237
-     if ((res = mp_sub(&w3, &w0, &w3)) != MP_OKAY) {
8238
-        goto ERR;
8239
-     }
8240
-     /* r1/2 */
8241
-     if ((res = mp_div_2(&w1, &w1)) != MP_OKAY) {
8242
-        goto ERR;
8243
-     }
8244
-     /* r3/2 */
8245
-     if ((res = mp_div_2(&w3, &w3)) != MP_OKAY) {
8246
-        goto ERR;
8247
-     }
8248
-     /* r2 - r0 - r4 */
8249
-     if ((res = mp_sub(&w2, &w0, &w2)) != MP_OKAY) {
8250
-        goto ERR;
8251
-     }
8252
-     if ((res = mp_sub(&w2, &w4, &w2)) != MP_OKAY) {
8253
-        goto ERR;
8254
-     }
8255
-     /* r1 - r2 */
8256
-     if ((res = mp_sub(&w1, &w2, &w1)) != MP_OKAY) {
8257
-        goto ERR;
8258
-     }
8259
-     /* r3 - r2 */
8260
-     if ((res = mp_sub(&w3, &w2, &w3)) != MP_OKAY) {
8261
-        goto ERR;
8262
-     }
8263
-     /* r1 - 8r0 */
8264
-     if ((res = mp_mul_2d(&w0, 3, &tmp1)) != MP_OKAY) {
8265
-        goto ERR;
8266
-     }
8267
-     if ((res = mp_sub(&w1, &tmp1, &w1)) != MP_OKAY) {
8268
-        goto ERR;
8269
-     }
8270
-     /* r3 - 8r4 */
8271
-     if ((res = mp_mul_2d(&w4, 3, &tmp1)) != MP_OKAY) {
8272
-        goto ERR;
8273
-     }
8274
-     if ((res = mp_sub(&w3, &tmp1, &w3)) != MP_OKAY) {
8275
-        goto ERR;
8276
-     }
8277
-     /* 3r2 - r1 - r3 */
8278
-     if ((res = mp_mul_d(&w2, 3, &w2)) != MP_OKAY) {
8279
-        goto ERR;
8280
-     }
8281
-     if ((res = mp_sub(&w2, &w1, &w2)) != MP_OKAY) {
8282
-        goto ERR;
8283
-     }
8284
-     if ((res = mp_sub(&w2, &w3, &w2)) != MP_OKAY) {
8285
-        goto ERR;
8286
-     }
8287
-     /* r1 - r2 */
8288
-     if ((res = mp_sub(&w1, &w2, &w1)) != MP_OKAY) {
8289
-        goto ERR;
8290
-     }
8291
-     /* r3 - r2 */
8292
-     if ((res = mp_sub(&w3, &w2, &w3)) != MP_OKAY) {
8293
-        goto ERR;
8294
-     }
8295
-     /* r1/3 */
8296
-     if ((res = mp_div_3(&w1, &w1, NULL)) != MP_OKAY) {
8297
-        goto ERR;
8298
-     }
8299
-     /* r3/3 */
8300
-     if ((res = mp_div_3(&w3, &w3, NULL)) != MP_OKAY) {
8301
-        goto ERR;
8302
-     }
11619
+   case 13:
11620
+      a = A->dp;
11621
+      COMBA_START; 
11622
+
11623
+      /* clear carries */
11624
+      CLEAR_CARRY;
11625
+
11626
+      /* output 0 */
11627
+      SQRADD(a[0],a[0]);
11628
+      COMBA_STORE(b[0]);
11629
+
11630
+      /* output 1 */
11631
+      CARRY_FORWARD;
11632
+      SQRADD2(a[0], a[1]); 
11633
+      COMBA_STORE(b[1]);
11634
+
11635
+      /* output 2 */
11636
+      CARRY_FORWARD;
11637
+      SQRADD2(a[0], a[2]);    SQRADD(a[1], a[1]); 
11638
+      COMBA_STORE(b[2]);
11639
+
11640
+      /* output 3 */
11641
+      CARRY_FORWARD;
11642
+      SQRADD2(a[0], a[3]);    SQRADD2(a[1], a[2]); 
11643
+      COMBA_STORE(b[3]);
11644
+
11645
+      /* output 4 */
11646
+      CARRY_FORWARD;
11647
+      SQRADD2(a[0], a[4]);    SQRADD2(a[1], a[3]);    SQRADD(a[2], a[2]); 
11648
+      COMBA_STORE(b[4]);
11649
+
11650
+      /* output 5 */
11651
+      CARRY_FORWARD;
11652
+   SQRADDSC(a[0], a[5]); SQRADDAC(a[1], a[4]); SQRADDAC(a[2], a[3]); SQRADDDB; 
11653
+      COMBA_STORE(b[5]);
11654
+
11655
+      /* output 6 */
11656
+      CARRY_FORWARD;
11657
+   SQRADDSC(a[0], a[6]); SQRADDAC(a[1], a[5]); SQRADDAC(a[2], a[4]); SQRADDDB; SQRADD(a[3], a[3]); 
11658
+      COMBA_STORE(b[6]);
11659
+
11660
+      /* output 7 */
11661
+      CARRY_FORWARD;
11662
+   SQRADDSC(a[0], a[7]); SQRADDAC(a[1], a[6]); SQRADDAC(a[2], a[5]); SQRADDAC(a[3], a[4]); SQRADDDB; 
11663
+      COMBA_STORE(b[7]);
11664
+
11665
+      /* output 8 */
11666
+      CARRY_FORWARD;
11667
+   SQRADDSC(a[0], a[8]); SQRADDAC(a[1], a[7]); SQRADDAC(a[2], a[6]); SQRADDAC(a[3], a[5]); SQRADDDB; SQRADD(a[4], a[4]); 
11668
+      COMBA_STORE(b[8]);
11669
+
11670
+      /* output 9 */
11671
+      CARRY_FORWARD;
11672
+   SQRADDSC(a[0], a[9]); SQRADDAC(a[1], a[8]); SQRADDAC(a[2], a[7]); SQRADDAC(a[3], a[6]); SQRADDAC(a[4], a[5]); SQRADDDB; 
11673
+      COMBA_STORE(b[9]);
11674
+
11675
+      /* output 10 */
11676
+      CARRY_FORWARD;
11677
+   SQRADDSC(a[0], a[10]); SQRADDAC(a[1], a[9]); SQRADDAC(a[2], a[8]); SQRADDAC(a[3], a[7]); SQRADDAC(a[4], a[6]); SQRADDDB; SQRADD(a[5], a[5]); 
11678
+      COMBA_STORE(b[10]);
11679
+
11680
+      /* output 11 */
11681
+      CARRY_FORWARD;
11682
+   SQRADDSC(a[0], a[11]); SQRADDAC(a[1], a[10]); SQRADDAC(a[2], a[9]); SQRADDAC(a[3], a[8]); SQRADDAC(a[4], a[7]); SQRADDAC(a[5], a[6]); SQRADDDB; 
11683
+      COMBA_STORE(b[11]);
11684
+
11685
+      /* output 12 */
11686
+      CARRY_FORWARD;
11687
+   SQRADDSC(a[0], a[12]); SQRADDAC(a[1], a[11]); SQRADDAC(a[2], a[10]); SQRADDAC(a[3], a[9]); SQRADDAC(a[4], a[8]); SQRADDAC(a[5], a[7]); SQRADDDB; SQRADD(a[6], a[6]); 
11688
+      COMBA_STORE(b[12]);
11689
+
11690
+      /* output 13 */
11691
+      CARRY_FORWARD;
11692
+   SQRADDSC(a[1], a[12]); SQRADDAC(a[2], a[11]); SQRADDAC(a[3], a[10]); SQRADDAC(a[4], a[9]); SQRADDAC(a[5], a[8]); SQRADDAC(a[6], a[7]); SQRADDDB; 
11693
+      COMBA_STORE(b[13]);
11694
+
11695
+      /* output 14 */
11696
+      CARRY_FORWARD;
11697
+   SQRADDSC(a[2], a[12]); SQRADDAC(a[3], a[11]); SQRADDAC(a[4], a[10]); SQRADDAC(a[5], a[9]); SQRADDAC(a[6], a[8]); SQRADDDB; SQRADD(a[7], a[7]); 
11698
+      COMBA_STORE(b[14]);
11699
+
11700
+      /* output 15 */
11701
+      CARRY_FORWARD;
11702
+   SQRADDSC(a[3], a[12]); SQRADDAC(a[4], a[11]); SQRADDAC(a[5], a[10]); SQRADDAC(a[6], a[9]); SQRADDAC(a[7], a[8]); SQRADDDB; 
11703
+      COMBA_STORE(b[15]);
11704
+
11705
+      /* output 16 */
11706
+      CARRY_FORWARD;
11707
+   SQRADDSC(a[4], a[12]); SQRADDAC(a[5], a[11]); SQRADDAC(a[6], a[10]); SQRADDAC(a[7], a[9]); SQRADDDB; SQRADD(a[8], a[8]); 
11708
+      COMBA_STORE(b[16]);
11709
+
11710
+      /* output 17 */
11711
+      CARRY_FORWARD;
11712
+   SQRADDSC(a[5], a[12]); SQRADDAC(a[6], a[11]); SQRADDAC(a[7], a[10]); SQRADDAC(a[8], a[9]); SQRADDDB; 
11713
+      COMBA_STORE(b[17]);
11714
+
11715
+      /* output 18 */
11716
+      CARRY_FORWARD;
11717
+   SQRADDSC(a[6], a[12]); SQRADDAC(a[7], a[11]); SQRADDAC(a[8], a[10]); SQRADDDB; SQRADD(a[9], a[9]); 
11718
+      COMBA_STORE(b[18]);
11719
+
11720
+      /* output 19 */
11721
+      CARRY_FORWARD;
11722
+   SQRADDSC(a[7], a[12]); SQRADDAC(a[8], a[11]); SQRADDAC(a[9], a[10]); SQRADDDB; 
11723
+      COMBA_STORE(b[19]);
11724
+
11725
+      /* output 20 */
11726
+      CARRY_FORWARD;
11727
+      SQRADD2(a[8], a[12]);    SQRADD2(a[9], a[11]);    SQRADD(a[10], a[10]); 
11728
+      COMBA_STORE(b[20]);
11729
+
11730
+      /* output 21 */
11731
+      CARRY_FORWARD;
11732
+      SQRADD2(a[9], a[12]);    SQRADD2(a[10], a[11]); 
11733
+      COMBA_STORE(b[21]);
11734
+
11735
+      /* output 22 */
11736
+      CARRY_FORWARD;
11737
+      SQRADD2(a[10], a[12]);    SQRADD(a[11], a[11]); 
11738
+      COMBA_STORE(b[22]);
11739
+
11740
+      /* output 23 */
11741
+      CARRY_FORWARD;
11742
+      SQRADD2(a[11], a[12]); 
11743
+      COMBA_STORE(b[23]);
11744
+
11745
+      /* output 24 */
11746
+      CARRY_FORWARD;
11747
+      SQRADD(a[12], a[12]); 
11748
+      COMBA_STORE(b[24]);
11749
+      COMBA_STORE2(b[25]);
11750
+      COMBA_FINI;
11751
+
11752
+      B->used = 26;
11753
+      B->sign = FP_ZPOS;
11754
+      memcpy(B->dp, b, 26 * sizeof(fp_digit));
11755
+      fp_clamp(B);
11756
+      break;
8303 11757
 
8304
-     /* at this point shift W[n] by B*n */
8305
-     if ((res = mp_lshd(&w1, 1*B)) != MP_OKAY) {
8306
-        goto ERR;
8307
-     }
8308
-     if ((res = mp_lshd(&w2, 2*B)) != MP_OKAY) {
8309
-        goto ERR;
8310
-     }
8311
-     if ((res = mp_lshd(&w3, 3*B)) != MP_OKAY) {
8312
-        goto ERR;
8313
-     }
8314
-     if ((res = mp_lshd(&w4, 4*B)) != MP_OKAY) {
8315
-        goto ERR;
8316
-     }
11758
+   case 14:
11759
+      a = A->dp;
11760
+      COMBA_START; 
11761
+
11762
+      /* clear carries */
11763
+      CLEAR_CARRY;
11764
+
11765
+      /* output 0 */
11766
+      SQRADD(a[0],a[0]);
11767
+      COMBA_STORE(b[0]);
11768
+
11769
+      /* output 1 */
11770
+      CARRY_FORWARD;
11771
+      SQRADD2(a[0], a[1]); 
11772
+      COMBA_STORE(b[1]);
11773
+
11774
+      /* output 2 */
11775
+      CARRY_FORWARD;
11776
+      SQRADD2(a[0], a[2]);    SQRADD(a[1], a[1]); 
11777
+      COMBA_STORE(b[2]);
11778
+
11779
+      /* output 3 */
11780
+      CARRY_FORWARD;
11781
+      SQRADD2(a[0], a[3]);    SQRADD2(a[1], a[2]); 
11782
+      COMBA_STORE(b[3]);
11783
+
11784
+      /* output 4 */
11785
+      CARRY_FORWARD;
11786
+      SQRADD2(a[0], a[4]);    SQRADD2(a[1], a[3]);    SQRADD(a[2], a[2]); 
11787
+      COMBA_STORE(b[4]);
11788
+
11789
+      /* output 5 */
11790
+      CARRY_FORWARD;
11791
+   SQRADDSC(a[0], a[5]); SQRADDAC(a[1], a[4]); SQRADDAC(a[2], a[3]); SQRADDDB; 
11792
+      COMBA_STORE(b[5]);
11793
+
11794
+      /* output 6 */
11795
+      CARRY_FORWARD;
11796
+   SQRADDSC(a[0], a[6]); SQRADDAC(a[1], a[5]); SQRADDAC(a[2], a[4]); SQRADDDB; SQRADD(a[3], a[3]); 
11797
+      COMBA_STORE(b[6]);
11798
+
11799
+      /* output 7 */
11800
+      CARRY_FORWARD;
11801
+   SQRADDSC(a[0], a[7]); SQRADDAC(a[1], a[6]); SQRADDAC(a[2], a[5]); SQRADDAC(a[3], a[4]); SQRADDDB; 
11802
+      COMBA_STORE(b[7]);
11803
+
11804
+      /* output 8 */
11805
+      CARRY_FORWARD;
11806
+   SQRADDSC(a[0], a[8]); SQRADDAC(a[1], a[7]); SQRADDAC(a[2], a[6]); SQRADDAC(a[3], a[5]); SQRADDDB; SQRADD(a[4], a[4]); 
11807
+      COMBA_STORE(b[8]);
11808
+
11809
+      /* output 9 */
11810
+      CARRY_FORWARD;
11811
+   SQRADDSC(a[0], a[9]); SQRADDAC(a[1], a[8]); SQRADDAC(a[2], a[7]); SQRADDAC(a[3], a[6]); SQRADDAC(a[4], a[5]); SQRADDDB; 
11812
+      COMBA_STORE(b[9]);
11813
+
11814
+      /* output 10 */
11815
+      CARRY_FORWARD;
11816
+   SQRADDSC(a[0], a[10]); SQRADDAC(a[1], a[9]); SQRADDAC(a[2], a[8]); SQRADDAC(a[3], a[7]); SQRADDAC(a[4], a[6]); SQRADDDB; SQRADD(a[5], a[5]); 
11817
+      COMBA_STORE(b[10]);
11818
+
11819
+      /* output 11 */
11820
+      CARRY_FORWARD;
11821
+   SQRADDSC(a[0], a[11]); SQRADDAC(a[1], a[10]); SQRADDAC(a[2], a[9]); SQRADDAC(a[3], a[8]); SQRADDAC(a[4], a[7]); SQRADDAC(a[5], a[6]); SQRADDDB; 
11822
+      COMBA_STORE(b[11]);
11823
+
11824
+      /* output 12 */
11825
+      CARRY_FORWARD;
11826
+   SQRADDSC(a[0], a[12]); SQRADDAC(a[1], a[11]); SQRADDAC(a[2], a[10]); SQRADDAC(a[3], a[9]); SQRADDAC(a[4], a[8]); SQRADDAC(a[5], a[7]); SQRADDDB; SQRADD(a[6], a[6]); 
11827
+      COMBA_STORE(b[12]);
11828
+
11829
+      /* output 13 */
11830
+      CARRY_FORWARD;
11831
+   SQRADDSC(a[0], a[13]); SQRADDAC(a[1], a[12]); SQRADDAC(a[2], a[11]); SQRADDAC(a[3], a[10]); SQRADDAC(a[4], a[9]); SQRADDAC(a[5], a[8]); SQRADDAC(a[6], a[7]); SQRADDDB; 
11832
+      COMBA_STORE(b[13]);
11833
+
11834
+      /* output 14 */
11835
+      CARRY_FORWARD;
11836
+   SQRADDSC(a[1], a[13]); SQRADDAC(a[2], a[12]); SQRADDAC(a[3], a[11]); SQRADDAC(a[4], a[10]); SQRADDAC(a[5], a[9]); SQRADDAC(a[6], a[8]); SQRADDDB; SQRADD(a[7], a[7]); 
11837
+      COMBA_STORE(b[14]);
11838
+
11839
+      /* output 15 */
11840
+      CARRY_FORWARD;
11841
+   SQRADDSC(a[2], a[13]); SQRADDAC(a[3], a[12]); SQRADDAC(a[4], a[11]); SQRADDAC(a[5], a[10]); SQRADDAC(a[6], a[9]); SQRADDAC(a[7], a[8]); SQRADDDB; 
11842
+      COMBA_STORE(b[15]);
11843
+
11844
+      /* output 16 */
11845
+      CARRY_FORWARD;
11846
+   SQRADDSC(a[3], a[13]); SQRADDAC(a[4], a[12]); SQRADDAC(a[5], a[11]); SQRADDAC(a[6], a[10]); SQRADDAC(a[7], a[9]); SQRADDDB; SQRADD(a[8], a[8]); 
11847
+      COMBA_STORE(b[16]);
11848
+
11849
+      /* output 17 */
11850
+      CARRY_FORWARD;
11851
+   SQRADDSC(a[4], a[13]); SQRADDAC(a[5], a[12]); SQRADDAC(a[6], a[11]); SQRADDAC(a[7], a[10]); SQRADDAC(a[8], a[9]); SQRADDDB; 
11852
+      COMBA_STORE(b[17]);
11853
+
11854
+      /* output 18 */
11855
+      CARRY_FORWARD;
11856
+   SQRADDSC(a[5], a[13]); SQRADDAC(a[6], a[12]); SQRADDAC(a[7], a[11]); SQRADDAC(a[8], a[10]); SQRADDDB; SQRADD(a[9], a[9]); 
11857
+      COMBA_STORE(b[18]);
11858
+
11859
+      /* output 19 */
11860
+      CARRY_FORWARD;
11861
+   SQRADDSC(a[6], a[13]); SQRADDAC(a[7], a[12]); SQRADDAC(a[8], a[11]); SQRADDAC(a[9], a[10]); SQRADDDB; 
11862
+      COMBA_STORE(b[19]);
11863
+
11864
+      /* output 20 */
11865
+      CARRY_FORWARD;
11866
+   SQRADDSC(a[7], a[13]); SQRADDAC(a[8], a[12]); SQRADDAC(a[9], a[11]); SQRADDDB; SQRADD(a[10], a[10]); 
11867
+      COMBA_STORE(b[20]);
11868
+
11869
+      /* output 21 */
11870
+      CARRY_FORWARD;
11871
+   SQRADDSC(a[8], a[13]); SQRADDAC(a[9], a[12]); SQRADDAC(a[10], a[11]); SQRADDDB; 
11872
+      COMBA_STORE(b[21]);
11873
+
11874
+      /* output 22 */
11875
+      CARRY_FORWARD;
11876
+      SQRADD2(a[9], a[13]);    SQRADD2(a[10], a[12]);    SQRADD(a[11], a[11]); 
11877
+      COMBA_STORE(b[22]);
11878
+
11879
+      /* output 23 */
11880
+      CARRY_FORWARD;
11881
+      SQRADD2(a[10], a[13]);    SQRADD2(a[11], a[12]); 
11882
+      COMBA_STORE(b[23]);
11883
+
11884
+      /* output 24 */
11885
+      CARRY_FORWARD;
11886
+      SQRADD2(a[11], a[13]);    SQRADD(a[12], a[12]); 
11887
+      COMBA_STORE(b[24]);
11888
+
11889
+      /* output 25 */
11890
+      CARRY_FORWARD;
11891
+      SQRADD2(a[12], a[13]); 
11892
+      COMBA_STORE(b[25]);
11893
+
11894
+      /* output 26 */
11895
+      CARRY_FORWARD;
11896
+      SQRADD(a[13], a[13]); 
11897
+      COMBA_STORE(b[26]);
11898
+      COMBA_STORE2(b[27]);
11899
+      COMBA_FINI;
11900
+
11901
+      B->used = 28;
11902
+      B->sign = FP_ZPOS;
11903
+      memcpy(B->dp, b, 28 * sizeof(fp_digit));
11904
+      fp_clamp(B);
11905
+      break;
8317 11906
 
8318
-     if ((res = mp_add(&w0, &w1, b)) != MP_OKAY) {
8319
-        goto ERR;
8320
-     }
8321
-     if ((res = mp_add(&w2, &w3, &tmp1)) != MP_OKAY) {
8322
-        goto ERR;
8323
-     }
8324
-     if ((res = mp_add(&w4, &tmp1, &tmp1)) != MP_OKAY) {
8325
-        goto ERR;
8326
-     }
8327
-     if ((res = mp_add(&tmp1, b, b)) != MP_OKAY) {
8328
-        goto ERR;
8329
-     }
11907
+   case 15:
11908
+      a = A->dp;
11909
+      COMBA_START; 
11910
+
11911
+      /* clear carries */
11912
+      CLEAR_CARRY;
11913
+
11914
+      /* output 0 */
11915
+      SQRADD(a[0],a[0]);
11916
+      COMBA_STORE(b[0]);
11917
+
11918
+      /* output 1 */
11919
+      CARRY_FORWARD;
11920
+      SQRADD2(a[0], a[1]); 
11921
+      COMBA_STORE(b[1]);
11922
+
11923
+      /* output 2 */
11924
+      CARRY_FORWARD;
11925
+      SQRADD2(a[0], a[2]);    SQRADD(a[1], a[1]); 
11926
+      COMBA_STORE(b[2]);
11927
+
11928
+      /* output 3 */
11929
+      CARRY_FORWARD;
11930
+      SQRADD2(a[0], a[3]);    SQRADD2(a[1], a[2]); 
11931
+      COMBA_STORE(b[3]);
11932
+
11933
+      /* output 4 */
11934
+      CARRY_FORWARD;
11935
+      SQRADD2(a[0], a[4]);    SQRADD2(a[1], a[3]);    SQRADD(a[2], a[2]); 
11936
+      COMBA_STORE(b[4]);
11937
+
11938
+      /* output 5 */
11939
+      CARRY_FORWARD;
11940
+   SQRADDSC(a[0], a[5]); SQRADDAC(a[1], a[4]); SQRADDAC(a[2], a[3]); SQRADDDB; 
11941
+      COMBA_STORE(b[5]);
11942
+
11943
+      /* output 6 */
11944
+      CARRY_FORWARD;
11945
+   SQRADDSC(a[0], a[6]); SQRADDAC(a[1], a[5]); SQRADDAC(a[2], a[4]); SQRADDDB; SQRADD(a[3], a[3]); 
11946
+      COMBA_STORE(b[6]);
11947
+
11948
+      /* output 7 */
11949
+      CARRY_FORWARD;
11950
+   SQRADDSC(a[0], a[7]); SQRADDAC(a[1], a[6]); SQRADDAC(a[2], a[5]); SQRADDAC(a[3], a[4]); SQRADDDB; 
11951
+      COMBA_STORE(b[7]);
11952
+
11953
+      /* output 8 */
11954
+      CARRY_FORWARD;
11955
+   SQRADDSC(a[0], a[8]); SQRADDAC(a[1], a[7]); SQRADDAC(a[2], a[6]); SQRADDAC(a[3], a[5]); SQRADDDB; SQRADD(a[4], a[4]); 
11956
+      COMBA_STORE(b[8]);
11957
+
11958
+      /* output 9 */
11959
+      CARRY_FORWARD;
11960
+   SQRADDSC(a[0], a[9]); SQRADDAC(a[1], a[8]); SQRADDAC(a[2], a[7]); SQRADDAC(a[3], a[6]); SQRADDAC(a[4], a[5]); SQRADDDB; 
11961
+      COMBA_STORE(b[9]);
11962
+
11963
+      /* output 10 */
11964
+      CARRY_FORWARD;
11965
+   SQRADDSC(a[0], a[10]); SQRADDAC(a[1], a[9]); SQRADDAC(a[2], a[8]); SQRADDAC(a[3], a[7]); SQRADDAC(a[4], a[6]); SQRADDDB; SQRADD(a[5], a[5]); 
11966
+      COMBA_STORE(b[10]);
11967
+
11968
+      /* output 11 */
11969
+      CARRY_FORWARD;
11970
+   SQRADDSC(a[0], a[11]); SQRADDAC(a[1], a[10]); SQRADDAC(a[2], a[9]); SQRADDAC(a[3], a[8]); SQRADDAC(a[4], a[7]); SQRADDAC(a[5], a[6]); SQRADDDB; 
11971
+      COMBA_STORE(b[11]);
11972
+
11973
+      /* output 12 */
11974
+      CARRY_FORWARD;
11975
+   SQRADDSC(a[0], a[12]); SQRADDAC(a[1], a[11]); SQRADDAC(a[2], a[10]); SQRADDAC(a[3], a[9]); SQRADDAC(a[4], a[8]); SQRADDAC(a[5], a[7]); SQRADDDB; SQRADD(a[6], a[6]); 
11976
+      COMBA_STORE(b[12]);
11977
+
11978
+      /* output 13 */
11979
+      CARRY_FORWARD;
11980
+   SQRADDSC(a[0], a[13]); SQRADDAC(a[1], a[12]); SQRADDAC(a[2], a[11]); SQRADDAC(a[3], a[10]); SQRADDAC(a[4], a[9]); SQRADDAC(a[5], a[8]); SQRADDAC(a[6], a[7]); SQRADDDB; 
11981
+      COMBA_STORE(b[13]);
11982
+
11983
+      /* output 14 */
11984
+      CARRY_FORWARD;
11985
+   SQRADDSC(a[0], a[14]); SQRADDAC(a[1], a[13]); SQRADDAC(a[2], a[12]); SQRADDAC(a[3], a[11]); SQRADDAC(a[4], a[10]); SQRADDAC(a[5], a[9]); SQRADDAC(a[6], a[8]); SQRADDDB; SQRADD(a[7], a[7]); 
11986
+      COMBA_STORE(b[14]);
11987
+
11988
+      /* output 15 */
11989
+      CARRY_FORWARD;
11990
+   SQRADDSC(a[1], a[14]); SQRADDAC(a[2], a[13]); SQRADDAC(a[3], a[12]); SQRADDAC(a[4], a[11]); SQRADDAC(a[5], a[10]); SQRADDAC(a[6], a[9]); SQRADDAC(a[7], a[8]); SQRADDDB; 
11991
+      COMBA_STORE(b[15]);
11992
+
11993
+      /* output 16 */
11994
+      CARRY_FORWARD;
11995
+   SQRADDSC(a[2], a[14]); SQRADDAC(a[3], a[13]); SQRADDAC(a[4], a[12]); SQRADDAC(a[5], a[11]); SQRADDAC(a[6], a[10]); SQRADDAC(a[7], a[9]); SQRADDDB; SQRADD(a[8], a[8]); 
11996
+      COMBA_STORE(b[16]);
11997
+
11998
+      /* output 17 */
11999
+      CARRY_FORWARD;
12000
+   SQRADDSC(a[3], a[14]); SQRADDAC(a[4], a[13]); SQRADDAC(a[5], a[12]); SQRADDAC(a[6], a[11]); SQRADDAC(a[7], a[10]); SQRADDAC(a[8], a[9]); SQRADDDB; 
12001
+      COMBA_STORE(b[17]);
12002
+
12003
+      /* output 18 */
12004
+      CARRY_FORWARD;
12005
+   SQRADDSC(a[4], a[14]); SQRADDAC(a[5], a[13]); SQRADDAC(a[6], a[12]); SQRADDAC(a[7], a[11]); SQRADDAC(a[8], a[10]); SQRADDDB; SQRADD(a[9], a[9]); 
12006
+      COMBA_STORE(b[18]);
12007
+
12008
+      /* output 19 */
12009
+      CARRY_FORWARD;
12010
+   SQRADDSC(a[5], a[14]); SQRADDAC(a[6], a[13]); SQRADDAC(a[7], a[12]); SQRADDAC(a[8], a[11]); SQRADDAC(a[9], a[10]); SQRADDDB; 
12011
+      COMBA_STORE(b[19]);
12012
+
12013
+      /* output 20 */
12014
+      CARRY_FORWARD;
12015
+   SQRADDSC(a[6], a[14]); SQRADDAC(a[7], a[13]); SQRADDAC(a[8], a[12]); SQRADDAC(a[9], a[11]); SQRADDDB; SQRADD(a[10], a[10]); 
12016
+      COMBA_STORE(b[20]);
12017
+
12018
+      /* output 21 */
12019
+      CARRY_FORWARD;
12020
+   SQRADDSC(a[7], a[14]); SQRADDAC(a[8], a[13]); SQRADDAC(a[9], a[12]); SQRADDAC(a[10], a[11]); SQRADDDB; 
12021
+      COMBA_STORE(b[21]);
12022
+
12023
+      /* output 22 */
12024
+      CARRY_FORWARD;
12025
+   SQRADDSC(a[8], a[14]); SQRADDAC(a[9], a[13]); SQRADDAC(a[10], a[12]); SQRADDDB; SQRADD(a[11], a[11]); 
12026
+      COMBA_STORE(b[22]);
12027
+
12028
+      /* output 23 */
12029
+      CARRY_FORWARD;
12030
+   SQRADDSC(a[9], a[14]); SQRADDAC(a[10], a[13]); SQRADDAC(a[11], a[12]); SQRADDDB; 
12031
+      COMBA_STORE(b[23]);
12032
+
12033
+      /* output 24 */
12034
+      CARRY_FORWARD;
12035
+      SQRADD2(a[10], a[14]);    SQRADD2(a[11], a[13]);    SQRADD(a[12], a[12]); 
12036
+      COMBA_STORE(b[24]);
12037
+
12038
+      /* output 25 */
12039
+      CARRY_FORWARD;
12040
+      SQRADD2(a[11], a[14]);    SQRADD2(a[12], a[13]); 
12041
+      COMBA_STORE(b[25]);
12042
+
12043
+      /* output 26 */
12044
+      CARRY_FORWARD;
12045
+      SQRADD2(a[12], a[14]);    SQRADD(a[13], a[13]); 
12046
+      COMBA_STORE(b[26]);
12047
+
12048
+      /* output 27 */
12049
+      CARRY_FORWARD;
12050
+      SQRADD2(a[13], a[14]); 
12051
+      COMBA_STORE(b[27]);
12052
+
12053
+      /* output 28 */
12054
+      CARRY_FORWARD;
12055
+      SQRADD(a[14], a[14]); 
12056
+      COMBA_STORE(b[28]);
12057
+      COMBA_STORE2(b[29]);
12058
+      COMBA_FINI;
12059
+
12060
+      B->used = 30;
12061
+      B->sign = FP_ZPOS;
12062
+      memcpy(B->dp, b, 30 * sizeof(fp_digit));
12063
+      fp_clamp(B);
12064
+      break;
8330 12065
 
8331
-ERR:
8332
-     mp_clear_multi(&w0, &w1, &w2, &w3, &w4, &a0, &a1, &a2, &tmp1, NULL);
8333
-     return res;
12066
+   case 16:
12067
+      a = A->dp;
12068
+      COMBA_START; 
12069
+
12070
+      /* clear carries */
12071
+      CLEAR_CARRY;
12072
+
12073
+      /* output 0 */
12074
+      SQRADD(a[0],a[0]);
12075
+      COMBA_STORE(b[0]);
12076
+
12077
+      /* output 1 */
12078
+      CARRY_FORWARD;
12079
+      SQRADD2(a[0], a[1]); 
12080
+      COMBA_STORE(b[1]);
12081
+
12082
+      /* output 2 */
12083
+      CARRY_FORWARD;
12084
+      SQRADD2(a[0], a[2]);    SQRADD(a[1], a[1]); 
12085
+      COMBA_STORE(b[2]);
12086
+
12087
+      /* output 3 */
12088
+      CARRY_FORWARD;
12089
+      SQRADD2(a[0], a[3]);    SQRADD2(a[1], a[2]); 
12090
+      COMBA_STORE(b[3]);
12091
+
12092
+      /* output 4 */
12093
+      CARRY_FORWARD;
12094
+      SQRADD2(a[0], a[4]);    SQRADD2(a[1], a[3]);    SQRADD(a[2], a[2]); 
12095
+      COMBA_STORE(b[4]);
12096
+
12097
+      /* output 5 */
12098
+      CARRY_FORWARD;
12099
+   SQRADDSC(a[0], a[5]); SQRADDAC(a[1], a[4]); SQRADDAC(a[2], a[3]); SQRADDDB; 
12100
+      COMBA_STORE(b[5]);
12101
+
12102
+      /* output 6 */
12103
+      CARRY_FORWARD;
12104
+   SQRADDSC(a[0], a[6]); SQRADDAC(a[1], a[5]); SQRADDAC(a[2], a[4]); SQRADDDB; SQRADD(a[3], a[3]); 
12105
+      COMBA_STORE(b[6]);
12106
+
12107
+      /* output 7 */
12108
+      CARRY_FORWARD;
12109
+   SQRADDSC(a[0], a[7]); SQRADDAC(a[1], a[6]); SQRADDAC(a[2], a[5]); SQRADDAC(a[3], a[4]); SQRADDDB; 
12110
+      COMBA_STORE(b[7]);
12111
+
12112
+      /* output 8 */
12113
+      CARRY_FORWARD;
12114
+   SQRADDSC(a[0], a[8]); SQRADDAC(a[1], a[7]); SQRADDAC(a[2], a[6]); SQRADDAC(a[3], a[5]); SQRADDDB; SQRADD(a[4], a[4]); 
12115
+      COMBA_STORE(b[8]);
12116
+
12117
+      /* output 9 */
12118
+      CARRY_FORWARD;
12119
+   SQRADDSC(a[0], a[9]); SQRADDAC(a[1], a[8]); SQRADDAC(a[2], a[7]); SQRADDAC(a[3], a[6]); SQRADDAC(a[4], a[5]); SQRADDDB; 
12120
+      COMBA_STORE(b[9]);
12121
+
12122
+      /* output 10 */
12123
+      CARRY_FORWARD;
12124
+   SQRADDSC(a[0], a[10]); SQRADDAC(a[1], a[9]); SQRADDAC(a[2], a[8]); SQRADDAC(a[3], a[7]); SQRADDAC(a[4], a[6]); SQRADDDB; SQRADD(a[5], a[5]); 
12125
+      COMBA_STORE(b[10]);
12126
+
12127
+      /* output 11 */
12128
+      CARRY_FORWARD;
12129
+   SQRADDSC(a[0], a[11]); SQRADDAC(a[1], a[10]); SQRADDAC(a[2], a[9]); SQRADDAC(a[3], a[8]); SQRADDAC(a[4], a[7]); SQRADDAC(a[5], a[6]); SQRADDDB; 
12130
+      COMBA_STORE(b[11]);
12131
+
12132
+      /* output 12 */
12133
+      CARRY_FORWARD;
12134
+   SQRADDSC(a[0], a[12]); SQRADDAC(a[1], a[11]); SQRADDAC(a[2], a[10]); SQRADDAC(a[3], a[9]); SQRADDAC(a[4], a[8]); SQRADDAC(a[5], a[7]); SQRADDDB; SQRADD(a[6], a[6]); 
12135
+      COMBA_STORE(b[12]);
12136
+
12137
+      /* output 13 */
12138
+      CARRY_FORWARD;
12139
+   SQRADDSC(a[0], a[13]); SQRADDAC(a[1], a[12]); SQRADDAC(a[2], a[11]); SQRADDAC(a[3], a[10]); SQRADDAC(a[4], a[9]); SQRADDAC(a[5], a[8]); SQRADDAC(a[6], a[7]); SQRADDDB; 
12140
+      COMBA_STORE(b[13]);
12141
+
12142
+      /* output 14 */
12143
+      CARRY_FORWARD;
12144
+   SQRADDSC(a[0], a[14]); SQRADDAC(a[1], a[13]); SQRADDAC(a[2], a[12]); SQRADDAC(a[3], a[11]); SQRADDAC(a[4], a[10]); SQRADDAC(a[5], a[9]); SQRADDAC(a[6], a[8]); SQRADDDB; SQRADD(a[7], a[7]); 
12145
+      COMBA_STORE(b[14]);
12146
+
12147
+      /* output 15 */
12148
+      CARRY_FORWARD;
12149
+   SQRADDSC(a[0], a[15]); SQRADDAC(a[1], a[14]); SQRADDAC(a[2], a[13]); SQRADDAC(a[3], a[12]); SQRADDAC(a[4], a[11]); SQRADDAC(a[5], a[10]); SQRADDAC(a[6], a[9]); SQRADDAC(a[7], a[8]); SQRADDDB; 
12150
+      COMBA_STORE(b[15]);
12151
+
12152
+      /* output 16 */
12153
+      CARRY_FORWARD;
12154
+   SQRADDSC(a[1], a[15]); SQRADDAC(a[2], a[14]); SQRADDAC(a[3], a[13]); SQRADDAC(a[4], a[12]); SQRADDAC(a[5], a[11]); SQRADDAC(a[6], a[10]); SQRADDAC(a[7], a[9]); SQRADDDB; SQRADD(a[8], a[8]); 
12155
+      COMBA_STORE(b[16]);
12156
+
12157
+      /* output 17 */
12158
+      CARRY_FORWARD;
12159
+   SQRADDSC(a[2], a[15]); SQRADDAC(a[3], a[14]); SQRADDAC(a[4], a[13]); SQRADDAC(a[5], a[12]); SQRADDAC(a[6], a[11]); SQRADDAC(a[7], a[10]); SQRADDAC(a[8], a[9]); SQRADDDB; 
12160
+      COMBA_STORE(b[17]);
12161
+
12162
+      /* output 18 */
12163
+      CARRY_FORWARD;
12164
+   SQRADDSC(a[3], a[15]); SQRADDAC(a[4], a[14]); SQRADDAC(a[5], a[13]); SQRADDAC(a[6], a[12]); SQRADDAC(a[7], a[11]); SQRADDAC(a[8], a[10]); SQRADDDB; SQRADD(a[9], a[9]); 
12165
+      COMBA_STORE(b[18]);
12166
+
12167
+      /* output 19 */
12168
+      CARRY_FORWARD;
12169
+   SQRADDSC(a[4], a[15]); SQRADDAC(a[5], a[14]); SQRADDAC(a[6], a[13]); SQRADDAC(a[7], a[12]); SQRADDAC(a[8], a[11]); SQRADDAC(a[9], a[10]); SQRADDDB; 
12170
+      COMBA_STORE(b[19]);
12171
+
12172
+      /* output 20 */
12173
+      CARRY_FORWARD;
12174
+   SQRADDSC(a[5], a[15]); SQRADDAC(a[6], a[14]); SQRADDAC(a[7], a[13]); SQRADDAC(a[8], a[12]); SQRADDAC(a[9], a[11]); SQRADDDB; SQRADD(a[10], a[10]); 
12175
+      COMBA_STORE(b[20]);
12176
+
12177
+      /* output 21 */
12178
+      CARRY_FORWARD;
12179
+   SQRADDSC(a[6], a[15]); SQRADDAC(a[7], a[14]); SQRADDAC(a[8], a[13]); SQRADDAC(a[9], a[12]); SQRADDAC(a[10], a[11]); SQRADDDB; 
12180
+      COMBA_STORE(b[21]);
12181
+
12182
+      /* output 22 */
12183
+      CARRY_FORWARD;
12184
+   SQRADDSC(a[7], a[15]); SQRADDAC(a[8], a[14]); SQRADDAC(a[9], a[13]); SQRADDAC(a[10], a[12]); SQRADDDB; SQRADD(a[11], a[11]); 
12185
+      COMBA_STORE(b[22]);
12186
+
12187
+      /* output 23 */
12188
+      CARRY_FORWARD;
12189
+   SQRADDSC(a[8], a[15]); SQRADDAC(a[9], a[14]); SQRADDAC(a[10], a[13]); SQRADDAC(a[11], a[12]); SQRADDDB; 
12190
+      COMBA_STORE(b[23]);
12191
+
12192
+      /* output 24 */
12193
+      CARRY_FORWARD;
12194
+   SQRADDSC(a[9], a[15]); SQRADDAC(a[10], a[14]); SQRADDAC(a[11], a[13]); SQRADDDB; SQRADD(a[12], a[12]); 
12195
+      COMBA_STORE(b[24]);
12196
+
12197
+      /* output 25 */
12198
+      CARRY_FORWARD;
12199
+   SQRADDSC(a[10], a[15]); SQRADDAC(a[11], a[14]); SQRADDAC(a[12], a[13]); SQRADDDB; 
12200
+      COMBA_STORE(b[25]);
12201
+
12202
+      /* output 26 */
12203
+      CARRY_FORWARD;
12204
+      SQRADD2(a[11], a[15]);    SQRADD2(a[12], a[14]);    SQRADD(a[13], a[13]); 
12205
+      COMBA_STORE(b[26]);
12206
+
12207
+      /* output 27 */
12208
+      CARRY_FORWARD;
12209
+      SQRADD2(a[12], a[15]);    SQRADD2(a[13], a[14]); 
12210
+      COMBA_STORE(b[27]);
12211
+
12212
+      /* output 28 */
12213
+      CARRY_FORWARD;
12214
+      SQRADD2(a[13], a[15]);    SQRADD(a[14], a[14]); 
12215
+      COMBA_STORE(b[28]);
12216
+
12217
+      /* output 29 */
12218
+      CARRY_FORWARD;
12219
+      SQRADD2(a[14], a[15]); 
12220
+      COMBA_STORE(b[29]);
12221
+
12222
+      /* output 30 */
12223
+      CARRY_FORWARD;
12224
+      SQRADD(a[15], a[15]); 
12225
+      COMBA_STORE(b[30]);
12226
+      COMBA_STORE2(b[31]);
12227
+      COMBA_FINI;
12228
+
12229
+      B->used = 32;
12230
+      B->sign = FP_ZPOS;
12231
+      memcpy(B->dp, b, 32 * sizeof(fp_digit));
12232
+      fp_clamp(B);
12233
+      break;
12234
+}
8334 12235
 }
8335 12236
 
8336
-#endif
12237
+#endif /* TFM_SMALL_SET */
8337 12238
 
8338
-/* $Source: /cvs/libtom/libtommath/bn_mp_toom_sqr.c,v $ */
8339
-/* $Revision: 1.3 $ */
8340
-/* $Date: 2006/03/31 14:18:44 $ */
12239
+/* $Source: /cvs/libtom/tomsfastmath/src/sqr/fp_sqr_comba_small_set.c,v $ */
12240
+/* $Revision: 1.1 $ */
12241
+/* $Date: 2007/02/15 00:31:32 $ */
8341 12242
 
8342
-/* End: bn_mp_toom_sqr.c */
12243
+/* End: fp_sqr_comba_small_set.c */
8343 12244
 
8344
-/* Start: bn_mp_toradix.c */
8345
-#include <bignum.h>
8346
-#ifdef BN_MP_TORADIX_C
8347
-/* LibTomMath, multiple-precision integer library -- Tom St Denis
8348
- *
8349
- * LibTomMath is a library that provides multiple-precision
8350
- * integer arithmetic as well as number theoretic functionality.
8351
- *
8352
- * The library was designed directly after the MPI library by
8353
- * Michael Fromberger but has been written from scratch with
8354
- * additional optimizations in place.
8355
- *
8356
- * The library is free for all purposes without any express
8357
- * guarantee it works.
12245
+/* Start: fp_sqrmod.c */
12246
+/* TomsFastMath, a fast ISO C bignum library.
12247
+ * 
12248
+ * This project is meant to fill in where LibTomMath
12249
+ * falls short.  That is speed ;-)
8358 12250
  *
8359
- * Tom St Denis, tomstdenis@gmail.com, http://math.libtomcrypt.com
12251
+ * This project is public domain and free for all purposes.
12252
+ * 
12253
+ * Tom St Denis, tomstdenis@gmail.com
8360 12254
  */
12255
+#include "bignum_fast.h"
8361 12256
 
8362
-/* stores a bignum as a ASCII string in a given radix (2..64) */
8363
-int mp_toradix (mp_int * a, char *str, int radix)
12257
+/* c = a * a (mod b) */
12258
+int fp_sqrmod(fp_int *a, fp_int *b, fp_int *c)
8364 12259
 {
8365
-  int     res, digs;
8366
-  mp_int  t;
8367
-  mp_digit d;
8368
-  char   *_s = str;
8369
-
8370
-  /* check range of the radix */
8371
-  if (radix < 2 || radix > 64) {
8372
-    return MP_VAL;
8373
-  }
8374
-
8375
-  /* quick out if its zero */
8376
-  if (mp_iszero(a) == 1) {
8377
-     *str++ = '0';
8378
-     *str = '\0';
8379
-     return MP_OKAY;
8380
-  }
8381
-
8382
-  if ((res = mp_init_copy (&t, a)) != MP_OKAY) {
8383
-    return res;
8384
-  }
8385
-
8386
-  /* if it is negative output a - */
8387
-  if (t.sign == MP_NEG) {
8388
-    ++_s;
8389
-    *str++ = '-';
8390
-    t.sign = MP_ZPOS;
8391
-  }
8392
-
8393
-  digs = 0;
8394
-  while (mp_iszero (&t) == 0) {
8395
-    if ((res = mp_div_d (&t, (mp_digit) radix, &t, &d)) != MP_OKAY) {
8396
-      mp_clear (&t);
8397
-      return res;
8398
-    }
8399
-    *str++ = mp_s_rmap[d];
8400
-    ++digs;
8401
-  }
8402
-
8403
-  /* reverse the digits of the string.  In this case _s points
8404
-   * to the first digit [exluding the sign] of the number]
8405
-   */
8406
-  bn_reverse ((unsigned char *)_s, digs);
8407
-
8408
-  /* append a NULL so the string is properly terminated */
8409
-  *str = '\0';
8410
-
8411
-  mp_clear (&t);
8412
-  return MP_OKAY;
12260
+  fp_int tmp;
12261
+  fp_zero(&tmp);
12262
+  fp_sqr(a, &tmp);
12263
+  return fp_mod(&tmp, b, c);
8413 12264
 }
8414 12265
 
8415
-#endif
12266
+/* $Source: /cvs/libtom/tomsfastmath/src/sqr/fp_sqrmod.c,v $ */
12267
+/* $Revision: 1.1 $ */
12268
+/* $Date: 2006/12/31 21:25:53 $ */
8416 12269
 
8417
-/* $Source: /cvs/libtom/libtommath/bn_mp_toradix.c,v $ */
8418
-/* $Revision: 1.3 $ */
8419
-/* $Date: 2006/03/31 14:18:44 $ */
8420
-
8421
-/* End: bn_mp_toradix.c */
12270
+/* End: fp_sqrmod.c */
8422 12271
 
8423
-/* Start: bn_mp_toradix_n.c */
8424
-#include <bignum.h>
8425
-#ifdef BN_MP_TORADIX_N_C
8426
-/* LibTomMath, multiple-precision integer library -- Tom St Denis
8427
- *
8428
- * LibTomMath is a library that provides multiple-precision
8429
- * integer arithmetic as well as number theoretic functionality.
8430
- *
8431
- * The library was designed directly after the MPI library by
8432
- * Michael Fromberger but has been written from scratch with
8433
- * additional optimizations in place.
8434
- *
8435
- * The library is free for all purposes without any express
8436
- * guarantee it works.
12272
+/* Start: fp_sub.c */
12273
+/* TomsFastMath, a fast ISO C bignum library.
12274
+ * 
12275
+ * This project is meant to fill in where LibTomMath
12276
+ * falls short.  That is speed ;-)
8437 12277
  *
8438
- * Tom St Denis, tomstdenis@gmail.com, http://math.libtomcrypt.com
12278
+ * This project is public domain and free for all purposes.
12279
+ * 
12280
+ * Tom St Denis, tomstdenis@gmail.com
8439 12281
  */
12282
+#include "bignum_fast.h"
8440 12283
 
8441
-/* stores a bignum as a ASCII string in a given radix (2..64) 
8442
- *
8443
- * Stores upto maxlen-1 chars and always a NULL byte 
8444
- */
8445
-int mp_toradix_n(mp_int * a, char *str, int radix, int maxlen)
12284
+/* c = a - b */
12285
+void fp_sub(fp_int *a, fp_int *b, fp_int *c)
8446 12286
 {
8447
-  int     res, digs;
8448
-  mp_int  t;
8449
-  mp_digit d;
8450
-  char   *_s = str;
8451
-
8452
-  /* check range of the maxlen, radix */
8453
-  if (maxlen < 2 || radix < 2 || radix > 64) {
8454
-    return MP_VAL;
8455
-  }
12287
+  int     sa, sb;
8456 12288
 
8457
-  /* quick out if its zero */
8458
-  if (mp_iszero(a) == MP_YES) {
8459
-     *str++ = '0';
8460
-     *str = '\0';
8461
-     return MP_OKAY;
8462
-  }
8463
-
8464
-  if ((res = mp_init_copy (&t, a)) != MP_OKAY) {
8465
-    return res;
8466
-  }
8467
-
8468
-  /* if it is negative output a - */
8469
-  if (t.sign == MP_NEG) {
8470
-    /* we have to reverse our digits later... but not the - sign!! */
8471
-    ++_s;
8472
-
8473
-    /* store the flag and mark the number as positive */
8474
-    *str++ = '-';
8475
-    t.sign = MP_ZPOS;
8476
- 
8477
-    /* subtract a char */
8478
-    --maxlen;
8479
-  }
12289
+  sa = a->sign;
12290
+  sb = b->sign;
8480 12291
 
8481
-  digs = 0;
8482
-  while (mp_iszero (&t) == 0) {
8483
-    if (--maxlen < 1) {
8484
-       /* no more room */
8485
-       break;
8486
-    }
8487
-    if ((res = mp_div_d (&t, (mp_digit) radix, &t, &d)) != MP_OKAY) {
8488
-      mp_clear (&t);
8489
-      return res;
12292
+  if (sa != sb) {
12293
+    /* subtract a negative from a positive, OR */
12294
+    /* subtract a positive from a negative. */
12295
+    /* In either case, ADD their magnitudes, */
12296
+    /* and use the sign of the first number. */
12297
+    c->sign = sa;
12298
+    s_fp_add (a, b, c);
12299
+  } else {
12300
+    /* subtract a positive from a positive, OR */
12301
+    /* subtract a negative from a negative. */
12302
+    /* First, take the difference between their */
12303
+    /* magnitudes, then... */
12304
+    if (fp_cmp_mag (a, b) != FP_LT) {
12305
+      /* Copy the sign from the first */
12306
+      c->sign = sa;
12307
+      /* The first has a larger or equal magnitude */
12308
+      s_fp_sub (a, b, c);
12309
+    } else {
12310
+      /* The result has the *opposite* sign from */
12311
+      /* the first number. */
12312
+      c->sign = (sa == FP_ZPOS) ? FP_NEG : FP_ZPOS;
12313
+      /* The second has a larger magnitude */
12314
+      s_fp_sub (b, a, c);
8490 12315
     }
8491
-    *str++ = mp_s_rmap[d];
8492
-    ++digs;
8493 12316
   }
8494
-
8495
-  /* reverse the digits of the string.  In this case _s points
8496
-   * to the first digit [exluding the sign] of the number
8497
-   */
8498
-  bn_reverse ((unsigned char *)_s, digs);
8499
-
8500
-  /* append a NULL so the string is properly terminated */
8501
-  *str = '\0';
8502
-
8503
-  mp_clear (&t);
8504
-  return MP_OKAY;
8505 12317
 }
8506 12318
 
8507
-#endif
8508
-
8509
-/* $Source: /cvs/libtom/libtommath/bn_mp_toradix_n.c,v $ */
8510
-/* $Revision: 1.4 $ */
8511
-/* $Date: 2006/03/31 14:18:44 $ */
8512
-
8513
-/* End: bn_mp_toradix_n.c */
8514
-
8515
-/* Start: bn_mp_unsigned_bin_size.c */
8516
-#include <bignum.h>
8517
-#ifdef BN_MP_UNSIGNED_BIN_SIZE_C
8518
-/* LibTomMath, multiple-precision integer library -- Tom St Denis
8519
- *
8520
- * LibTomMath is a library that provides multiple-precision
8521
- * integer arithmetic as well as number theoretic functionality.
8522
- *
8523
- * The library was designed directly after the MPI library by
8524
- * Michael Fromberger but has been written from scratch with
8525
- * additional optimizations in place.
8526
- *
8527
- * The library is free for all purposes without any express
8528
- * guarantee it works.
8529
- *
8530
- * Tom St Denis, tomstdenis@gmail.com, http://math.libtomcrypt.com
8531
- */
8532
-
8533
-/* get the size for an unsigned equivalent */
8534
-int mp_unsigned_bin_size (mp_int * a)
8535
-{
8536
-  int     size = mp_count_bits (a);
8537
-  return (size / 8 + ((size & 7) != 0 ? 1 : 0));
8538
-}
8539
-#endif
8540 12319
 
8541
-/* $Source: /cvs/libtom/libtommath/bn_mp_unsigned_bin_size.c,v $ */
8542
-/* $Revision: 1.3 $ */
8543
-/* $Date: 2006/03/31 14:18:44 $ */
12320
+/* $Source: /cvs/libtom/tomsfastmath/src/addsub/fp_sub.c,v $ */
12321
+/* $Revision: 1.1 $ */
12322
+/* $Date: 2006/12/31 21:25:53 $ */
8544 12323
 
8545
-/* End: bn_mp_unsigned_bin_size.c */
12324
+/* End: fp_sub.c */
8546 12325
 
8547
-/* Start: bn_mp_xor.c */
8548
-#include <bignum.h>
8549
-#ifdef BN_MP_XOR_C
8550
-/* LibTomMath, multiple-precision integer library -- Tom St Denis
8551
- *
8552
- * LibTomMath is a library that provides multiple-precision
8553
- * integer arithmetic as well as number theoretic functionality.
8554
- *
8555
- * The library was designed directly after the MPI library by
8556
- * Michael Fromberger but has been written from scratch with
8557
- * additional optimizations in place.
8558
- *
8559
- * The library is free for all purposes without any express
8560
- * guarantee it works.
12326
+/* Start: fp_sub_d.c */
12327
+/* TomsFastMath, a fast ISO C bignum library.
12328
+ * 
12329
+ * This project is meant to fill in where LibTomMath
12330
+ * falls short.  That is speed ;-)
8561 12331
  *
8562
- * Tom St Denis, tomstdenis@gmail.com, http://math.libtomcrypt.com
12332
+ * This project is public domain and free for all purposes.
12333
+ * 
12334
+ * Tom St Denis, tomstdenis@gmail.com
8563 12335
  */
12336
+#include "bignum_fast.h"
8564 12337
 
8565
-/* XOR two ints together */
8566
-int
8567
-mp_xor (mp_int * a, mp_int * b, mp_int * c)
12338
+/* c = a - b */
12339
+void fp_sub_d(fp_int *a, fp_digit b, fp_int *c)
8568 12340
 {
8569
-  int     res, ix, px;
8570
-  mp_int  t, *x;
8571
-
8572
-  if (a->used > b->used) {
8573
-    if ((res = mp_init_copy (&t, a)) != MP_OKAY) {
8574
-      return res;
8575
-    }
8576
-    px = b->used;
8577
-    x = b;
8578
-  } else {
8579
-    if ((res = mp_init_copy (&t, b)) != MP_OKAY) {
8580
-      return res;
8581
-    }
8582
-    px = a->used;
8583
-    x = a;
8584
-  }
8585
-
8586
-  for (ix = 0; ix < px; ix++) {
8587
-     t.dp[ix] ^= x->dp[ix];
8588
-  }
8589
-  mp_clamp (&t);
8590
-  mp_exch (c, &t);
8591
-  mp_clear (&t);
8592
-  return MP_OKAY;
12341
+   fp_int tmp;
12342
+   fp_set(&tmp, b);
12343
+   fp_sub(a, &tmp, c);
8593 12344
 }
8594
-#endif
8595 12345
 
8596
-/* $Source: /cvs/libtom/libtommath/bn_mp_xor.c,v $ */
8597
-/* $Revision: 1.3 $ */
8598
-/* $Date: 2006/03/31 14:18:44 $ */
12346
+/* $Source: /cvs/libtom/tomsfastmath/src/addsub/fp_sub_d.c,v $ */
12347
+/* $Revision: 1.1 $ */
12348
+/* $Date: 2006/12/31 21:25:53 $ */
8599 12349
 
8600
-/* End: bn_mp_xor.c */
12350
+/* End: fp_sub_d.c */
8601 12351
 
8602
-/* Start: bn_mp_zero.c */
8603
-#include <bignum.h>
8604
-#ifdef BN_MP_ZERO_C
8605
-/* LibTomMath, multiple-precision integer library -- Tom St Denis
8606
- *
8607
- * LibTomMath is a library that provides multiple-precision
8608
- * integer arithmetic as well as number theoretic functionality.
8609
- *
8610
- * The library was designed directly after the MPI library by
8611
- * Michael Fromberger but has been written from scratch with
8612
- * additional optimizations in place.
8613
- *
8614
- * The library is free for all purposes without any express
8615
- * guarantee it works.
12352
+/* Start: fp_submod.c */
12353
+/* TomsFastMath, a fast ISO C bignum library.
12354
+ * 
12355
+ * This project is meant to fill in where LibTomMath
12356
+ * falls short.  That is speed ;-)
8616 12357
  *
8617
- * Tom St Denis, tomstdenis@gmail.com, http://math.libtomcrypt.com
12358
+ * This project is public domain and free for all purposes.
12359
+ * 
12360
+ * Tom St Denis, tomstdenis@gmail.com
8618 12361
  */
12362
+#include "bignum_fast.h"
8619 12363
 
8620
-/* set to zero */
8621
-void mp_zero (mp_int * a)
12364
+/* d = a - b (mod c) */
12365
+int fp_submod(fp_int *a, fp_int *b, fp_int *c, fp_int *d)
8622 12366
 {
8623
-  int       n;
8624
-  mp_digit *tmp;
8625
-
8626
-  a->sign = MP_ZPOS;
8627
-  a->used = 0;
8628
-
8629
-  tmp = a->dp;
8630
-  for (n = 0; n < a->alloc; n++) {
8631
-     *tmp++ = 0;
8632
-  }
12367
+  fp_int tmp;
12368
+  fp_zero(&tmp);
12369
+  fp_sub(a, b, &tmp);
12370
+  return fp_mod(&tmp, c, d);
8633 12371
 }
8634
-#endif
8635
-
8636
-/* $Source: /cvs/libtom/libtommath/bn_mp_zero.c,v $ */
8637
-/* $Revision: 1.3 $ */
8638
-/* $Date: 2006/03/31 14:18:44 $ */
8639
-
8640
-/* End: bn_mp_zero.c */
8641
-
8642
-/* Start: bn_prime_tab.c */
8643
-#include <bignum.h>
8644
-#ifdef BN_PRIME_TAB_C
8645
-/* LibTomMath, multiple-precision integer library -- Tom St Denis
8646
- *
8647
- * LibTomMath is a library that provides multiple-precision
8648
- * integer arithmetic as well as number theoretic functionality.
8649
- *
8650
- * The library was designed directly after the MPI library by
8651
- * Michael Fromberger but has been written from scratch with
8652
- * additional optimizations in place.
8653
- *
8654
- * The library is free for all purposes without any express
8655
- * guarantee it works.
8656
- *
8657
- * Tom St Denis, tomstdenis@gmail.com, http://math.libtomcrypt.com
8658
- */
8659
-const mp_digit ltm_prime_tab[] = {
8660
-  0x0002, 0x0003, 0x0005, 0x0007, 0x000B, 0x000D, 0x0011, 0x0013,
8661
-  0x0017, 0x001D, 0x001F, 0x0025, 0x0029, 0x002B, 0x002F, 0x0035,
8662
-  0x003B, 0x003D, 0x0043, 0x0047, 0x0049, 0x004F, 0x0053, 0x0059,
8663
-  0x0061, 0x0065, 0x0067, 0x006B, 0x006D, 0x0071, 0x007F,
8664
-#ifndef MP_8BIT
8665
-  0x0083,
8666
-  0x0089, 0x008B, 0x0095, 0x0097, 0x009D, 0x00A3, 0x00A7, 0x00AD,
8667
-  0x00B3, 0x00B5, 0x00BF, 0x00C1, 0x00C5, 0x00C7, 0x00D3, 0x00DF,
8668
-  0x00E3, 0x00E5, 0x00E9, 0x00EF, 0x00F1, 0x00FB, 0x0101, 0x0107,
8669
-  0x010D, 0x010F, 0x0115, 0x0119, 0x011B, 0x0125, 0x0133, 0x0137,
8670 12372
 
8671
-  0x0139, 0x013D, 0x014B, 0x0151, 0x015B, 0x015D, 0x0161, 0x0167,
8672
-  0x016F, 0x0175, 0x017B, 0x017F, 0x0185, 0x018D, 0x0191, 0x0199,
8673
-  0x01A3, 0x01A5, 0x01AF, 0x01B1, 0x01B7, 0x01BB, 0x01C1, 0x01C9,
8674
-  0x01CD, 0x01CF, 0x01D3, 0x01DF, 0x01E7, 0x01EB, 0x01F3, 0x01F7,
8675
-  0x01FD, 0x0209, 0x020B, 0x021D, 0x0223, 0x022D, 0x0233, 0x0239,
8676
-  0x023B, 0x0241, 0x024B, 0x0251, 0x0257, 0x0259, 0x025F, 0x0265,
8677
-  0x0269, 0x026B, 0x0277, 0x0281, 0x0283, 0x0287, 0x028D, 0x0293,
8678
-  0x0295, 0x02A1, 0x02A5, 0x02AB, 0x02B3, 0x02BD, 0x02C5, 0x02CF,
8679 12373
 
8680
-  0x02D7, 0x02DD, 0x02E3, 0x02E7, 0x02EF, 0x02F5, 0x02F9, 0x0301,
8681
-  0x0305, 0x0313, 0x031D, 0x0329, 0x032B, 0x0335, 0x0337, 0x033B,
8682
-  0x033D, 0x0347, 0x0355, 0x0359, 0x035B, 0x035F, 0x036D, 0x0371,
8683
-  0x0373, 0x0377, 0x038B, 0x038F, 0x0397, 0x03A1, 0x03A9, 0x03AD,
8684
-  0x03B3, 0x03B9, 0x03C7, 0x03CB, 0x03D1, 0x03D7, 0x03DF, 0x03E5,
8685
-  0x03F1, 0x03F5, 0x03FB, 0x03FD, 0x0407, 0x0409, 0x040F, 0x0419,
8686
-  0x041B, 0x0425, 0x0427, 0x042D, 0x043F, 0x0443, 0x0445, 0x0449,
8687
-  0x044F, 0x0455, 0x045D, 0x0463, 0x0469, 0x047F, 0x0481, 0x048B,
12374
+/* $Source: /cvs/libtom/tomsfastmath/src/addsub/fp_submod.c,v $ */
12375
+/* $Revision: 1.1 $ */
12376
+/* $Date: 2006/12/31 21:25:53 $ */
8688 12377
 
8689
-  0x0493, 0x049D, 0x04A3, 0x04A9, 0x04B1, 0x04BD, 0x04C1, 0x04C7,
8690
-  0x04CD, 0x04CF, 0x04D5, 0x04E1, 0x04EB, 0x04FD, 0x04FF, 0x0503,
8691
-  0x0509, 0x050B, 0x0511, 0x0515, 0x0517, 0x051B, 0x0527, 0x0529,
8692
-  0x052F, 0x0551, 0x0557, 0x055D, 0x0565, 0x0577, 0x0581, 0x058F,
8693
-  0x0593, 0x0595, 0x0599, 0x059F, 0x05A7, 0x05AB, 0x05AD, 0x05B3,
8694
-  0x05BF, 0x05C9, 0x05CB, 0x05CF, 0x05D1, 0x05D5, 0x05DB, 0x05E7,
8695
-  0x05F3, 0x05FB, 0x0607, 0x060D, 0x0611, 0x0617, 0x061F, 0x0623,
8696
-  0x062B, 0x062F, 0x063D, 0x0641, 0x0647, 0x0649, 0x064D, 0x0653
8697
-#endif
8698
-};
8699
-#endif
8700
-
8701
-/* $Source: /cvs/libtom/libtommath/bn_prime_tab.c,v $ */
8702
-/* $Revision: 1.3 $ */
8703
-/* $Date: 2006/03/31 14:18:44 $ */
8704
-
8705
-/* End: bn_prime_tab.c */
12378
+/* End: fp_submod.c */
8706 12379
 
8707
-/* Start: bn_reverse.c */
8708
-#include <bignum.h>
8709
-#ifdef BN_REVERSE_C
8710
-/* LibTomMath, multiple-precision integer library -- Tom St Denis
8711
- *
8712
- * LibTomMath is a library that provides multiple-precision
8713
- * integer arithmetic as well as number theoretic functionality.
8714
- *
8715
- * The library was designed directly after the MPI library by
8716
- * Michael Fromberger but has been written from scratch with
8717
- * additional optimizations in place.
8718
- *
8719
- * The library is free for all purposes without any express
8720
- * guarantee it works.
12380
+/* Start: fp_to_signed_bin.c */
12381
+/* TomsFastMath, a fast ISO C bignum library.
12382
+ * 
12383
+ * This project is meant to fill in where LibTomMath
12384
+ * falls short.  That is speed ;-)
8721 12385
  *
8722
- * Tom St Denis, tomstdenis@gmail.com, http://math.libtomcrypt.com
12386
+ * This project is public domain and free for all purposes.
12387
+ * 
12388
+ * Tom St Denis, tomstdenis@gmail.com
8723 12389
  */
12390
+#include "bignum_fast.h"
8724 12391
 
8725
-/* reverse an array, used for radix code */
8726
-void
8727
-bn_reverse (unsigned char *s, int len)
12392
+void fp_to_signed_bin(fp_int *a, unsigned char *b)
8728 12393
 {
8729
-  int     ix, iy;
8730
-  unsigned char t;
8731
-
8732
-  ix = 0;
8733
-  iy = len - 1;
8734
-  while (ix < iy) {
8735
-    t     = s[ix];
8736
-    s[ix] = s[iy];
8737
-    s[iy] = t;
8738
-    ++ix;
8739
-    --iy;
8740
-  }
12394
+  fp_to_unsigned_bin (a, b + 1);
12395
+  b[0] = (unsigned char) ((a->sign == FP_ZPOS) ? 0 : 1);
8741 12396
 }
8742
-#endif
8743 12397
 
8744
-/* $Source: /cvs/libtom/libtommath/bn_reverse.c,v $ */
8745
-/* $Revision: 1.3 $ */
8746
-/* $Date: 2006/03/31 14:18:44 $ */
12398
+/* $Source: /cvs/libtom/tomsfastmath/src/bin/fp_to_signed_bin.c,v $ */
12399
+/* $Revision: 1.1 $ */
12400
+/* $Date: 2006/12/31 21:25:53 $ */
8747 12401
 
8748
-/* End: bn_reverse.c */
12402
+/* End: fp_to_signed_bin.c */
8749 12403
 
8750
-/* Start: bn_s_mp_add.c */
8751
-#include <bignum.h>
8752
-#ifdef BN_S_MP_ADD_C
8753
-/* LibTomMath, multiple-precision integer library -- Tom St Denis
8754
- *
8755
- * LibTomMath is a library that provides multiple-precision
8756
- * integer arithmetic as well as number theoretic functionality.
8757
- *
8758
- * The library was designed directly after the MPI library by
8759
- * Michael Fromberger but has been written from scratch with
8760
- * additional optimizations in place.
8761
- *
8762
- * The library is free for all purposes without any express
8763
- * guarantee it works.
12404
+/* Start: fp_to_unsigned_bin.c */
12405
+/* TomsFastMath, a fast ISO C bignum library.
12406
+ * 
12407
+ * This project is meant to fill in where LibTomMath
12408
+ * falls short.  That is speed ;-)
8764 12409
  *
8765
- * Tom St Denis, tomstdenis@gmail.com, http://math.libtomcrypt.com
12410
+ * This project is public domain and free for all purposes.
12411
+ * 
12412
+ * Tom St Denis, tomstdenis@gmail.com
8766 12413
  */
12414
+#include "bignum_fast.h"
8767 12415
 
8768
-/* low level addition, based on HAC pp.594, Algorithm 14.7 */
8769
-int
8770
-s_mp_add (mp_int * a, mp_int * b, mp_int * c)
12416
+void fp_to_unsigned_bin(fp_int *a, unsigned char *b)
8771 12417
 {
8772
-  mp_int *x;
8773
-  int     olduse, res, min, max;
8774
-
8775
-  /* find sizes, we let |a| <= |b| which means we have to sort
8776
-   * them.  "x" will point to the input with the most digits
8777
-   */
8778
-  if (a->used > b->used) {
8779
-    min = b->used;
8780
-    max = a->used;
8781
-    x = a;
8782
-  } else {
8783
-    min = a->used;
8784
-    max = b->used;
8785
-    x = b;
8786
-  }
8787
-
8788
-  /* init result */
8789
-  if (c->alloc < max + 1) {
8790
-    if ((res = mp_grow (c, max + 1)) != MP_OKAY) {
8791
-      return res;
8792
-    }
8793
-  }
8794
-
8795
-  /* get old used digit count and set new one */
8796
-  olduse = c->used;
8797
-  c->used = max + 1;
8798
-
8799
-  {
8800
-    register mp_digit u, *tmpa, *tmpb, *tmpc;
8801
-    register int i;
8802
-
8803
-    /* alias for digit pointers */
8804
-
8805
-    /* first input */
8806
-    tmpa = a->dp;
8807
-
8808
-    /* second input */
8809
-    tmpb = b->dp;
8810
-
8811
-    /* destination */
8812
-    tmpc = c->dp;
8813
-
8814
-    /* zero the carry */
8815
-    u = 0;
8816
-    for (i = 0; i < min; i++) {
8817
-      /* Compute the sum at one digit, T[i] = A[i] + B[i] + U */
8818
-      *tmpc = *tmpa++ + *tmpb++ + u;
8819
-
8820
-      /* U = carry bit of T[i] */
8821
-      u = *tmpc >> ((mp_digit)DIGIT_BIT);
8822
-
8823
-      /* take away carry bit from T[i] */
8824
-      *tmpc++ &= MP_MASK;
8825
-    }
8826
-
8827
-    /* now copy higher words if any, that is in A+B 
8828
-     * if A or B has more digits add those in 
8829
-     */
8830
-    if (min != max) {
8831
-      for (; i < max; i++) {
8832
-        /* T[i] = X[i] + U */
8833
-        *tmpc = x->dp[i] + u;
8834
-
8835
-        /* U = carry bit of T[i] */
8836
-        u = *tmpc >> ((mp_digit)DIGIT_BIT);
8837
-
8838
-        /* take away carry bit from T[i] */
8839
-        *tmpc++ &= MP_MASK;
8840
-      }
8841
-    }
12418
+  int     x;
12419
+  fp_int  t;
8842 12420
 
8843
-    /* add carry */
8844
-    *tmpc++ = u;
12421
+  fp_init_copy(&t, a);
8845 12422
 
8846
-    /* clear digits above oldused */
8847
-    for (i = c->used; i < olduse; i++) {
8848
-      *tmpc++ = 0;
8849
-    }
12423
+  x = 0;
12424
+  while (fp_iszero (&t) == FP_NO) {
12425
+      b[x++] = (unsigned char) (t.dp[0] & 255);
12426
+      fp_div_2d (&t, 8, &t, NULL);
8850 12427
   }
8851
-
8852
-  mp_clamp (c);
8853
-  return MP_OKAY;
12428
+  fp_reverse (b, x);
8854 12429
 }
8855
-#endif
8856 12430
 
8857
-/* $Source: /cvs/libtom/libtommath/bn_s_mp_add.c,v $ */
8858
-/* $Revision: 1.3 $ */
8859
-/* $Date: 2006/03/31 14:18:44 $ */
12431
+/* $Source: /cvs/libtom/tomsfastmath/src/bin/fp_to_unsigned_bin.c,v $ */
12432
+/* $Revision: 1.2 $ */
12433
+/* $Date: 2007/02/27 02:38:44 $ */
8860 12434
 
8861
-/* End: bn_s_mp_add.c */
12435
+/* End: fp_to_unsigned_bin.c */
8862 12436
 
8863
-/* Start: bn_s_mp_exptmod.c */
8864
-#include <bignum.h>
8865
-#ifdef BN_S_MP_EXPTMOD_C
8866
-/* LibTomMath, multiple-precision integer library -- Tom St Denis
8867
- *
8868
- * LibTomMath is a library that provides multiple-precision
8869
- * integer arithmetic as well as number theoretic functionality.
8870
- *
8871
- * The library was designed directly after the MPI library by
8872
- * Michael Fromberger but has been written from scratch with
8873
- * additional optimizations in place.
8874
- *
8875
- * The library is free for all purposes without any express
8876
- * guarantee it works.
12437
+/* Start: fp_toradix.c */
12438
+/* TomsFastMath, a fast ISO C bignum library.
12439
+ * 
12440
+ * This project is meant to fill in where LibTomMath
12441
+ * falls short.  That is speed ;-)
8877 12442
  *
8878
- * Tom St Denis, tomstdenis@gmail.com, http://math.libtomcrypt.com
12443
+ * This project is public domain and free for all purposes.
12444
+ * 
12445
+ * Tom St Denis, tomstdenis@gmail.com
8879 12446
  */
8880
-#ifdef MP_LOW_MEM
8881
-   #define TAB_SIZE 32
8882
-#else
8883
-   #define TAB_SIZE 256
8884
-#endif
12447
+#include "bignum_fast.h"
8885 12448
 
8886
-int s_mp_exptmod (mp_int * G, mp_int * X, mp_int * P, mp_int * Y, int redmode)
12449
+int fp_toradix(fp_int *a, char *str, int radix)
8887 12450
 {
8888
-  mp_int  M[TAB_SIZE], res, mu;
8889
-  mp_digit buf;
8890
-  int     err, bitbuf, bitcpy, bitcnt, mode, digidx, x, y, winsize;
8891
-  int (*redux)(mp_int*,mp_int*,mp_int*);
8892
-
8893
-  /* find window size */
8894
-  x = mp_count_bits (X);
8895
-  if (x <= 7) {
8896
-    winsize = 2;
8897
-  } else if (x <= 36) {
8898
-    winsize = 3;
8899
-  } else if (x <= 140) {
8900
-    winsize = 4;
8901
-  } else if (x <= 450) {
8902
-    winsize = 5;
8903
-  } else if (x <= 1303) {
8904
-    winsize = 6;
8905
-  } else if (x <= 3529) {
8906
-    winsize = 7;
8907
-  } else {
8908
-    winsize = 8;
8909
-  }
8910
-
8911
-#ifdef MP_LOW_MEM
8912
-    if (winsize > 5) {
8913
-       winsize = 5;
8914
-    }
8915
-#endif
12451
+  int     digs;
12452
+  fp_int  t;
12453
+  fp_digit d;
12454
+  char   *_s = str;
8916 12455
 
8917
-  /* init M array */
8918
-  /* init first cell */
8919
-  if ((err = mp_init(&M[1])) != MP_OKAY) {
8920
-     return err; 
12456
+  /* check range of the radix */
12457
+  if (radix < 2 || radix > 64) {
12458
+    return FP_VAL;
8921 12459
   }
8922 12460
 
8923
-  /* now init the second half of the array */
8924
-  for (x = 1<<(winsize-1); x < (1 << winsize); x++) {
8925
-    if ((err = mp_init(&M[x])) != MP_OKAY) {
8926
-      for (y = 1<<(winsize-1); y < x; y++) {
8927
-        mp_clear (&M[y]);
8928
-      }
8929
-      mp_clear(&M[1]);
8930
-      return err;
8931
-    }
12461
+  /* quick out if its zero */
12462
+  if (fp_iszero(a) == 1) {
12463
+     *str++ = '0';
12464
+     *str = '\0';
12465
+     return FP_OKAY;
8932 12466
   }
8933 12467
 
8934
-  /* create mu, used for Barrett reduction */
8935
-  if ((err = mp_init (&mu)) != MP_OKAY) {
8936
-    goto LBL_M;
8937
-  }
8938
-  
8939
-  if (redmode == 0) {
8940
-     if ((err = mp_reduce_setup (&mu, P)) != MP_OKAY) {
8941
-        goto LBL_MU;
8942
-     }
8943
-     redux = mp_reduce;
8944
-  } else {
8945
-     if ((err = mp_reduce_2k_setup_l (P, &mu)) != MP_OKAY) {
8946
-        goto LBL_MU;
8947
-     }
8948
-     redux = mp_reduce_2k_l;
8949
-  }    
12468
+  fp_init_copy(&t, a);
8950 12469
 
8951
-  /* create M table
8952
-   *
8953
-   * The M table contains powers of the base, 
8954
-   * e.g. M[x] = G**x mod P
8955
-   *
8956
-   * The first half of the table is not 
8957
-   * computed though accept for M[0] and M[1]
8958
-   */
8959
-  if ((err = mp_mod (G, P, &M[1])) != MP_OKAY) {
8960
-    goto LBL_MU;
8961
-  }
8962
-
8963
-  /* compute the value at M[1<<(winsize-1)] by squaring 
8964
-   * M[1] (winsize-1) times 
8965
-   */
8966
-  if ((err = mp_copy (&M[1], &M[1 << (winsize - 1)])) != MP_OKAY) {
8967
-    goto LBL_MU;
12470
+  /* if it is negative output a - */
12471
+  if (t.sign == FP_NEG) {
12472
+    ++_s;
12473
+    *str++ = '-';
12474
+    t.sign = FP_ZPOS;
8968 12475
   }
8969 12476
 
8970
-  for (x = 0; x < (winsize - 1); x++) {
8971
-    /* square it */
8972
-    if ((err = mp_sqr (&M[1 << (winsize - 1)], 
8973
-                       &M[1 << (winsize - 1)])) != MP_OKAY) {
8974
-      goto LBL_MU;
8975
-    }
8976
-
8977
-    /* reduce modulo P */
8978
-    if ((err = redux (&M[1 << (winsize - 1)], P, &mu)) != MP_OKAY) {
8979
-      goto LBL_MU;
8980
-    }
12477
+  digs = 0;
12478
+  while (fp_iszero (&t) == FP_NO) {
12479
+    fp_div_d (&t, (fp_digit) radix, &t, &d);
12480
+    *str++ = fp_s_rmap[d];
12481
+    ++digs;
8981 12482
   }
8982 12483
 
8983
-  /* create upper table, that is M[x] = M[x-1] * M[1] (mod P)
8984
-   * for x = (2**(winsize - 1) + 1) to (2**winsize - 1)
12484
+  /* reverse the digits of the string.  In this case _s points
12485
+   * to the first digit [exluding the sign] of the number]
8985 12486
    */
8986
-  for (x = (1 << (winsize - 1)) + 1; x < (1 << winsize); x++) {
8987
-    if ((err = mp_mul (&M[x - 1], &M[1], &M[x])) != MP_OKAY) {
8988
-      goto LBL_MU;
8989
-    }
8990
-    if ((err = redux (&M[x], P, &mu)) != MP_OKAY) {
8991
-      goto LBL_MU;
8992
-    }
8993
-  }
8994
-
8995
-  /* setup result */
8996
-  if ((err = mp_init (&res)) != MP_OKAY) {
8997
-    goto LBL_MU;
8998
-  }
8999
-  mp_set (&res, 1);
9000
-
9001
-  /* set initial mode and bit cnt */
9002
-  mode   = 0;
9003
-  bitcnt = 1;
9004
-  buf    = 0;
9005
-  digidx = X->used - 1;
9006
-  bitcpy = 0;
9007
-  bitbuf = 0;
9008
-
9009
-  for (;;) {
9010
-    /* grab next digit as required */
9011
-    if (--bitcnt == 0) {
9012
-      /* if digidx == -1 we are out of digits */
9013
-      if (digidx == -1) {
9014
-        break;
9015
-      }
9016
-      /* read next digit and reset the bitcnt */
9017
-      buf    = X->dp[digidx--];
9018
-      bitcnt = (int) DIGIT_BIT;
9019
-    }
9020
-
9021
-    /* grab the next msb from the exponent */
9022
-    y     = (buf >> (mp_digit)(DIGIT_BIT - 1)) & 1;
9023
-    buf <<= (mp_digit)1;
9024
-
9025
-    /* if the bit is zero and mode == 0 then we ignore it
9026
-     * These represent the leading zero bits before the first 1 bit
9027
-     * in the exponent.  Technically this opt is not required but it
9028
-     * does lower the # of trivial squaring/reductions used
9029
-     */
9030
-    if (mode == 0 && y == 0) {
9031
-      continue;
9032
-    }
9033
-
9034
-    /* if the bit is zero and mode == 1 then we square */
9035
-    if (mode == 1 && y == 0) {
9036
-      if ((err = mp_sqr (&res, &res)) != MP_OKAY) {
9037
-        goto LBL_RES;
9038
-      }
9039
-      if ((err = redux (&res, P, &mu)) != MP_OKAY) {
9040
-        goto LBL_RES;
9041
-      }
9042
-      continue;
9043
-    }
9044
-
9045
-    /* else we add it to the window */
9046
-    bitbuf |= (y << (winsize - ++bitcpy));
9047
-    mode    = 2;
9048
-
9049
-    if (bitcpy == winsize) {
9050
-      /* ok window is filled so square as required and multiply  */
9051
-      /* square first */
9052
-      for (x = 0; x < winsize; x++) {
9053
-        if ((err = mp_sqr (&res, &res)) != MP_OKAY) {
9054
-          goto LBL_RES;
9055
-        }
9056
-        if ((err = redux (&res, P, &mu)) != MP_OKAY) {
9057
-          goto LBL_RES;
9058
-        }
9059
-      }
9060
-
9061
-      /* then multiply */
9062
-      if ((err = mp_mul (&res, &M[bitbuf], &res)) != MP_OKAY) {
9063
-        goto LBL_RES;
9064
-      }
9065
-      if ((err = redux (&res, P, &mu)) != MP_OKAY) {
9066
-        goto LBL_RES;
9067
-      }
9068
-
9069
-      /* empty window and reset */
9070
-      bitcpy = 0;
9071
-      bitbuf = 0;
9072
-      mode   = 1;
9073
-    }
9074
-  }
9075
-
9076
-  /* if bits remain then square/multiply */
9077
-  if (mode == 2 && bitcpy > 0) {
9078
-    /* square then multiply if the bit is set */
9079
-    for (x = 0; x < bitcpy; x++) {
9080
-      if ((err = mp_sqr (&res, &res)) != MP_OKAY) {
9081
-        goto LBL_RES;
9082
-      }
9083
-      if ((err = redux (&res, P, &mu)) != MP_OKAY) {
9084
-        goto LBL_RES;
9085
-      }
9086
-
9087
-      bitbuf <<= 1;
9088
-      if ((bitbuf & (1 << winsize)) != 0) {
9089
-        /* then multiply */
9090
-        if ((err = mp_mul (&res, &M[1], &res)) != MP_OKAY) {
9091
-          goto LBL_RES;
9092
-        }
9093
-        if ((err = redux (&res, P, &mu)) != MP_OKAY) {
9094
-          goto LBL_RES;
9095
-        }
9096
-      }
9097
-    }
9098
-  }
12487
+  fp_reverse ((unsigned char *)_s, digs);
9099 12488
 
9100
-  mp_exch (&res, Y);
9101
-  err = MP_OKAY;
9102
-LBL_RES:mp_clear (&res);
9103
-LBL_MU:mp_clear (&mu);
9104
-LBL_M:
9105
-  mp_clear(&M[1]);
9106
-  for (x = 1<<(winsize-1); x < (1 << winsize); x++) {
9107
-    mp_clear (&M[x]);
9108
-  }
9109
-  return err;
12489
+  /* append a NULL so the string is properly terminated */
12490
+  *str = '\0';
12491
+  return FP_OKAY;
9110 12492
 }
9111
-#endif
9112 12493
 
9113
-/* $Source: /cvs/libtom/libtommath/bn_s_mp_exptmod.c,v $ */
9114
-/* $Revision: 1.4 $ */
9115
-/* $Date: 2006/03/31 14:18:44 $ */
12494
+/* $Source: /cvs/libtom/tomsfastmath/src/bin/fp_toradix.c,v $ */
12495
+/* $Revision: 1.2 $ */
12496
+/* $Date: 2007/02/27 02:38:44 $ */
9116 12497
 
9117
-/* End: bn_s_mp_exptmod.c */
12498
+/* End: fp_toradix.c */
9118 12499
 
9119
-/* Start: bn_s_mp_mul_digs.c */
9120
-#include <bignum.h>
9121
-#ifdef BN_S_MP_MUL_DIGS_C
9122
-/* LibTomMath, multiple-precision integer library -- Tom St Denis
9123
- *
9124
- * LibTomMath is a library that provides multiple-precision
9125
- * integer arithmetic as well as number theoretic functionality.
9126
- *
9127
- * The library was designed directly after the MPI library by
9128
- * Michael Fromberger but has been written from scratch with
9129
- * additional optimizations in place.
9130
- *
9131
- * The library is free for all purposes without any express
9132
- * guarantee it works.
12500
+/* Start: fp_unsigned_bin_size.c */
12501
+/* TomsFastMath, a fast ISO C bignum library.
12502
+ * 
12503
+ * This project is meant to fill in where LibTomMath
12504
+ * falls short.  That is speed ;-)
9133 12505
  *
9134
- * Tom St Denis, tomstdenis@gmail.com, http://math.libtomcrypt.com
12506
+ * This project is public domain and free for all purposes.
12507
+ * 
12508
+ * Tom St Denis, tomstdenis@gmail.com
9135 12509
  */
12510
+#include "bignum_fast.h"
9136 12511
 
9137
-/* multiplies |a| * |b| and only computes upto digs digits of result
9138
- * HAC pp. 595, Algorithm 14.12  Modified so you can control how 
9139
- * many digits of output are created.
9140
- */
9141
-int s_mp_mul_digs (mp_int * a, mp_int * b, mp_int * c, int digs)
12512
+int fp_unsigned_bin_size(fp_int *a)
9142 12513
 {
9143
-  mp_int  t;
9144
-  int     res, pa, pb, ix, iy;
9145
-  mp_digit u;
9146
-  mp_word r;
9147
-  mp_digit tmpx, *tmpt, *tmpy;
9148
-
9149
-  /* can we use the fast multiplier? */
9150
-  if (((digs) < MP_WARRAY) &&
9151
-      MIN (a->used, b->used) < 
9152
-          (1 << ((CHAR_BIT * sizeof (mp_word)) - (2 * DIGIT_BIT)))) {
9153
-    return fast_s_mp_mul_digs (a, b, c, digs);
9154
-  }
9155
-
9156
-  if ((res = mp_init_size (&t, digs)) != MP_OKAY) {
9157
-    return res;
9158
-  }
9159
-  t.used = digs;
9160
-
9161
-  /* compute the digits of the product directly */
9162
-  pa = a->used;
9163
-  for (ix = 0; ix < pa; ix++) {
9164
-    /* set the carry to zero */
9165
-    u = 0;
9166
-
9167
-    /* limit ourselves to making digs digits of output */
9168
-    pb = MIN (b->used, digs - ix);
9169
-
9170
-    /* setup some aliases */
9171
-    /* copy of the digit from a used within the nested loop */
9172
-    tmpx = a->dp[ix];
9173
-    
9174
-    /* an alias for the destination shifted ix places */
9175
-    tmpt = t.dp + ix;
9176
-    
9177
-    /* an alias for the digits of b */
9178
-    tmpy = b->dp;
9179
-
9180
-    /* compute the columns of the output and propagate the carry */
9181
-    for (iy = 0; iy < pb; iy++) {
9182
-      /* compute the column as a mp_word */
9183
-      r       = ((mp_word)*tmpt) +
9184
-                ((mp_word)tmpx) * ((mp_word)*tmpy++) +
9185
-                ((mp_word) u);
9186
-
9187
-      /* the new column is the lower part of the result */
9188
-      *tmpt++ = (mp_digit) (r & ((mp_word) MP_MASK));
9189
-
9190
-      /* get the carry word from the result */
9191
-      u       = (mp_digit) (r >> ((mp_word) DIGIT_BIT));
9192
-    }
9193
-    /* set carry if it is placed below digs */
9194
-    if (ix + iy < digs) {
9195
-      *tmpt = u;
9196
-    }
9197
-  }
9198
-
9199
-  mp_clamp (&t);
9200
-  mp_exch (&t, c);
9201
-
9202
-  mp_clear (&t);
9203
-  return MP_OKAY;
12514
+  int     size = fp_count_bits (a);
12515
+  return (size / 8 + ((size & 7) != 0 ? 1 : 0));
9204 12516
 }
9205
-#endif
9206 12517
 
9207
-/* $Source: /cvs/libtom/libtommath/bn_s_mp_mul_digs.c,v $ */
9208
-/* $Revision: 1.3 $ */
9209
-/* $Date: 2006/03/31 14:18:44 $ */
12518
+/* $Source: /cvs/libtom/tomsfastmath/src/bin/fp_unsigned_bin_size.c,v $ */
12519
+/* $Revision: 1.1 $ */
12520
+/* $Date: 2006/12/31 21:25:53 $ */
9210 12521
 
9211
-/* End: bn_s_mp_mul_digs.c */
12522
+/* End: fp_unsigned_bin_size.c */
9212 12523
 
9213
-/* Start: bn_s_mp_mul_high_digs.c */
9214
-#include <bignum.h>
9215
-#ifdef BN_S_MP_MUL_HIGH_DIGS_C
9216
-/* LibTomMath, multiple-precision integer library -- Tom St Denis
9217
- *
9218
- * LibTomMath is a library that provides multiple-precision
9219
- * integer arithmetic as well as number theoretic functionality.
9220
- *
9221
- * The library was designed directly after the MPI library by
9222
- * Michael Fromberger but has been written from scratch with
9223
- * additional optimizations in place.
9224
- *
9225
- * The library is free for all purposes without any express
9226
- * guarantee it works.
12524
+/* Start: s_fp_add.c */
12525
+/* TomsFastMath, a fast ISO C bignum library.
12526
+ * 
12527
+ * This project is meant to fill in where LibTomMath
12528
+ * falls short.  That is speed ;-)
9227 12529
  *
9228
- * Tom St Denis, tomstdenis@gmail.com, http://math.libtomcrypt.com
12530
+ * This project is public domain and free for all purposes.
12531
+ * 
12532
+ * Tom St Denis, tomstdenis@gmail.com
9229 12533
  */
12534
+#include "bignum_fast.h"
9230 12535
 
9231
-/* multiplies |a| * |b| and does not compute the lower digs digits
9232
- * [meant to get the higher part of the product]
9233
- */
9234
-int
9235
-s_mp_mul_high_digs (mp_int * a, mp_int * b, mp_int * c, int digs)
12536
+/* unsigned addition */
12537
+void s_fp_add(fp_int *a, fp_int *b, fp_int *c)
9236 12538
 {
9237
-  mp_int  t;
9238
-  int     res, pa, pb, ix, iy;
9239
-  mp_digit u;
9240
-  mp_word r;
9241
-  mp_digit tmpx, *tmpt, *tmpy;
9242
-
9243
-  /* can we use the fast multiplier? */
9244
-#ifdef BN_FAST_S_MP_MUL_HIGH_DIGS_C
9245
-  if (((a->used + b->used + 1) < MP_WARRAY)
9246
-      && MIN (a->used, b->used) < (1 << ((CHAR_BIT * sizeof (mp_word)) - (2 * DIGIT_BIT)))) {
9247
-    return fast_s_mp_mul_high_digs (a, b, c, digs);
9248
-  }
9249
-#endif
12539
+  int      x, y, oldused;
12540
+  register fp_word  t;
9250 12541
 
9251
-  if ((res = mp_init_size (&t, a->used + b->used + 1)) != MP_OKAY) {
9252
-    return res;
12542
+  y       = MAX(a->used, b->used);
12543
+  oldused = c->used;
12544
+  c->used = y;
12545
+ 
12546
+  t = 0;
12547
+  for (x = 0; x < y; x++) {
12548
+      t         += ((fp_word)a->dp[x]) + ((fp_word)b->dp[x]);
12549
+      c->dp[x]   = (fp_digit)t;
12550
+      t        >>= DIGIT_BIT;
12551
+  }
12552
+  if (t != 0 && x < FP_SIZE) {
12553
+     c->dp[c->used++] = (fp_digit)t;
12554
+     ++x;
9253 12555
   }
9254
-  t.used = a->used + b->used + 1;
9255
-
9256
-  pa = a->used;
9257
-  pb = b->used;
9258
-  for (ix = 0; ix < pa; ix++) {
9259
-    /* clear the carry */
9260
-    u = 0;
9261
-
9262
-    /* left hand side of A[ix] * B[iy] */
9263
-    tmpx = a->dp[ix];
9264
-
9265
-    /* alias to the address of where the digits will be stored */
9266
-    tmpt = &(t.dp[digs]);
9267
-
9268
-    /* alias for where to read the right hand side from */
9269
-    tmpy = b->dp + (digs - ix);
9270
-
9271
-    for (iy = digs - ix; iy < pb; iy++) {
9272
-      /* calculate the double precision result */
9273
-      r       = ((mp_word)*tmpt) +
9274
-                ((mp_word)tmpx) * ((mp_word)*tmpy++) +
9275
-                ((mp_word) u);
9276
-
9277
-      /* get the lower part */
9278
-      *tmpt++ = (mp_digit) (r & ((mp_word) MP_MASK));
9279 12556
 
9280
-      /* carry the carry */
9281
-      u       = (mp_digit) (r >> ((mp_word) DIGIT_BIT));
9282
-    }
9283
-    *tmpt = u;
12557
+  c->used = x;
12558
+  for (; x < oldused; x++) {
12559
+     c->dp[x] = 0;
9284 12560
   }
9285
-  mp_clamp (&t);
9286
-  mp_exch (&t, c);
9287
-  mp_clear (&t);
9288
-  return MP_OKAY;
12561
+  fp_clamp(c);
9289 12562
 }
9290
-#endif
9291 12563
 
9292
-/* $Source: /cvs/libtom/libtommath/bn_s_mp_mul_high_digs.c,v $ */
9293
-/* $Revision: 1.3 $ */
9294
-/* $Date: 2006/03/31 14:18:44 $ */
12564
+/* $Source: /cvs/libtom/tomsfastmath/src/addsub/s_fp_add.c,v $ */
12565
+/* $Revision: 1.1 $ */
12566
+/* $Date: 2006/12/31 21:25:53 $ */
9295 12567
 
9296
-/* End: bn_s_mp_mul_high_digs.c */
12568
+/* End: s_fp_add.c */
9297 12569
 
9298
-/* Start: bn_s_mp_sqr.c */
9299
-#include <bignum.h>
9300
-#ifdef BN_S_MP_SQR_C
9301
-/* LibTomMath, multiple-precision integer library -- Tom St Denis
9302
- *
9303
- * LibTomMath is a library that provides multiple-precision
9304
- * integer arithmetic as well as number theoretic functionality.
9305
- *
9306
- * The library was designed directly after the MPI library by
9307
- * Michael Fromberger but has been written from scratch with
9308
- * additional optimizations in place.
9309
- *
9310
- * The library is free for all purposes without any express
9311
- * guarantee it works.
12570
+/* Start: s_fp_sub.c */
12571
+/* TomsFastMath, a fast ISO C bignum library.
12572
+ * 
12573
+ * This project is meant to fill in where LibTomMath
12574
+ * falls short.  That is speed ;-)
9312 12575
  *
9313
- * Tom St Denis, tomstdenis@gmail.com, http://math.libtomcrypt.com
12576
+ * This project is public domain and free for all purposes.
12577
+ * 
12578
+ * Tom St Denis, tomstdenis@gmail.com
9314 12579
  */
12580
+#include "bignum_fast.h"
9315 12581
 
9316
-/* low level squaring, b = a*a, HAC pp.596-597, Algorithm 14.16 */
9317
-int s_mp_sqr (mp_int * a, mp_int * b)
12582
+/* unsigned subtraction ||a|| >= ||b|| ALWAYS! */
12583
+void s_fp_sub(fp_int *a, fp_int *b, fp_int *c)
9318 12584
 {
9319
-  mp_int  t;
9320
-  int     res, ix, iy, pa;
9321
-  mp_word r;
9322
-  mp_digit u, tmpx, *tmpt;
9323
-
9324
-  pa = a->used;
9325
-  if ((res = mp_init_size (&t, 2*pa + 1)) != MP_OKAY) {
9326
-    return res;
9327
-  }
9328
-
9329
-  /* default used is maximum possible size */
9330
-  t.used = 2*pa + 1;
9331
-
9332
-  for (ix = 0; ix < pa; ix++) {
9333
-    /* first calculate the digit at 2*ix */
9334
-    /* calculate double precision result */
9335
-    r = ((mp_word) t.dp[2*ix]) +
9336
-        ((mp_word)a->dp[ix])*((mp_word)a->dp[ix]);
9337
-
9338
-    /* store lower part in result */
9339
-    t.dp[ix+ix] = (mp_digit) (r & ((mp_word) MP_MASK));
9340
-
9341
-    /* get the carry */
9342
-    u           = (mp_digit)(r >> ((mp_word) DIGIT_BIT));
9343
-
9344
-    /* left hand side of A[ix] * A[iy] */
9345
-    tmpx        = a->dp[ix];
9346
-
9347
-    /* alias for where to store the results */
9348
-    tmpt        = t.dp + (2*ix + 1);
9349
-    
9350
-    for (iy = ix + 1; iy < pa; iy++) {
9351
-      /* first calculate the product */
9352
-      r       = ((mp_word)tmpx) * ((mp_word)a->dp[iy]);
9353
-
9354
-      /* now calculate the double precision result, note we use
9355
-       * addition instead of *2 since it's easier to optimize
9356
-       */
9357
-      r       = ((mp_word) *tmpt) + r + r + ((mp_word) u);
9358
-
9359
-      /* store lower part */
9360
-      *tmpt++ = (mp_digit) (r & ((mp_word) MP_MASK));
9361
-
9362
-      /* get carry */
9363
-      u       = (mp_digit)(r >> ((mp_word) DIGIT_BIT));
9364
-    }
9365
-    /* propagate upwards */
9366
-    while (u != ((mp_digit) 0)) {
9367
-      r       = ((mp_word) *tmpt) + ((mp_word) u);
9368
-      *tmpt++ = (mp_digit) (r & ((mp_word) MP_MASK));
9369
-      u       = (mp_digit)(r >> ((mp_word) DIGIT_BIT));
9370
-    }
12585
+  int      x, oldbused, oldused;
12586
+  fp_word  t;
12587
+
12588
+  oldused  = c->used;
12589
+  oldbused = b->used;
12590
+  c->used  = a->used;
12591
+  t       = 0;
12592
+  for (x = 0; x < oldbused; x++) {
12593
+     t         = ((fp_word)a->dp[x]) - (((fp_word)b->dp[x]) + t);
12594
+     c->dp[x]  = (fp_digit)t;
12595
+     t         = (t >> DIGIT_BIT)&1;
12596
+  }
12597
+  for (; x < a->used; x++) {
12598
+     t         = ((fp_word)a->dp[x]) - t;
12599
+     c->dp[x]  = (fp_digit)t;
12600
+     t         = (t >> DIGIT_BIT);
12601
+   }
12602
+  for (; x < oldused; x++) {
12603
+     c->dp[x] = 0;
9371 12604
   }
9372
-
9373
-  mp_clamp (&t);
9374
-  mp_exch (&t, b);
9375
-  mp_clear (&t);
9376
-  return MP_OKAY;
12605
+  fp_clamp(c);
9377 12606
 }
9378
-#endif
9379 12607
 
9380
-/* $Source: /cvs/libtom/libtommath/bn_s_mp_sqr.c,v $ */
9381
-/* $Revision: 1.3 $ */
9382
-/* $Date: 2006/03/31 14:18:44 $ */
12608
+/* $Source: /cvs/libtom/tomsfastmath/src/addsub/s_fp_sub.c,v $ */
12609
+/* $Revision: 1.1 $ */
12610
+/* $Date: 2006/12/31 21:25:53 $ */
9383 12611
 
9384
-/* End: bn_s_mp_sqr.c */
12612
+/* End: s_fp_sub.c */
9385 12613
 
9386
-/* Start: bn_s_mp_sub.c */
9387
-#include <bignum.h>
9388
-#ifdef BN_S_MP_SUB_C
9389
-/* LibTomMath, multiple-precision integer library -- Tom St Denis
9390
- *
9391
- * LibTomMath is a library that provides multiple-precision
9392
- * integer arithmetic as well as number theoretic functionality.
9393
- *
9394
- * The library was designed directly after the MPI library by
9395
- * Michael Fromberger but has been written from scratch with
9396
- * additional optimizations in place.
9397
- *
9398
- * The library is free for all purposes without any express
9399
- * guarantee it works.
12614
+
12615
+/* EOF */
12616
+/* TomsFastMath, a fast ISO C bignum library.
12617
+ * 
12618
+ * This project is meant to fill in where LibTomMath
12619
+ * falls short.  That is speed ;-)
9400 12620
  *
9401
- * Tom St Denis, tomstdenis@gmail.com, http://math.libtomcrypt.com
12621
+ * This project is public domain and free for all purposes.
12622
+ * 
12623
+ * Tom St Denis, tomstdenis@gmail.com
9402 12624
  */
9403 12625
 
9404
-/* low level subtraction (assumes |a| > |b|), HAC pp.595 Algorithm 14.9 */
9405
-int
9406
-s_mp_sub (mp_int * a, mp_int * b, mp_int * c)
9407
-{
9408
-  int     olduse, res, min, max;
12626
+#define TFM_DEFINES
12627
+#include "fp_sqr_comba.c"
9409 12628
 
9410
-  /* find sizes */
9411
-  min = b->used;
9412
-  max = a->used;
12629
+/* generic comba squarer */
12630
+void fp_sqr_comba(fp_int *A, fp_int *B)
12631
+{
12632
+  int       pa, ix, iz;
12633
+  fp_digit  c0, c1, c2;
12634
+  fp_int    tmp, *dst;
12635
+#ifdef TFM_ISO
12636
+  fp_word   tt;
12637
+#endif    
9413 12638
 
9414
-  /* init result */
9415
-  if (c->alloc < max) {
9416
-    if ((res = mp_grow (c, max)) != MP_OKAY) {
9417
-      return res;
9418
-    }
12639
+  /* get size of output and trim */
12640
+  pa = A->used + A->used;
12641
+  if (pa >= FP_SIZE) {
12642
+     pa = FP_SIZE-1;
9419 12643
   }
9420
-  olduse = c->used;
9421
-  c->used = max;
9422
-
9423
-  {
9424
-    register mp_digit u, *tmpa, *tmpb, *tmpc;
9425
-    register int i;
9426 12644
 
9427
-    /* alias for digit pointers */
9428
-    tmpa = a->dp;
9429
-    tmpb = b->dp;
9430
-    tmpc = c->dp;
9431
-
9432
-    /* set carry to zero */
9433
-    u = 0;
9434
-    for (i = 0; i < min; i++) {
9435
-      /* T[i] = A[i] - B[i] - U */
9436
-      *tmpc = *tmpa++ - *tmpb++ - u;
9437
-
9438
-      /* U = carry bit of T[i]
9439
-       * Note this saves performing an AND operation since
9440
-       * if a carry does occur it will propagate all the way to the
9441
-       * MSB.  As a result a single shift is enough to get the carry
9442
-       */
9443
-      u = *tmpc >> ((mp_digit)(CHAR_BIT * sizeof (mp_digit) - 1));
9444
-
9445
-      /* Clear carry from T[i] */
9446
-      *tmpc++ &= MP_MASK;
9447
-    }
9448
-
9449
-    /* now copy higher words if any, e.g. if A has more digits than B  */
9450
-    for (; i < max; i++) {
9451
-      /* T[i] = A[i] - U */
9452
-      *tmpc = *tmpa++ - u;
9453
-
9454
-      /* U = carry bit of T[i] */
9455
-      u = *tmpc >> ((mp_digit)(CHAR_BIT * sizeof (mp_digit) - 1));
9456
-
9457
-      /* Clear carry from T[i] */
9458
-      *tmpc++ &= MP_MASK;
9459
-    }
12645
+  /* number of output digits to produce */
12646
+  COMBA_START;
12647
+  CLEAR_CARRY;
9460 12648
 
9461
-    /* clear digits above used (since we may not have grown result above) */
9462
-    for (i = c->used; i < olduse; i++) {
9463
-      *tmpc++ = 0;
9464
-    }
12649
+  if (A == B) {
12650
+     fp_zero(&tmp);
12651
+     dst = &tmp;
12652
+  } else {
12653
+     fp_zero(B);
12654
+     dst = B;
9465 12655
   }
9466 12656
 
9467
-  mp_clamp (c);
9468
-  return MP_OKAY;
9469
-}
12657
+  for (ix = 0; ix < pa; ix++) { 
12658
+      int      tx, ty, iy;
12659
+      fp_digit *tmpy, *tmpx;
9470 12660
 
9471
-#endif
12661
+      /* get offsets into the two bignums */
12662
+      ty = MIN(A->used-1, ix);
12663
+      tx = ix - ty;
9472 12664
 
9473
-/* $Source: /cvs/libtom/libtommath/bn_s_mp_sub.c,v $ */
9474
-/* $Revision: 1.3 $ */
9475
-/* $Date: 2006/03/31 14:18:44 $ */
12665
+      /* setup temp aliases */
12666
+      tmpx = A->dp + tx;
12667
+      tmpy = A->dp + ty;
9476 12668
 
9477
-/* End: bn_s_mp_sub.c */
12669
+      /* this is the number of times the loop will iterrate,
12670
+         while (tx++ < a->used && ty-- >= 0) { ... }
12671
+       */
12672
+      iy = MIN(A->used-tx, ty+1);
9478 12673
 
9479
-/* Start: bncore.c */
9480
-#include <bignum.h>
9481
-#ifdef BNCORE_C
9482
-/* LibTomMath, multiple-precision integer library -- Tom St Denis
9483
- *
9484
- * LibTomMath is a library that provides multiple-precision
9485
- * integer arithmetic as well as number theoretic functionality.
9486
- *
9487
- * The library was designed directly after the MPI library by
9488
- * Michael Fromberger but has been written from scratch with
9489
- * additional optimizations in place.
9490
- *
9491
- * The library is free for all purposes without any express
9492
- * guarantee it works.
9493
- *
9494
- * Tom St Denis, tomstdenis@gmail.com, http://math.libtomcrypt.com
9495
- */
12674
+      /* now for squaring tx can never equal ty 
12675
+       * we halve the distance since they approach 
12676
+       * at a rate of 2x and we have to round because 
12677
+       * odd cases need to be executed
12678
+       */
12679
+      iy = MIN(iy, (ty-tx+1)>>1);
9496 12680
 
9497
-/* Known optimal configurations
12681
+      /* forward carries */
12682
+      CARRY_FORWARD;
9498 12683
 
9499
- CPU                    /Compiler     /MUL CUTOFF/SQR CUTOFF
9500
- Intel P4 Northwood     /GCC v3.4.1   /        88/       128/LTM 0.32 ;-)
9501
- AMD Athlon64           /GCC v3.4.4   /        80/       120/LTM 0.35
9502
- 
9503
-*/
12684
+      /* execute loop */
12685
+      for (iz = 0; iz < iy; iz++) {
12686
+          SQRADD2(*tmpx++, *tmpy--);
12687
+      }
9504 12688
 
9505
-int     KARATSUBA_MUL_CUTOFF = 80,      /* Min. number of digits before Karatsuba multiplication is used. */
9506
-        KARATSUBA_SQR_CUTOFF = 120,     /* Min. number of digits before Karatsuba squaring is used. */
9507
-        
9508
-        TOOM_MUL_CUTOFF      = 350,      /* no optimal values of these are known yet so set em high */
9509
-        TOOM_SQR_CUTOFF      = 400; 
9510
-#endif
12689
+      /* even columns have the square term in them */
12690
+      if ((ix&1) == 0) {
12691
+          SQRADD(A->dp[ix>>1], A->dp[ix>>1]);
12692
+      }
9511 12693
 
9512
-/* $Source: /cvs/libtom/libtommath/bncore.c,v $ */
9513
-/* $Revision: 1.4 $ */
9514
-/* $Date: 2006/03/31 14:18:44 $ */
12694
+      /* store it */
12695
+      COMBA_STORE(dst->dp[ix]);
12696
+  }
9515 12697
 
9516
-/* End: bncore.c */
12698
+  COMBA_FINI;
9517 12699
 
12700
+  /* setup dest */
12701
+  dst->used = pa;
12702
+  fp_clamp (dst);
12703
+  if (dst != B) {
12704
+     fp_copy(dst, B);
12705
+  }
12706
+}
9518 12707
 
9519
-/* EOF */
12708
+/* $Source: /cvs/libtom/tomsfastmath/src/sqr/Attic/fp_sqr_comba_generic.c,v $ */
12709
+/* $Revision: 1.3 $ */
12710
+/* $Date: 2007/02/15 00:31:32 $ */
... ...
@@ -1,586 +1,32 @@
1
-/* LibTomMath, multiple-precision integer library -- Tom St Denis
2
- *
3
- * LibTomMath is a library that provides multiple-precision
4
- * integer arithmetic as well as number theoretic functionality.
5
- *
6
- * The library was designed directly after the MPI library by
7
- * Michael Fromberger but has been written from scratch with
8
- * additional optimizations in place.
9
- *
10
- * The library is free for all purposes without any express
11
- * guarantee it works.
12
- *
13
- * Tom St Denis, tomstdenis@gmail.com, http://math.libtomcrypt.com
14
- */
15
-#ifndef __BIGNUM_H
16
-#define __BIGNUM_H
17
-
18
-#ifdef HAVE_SYSTEM_TOMMATH
19
-#include <tommath.h>
20
-#else
21
-#include <stdio.h>
22
-#include <string.h>
23
-#include <stdlib.h>
24
-#include <ctype.h>
25
-#include <limits.h>
26
-
27
-/*
28
-#define BN_MP_INIT_C
29
-#define BN_MP_ZERO_C
30
-#define BN_MP_READ_RADIX_C
31
-#define BN_MP_RADIX_SMAP_C
32
-#define BN_MP_SET_INT_C
33
-#define BN_MP_MUL_2D_C
34
-#define BN_MP_MUL_D_C
35
-#define BN_MP_CLAMP_C
36
-#define BN_MP_ADD_D_C
37
-#define BN_S_MP_ADD_C
38
-#define BN_MP_LSHD_C
39
-#define BN_MP_GROW_C
40
-#define BN_MP_CMP_MAG_C
41
-#define BN_MP_COPY_C
42
-*/
43
-
44
-#define LTM_ALL /* FIXME: tk: limit to the above class */
45
-#include "bignum_class.h"
46
-
47
-#ifdef __cplusplus
48
-extern "C" {
49
-
50
-/* C++ compilers don't like assigning void * to mp_digit * */
51
-#define  OPT_CAST(x)  (x *)
52
-
53
-#else
54
-
55
-/* C on the other hand doesn't care */
56
-#define  OPT_CAST(x)
57
-
58
-#endif
59
-
60
-#if (__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3)
61
-/*Don't enable this everywhere , some old gcc's have broken udivti3.
62
-Also divisions get translated into libcalls, not worth using this mode.*/
63
-
64
-/* detect 64-bit mode if possible */
65
-#if defined(__x86_64__) 
66
-   #if !(defined(MP_64BIT) && defined(MP_16BIT) && defined(MP_8BIT))
67
-      #define MP_64BIT
68
-   #endif
69
-#endif
70
-#endif
71
-
72
-/* some default configurations.
73
- *
74
- * A "mp_digit" must be able to hold DIGIT_BIT + 1 bits
75
- * A "mp_word" must be able to hold 2*DIGIT_BIT + 1 bits
76
- *
77
- * At the very least a mp_digit must be able to hold 7 bits
78
- * [any size beyond that is ok provided it doesn't overflow the data type]
79
- */
80
-#ifdef MP_8BIT
81
-   typedef unsigned char      mp_digit;
82
-   typedef unsigned short     mp_word;
83
-#elif defined(MP_16BIT)
84
-   typedef unsigned short     mp_digit;
85
-   typedef unsigned long      mp_word;
86
-#elif defined(MP_64BIT)
87
-   /* for GCC only on supported platforms */
88
-#ifndef CRYPT
89
-   typedef unsigned long long ulong64;
90
-   typedef signed long long   long64;
91
-#endif
92
-
93
-   typedef unsigned long      mp_digit;
94
-   typedef unsigned long      mp_word __attribute__ ((mode(TI)));
95
-
96
-   #define DIGIT_BIT          60
97
-#else
98
-   /* this is the default case, 28-bit digits */
99
-   
100
-   /* this is to make porting into LibTomCrypt easier :-) */
101
-#ifndef CRYPT
102
-   #if defined(_MSC_VER) || defined(__BORLANDC__) 
103
-      typedef unsigned __int64   ulong64;
104
-      typedef signed __int64     long64;
105
-   #else
106
-      typedef unsigned long long ulong64;
107
-      typedef signed long long   long64;
108
-   #endif
109
-#endif
110
-
111
-   typedef unsigned long      mp_digit;
112
-   typedef ulong64            mp_word;
113
-
114
-#ifdef MP_31BIT   
115
-   /* this is an extension that uses 31-bit digits */
116
-   #define DIGIT_BIT          31
117
-#else
118
-   /* default case is 28-bit digits, defines MP_28BIT as a handy macro to test */
119
-   #define DIGIT_BIT          28
120
-   #define MP_28BIT
121
-#endif   
1
+#ifndef BIGNUM_H_
2
+#define BIGNUM_H_
3
+
4
+#define TFM_CHECK
5
+
6
+#include "bignum_fast.h"
7
+typedef fp_int mp_int;
8
+#define mp_cmp fp_cmp
9
+#define mp_toradix_n(a,b,c,d) fp_toradix(a,b,c)
10
+#define mp_init(a) (fp_init(a), 0)
11
+#define mp_add fp_add
12
+
13
+#define mp_init_multi(a,b,c,d) (mp_init(a), mp_init(b), mp_init(c), 0)
14
+
15
+#define mp_read_unsigned_bin(a,b,c) (fp_read_unsigned_bin(a, b, c), 0)
16
+
17
+#define mp_div fp_div
18
+#define mp_clear_multi(...)
19
+#define mp_copy(a,b) (fp_copy(a,b), 0)
20
+#define mp_unsigned_bin_size fp_unsigned_bin_size
21
+#define mp_to_unsigned_bin(a,b) (fp_to_unsigned_bin(a,b), 0)
22
+#define mp_read_radix fp_read_radix
23
+#define mp_exptmod fp_exptmod
24
+#define mp_get_int(a) cli_readint32(a)
25
+
26
+static void mp_set_int(fp_int *a, int b)
27
+{
28
+    fp_read_unsigned_bin(a, (char*)&b, sizeof(b));
29
+}
30
+#define mp_mul_2d fp_mul_2d
31
+#define mp_clear(x)
122 32
 #endif
123
-
124
-
125
-/* otherwise the bits per digit is calculated automatically from the size of a mp_digit */
126
-#ifndef DIGIT_BIT
127
-   #define DIGIT_BIT     ((int)((CHAR_BIT * sizeof(mp_digit) - 1)))  /* bits per digit */
128
-#endif
129
-
130
-#define MP_DIGIT_BIT     DIGIT_BIT
131
-#define MP_MASK          ((((mp_digit)1)<<((mp_digit)DIGIT_BIT))-((mp_digit)1))
132
-#define MP_DIGIT_MAX     MP_MASK
133
-
134
-/* equalities */
135
-#define MP_LT        -1   /* less than */
136
-#define MP_EQ         0   /* equal to */
137
-#define MP_GT         1   /* greater than */
138
-
139
-#define MP_ZPOS       0   /* positive integer */
140
-#define MP_NEG        1   /* negative */
141
-
142
-#define MP_OKAY       0   /* ok result */
143
-#define MP_MEM        -2  /* out of mem */
144
-#define MP_VAL        -3  /* invalid input */
145
-#define MP_RANGE      MP_VAL
146
-
147
-#define MP_YES        1   /* yes response */
148
-#define MP_NO         0   /* no response */
149
-
150
-/* Primality generation flags */
151
-#define LTM_PRIME_BBS      0x0001 /* BBS style prime */
152
-#define LTM_PRIME_SAFE     0x0002 /* Safe prime (p-1)/2 == prime */
153
-#define LTM_PRIME_2MSB_ON  0x0008 /* force 2nd MSB to 1 */
154
-
155
-typedef int           mp_err;
156
-
157
-/* you'll have to tune these... */
158
-extern int KARATSUBA_MUL_CUTOFF,
159
-           KARATSUBA_SQR_CUTOFF,
160
-           TOOM_MUL_CUTOFF,
161
-           TOOM_SQR_CUTOFF;
162
-
163
-/* define this to use lower memory usage routines (exptmods mostly) */
164
-/* #define MP_LOW_MEM */
165
-
166
-/* default precision */
167
-#ifndef MP_PREC
168
-   #ifndef MP_LOW_MEM
169
-      #define MP_PREC                 32     /* default digits of precision */
170
-   #else
171
-      #define MP_PREC                 8      /* default digits of precision */
172
-   #endif   
173
-#endif
174
-
175
-/* size of comba arrays, should be at least 2 * 2**(BITS_PER_WORD - BITS_PER_DIGIT*2) */
176
-#define MP_WARRAY               (1 << (sizeof(mp_word) * CHAR_BIT - 2 * DIGIT_BIT + 1))
177
-
178
-/* the infamous mp_int structure */
179
-typedef struct  {
180
-    int used, alloc, sign;
181
-    mp_digit *dp;
182
-} mp_int;
183
-
184
-/* callback for mp_prime_random, should fill dst with random bytes and return how many read [upto len] */
185
-typedef int ltm_prime_callback(unsigned char *dst, int len, void *dat);
186
-
187
-
188
-#define USED(m)    ((m)->used)
189
-#define DIGIT(m,k) ((m)->dp[(k)])
190
-#define SIGN(m)    ((m)->sign)
191
-
192
-/* error code to char* string */
193
-const char *mp_error_to_string(int code);
194
-
195
-/* ---> init and deinit bignum functions <--- */
196
-/* init a bignum */
197
-int mp_init(mp_int *a);
198
-
199
-/* free a bignum */
200
-void mp_clear(mp_int *a);
201
-
202
-/* init a null terminated series of arguments */
203
-int mp_init_multi(mp_int *mp, ...);
204
-
205
-/* clear a null terminated series of arguments */
206
-void mp_clear_multi(mp_int *mp, ...);
207
-
208
-/* exchange two ints */
209
-void mp_exch(mp_int *a, mp_int *b);
210
-
211
-/* shrink ram required for a bignum */
212
-int mp_shrink(mp_int *a);
213
-
214
-/* grow an int to a given size */
215
-int mp_grow(mp_int *a, int size);
216
-
217
-/* init to a given number of digits */
218
-int mp_init_size(mp_int *a, int size);
219
-
220
-/* ---> Basic Manipulations <--- */
221
-#define mp_iszero(a) (((a)->used == 0) ? MP_YES : MP_NO)
222
-#define mp_iseven(a) (((a)->used > 0 && (((a)->dp[0] & 1) == 0)) ? MP_YES : MP_NO)
223
-#define mp_isodd(a)  (((a)->used > 0 && (((a)->dp[0] & 1) == 1)) ? MP_YES : MP_NO)
224
-
225
-/* set to zero */
226
-void mp_zero(mp_int *a);
227
-
228
-/* set to a digit */
229
-void mp_set(mp_int *a, mp_digit b);
230
-
231
-/* set a 32-bit const */
232
-int mp_set_int(mp_int *a, unsigned long b);
233
-
234
-/* get a 32-bit value */
235
-unsigned long mp_get_int(mp_int * a);
236
-
237
-/* initialize and set a digit */
238
-int mp_init_set (mp_int * a, mp_digit b);
239
-
240
-/* initialize and set 32-bit value */
241
-int mp_init_set_int (mp_int * a, unsigned long b);
242
-
243
-/* copy, b = a */
244
-int mp_copy(mp_int *a, mp_int *b);
245
-
246
-/* inits and copies, a = b */
247
-int mp_init_copy(mp_int *a, mp_int *b);
248
-
249
-/* trim unused digits */
250
-void mp_clamp(mp_int *a);
251
-
252
-/* ---> digit manipulation <--- */
253
-
254
-/* right shift by "b" digits */
255
-void mp_rshd(mp_int *a, int b);
256
-
257
-/* left shift by "b" digits */
258
-int mp_lshd(mp_int *a, int b);
259
-
260
-/* c = a / 2**b */
261
-int mp_div_2d(mp_int *a, int b, mp_int *c, mp_int *d);
262
-
263
-/* b = a/2 */
264
-int mp_div_2(mp_int *a, mp_int *b);
265
-
266
-/* c = a * 2**b */
267
-int mp_mul_2d(mp_int *a, int b, mp_int *c);
268
-
269
-/* b = a*2 */
270
-int mp_mul_2(mp_int *a, mp_int *b);
271
-
272
-/* c = a mod 2**d */
273
-int mp_mod_2d(mp_int *a, int b, mp_int *c);
274
-
275
-/* computes a = 2**b */
276
-int mp_2expt(mp_int *a, int b);
277
-
278
-/* Counts the number of lsbs which are zero before the first zero bit */
279
-int mp_cnt_lsb(mp_int *a);
280
-
281
-/* I Love Earth! */
282
-
283
-/* makes a pseudo-random int of a given size */
284
-int mp_rand(mp_int *a, int digits);
285
-
286
-/* ---> binary operations <--- */
287
-/* c = a XOR b  */
288
-int mp_xor(mp_int *a, mp_int *b, mp_int *c);
289
-
290
-/* c = a OR b */
291
-int mp_or(mp_int *a, mp_int *b, mp_int *c);
292
-
293
-/* c = a AND b */
294
-int mp_and(mp_int *a, mp_int *b, mp_int *c);
295
-
296
-/* ---> Basic arithmetic <--- */
297
-
298
-/* b = -a */
299
-int mp_neg(mp_int *a, mp_int *b);
300
-
301
-/* b = |a| */
302
-int mp_abs(mp_int *a, mp_int *b);
303
-
304
-/* compare a to b */
305
-int mp_cmp(mp_int *a, mp_int *b);
306
-
307
-/* compare |a| to |b| */
308
-int mp_cmp_mag(mp_int *a, mp_int *b);
309
-
310
-/* c = a + b */
311
-int mp_add(mp_int *a, mp_int *b, mp_int *c);
312
-
313
-/* c = a - b */
314
-int mp_sub(mp_int *a, mp_int *b, mp_int *c);
315
-
316
-/* c = a * b */
317
-int mp_mul(mp_int *a, mp_int *b, mp_int *c);
318
-
319
-/* b = a*a  */
320
-int mp_sqr(mp_int *a, mp_int *b);
321
-
322
-/* a/b => cb + d == a */
323
-int mp_div(mp_int *a, mp_int *b, mp_int *c, mp_int *d);
324
-
325
-/* c = a mod b, 0 <= c < b  */
326
-int mp_mod(mp_int *a, mp_int *b, mp_int *c);
327
-
328
-/* ---> single digit functions <--- */
329
-
330
-/* compare against a single digit */
331
-int mp_cmp_d(mp_int *a, mp_digit b);
332
-
333
-/* c = a + b */
334
-int mp_add_d(mp_int *a, mp_digit b, mp_int *c);
335
-
336
-/* c = a - b */
337
-int mp_sub_d(mp_int *a, mp_digit b, mp_int *c);
338
-
339
-/* c = a * b */
340
-int mp_mul_d(mp_int *a, mp_digit b, mp_int *c);
341
-
342
-/* a/b => cb + d == a */
343
-int mp_div_d(mp_int *a, mp_digit b, mp_int *c, mp_digit *d);
344
-
345
-/* a/3 => 3c + d == a */
346
-int mp_div_3(mp_int *a, mp_int *c, mp_digit *d);
347
-
348
-/* c = a**b */
349
-int mp_expt_d(mp_int *a, mp_digit b, mp_int *c);
350
-
351
-/* c = a mod b, 0 <= c < b  */
352
-int mp_mod_d(mp_int *a, mp_digit b, mp_digit *c);
353
-
354
-/* ---> number theory <--- */
355
-
356
-/* d = a + b (mod c) */
357
-int mp_addmod(mp_int *a, mp_int *b, mp_int *c, mp_int *d);
358
-
359
-/* d = a - b (mod c) */
360
-int mp_submod(mp_int *a, mp_int *b, mp_int *c, mp_int *d);
361
-
362
-/* d = a * b (mod c) */
363
-int mp_mulmod(mp_int *a, mp_int *b, mp_int *c, mp_int *d);
364
-
365
-/* c = a * a (mod b) */
366
-int mp_sqrmod(mp_int *a, mp_int *b, mp_int *c);
367
-
368
-/* c = 1/a (mod b) */
369
-int mp_invmod(mp_int *a, mp_int *b, mp_int *c);
370
-
371
-/* c = (a, b) */
372
-int mp_gcd(mp_int *a, mp_int *b, mp_int *c);
373
-
374
-/* produces value such that U1*a + U2*b = U3 */
375
-int mp_exteuclid(mp_int *a, mp_int *b, mp_int *U1, mp_int *U2, mp_int *U3);
376
-
377
-/* c = [a, b] or (a*b)/(a, b) */
378
-int mp_lcm(mp_int *a, mp_int *b, mp_int *c);
379
-
380
-/* finds one of the b'th root of a, such that |c|**b <= |a|
381
- *
382
- * returns error if a < 0 and b is even
383
- */
384
-int mp_n_root(mp_int *a, mp_digit b, mp_int *c);
385
-
386
-/* special sqrt algo */
387
-int mp_sqrt(mp_int *arg, mp_int *ret);
388
-
389
-/* is number a square? */
390
-int mp_is_square(mp_int *arg, int *ret);
391
-
392
-/* computes the jacobi c = (a | n) (or Legendre if b is prime)  */
393
-int mp_jacobi(mp_int *a, mp_int *n, int *c);
394
-
395
-/* used to setup the Barrett reduction for a given modulus b */
396
-int mp_reduce_setup(mp_int *a, mp_int *b);
397
-
398
-/* Barrett Reduction, computes a (mod b) with a precomputed value c
399
- *
400
- * Assumes that 0 < a <= b*b, note if 0 > a > -(b*b) then you can merely
401
- * compute the reduction as -1 * mp_reduce(mp_abs(a)) [pseudo code].
402
- */
403
-int mp_reduce(mp_int *a, mp_int *b, mp_int *c);
404
-
405
-/* setups the montgomery reduction */
406
-int mp_montgomery_setup(mp_int *a, mp_digit *mp);
407
-
408
-/* computes a = B**n mod b without division or multiplication useful for
409
- * normalizing numbers in a Montgomery system.
410
- */
411
-int mp_montgomery_calc_normalization(mp_int *a, mp_int *b);
412
-
413
-/* computes x/R == x (mod N) via Montgomery Reduction */
414
-int mp_montgomery_reduce(mp_int *a, mp_int *m, mp_digit mp);
415
-
416
-/* returns 1 if a is a valid DR modulus */
417
-int mp_dr_is_modulus(mp_int *a);
418
-
419
-/* sets the value of "d" required for mp_dr_reduce */
420
-void mp_dr_setup(mp_int *a, mp_digit *d);
421
-
422
-/* reduces a modulo b using the Diminished Radix method */
423
-int mp_dr_reduce(mp_int *a, mp_int *b, mp_digit mp);
424
-
425
-/* returns true if a can be reduced with mp_reduce_2k */
426
-int mp_reduce_is_2k(mp_int *a);
427
-
428
-/* determines k value for 2k reduction */
429
-int mp_reduce_2k_setup(mp_int *a, mp_digit *d);
430
-
431
-/* reduces a modulo b where b is of the form 2**p - k [0 <= a] */
432
-int mp_reduce_2k(mp_int *a, mp_int *n, mp_digit d);
433
-
434
-/* returns true if a can be reduced with mp_reduce_2k_l */
435
-int mp_reduce_is_2k_l(mp_int *a);
436
-
437
-/* determines k value for 2k reduction */
438
-int mp_reduce_2k_setup_l(mp_int *a, mp_int *d);
439
-
440
-/* reduces a modulo b where b is of the form 2**p - k [0 <= a] */
441
-int mp_reduce_2k_l(mp_int *a, mp_int *n, mp_int *d);
442
-
443
-/* d = a**b (mod c) */
444
-int mp_exptmod(mp_int *a, mp_int *b, mp_int *c, mp_int *d);
445
-
446
-/* ---> Primes <--- */
447
-
448
-/* number of primes */
449
-#ifdef MP_8BIT
450
-   #define PRIME_SIZE      31
451
-#else
452
-   #define PRIME_SIZE      256
453
-#endif
454
-
455
-/* table of first PRIME_SIZE primes */
456
-extern const mp_digit ltm_prime_tab[];
457
-
458
-/* result=1 if a is divisible by one of the first PRIME_SIZE primes */
459
-int mp_prime_is_divisible(mp_int *a, int *result);
460
-
461
-/* performs one Fermat test of "a" using base "b".
462
- * Sets result to 0 if composite or 1 if probable prime
463
- */
464
-int mp_prime_fermat(mp_int *a, mp_int *b, int *result);
465
-
466
-/* performs one Miller-Rabin test of "a" using base "b".
467
- * Sets result to 0 if composite or 1 if probable prime
468
- */
469
-int mp_prime_miller_rabin(mp_int *a, mp_int *b, int *result);
470
-
471
-/* This gives [for a given bit size] the number of trials required
472
- * such that Miller-Rabin gives a prob of failure lower than 2^-96 
473
- */
474
-int mp_prime_rabin_miller_trials(int size);
475
-
476
-/* performs t rounds of Miller-Rabin on "a" using the first
477
- * t prime bases.  Also performs an initial sieve of trial
478
- * division.  Determines if "a" is prime with probability
479
- * of error no more than (1/4)**t.
480
- *
481
- * Sets result to 1 if probably prime, 0 otherwise
482
- */
483
-int mp_prime_is_prime(mp_int *a, int t, int *result);
484
-
485
-/* finds the next prime after the number "a" using "t" trials
486
- * of Miller-Rabin.
487
- *
488
- * bbs_style = 1 means the prime must be congruent to 3 mod 4
489
- */
490
-int mp_prime_next_prime(mp_int *a, int t, int bbs_style);
491
-
492
-/* makes a truly random prime of a given size (bytes),
493
- * call with bbs = 1 if you want it to be congruent to 3 mod 4 
494
- *
495
- * You have to supply a callback which fills in a buffer with random bytes.  "dat" is a parameter you can
496
- * have passed to the callback (e.g. a state or something).  This function doesn't use "dat" itself
497
- * so it can be NULL
498
- *
499
- * The prime generated will be larger than 2^(8*size).
500
- */
501
-#define mp_prime_random(a, t, size, bbs, cb, dat) mp_prime_random_ex(a, t, ((size) * 8) + 1, (bbs==1)?LTM_PRIME_BBS:0, cb, dat)
502
-
503
-/* makes a truly random prime of a given size (bits),
504
- *
505
- * Flags are as follows:
506
- * 
507
- *   LTM_PRIME_BBS      - make prime congruent to 3 mod 4
508
- *   LTM_PRIME_SAFE     - make sure (p-1)/2 is prime as well (implies LTM_PRIME_BBS)
509
- *   LTM_PRIME_2MSB_OFF - make the 2nd highest bit zero
510
- *   LTM_PRIME_2MSB_ON  - make the 2nd highest bit one
511
- *
512
- * You have to supply a callback which fills in a buffer with random bytes.  "dat" is a parameter you can
513
- * have passed to the callback (e.g. a state or something).  This function doesn't use "dat" itself
514
- * so it can be NULL
515
- *
516
- */
517
-int mp_prime_random_ex(mp_int *a, int t, int size, int flags, ltm_prime_callback cb, void *dat);
518
-
519
-/* ---> radix conversion <--- */
520
-int mp_count_bits(mp_int *a);
521
-
522
-int mp_unsigned_bin_size(mp_int *a);
523
-int mp_read_unsigned_bin(mp_int *a, const unsigned char *b, int c);
524
-int mp_to_unsigned_bin(mp_int *a, unsigned char *b);
525
-int mp_to_unsigned_bin_n (mp_int * a, unsigned char *b, unsigned long *outlen);
526
-
527
-int mp_signed_bin_size(mp_int *a);
528
-int mp_read_signed_bin(mp_int *a, const unsigned char *b, int c);
529
-int mp_to_signed_bin(mp_int *a,  unsigned char *b);
530
-int mp_to_signed_bin_n (mp_int * a, unsigned char *b, unsigned long *outlen);
531
-
532
-int mp_read_radix(mp_int *a, const char *str, int radix);
533
-int mp_toradix(mp_int *a, char *str, int radix);
534
-int mp_toradix_n(mp_int * a, char *str, int radix, int maxlen);
535
-int mp_radix_size(mp_int *a, int radix, int *size);
536
-
537
-int mp_fread(mp_int *a, int radix, FILE *stream);
538
-int mp_fwrite(mp_int *a, int radix, FILE *stream);
539
-
540
-#define mp_read_raw(mp, str, len) mp_read_signed_bin((mp), (str), (len))
541
-#define mp_raw_size(mp)           mp_signed_bin_size(mp)
542
-#define mp_toraw(mp, str)         mp_to_signed_bin((mp), (str))
543
-#define mp_read_mag(mp, str, len) mp_read_unsigned_bin((mp), (str), (len))
544
-#define mp_mag_size(mp)           mp_unsigned_bin_size(mp)
545
-#define mp_tomag(mp, str)         mp_to_unsigned_bin((mp), (str))
546
-
547
-#define mp_tobinary(M, S)  mp_toradix((M), (S), 2)
548
-#define mp_tooctal(M, S)   mp_toradix((M), (S), 8)
549
-#define mp_todecimal(M, S) mp_toradix((M), (S), 10)
550
-#define mp_tohex(M, S)     mp_toradix((M), (S), 16)
551
-
552
-/* lowlevel functions, do not call! */
553
-int s_mp_add(mp_int *a, mp_int *b, mp_int *c);
554
-int s_mp_sub(mp_int *a, mp_int *b, mp_int *c);
555
-#define s_mp_mul(a, b, c) s_mp_mul_digs(a, b, c, (a)->used + (b)->used + 1)
556
-int fast_s_mp_mul_digs(mp_int *a, mp_int *b, mp_int *c, int digs);
557
-int s_mp_mul_digs(mp_int *a, mp_int *b, mp_int *c, int digs);
558
-int fast_s_mp_mul_high_digs(mp_int *a, mp_int *b, mp_int *c, int digs);
559
-int s_mp_mul_high_digs(mp_int *a, mp_int *b, mp_int *c, int digs);
560
-int fast_s_mp_sqr(mp_int *a, mp_int *b);
561
-int s_mp_sqr(mp_int *a, mp_int *b);
562
-int mp_karatsuba_mul(mp_int *a, mp_int *b, mp_int *c);
563
-int mp_toom_mul(mp_int *a, mp_int *b, mp_int *c);
564
-int mp_karatsuba_sqr(mp_int *a, mp_int *b);
565
-int mp_toom_sqr(mp_int *a, mp_int *b);
566
-int fast_mp_invmod(mp_int *a, mp_int *b, mp_int *c);
567
-int mp_invmod_slow (mp_int * a, mp_int * b, mp_int * c);
568
-int fast_mp_montgomery_reduce(mp_int *a, mp_int *m, mp_digit mp);
569
-int mp_exptmod_fast(mp_int *G, mp_int *X, mp_int *P, mp_int *Y, int mode);
570
-int s_mp_exptmod (mp_int * G, mp_int * X, mp_int * P, mp_int * Y, int mode);
571
-void bn_reverse(unsigned char *s, int len);
572
-
573
-extern const char *mp_s_rmap;
574
-
575
-#ifdef __cplusplus
576
-   }
577
-#endif
578
-
579
-#endif /* HAVE_SYSTEM_TOMMATH */
580
-
581
-#endif
582
-
583
-
584
-/* $Source: /cvs/libtom/libtommath/tommath.h,v $ */
585
-/* $Revision: 1.8 $ */
586
-/* $Date: 2006/03/31 14:18:44 $ */
587 33
new file mode 100644
... ...
@@ -0,0 +1,572 @@
0
+/* TomsFastMath, a fast ISO C bignum library.
1
+ * 
2
+ * This project is meant to fill in where LibTomMath
3
+ * falls short.  That is speed ;-)
4
+ *
5
+ * This project is public domain and free for all purposes.
6
+ * 
7
+ * Tom St Denis, tomstdenis@gmail.com
8
+ */
9
+#ifndef TFM_H_
10
+#define TFM_H_
11
+
12
+#ifndef __GNUC__
13
+#define TFM_NO_ASM
14
+#endif
15
+
16
+#include <stdio.h>
17
+#include <string.h>
18
+#include <stdlib.h>
19
+#include <ctype.h>
20
+#include <limits.h>
21
+
22
+#ifndef MIN
23
+   #define MIN(x,y) ((x)<(y)?(x):(y))
24
+#endif
25
+
26
+#ifndef MAX
27
+   #define MAX(x,y) ((x)>(y)?(x):(y))
28
+#endif
29
+
30
+/* externally define this symbol to ignore the default settings, useful for changing the build from the make process */
31
+#ifndef TFM_ALREADY_SET
32
+
33
+/* do we want the large set of small multiplications ? 
34
+   Enable these if you are going to be doing a lot of small (<= 16 digit) multiplications say in ECC
35
+   Or if you're on a 64-bit machine doing RSA as a 1024-bit integer == 16 digits ;-)
36
+ */
37
+#define TFM_SMALL_SET
38
+
39
+/* do we want huge code 
40
+   Enable these if you are doing 20, 24, 28, 32, 48, 64 digit multiplications (useful for RSA)
41
+   Less important on 64-bit machines as 32 digits == 2048 bits
42
+ */
43
+#if 0
44
+#define TFM_MUL3
45
+#define TFM_MUL4
46
+#define TFM_MUL6
47
+#define TFM_MUL7
48
+#define TFM_MUL8
49
+#define TFM_MUL9
50
+#define TFM_MUL12
51
+#define TFM_MUL17
52
+#endif
53
+#define TFM_MUL20
54
+#define TFM_MUL24
55
+#define TFM_MUL28
56
+#define TFM_MUL32
57
+#define TFM_MUL48
58
+#define TFM_MUL64
59
+
60
+#if 0
61
+#define TFM_SQR3
62
+#define TFM_SQR4
63
+#define TFM_SQR6
64
+#define TFM_SQR7
65
+#define TFM_SQR8
66
+#define TFM_SQR9
67
+#define TFM_SQR12
68
+#define TFM_SQR17
69
+#endif
70
+#define TFM_SQR20
71
+#define TFM_SQR24
72
+#define TFM_SQR28
73
+#define TFM_SQR32
74
+#define TFM_SQR48
75
+#define TFM_SQR64
76
+
77
+/* do we want some overflow checks
78
+   Not required if you make sure your numbers are within range (e.g. by default a modulus for fp_exptmod() can only be upto 2048 bits long)
79
+ */
80
+#define TFM_CHECK
81
+
82
+/* Is the target a P4 Prescott
83
+ */
84
+/* #define TFM_PRESCOTT */
85
+
86
+/* Do we want timing resistant fp_exptmod() ?
87
+ * This makes it slower but also timing invariant with respect to the exponent 
88
+ */
89
+/* #define TFM_TIMING_RESISTANT */
90
+
91
+#endif
92
+
93
+/* Max size of any number in bits.  Basically the largest size you will be multiplying
94
+ * should be half [or smaller] of FP_MAX_SIZE-four_digit
95
+ *
96
+ * You can externally define this or it defaults to 4096-bits [allowing multiplications upto 2048x2048 bits ]
97
+ */
98
+#ifndef FP_MAX_SIZE
99
+   #define FP_MAX_SIZE           (8192+(8*DIGIT_BIT))
100
+#endif
101
+
102
+/* will this lib work? */
103
+#if (CHAR_BIT & 7)
104
+   #error CHAR_BIT must be a multiple of eight.
105
+#endif
106
+#if FP_MAX_SIZE % CHAR_BIT
107
+   #error FP_MAX_SIZE must be a multiple of CHAR_BIT
108
+#endif
109
+
110
+/* autodetect x86-64 and make sure we are using 64-bit digits with x86-64 asm */
111
+#if defined(__x86_64__)
112
+   #if defined(TFM_X86) || defined(TFM_SSE2) || defined(TFM_ARM) 
113
+       #error x86-64 detected, x86-32/SSE2/ARM optimizations are not valid!
114
+   #endif
115
+   #if !defined(TFM_X86_64) && !defined(TFM_NO_ASM)
116
+      #define TFM_X86_64
117
+   #endif
118
+#endif
119
+#if defined(TFM_X86_64)
120
+    #if !defined(FP_64BIT)
121
+       #define FP_64BIT
122
+    #endif
123
+#endif
124
+
125
+/* try to detect x86-32 */
126
+#if defined(__i386__) && !defined(TFM_SSE2)
127
+   #if defined(TFM_X86_64) || defined(TFM_ARM) 
128
+       #error x86-32 detected, x86-64/ARM optimizations are not valid!
129
+   #endif
130
+   #if !defined(TFM_X86) && !defined(TFM_NO_ASM)
131
+      #define TFM_X86
132
+   #endif
133
+#endif
134
+
135
+/* make sure we're 32-bit for x86-32/sse/arm/ppc32 */
136
+#if (defined(TFM_X86) || defined(TFM_SSE2) || defined(TFM_ARM) || defined(TFM_PPC32)) && defined(FP_64BIT)
137
+   #warning x86-32, SSE2 and ARM, PPC32 optimizations require 32-bit digits (undefining)
138
+   #undef FP_64BIT
139
+#endif
140
+
141
+/* multi asms? */
142
+#ifdef TFM_X86
143
+   #define TFM_ASM
144
+#endif
145
+#ifdef TFM_X86_64
146
+   #ifdef TFM_ASM
147
+      #error TFM_ASM already defined!
148
+   #endif
149
+   #define TFM_ASM
150
+#endif
151
+#ifdef TFM_SSE2
152
+   #ifdef TFM_ASM
153
+      #error TFM_ASM already defined!
154
+   #endif
155
+   #define TFM_ASM
156
+#endif
157
+#ifdef TFM_ARM
158
+   #ifdef TFM_ASM
159
+      #error TFM_ASM already defined!
160
+   #endif
161
+   #define TFM_ASM
162
+#endif
163
+#ifdef TFM_PPC32
164
+   #ifdef TFM_ASM
165
+      #error TFM_ASM already defined!
166
+   #endif
167
+   #define TFM_ASM
168
+#endif
169
+#ifdef TFM_PPC64
170
+   #ifdef TFM_ASM
171
+      #error TFM_ASM already defined!
172
+   #endif
173
+   #define TFM_ASM
174
+#endif
175
+#ifdef TFM_AVR32
176
+   #ifdef TFM_ASM
177
+      #error TFM_ASM already defined!
178
+   #endif
179
+   #define TFM_ASM
180
+#endif
181
+
182
+/* we want no asm? */
183
+#ifdef TFM_NO_ASM
184
+   #undef TFM_X86
185
+   #undef TFM_X86_64
186
+   #undef TFM_SSE2
187
+   #undef TFM_ARM
188
+   #undef TFM_PPC32
189
+   #undef TFM_PPC64
190
+   #undef TFM_AVR32
191
+   #undef TFM_ASM   
192
+#endif
193
+
194
+/* ECC helpers */
195
+#ifdef TFM_ECC192
196
+   #ifdef FP_64BIT
197
+       #define TFM_MUL3
198
+       #define TFM_SQR3
199
+   #else
200
+       #define TFM_MUL6
201
+       #define TFM_SQR6
202
+   #endif
203
+#endif
204
+
205
+#ifdef TFM_ECC224
206
+   #ifdef FP_64BIT
207
+       #define TFM_MUL4
208
+       #define TFM_SQR4
209
+   #else
210
+       #define TFM_MUL7
211
+       #define TFM_SQR7
212
+   #endif
213
+#endif
214
+
215
+#ifdef TFM_ECC256
216
+   #ifdef FP_64BIT
217
+       #define TFM_MUL4
218
+       #define TFM_SQR4
219
+   #else
220
+       #define TFM_MUL8
221
+       #define TFM_SQR8
222
+   #endif
223
+#endif
224
+
225
+#ifdef TFM_ECC384
226
+   #ifdef FP_64BIT
227
+       #define TFM_MUL6
228
+       #define TFM_SQR6
229
+   #else
230
+       #define TFM_MUL12
231
+       #define TFM_SQR12
232
+   #endif
233
+#endif
234
+
235
+#ifdef TFM_ECC521
236
+   #ifdef FP_64BIT
237
+       #define TFM_MUL9
238
+       #define TFM_SQR9
239
+   #else
240
+       #define TFM_MUL17
241
+       #define TFM_SQR17
242
+   #endif
243
+#endif
244
+
245
+
246
+/* some default configurations.
247
+ */
248
+#if defined(FP_64BIT)
249
+   /* for GCC only on supported platforms */
250
+#ifndef CRYPT
251
+   typedef unsigned long ulong64;
252
+#endif
253
+   typedef ulong64            fp_digit;
254
+   typedef unsigned long      fp_word __attribute__ ((mode(TI)));
255
+#else
256
+   /* this is to make porting into LibTomCrypt easier :-) */
257
+#ifndef CRYPT
258
+   #if defined(_MSC_VER) || defined(__BORLANDC__) 
259
+      typedef unsigned __int64   ulong64;
260
+      typedef signed __int64     long64;
261
+   #else
262
+      typedef unsigned long long ulong64;
263
+      typedef signed long long   long64;
264
+   #endif
265
+#endif
266
+   typedef unsigned long      fp_digit;
267
+   typedef ulong64            fp_word;
268
+#endif
269
+
270
+/* # of digits this is */
271
+#define DIGIT_BIT  (int)((CHAR_BIT) * sizeof(fp_digit))
272
+#define FP_MASK    (fp_digit)(-1)
273
+#define FP_SIZE    (FP_MAX_SIZE/DIGIT_BIT)
274
+
275
+/* signs */
276
+#define FP_ZPOS     0
277
+#define FP_NEG      1
278
+
279
+/* return codes */
280
+#define FP_OKAY     0
281
+#define FP_VAL      1
282
+#define FP_MEM      2
283
+
284
+/* equalities */
285
+#define FP_LT        -1   /* less than */
286
+#define FP_EQ         0   /* equal to */
287
+#define FP_GT         1   /* greater than */
288
+
289
+/* replies */
290
+#define FP_YES        1   /* yes response */
291
+#define FP_NO         0   /* no response */
292
+
293
+/* a FP type */
294
+typedef struct {
295
+    fp_digit dp[FP_SIZE];
296
+    int      used, 
297
+             sign;
298
+} fp_int;
299
+
300
+/* functions */
301
+
302
+/* returns a TFM ident string useful for debugging... */
303
+const char *fp_ident(void);
304
+
305
+/* initialize [or zero] an fp int */
306
+#define fp_init(a)  (void)memset((a), 0, sizeof(fp_int))
307
+#define fp_zero(a)  fp_init(a)
308
+
309
+/* zero/even/odd ? */
310
+#define fp_iszero(a) (((a)->used == 0) ? FP_YES : FP_NO)
311
+#define fp_iseven(a) (((a)->used >= 0 && (((a)->dp[0] & 1) == 0)) ? FP_YES : FP_NO)
312
+#define fp_isodd(a)  (((a)->used > 0  && (((a)->dp[0] & 1) == 1)) ? FP_YES : FP_NO)
313
+
314
+/* set to a small digit */
315
+void fp_set(fp_int *a, fp_digit b);
316
+
317
+/* copy from a to b */
318
+#define fp_copy(a, b)      (void)(((a) != (b)) && memcpy((b), (a), sizeof(fp_int)))
319
+#define fp_init_copy(a, b) fp_copy(b, a)
320
+
321
+/* clamp digits */
322
+#define fp_clamp(a)   { while ((a)->used && (a)->dp[(a)->used-1] == 0) --((a)->used); (a)->sign = (a)->used ? (a)->sign : FP_ZPOS; }
323
+
324
+/* negate and absolute */
325
+#define fp_neg(a, b)  { fp_copy(a, b); (b)->sign ^= 1; fp_clamp(b); }
326
+#define fp_abs(a, b)  { fp_copy(a, b); (b)->sign  = 0; }
327
+
328
+/* right shift x digits */
329
+void fp_rshd(fp_int *a, int x);
330
+
331
+/* left shift x digits */
332
+void fp_lshd(fp_int *a, int x);
333
+
334
+/* signed comparison */
335
+int fp_cmp(fp_int *a, fp_int *b);
336
+
337
+/* unsigned comparison */
338
+int fp_cmp_mag(fp_int *a, fp_int *b);
339
+
340
+/* power of 2 operations */
341
+void fp_div_2d(fp_int *a, int b, fp_int *c, fp_int *d);
342
+void fp_mod_2d(fp_int *a, int b, fp_int *c);
343
+void fp_mul_2d(fp_int *a, int b, fp_int *c);
344
+void fp_2expt (fp_int *a, int b);
345
+void fp_mul_2(fp_int *a, fp_int *c);
346
+void fp_div_2(fp_int *a, fp_int *c);
347
+
348
+/* Counts the number of lsbs which are zero before the first zero bit */
349
+int fp_cnt_lsb(fp_int *a);
350
+
351
+/* c = a + b */
352
+void fp_add(fp_int *a, fp_int *b, fp_int *c);
353
+
354
+/* c = a - b */
355
+void fp_sub(fp_int *a, fp_int *b, fp_int *c);
356
+
357
+/* c = a * b */
358
+void fp_mul(fp_int *a, fp_int *b, fp_int *c);
359
+
360
+/* b = a*a  */
361
+void fp_sqr(fp_int *a, fp_int *b);
362
+
363
+/* a/b => cb + d == a */
364
+int fp_div(fp_int *a, fp_int *b, fp_int *c, fp_int *d);
365
+
366
+/* c = a mod b, 0 <= c < b  */
367
+int fp_mod(fp_int *a, fp_int *b, fp_int *c);
368
+
369
+/* compare against a single digit */
370
+int fp_cmp_d(fp_int *a, fp_digit b);
371
+
372
+/* c = a + b */
373
+void fp_add_d(fp_int *a, fp_digit b, fp_int *c);
374
+
375
+/* c = a - b */
376
+void fp_sub_d(fp_int *a, fp_digit b, fp_int *c);
377
+
378
+/* c = a * b */
379
+void fp_mul_d(fp_int *a, fp_digit b, fp_int *c);
380
+
381
+/* a/b => cb + d == a */
382
+int fp_div_d(fp_int *a, fp_digit b, fp_int *c, fp_digit *d);
383
+
384
+/* c = a mod b, 0 <= c < b  */
385
+int fp_mod_d(fp_int *a, fp_digit b, fp_digit *c);
386
+
387
+/* ---> number theory <--- */
388
+/* d = a + b (mod c) */
389
+int fp_addmod(fp_int *a, fp_int *b, fp_int *c, fp_int *d);
390
+
391
+/* d = a - b (mod c) */
392
+int fp_submod(fp_int *a, fp_int *b, fp_int *c, fp_int *d);
393
+
394
+/* d = a * b (mod c) */
395
+int fp_mulmod(fp_int *a, fp_int *b, fp_int *c, fp_int *d);
396
+
397
+/* c = a * a (mod b) */
398
+int fp_sqrmod(fp_int *a, fp_int *b, fp_int *c);
399
+
400
+/* c = 1/a (mod b) */
401
+int fp_invmod(fp_int *a, fp_int *b, fp_int *c);
402
+
403
+/* c = (a, b) */
404
+void fp_gcd(fp_int *a, fp_int *b, fp_int *c);
405
+
406
+/* c = [a, b] */
407
+void fp_lcm(fp_int *a, fp_int *b, fp_int *c);
408
+
409
+/* setups the montgomery reduction */
410
+int fp_montgomery_setup(fp_int *a, fp_digit *mp);
411
+
412
+/* computes a = B**n mod b without division or multiplication useful for
413
+ * normalizing numbers in a Montgomery system.
414
+ */
415
+void fp_montgomery_calc_normalization(fp_int *a, fp_int *b);
416
+
417
+/* computes x/R == x (mod N) via Montgomery Reduction */
418
+void fp_montgomery_reduce(fp_int *a, fp_int *m, fp_digit mp);
419
+
420
+/* d = a**b (mod c) */
421
+int fp_exptmod(fp_int *a, fp_int *b, fp_int *c, fp_int *d);
422
+
423
+/* primality stuff */
424
+
425
+/* perform a Miller-Rabin test of a to the base b and store result in "result" */
426
+void fp_prime_miller_rabin (fp_int * a, fp_int * b, int *result);
427
+
428
+/* 256 trial divisions + 8 Miller-Rabins, returns FP_YES if probable prime  */
429
+int fp_isprime(fp_int *a);
430
+
431
+/* Primality generation flags */
432
+#define TFM_PRIME_BBS      0x0001 /* BBS style prime */
433
+#define TFM_PRIME_SAFE     0x0002 /* Safe prime (p-1)/2 == prime */
434
+#define TFM_PRIME_2MSB_OFF 0x0004 /* force 2nd MSB to 0 */
435
+#define TFM_PRIME_2MSB_ON  0x0008 /* force 2nd MSB to 1 */
436
+
437
+/* callback for fp_prime_random, should fill dst with random bytes and return how many read [upto len] */
438
+typedef int tfm_prime_callback(unsigned char *dst, int len, void *dat);
439
+
440
+#define fp_prime_random(a, t, size, bbs, cb, dat) fp_prime_random_ex(a, t, ((size) * 8) + 1, (bbs==1)?TFM_PRIME_BBS:0, cb, dat)
441
+
442
+int fp_prime_random_ex(fp_int *a, int t, int size, int flags, tfm_prime_callback cb, void *dat);
443
+
444
+/* radix conersions */
445
+int fp_count_bits(fp_int *a);
446
+
447
+int fp_unsigned_bin_size(fp_int *a);
448
+void fp_read_unsigned_bin(fp_int *a, const unsigned char *b, int c);
449
+void fp_to_unsigned_bin(fp_int *a, unsigned char *b);
450
+
451
+int fp_signed_bin_size(fp_int *a);
452
+void fp_read_signed_bin(fp_int *a, unsigned char *b, int c);
453
+void fp_to_signed_bin(fp_int *a, unsigned char *b);
454
+
455
+int fp_read_radix(fp_int *a, const char *str, int radix);
456
+int fp_toradix(fp_int *a, char *str, int radix);
457
+int fp_toradix_n(fp_int * a, char *str, int radix, int maxlen);
458
+
459
+
460
+/* VARIOUS LOW LEVEL STUFFS */
461
+void s_fp_add(fp_int *a, fp_int *b, fp_int *c);
462
+void s_fp_sub(fp_int *a, fp_int *b, fp_int *c);
463
+void fp_reverse(unsigned char *s, int len);
464
+
465
+void fp_mul_comba(fp_int *A, fp_int *B, fp_int *C);
466
+
467
+#ifdef TFM_SMALL_SET
468
+void fp_mul_comba_small(fp_int *A, fp_int *B, fp_int *C);
469
+#endif
470
+
471
+#ifdef TFM_MUL3
472
+void fp_mul_comba3(fp_int *A, fp_int *B, fp_int *C);
473
+#endif
474
+#ifdef TFM_MUL4
475
+void fp_mul_comba4(fp_int *A, fp_int *B, fp_int *C);
476
+#endif
477
+#ifdef TFM_MUL6
478
+void fp_mul_comba6(fp_int *A, fp_int *B, fp_int *C);
479
+#endif
480
+#ifdef TFM_MUL7
481
+void fp_mul_comba7(fp_int *A, fp_int *B, fp_int *C);
482
+#endif
483
+#ifdef TFM_MUL8
484
+void fp_mul_comba8(fp_int *A, fp_int *B, fp_int *C);
485
+#endif
486
+#ifdef TFM_MUL9
487
+void fp_mul_comba9(fp_int *A, fp_int *B, fp_int *C);
488
+#endif
489
+#ifdef TFM_MUL12
490
+void fp_mul_comba12(fp_int *A, fp_int *B, fp_int *C);
491
+#endif
492
+#ifdef TFM_MUL17
493
+void fp_mul_comba17(fp_int *A, fp_int *B, fp_int *C);
494
+#endif
495
+
496
+#ifdef TFM_MUL20
497
+void fp_mul_comba20(fp_int *A, fp_int *B, fp_int *C);
498
+#endif
499
+#ifdef TFM_MUL24
500
+void fp_mul_comba24(fp_int *A, fp_int *B, fp_int *C);
501
+#endif
502
+#ifdef TFM_MUL28
503
+void fp_mul_comba28(fp_int *A, fp_int *B, fp_int *C);
504
+#endif
505
+#ifdef TFM_MUL32
506
+void fp_mul_comba32(fp_int *A, fp_int *B, fp_int *C);
507
+#endif
508
+#ifdef TFM_MUL48
509
+void fp_mul_comba48(fp_int *A, fp_int *B, fp_int *C);
510
+#endif
511
+#ifdef TFM_MUL64
512
+void fp_mul_comba64(fp_int *A, fp_int *B, fp_int *C);
513
+#endif
514
+
515
+void fp_sqr_comba(fp_int *A, fp_int *B);
516
+
517
+#ifdef TFM_SMALL_SET
518
+void fp_sqr_comba_small(fp_int *A, fp_int *B);
519
+#endif
520
+
521
+#ifdef TFM_SQR3
522
+void fp_sqr_comba3(fp_int *A, fp_int *B);
523
+#endif
524
+#ifdef TFM_SQR4
525
+void fp_sqr_comba4(fp_int *A, fp_int *B);
526
+#endif
527
+#ifdef TFM_SQR6
528
+void fp_sqr_comba6(fp_int *A, fp_int *B);
529
+#endif
530
+#ifdef TFM_SQR7
531
+void fp_sqr_comba7(fp_int *A, fp_int *B);
532
+#endif
533
+#ifdef TFM_SQR8
534
+void fp_sqr_comba8(fp_int *A, fp_int *B);
535
+#endif
536
+#ifdef TFM_SQR9
537
+void fp_sqr_comba9(fp_int *A, fp_int *B);
538
+#endif
539
+#ifdef TFM_SQR12
540
+void fp_sqr_comba12(fp_int *A, fp_int *B);
541
+#endif
542
+#ifdef TFM_SQR17
543
+void fp_sqr_comba17(fp_int *A, fp_int *B);
544
+#endif
545
+
546
+#ifdef TFM_SQR20
547
+void fp_sqr_comba20(fp_int *A, fp_int *B);
548
+#endif
549
+#ifdef TFM_SQR24
550
+void fp_sqr_comba24(fp_int *A, fp_int *B);
551
+#endif
552
+#ifdef TFM_SQR28
553
+void fp_sqr_comba28(fp_int *A, fp_int *B);
554
+#endif
555
+#ifdef TFM_SQR32
556
+void fp_sqr_comba32(fp_int *A, fp_int *B);
557
+#endif
558
+#ifdef TFM_SQR48
559
+void fp_sqr_comba48(fp_int *A, fp_int *B);
560
+#endif
561
+#ifdef TFM_SQR64
562
+void fp_sqr_comba64(fp_int *A, fp_int *B);
563
+#endif
564
+extern const char *fp_s_rmap;
565
+
566
+#endif
567
+
568
+
569
+/* $Source: /cvs/libtom/tomsfastmath/src/headers/tfm.h,v $ */
570
+/* $Revision: 1.3 $ */
571
+/* $Date: 2007/02/27 02:38:44 $ */
0 572
new file mode 100755
... ...
@@ -0,0 +1,76 @@
0
+/* super class file for PK algos */
1
+
2
+/* default ... include all MPI */
3
+#define LTM_ALL
4
+
5
+/* RSA only (does not support DH/DSA/ECC) */
6
+/* #define SC_RSA_1 */
7
+
8
+/* For reference.... On an Athlon64 optimizing for speed...
9
+
10
+   LTM's mpi.o with all functions [striped] is 142KiB in size.
11
+
12
+*/
13
+
14
+/* Works for RSA only, mpi.o is 68KiB */
15
+#ifdef SC_RSA_1
16
+   #define BN_MP_SHRINK_C
17
+   #define BN_MP_LCM_C
18
+   #define BN_MP_PRIME_RANDOM_EX_C
19
+   #define BN_MP_INVMOD_C
20
+   #define BN_MP_GCD_C
21
+   #define BN_MP_MOD_C
22
+   #define BN_MP_MULMOD_C
23
+   #define BN_MP_ADDMOD_C
24
+   #define BN_MP_EXPTMOD_C
25
+   #define BN_MP_SET_INT_C
26
+   #define BN_MP_INIT_MULTI_C
27
+   #define BN_MP_CLEAR_MULTI_C
28
+   #define BN_MP_UNSIGNED_BIN_SIZE_C
29
+   #define BN_MP_TO_UNSIGNED_BIN_C
30
+   #define BN_MP_MOD_D_C
31
+   #define BN_MP_PRIME_RABIN_MILLER_TRIALS_C
32
+   #define BN_REVERSE_C
33
+   #define BN_PRIME_TAB_C
34
+
35
+   /* other modifiers */
36
+   #define BN_MP_DIV_SMALL                    /* Slower division, not critical */
37
+
38
+   /* here we are on the last pass so we turn things off.  The functions classes are still there
39
+    * but we remove them specifically from the build.  This also invokes tweaks in functions
40
+    * like removing support for even moduli, etc...
41
+    */
42
+#ifdef LTM_LAST
43
+   #undef  BN_MP_TOOM_MUL_C
44
+   #undef  BN_MP_TOOM_SQR_C
45
+   #undef  BN_MP_KARATSUBA_MUL_C
46
+   #undef  BN_MP_KARATSUBA_SQR_C
47
+   #undef  BN_MP_REDUCE_C
48
+   #undef  BN_MP_REDUCE_SETUP_C
49
+   #undef  BN_MP_DR_IS_MODULUS_C
50
+   #undef  BN_MP_DR_SETUP_C
51
+   #undef  BN_MP_DR_REDUCE_C
52
+   #undef  BN_MP_REDUCE_IS_2K_C
53
+   #undef  BN_MP_REDUCE_2K_SETUP_C
54
+   #undef  BN_MP_REDUCE_2K_C
55
+   #undef  BN_S_MP_EXPTMOD_C
56
+   #undef  BN_MP_DIV_3_C
57
+   #undef  BN_S_MP_MUL_HIGH_DIGS_C
58
+   #undef  BN_FAST_S_MP_MUL_HIGH_DIGS_C
59
+   #undef  BN_FAST_MP_INVMOD_C
60
+
61
+   /* To safely undefine these you have to make sure your RSA key won't exceed the Comba threshold
62
+    * which is roughly 255 digits [7140 bits for 32-bit machines, 15300 bits for 64-bit machines] 
63
+    * which means roughly speaking you can handle upto 2536-bit RSA keys with these defined without
64
+    * trouble.  
65
+    */
66
+   #undef  BN_S_MP_MUL_DIGS_C
67
+   #undef  BN_S_MP_SQR_C
68
+   #undef  BN_MP_MONTGOMERY_REDUCE_C
69
+#endif
70
+
71
+#endif
72
+
73
+/* $Source$ */
74
+/* $Revision: 0.36 $ */
75
+/* $Date: 2005-08-01 16:37:28 +0000 $ */
0 76
new file mode 100644
... ...
@@ -0,0 +1,368 @@
0
+/* TomsFastMath, a fast ISO C bignum library.
1
+ * 
2
+ * This project is meant to fill in where LibTomMath
3
+ * falls short.  That is speed ;-)
4
+ *
5
+ * This project is public domain and free for all purposes.
6
+ * 
7
+ * Tom St Denis, tomstdenis@gmail.com
8
+ */
9
+
10
+/* About this file...
11
+
12
+*/
13
+
14
+#include <tfm.h>
15
+
16
+#if defined(TFM_PRESCOTT) && defined(TFM_SSE2)
17
+   #undef TFM_SSE2
18
+   #define TFM_X86
19
+#endif
20
+
21
+/* these are the combas.  Worship them. */
22
+#if defined(TFM_X86)
23
+/* Generic x86 optimized code */
24
+
25
+/* anything you need at the start */
26
+#define COMBA_START
27
+
28
+/* clear the chaining variables */
29
+#define COMBA_CLEAR \
30
+   c0 = c1 = c2 = 0;
31
+
32
+/* forward the carry to the next digit */
33
+#define COMBA_FORWARD \
34
+   do { c0 = c1; c1 = c2; c2 = 0; } while (0);
35
+
36
+/* store the first sum */
37
+#define COMBA_STORE(x) \
38
+   x = c0;
39
+
40
+/* store the second sum [carry] */
41
+#define COMBA_STORE2(x) \
42
+   x = c1;
43
+
44
+/* anything you need at the end */
45
+#define COMBA_FINI
46
+
47
+/* this should multiply i and j  */
48
+#define MULADD(i, j)                                      \
49
+asm(                                                      \
50
+     "movl  %6,%%eax     \n\t"                            \
51
+     "mull  %7           \n\t"                            \
52
+     "addl  %%eax,%0     \n\t"                            \
53
+     "adcl  %%edx,%1     \n\t"                            \
54
+     "adcl  $0,%2        \n\t"                            \
55
+     :"=r"(c0), "=r"(c1), "=r"(c2): "0"(c0), "1"(c1), "2"(c2), "m"(i), "m"(j)  :"%eax","%edx","%cc");
56
+
57
+#elif defined(TFM_X86_64)
58
+/* x86-64 optimized */
59
+
60
+/* anything you need at the start */
61
+#define COMBA_START
62
+
63
+/* clear the chaining variables */
64
+#define COMBA_CLEAR \
65
+   c0 = c1 = c2 = 0;
66
+
67
+/* forward the carry to the next digit */
68
+#define COMBA_FORWARD \
69
+   do { c0 = c1; c1 = c2; c2 = 0; } while (0);
70
+
71
+/* store the first sum */
72
+#define COMBA_STORE(x) \
73
+   x = c0;
74
+
75
+/* store the second sum [carry] */
76
+#define COMBA_STORE2(x) \
77
+   x = c1;
78
+
79
+/* anything you need at the end */
80
+#define COMBA_FINI
81
+
82
+/* this should multiply i and j  */
83
+#define MULADD(i, j)                                      \
84
+asm  (                                                    \
85
+     "movq  %6,%%rax     \n\t"                            \
86
+     "mulq  %7           \n\t"                            \
87
+     "addq  %%rax,%0     \n\t"                            \
88
+     "adcq  %%rdx,%1     \n\t"                            \
89
+     "adcq  $0,%2        \n\t"                            \
90
+     :"=r"(c0), "=r"(c1), "=r"(c2): "0"(c0), "1"(c1), "2"(c2), "g"(i), "g"(j)  :"%rax","%rdx","%cc");
91
+
92
+#elif defined(TFM_SSE2)
93
+/* use SSE2 optimizations */
94
+
95
+/* anything you need at the start */
96
+#define COMBA_START
97
+
98
+/* clear the chaining variables */
99
+#define COMBA_CLEAR \
100
+   c0 = c1 = c2 = 0;
101
+
102
+/* forward the carry to the next digit */
103
+#define COMBA_FORWARD \
104
+   do { c0 = c1; c1 = c2; c2 = 0; } while (0);
105
+
106
+/* store the first sum */
107
+#define COMBA_STORE(x) \
108
+   x = c0;
109
+
110
+/* store the second sum [carry] */
111
+#define COMBA_STORE2(x) \
112
+   x = c1;
113
+
114
+/* anything you need at the end */
115
+#define COMBA_FINI \
116
+   asm("emms");
117
+
118
+/* this should multiply i and j  */
119
+#define MULADD(i, j)                                     \
120
+asm(                                                     \
121
+    "movd  %6,%%mm0     \n\t"                            \
122
+    "movd  %7,%%mm1     \n\t"                            \
123
+    "pmuludq %%mm1,%%mm0\n\t"                            \
124
+    "movd  %%mm0,%%eax  \n\t"                            \
125
+    "psrlq $32,%%mm0    \n\t"                            \
126
+    "addl  %%eax,%0     \n\t"                            \
127
+    "movd  %%mm0,%%eax  \n\t"                            \
128
+    "adcl  %%eax,%1     \n\t"                            \
129
+    "adcl  $0,%2        \n\t"                            \
130
+    :"=r"(c0), "=r"(c1), "=r"(c2): "0"(c0), "1"(c1), "2"(c2), "m"(i), "m"(j)  :"%eax","%cc");
131
+
132
+#elif defined(TFM_ARM)
133
+/* ARM code */
134
+
135
+#define COMBA_START 
136
+
137
+#define COMBA_CLEAR \
138
+   c0 = c1 = c2 = 0;
139
+
140
+#define COMBA_FORWARD \
141
+   do { c0 = c1; c1 = c2; c2 = 0; } while (0);
142
+
143
+#define COMBA_STORE(x) \
144
+   x = c0;
145
+
146
+#define COMBA_STORE2(x) \
147
+   x = c1;
148
+
149
+#define COMBA_FINI
150
+
151
+#define MULADD(i, j)                                          \
152
+asm(                                                          \
153
+"  UMULL  r0,r1,%6,%7           \n\t"                         \
154
+"  ADDS   %0,%0,r0              \n\t"                         \
155
+"  ADCS   %1,%1,r1              \n\t"                         \
156
+"  ADC    %2,%2,#0              \n\t"                         \
157
+:"=r"(c0), "=r"(c1), "=r"(c2) : "0"(c0), "1"(c1), "2"(c2), "r"(i), "r"(j) : "r0", "r1", "%cc");
158
+
159
+#elif defined(TFM_PPC32)
160
+/* For 32-bit PPC */
161
+
162
+#define COMBA_START
163
+
164
+#define COMBA_CLEAR \
165
+   c0 = c1 = c2 = 0;
166
+
167
+#define COMBA_FORWARD \
168
+   do { c0 = c1; c1 = c2; c2 = 0; } while (0);
169
+
170
+#define COMBA_STORE(x) \
171
+   x = c0;
172
+
173
+#define COMBA_STORE2(x) \
174
+   x = c1;
175
+
176
+#define COMBA_FINI 
177
+   
178
+/* untested: will mulhwu change the flags?  Docs say no */
179
+#define MULADD(i, j)              \
180
+asm(                              \
181
+   " mullw  16,%6,%7       \n\t" \
182
+   " addc   %0,%0,16       \n\t" \
183
+   " mulhwu 16,%6,%7       \n\t" \
184
+   " adde   %1,%1,16       \n\t" \
185
+   " addze  %2,%2          \n\t" \
186
+:"=r"(c0), "=r"(c1), "=r"(c2):"0"(c0), "1"(c1), "2"(c2), "r"(i), "r"(j):"16");
187
+
188
+#elif defined(TFM_PPC64)
189
+/* For 64-bit PPC */
190
+
191
+#define COMBA_START
192
+
193
+#define COMBA_CLEAR \
194
+   c0 = c1 = c2 = 0;
195
+
196
+#define COMBA_FORWARD \
197
+   do { c0 = c1; c1 = c2; c2 = 0; } while (0);
198
+
199
+#define COMBA_STORE(x) \
200
+   x = c0;
201
+
202
+#define COMBA_STORE2(x) \
203
+   x = c1;
204
+
205
+#define COMBA_FINI 
206
+   
207
+/* untested: will mulhdu change the flags?  Docs say no */
208
+#define MULADD(i, j)              \
209
+asm(                              \
210
+   " mulld  r16,%6,%7       \n\t" \
211
+   " addc   %0,%0,16       \n\t" \
212
+   " mulhdu r16,%6,%7       \n\t" \
213
+   " adde   %1,%1,16       \n\t" \
214
+   " addze  %2,%2          \n\t" \
215
+:"=r"(c0), "=r"(c1), "=r"(c2):"0"(c0), "1"(c1), "2"(c2), "r"(i), "r"(j):"r16");
216
+
217
+#elif defined(TFM_AVR32)
218
+
219
+/* ISO C code */
220
+
221
+#define COMBA_START
222
+
223
+#define COMBA_CLEAR \
224
+   c0 = c1 = c2 = 0;
225
+
226
+#define COMBA_FORWARD \
227
+   do { c0 = c1; c1 = c2; c2 = 0; } while (0);
228
+
229
+#define COMBA_STORE(x) \
230
+   x = c0;
231
+
232
+#define COMBA_STORE2(x) \
233
+   x = c1;
234
+
235
+#define COMBA_FINI 
236
+   
237
+#define MULADD(i, j)             \
238
+asm(                             \
239
+   " mulu.d r2,%6,%7        \n\t"\
240
+   " add    %0,r2           \n\t"\
241
+   " adc    %1,%1,r3        \n\t"\
242
+   " acr    %2              \n\t"\
243
+:"=r"(c0), "=r"(c1), "=r"(c2):"0"(c0), "1"(c1), "2"(c2), "r"(i), "r"(j):"r2","r3");
244
+
245
+#elif defined(TFM_MIPS)
246
+
247
+#define COMBA_START
248
+
249
+#define COMBA_CLEAR \
250
+   c0 = c1 = c2 = 0;
251
+
252
+#define COMBA_FORWARD \
253
+   do { c0 = c1; c1 = c2; c2 = 0; } while (0);
254
+
255
+#define COMBA_STORE(x) \
256
+   x = c0;
257
+
258
+#define COMBA_STORE2(x) \
259
+   x = c1;
260
+
261
+#define COMBA_FINI 
262
+   
263
+#define MULADD(i, j)              \
264
+asm(                              \
265
+   " multu  %6,%7          \n\t"  \
266
+   " mflo   $12            \n\t"  \
267
+   " mfhi   $13            \n\t"  \
268
+   " addu    %0,%0,$12     \n\t"  \
269
+   " sltu   $12,%0,$12     \n\t"  \
270
+   " addu    %1,%1,$13     \n\t"  \
271
+   " sltu   $13,%1,$13     \n\t"  \
272
+   " addu    %1,%1,$12     \n\t"  \
273
+   " sltu   $12,%1,$12     \n\t"  \
274
+   " addu    %2,%2,$13     \n\t"  \
275
+   " addu    %2,%2,$12     \n\t"  \
276
+:"=r"(c0), "=r"(c1), "=r"(c2):"0"(c0), "1"(c1), "2"(c2), "r"(i), "r"(j):"$12","$13");
277
+
278
+#else
279
+/* ISO C code */
280
+
281
+#define COMBA_START
282
+
283
+#define COMBA_CLEAR \
284
+   c0 = c1 = c2 = 0;
285
+
286
+#define COMBA_FORWARD \
287
+   do { c0 = c1; c1 = c2; c2 = 0; } while (0);
288
+
289
+#define COMBA_STORE(x) \
290
+   x = c0;
291
+
292
+#define COMBA_STORE2(x) \
293
+   x = c1;
294
+
295
+#define COMBA_FINI 
296
+   
297
+#define MULADD(i, j)                                                              \
298
+   do { fp_word t;                                                                \
299
+   t = (fp_word)c0 + ((fp_word)i) * ((fp_word)j); c0 = t;                         \
300
+   t = (fp_word)c1 + (t >> DIGIT_BIT);            c1 = t; c2 += t >> DIGIT_BIT;   \
301
+   } while (0);
302
+
303
+#endif
304
+
305
+#ifndef TFM_DEFINES
306
+
307
+/* generic PxQ multiplier */
308
+void fp_mul_comba(fp_int *A, fp_int *B, fp_int *C)
309
+{
310
+   int       ix, iy, iz, tx, ty, pa;
311
+   fp_digit  c0, c1, c2, *tmpx, *tmpy;
312
+   fp_int    tmp, *dst;
313
+
314
+   COMBA_START;
315
+   COMBA_CLEAR;
316
+   
317
+   /* get size of output and trim */
318
+   pa = A->used + B->used;
319
+   if (pa >= FP_SIZE) {
320
+      pa = FP_SIZE-1;
321
+   }
322
+
323
+   if (A == C || B == C) {
324
+      fp_zero(&tmp);
325
+      dst = &tmp;
326
+   } else {
327
+      fp_zero(C);
328
+      dst = C;
329
+   }
330
+
331
+   for (ix = 0; ix < pa; ix++) {
332
+      /* get offsets into the two bignums */
333
+      ty = MIN(ix, B->used-1);
334
+      tx = ix - ty;
335
+
336
+      /* setup temp aliases */
337
+      tmpx = A->dp + tx;
338
+      tmpy = B->dp + ty;
339
+
340
+      /* this is the number of times the loop will iterrate, essentially its 
341
+         while (tx++ < a->used && ty-- >= 0) { ... }
342
+       */
343
+      iy = MIN(A->used-tx, ty+1);
344
+
345
+      /* execute loop */
346
+      COMBA_FORWARD;
347
+      for (iz = 0; iz < iy; ++iz) {
348
+          MULADD(*tmpx++, *tmpy--);
349
+      }
350
+
351
+      /* store term */
352
+      COMBA_STORE(dst->dp[ix]);
353
+  }
354
+  COMBA_FINI;
355
+
356
+  dst->used = pa;
357
+  dst->sign = A->sign ^ B->sign;
358
+  fp_clamp(dst);
359
+  fp_copy(dst, C);
360
+}
361
+
362
+#endif
363
+
364
+/* $Source: /cvs/libtom/tomsfastmath/src/mul/fp_mul_comba.c,v $ */
365
+/* $Revision: 1.4 $ */
366
+/* $Date: 2007/03/14 23:47:42 $ */
367
+
0 368
new file mode 100644
... ...
@@ -0,0 +1,677 @@
0
+/*
1
+ * 
2
+ * This project is meant to fill in where LibTomMath
3
+ * falls short.  That is speed ;-)
4
+ *
5
+ * This project is public domain and free for all purposes.
6
+ * 
7
+ * Tom St Denis, tomstdenis@gmail.com
8
+ */
9
+#include "bignum_fast.h"
10
+
11
+#if defined(TFM_PRESCOTT) && defined(TFM_SSE2)
12
+   #undef TFM_SSE2
13
+   #define TFM_X86
14
+#endif
15
+
16
+#if defined(TFM_X86)
17
+
18
+/* x86-32 optimized */
19
+
20
+#define COMBA_START
21
+
22
+#define CLEAR_CARRY \
23
+   c0 = c1 = c2 = 0;
24
+
25
+#define COMBA_STORE(x) \
26
+   x = c0;
27
+
28
+#define COMBA_STORE2(x) \
29
+   x = c1;
30
+
31
+#define CARRY_FORWARD \
32
+   do { c0 = c1; c1 = c2; c2 = 0; } while (0);
33
+
34
+#define COMBA_FINI
35
+
36
+#define SQRADD(i, j)                                      \
37
+asm(                                            \
38
+     "movl  %6,%%eax     \n\t"                            \
39
+     "mull  %%eax        \n\t"                            \
40
+     "addl  %%eax,%0     \n\t"                            \
41
+     "adcl  %%edx,%1     \n\t"                            \
42
+     "adcl  $0,%2        \n\t"                            \
43
+     :"=r"(c0), "=r"(c1), "=r"(c2): "0"(c0), "1"(c1), "2"(c2), "m"(i) :"%eax","%edx","%cc");
44
+
45
+#define SQRADD2(i, j)                                     \
46
+asm(                                            \
47
+     "movl  %6,%%eax     \n\t"                            \
48
+     "mull  %7           \n\t"                            \
49
+     "addl  %%eax,%0     \n\t"                            \
50
+     "adcl  %%edx,%1     \n\t"                            \
51
+     "adcl  $0,%2        \n\t"                            \
52
+     "addl  %%eax,%0     \n\t"                            \
53
+     "adcl  %%edx,%1     \n\t"                            \
54
+     "adcl  $0,%2        \n\t"                            \
55
+     :"=r"(c0), "=r"(c1), "=r"(c2): "0"(c0), "1"(c1), "2"(c2), "m"(i), "m"(j)  :"%eax","%edx","%cc");
56
+
57
+#define SQRADDSC(i, j)                                    \
58
+asm(                                                     \
59
+     "movl  %6,%%eax     \n\t"                            \
60
+     "mull  %7           \n\t"                            \
61
+     "movl  %%eax,%0     \n\t"                            \
62
+     "movl  %%edx,%1     \n\t"                            \
63
+     "xorl  %2,%2        \n\t"                            \
64
+     :"=r"(sc0), "=r"(sc1), "=r"(sc2): "0"(sc0), "1"(sc1), "2"(sc2), "g"(i), "g"(j) :"%eax","%edx","%cc");
65
+
66
+#define SQRADDAC(i, j)                                    \
67
+asm(                                                     \
68
+     "movl  %6,%%eax     \n\t"                            \
69
+     "mull  %7           \n\t"                            \
70
+     "addl  %%eax,%0     \n\t"                            \
71
+     "adcl  %%edx,%1     \n\t"                            \
72
+     "adcl  $0,%2        \n\t"                            \
73
+     :"=r"(sc0), "=r"(sc1), "=r"(sc2): "0"(sc0), "1"(sc1), "2"(sc2), "g"(i), "g"(j) :"%eax","%edx","%cc");
74
+
75
+#define SQRADDDB                                          \
76
+asm(                                                     \
77
+     "addl %6,%0         \n\t"                            \
78
+     "adcl %7,%1         \n\t"                            \
79
+     "adcl %8,%2         \n\t"                            \
80
+     "addl %6,%0         \n\t"                            \
81
+     "adcl %7,%1         \n\t"                            \
82
+     "adcl %8,%2         \n\t"                            \
83
+     :"=r"(c0), "=r"(c1), "=r"(c2) : "0"(c0), "1"(c1), "2"(c2), "r"(sc0), "r"(sc1), "r"(sc2) : "%cc");
84
+
85
+#elif defined(TFM_X86_64)
86
+/* x86-64 optimized */
87
+
88
+#define COMBA_START
89
+
90
+#define CLEAR_CARRY \
91
+   c0 = c1 = c2 = 0;
92
+
93
+#define COMBA_STORE(x) \
94
+   x = c0;
95
+
96
+#define COMBA_STORE2(x) \
97
+   x = c1;
98
+
99
+#define CARRY_FORWARD \
100
+   do { c0 = c1; c1 = c2; c2 = 0; } while (0);
101
+
102
+#define COMBA_FINI
103
+
104
+#define SQRADD(i, j)                                      \
105
+asm(                                                     \
106
+     "movq  %6,%%rax     \n\t"                            \
107
+     "mulq  %%rax        \n\t"                            \
108
+     "addq  %%rax,%0     \n\t"                            \
109
+     "adcq  %%rdx,%1     \n\t"                            \
110
+     "adcq  $0,%2        \n\t"                            \
111
+     :"=r"(c0), "=r"(c1), "=r"(c2): "0"(c0), "1"(c1), "2"(c2), "g"(i) :"%rax","%rdx","%cc");
112
+
113
+#define SQRADD2(i, j)                                     \
114
+asm(                                                     \
115
+     "movq  %6,%%rax     \n\t"                            \
116
+     "mulq  %7           \n\t"                            \
117
+     "addq  %%rax,%0     \n\t"                            \
118
+     "adcq  %%rdx,%1     \n\t"                            \
119
+     "adcq  $0,%2        \n\t"                            \
120
+     "addq  %%rax,%0     \n\t"                            \
121
+     "adcq  %%rdx,%1     \n\t"                            \
122
+     "adcq  $0,%2        \n\t"                            \
123
+     :"=r"(c0), "=r"(c1), "=r"(c2): "0"(c0), "1"(c1), "2"(c2), "g"(i), "g"(j)  :"%rax","%rdx","%cc");
124
+
125
+#define SQRADDSC(i, j)                                    \
126
+asm(                                                     \
127
+     "movq  %6,%%rax     \n\t"                            \
128
+     "mulq  %7           \n\t"                            \
129
+     "movq  %%rax,%0     \n\t"                            \
130
+     "movq  %%rdx,%1     \n\t"                            \
131
+     "xorq  %2,%2        \n\t"                            \
132
+     :"=r"(sc0), "=r"(sc1), "=r"(sc2): "0"(sc0), "1"(sc1), "2"(sc2), "g"(i), "g"(j) :"%rax","%rdx","%cc");
133
+
134
+#define SQRADDAC(i, j)                                                         \
135
+asm(                                                     \
136
+     "movq  %6,%%rax     \n\t"                            \
137
+     "mulq  %7           \n\t"                            \
138
+     "addq  %%rax,%0     \n\t"                            \
139
+     "adcq  %%rdx,%1     \n\t"                            \
140
+     "adcq  $0,%2        \n\t"                            \
141
+     :"=r"(sc0), "=r"(sc1), "=r"(sc2): "0"(sc0), "1"(sc1), "2"(sc2), "g"(i), "g"(j) :"%rax","%rdx","%cc");
142
+
143
+#define SQRADDDB                                          \
144
+asm(                                                     \
145
+     "addq %6,%0         \n\t"                            \
146
+     "adcq %7,%1         \n\t"                            \
147
+     "adcq %8,%2         \n\t"                            \
148
+     "addq %6,%0         \n\t"                            \
149
+     "adcq %7,%1         \n\t"                            \
150
+     "adcq %8,%2         \n\t"                            \
151
+     :"=r"(c0), "=r"(c1), "=r"(c2) : "0"(c0), "1"(c1), "2"(c2), "r"(sc0), "r"(sc1), "r"(sc2) : "%cc");
152
+
153
+#elif defined(TFM_SSE2)
154
+
155
+/* SSE2 Optimized */
156
+#define COMBA_START
157
+
158
+#define CLEAR_CARRY \
159
+   c0 = c1 = c2 = 0;
160
+
161
+#define COMBA_STORE(x) \
162
+   x = c0;
163
+
164
+#define COMBA_STORE2(x) \
165
+   x = c1;
166
+
167
+#define CARRY_FORWARD \
168
+   do { c0 = c1; c1 = c2; c2 = 0; } while (0);
169
+
170
+#define COMBA_FINI \
171
+   asm("emms");
172
+
173
+#define SQRADD(i, j)                                      \
174
+asm(                                            \
175
+     "movd  %6,%%mm0     \n\t"                            \
176
+     "pmuludq %%mm0,%%mm0\n\t"                            \
177
+     "movd  %%mm0,%%eax  \n\t"                            \
178
+     "psrlq $32,%%mm0    \n\t"                            \
179
+     "addl  %%eax,%0     \n\t"                            \
180
+     "movd  %%mm0,%%eax  \n\t"                            \
181
+     "adcl  %%eax,%1     \n\t"                            \
182
+     "adcl  $0,%2        \n\t"                            \
183
+     :"=r"(c0), "=r"(c1), "=r"(c2): "0"(c0), "1"(c1), "2"(c2), "m"(i) :"%eax","%cc");
184
+
185
+#define SQRADD2(i, j)                                     \
186
+asm(                                            \
187
+     "movd  %6,%%mm0     \n\t"                            \
188
+     "movd  %7,%%mm1     \n\t"                            \
189
+     "pmuludq %%mm1,%%mm0\n\t"                            \
190
+     "movd  %%mm0,%%eax  \n\t"                            \
191
+     "psrlq $32,%%mm0    \n\t"                            \
192
+     "movd  %%mm0,%%edx  \n\t"                            \
193
+     "addl  %%eax,%0     \n\t"                            \
194
+     "adcl  %%edx,%1     \n\t"                            \
195
+     "adcl  $0,%2        \n\t"                            \
196
+     "addl  %%eax,%0     \n\t"                            \
197
+     "adcl  %%edx,%1     \n\t"                            \
198
+     "adcl  $0,%2        \n\t"                            \
199
+     :"=r"(c0), "=r"(c1), "=r"(c2): "0"(c0), "1"(c1), "2"(c2), "m"(i), "m"(j)  :"%eax","%edx","%cc");
200
+
201
+#define SQRADDSC(i, j)                                                         \
202
+asm(                                            \
203
+     "movd  %6,%%mm0     \n\t"                            \
204
+     "movd  %7,%%mm1     \n\t"                            \
205
+     "pmuludq %%mm1,%%mm0\n\t"                            \
206
+     "movd  %%mm0,%0     \n\t"                            \
207
+     "psrlq $32,%%mm0    \n\t"                            \
208
+     "movd  %%mm0,%1     \n\t"                            \
209
+     "xorl  %2,%2        \n\t"                            \
210
+     :"=r"(sc0), "=r"(sc1), "=r"(sc2): "0"(sc0), "1"(sc1), "2"(sc2), "m"(i), "m"(j));
211
+
212
+#define SQRADDAC(i, j)                                                         \
213
+asm(                                            \
214
+     "movd  %6,%%mm0     \n\t"                            \
215
+     "movd  %7,%%mm1     \n\t"                            \
216
+     "pmuludq %%mm1,%%mm0\n\t"                            \
217
+     "movd  %%mm0,%%eax  \n\t"                            \
218
+     "psrlq $32,%%mm0    \n\t"                            \
219
+     "movd  %%mm0,%%edx  \n\t"                            \
220
+     "addl  %%eax,%0     \n\t"                            \
221
+     "adcl  %%edx,%1     \n\t"                            \
222
+     "adcl  $0,%2        \n\t"                            \
223
+     :"=r"(sc0), "=r"(sc1), "=r"(sc2): "0"(sc0), "1"(sc1), "2"(sc2), "m"(i), "m"(j)  :"%eax","%edx","%cc");
224
+
225
+#define SQRADDDB                                          \
226
+asm(                                                     \
227
+     "addl %6,%0         \n\t"                            \
228
+     "adcl %7,%1         \n\t"                            \
229
+     "adcl %8,%2         \n\t"                            \
230
+     "addl %6,%0         \n\t"                            \
231
+     "adcl %7,%1         \n\t"                            \
232
+     "adcl %8,%2         \n\t"                            \
233
+     :"=r"(c0), "=r"(c1), "=r"(c2) : "0"(c0), "1"(c1), "2"(c2), "r"(sc0), "r"(sc1), "r"(sc2) : "%cc");
234
+
235
+#elif defined(TFM_ARM)
236
+
237
+/* ARM code */
238
+
239
+#define COMBA_START
240
+
241
+#define CLEAR_CARRY \
242
+   c0 = c1 = c2 = 0;
243
+
244
+#define COMBA_STORE(x) \
245
+   x = c0;
246
+
247
+#define COMBA_STORE2(x) \
248
+   x = c1;
249
+
250
+#define CARRY_FORWARD \
251
+   do { c0 = c1; c1 = c2; c2 = 0; } while (0);
252
+
253
+#define COMBA_FINI
254
+
255
+/* multiplies point i and j, updates carry "c1" and digit c2 */
256
+#define SQRADD(i, j)                                             \
257
+asm(                                                             \
258
+"  UMULL  r0,r1,%6,%6              \n\t"                         \
259
+"  ADDS   %0,%0,r0                 \n\t"                         \
260
+"  ADCS   %1,%1,r1                 \n\t"                         \
261
+"  ADC    %2,%2,#0                 \n\t"                         \
262
+:"=r"(c0), "=r"(c1), "=r"(c2) : "0"(c0), "1"(c1), "2"(c2), "r"(i) : "r0", "r1", "%cc");
263
+	
264
+/* for squaring some of the terms are doubled... */
265
+#define SQRADD2(i, j)                                            \
266
+asm(                                                             \
267
+"  UMULL  r0,r1,%6,%7              \n\t"                         \
268
+"  ADDS   %0,%0,r0                 \n\t"                         \
269
+"  ADCS   %1,%1,r1                 \n\t"                         \
270
+"  ADC    %2,%2,#0                 \n\t"                         \
271
+"  ADDS   %0,%0,r0                 \n\t"                         \
272
+"  ADCS   %1,%1,r1                 \n\t"                         \
273
+"  ADC    %2,%2,#0                 \n\t"                         \
274
+:"=r"(c0), "=r"(c1), "=r"(c2) : "0"(c0), "1"(c1), "2"(c2), "r"(i), "r"(j) : "r0", "r1", "%cc");
275
+
276
+#define SQRADDSC(i, j)                                           \
277
+asm(                                                             \
278
+"  UMULL  %0,%1,%6,%7              \n\t"                         \
279
+"  SUB    %2,%2,%2                 \n\t"                         \
280
+:"=r"(sc0), "=r"(sc1), "=r"(sc2) : "0"(sc0), "1"(sc1), "2"(sc2), "r"(i), "r"(j) : "%cc");
281
+
282
+#define SQRADDAC(i, j)                                           \
283
+asm(                                                             \
284
+"  UMULL  r0,r1,%6,%7              \n\t"                         \
285
+"  ADDS   %0,%0,r0                 \n\t"                         \
286
+"  ADCS   %1,%1,r1                 \n\t"                         \
287
+"  ADC    %2,%2,#0                 \n\t"                         \
288
+:"=r"(sc0), "=r"(sc1), "=r"(sc2) : "0"(sc0), "1"(sc1), "2"(sc2), "r"(i), "r"(j) : "r0", "r1", "%cc");
289
+
290
+#define SQRADDDB                                                 \
291
+asm(                                                             \
292
+"  ADDS  %0,%0,%3                     \n\t"                      \
293
+"  ADCS  %1,%1,%4                     \n\t"                      \
294
+"  ADC   %2,%2,%5                     \n\t"                      \
295
+"  ADDS  %0,%0,%3                     \n\t"                      \
296
+"  ADCS  %1,%1,%4                     \n\t"                      \
297
+"  ADC   %2,%2,%5                     \n\t"                      \
298
+:"=r"(c0), "=r"(c1), "=r"(c2) : "r"(sc0), "r"(sc1), "r"(sc2), "0"(c0), "1"(c1), "2"(c2) : "%cc");
299
+
300
+#elif defined(TFM_PPC32)
301
+
302
+/* PPC32 */
303
+
304
+#define COMBA_START
305
+
306
+#define CLEAR_CARRY \
307
+   c0 = c1 = c2 = 0;
308
+
309
+#define COMBA_STORE(x) \
310
+   x = c0;
311
+
312
+#define COMBA_STORE2(x) \
313
+   x = c1;
314
+
315
+#define CARRY_FORWARD \
316
+   do { c0 = c1; c1 = c2; c2 = 0; } while (0);
317
+
318
+#define COMBA_FINI
319
+
320
+/* multiplies point i and j, updates carry "c1" and digit c2 */
321
+#define SQRADD(i, j)             \
322
+asm(                             \
323
+   " mullw  16,%6,%6       \n\t" \
324
+   " addc   %0,%0,16       \n\t" \
325
+   " mulhwu 16,%6,%6       \n\t" \
326
+   " adde   %1,%1,16       \n\t" \
327
+   " addze  %2,%2          \n\t" \
328
+:"=r"(c0), "=r"(c1), "=r"(c2):"0"(c0), "1"(c1), "2"(c2), "r"(i):"16","%cc");
329
+
330
+/* for squaring some of the terms are doubled... */
331
+#define SQRADD2(i, j)            \
332
+asm(                             \
333
+   " mullw  16,%6,%7       \n\t" \
334
+   " mulhwu 17,%6,%7       \n\t" \
335
+   " addc   %0,%0,16       \n\t" \
336
+   " adde   %1,%1,17       \n\t" \
337
+   " addze  %2,%2          \n\t" \
338
+   " addc   %0,%0,16       \n\t" \
339
+   " adde   %1,%1,17       \n\t" \
340
+   " addze  %2,%2          \n\t" \
341
+:"=r"(c0), "=r"(c1), "=r"(c2):"0"(c0), "1"(c1), "2"(c2), "r"(i), "r"(j):"16", "17","%cc");
342
+
343
+#define SQRADDSC(i, j)            \
344
+asm(                              \
345
+   " mullw  %0,%6,%7        \n\t" \
346
+   " mulhwu %1,%6,%7        \n\t" \
347
+   " xor    %2,%2,%2        \n\t" \
348
+:"=r"(sc0), "=r"(sc1), "=r"(sc2):"0"(sc0), "1"(sc1), "2"(sc2), "r"(i),"r"(j) : "%cc");
349
+
350
+#define SQRADDAC(i, j)           \
351
+asm(                             \
352
+   " mullw  16,%6,%7       \n\t" \
353
+   " addc   %0,%0,16       \n\t" \
354
+   " mulhwu 16,%6,%7       \n\t" \
355
+   " adde   %1,%1,16       \n\t" \
356
+   " addze  %2,%2          \n\t" \
357
+:"=r"(sc0), "=r"(sc1), "=r"(sc2):"0"(sc0), "1"(sc1), "2"(sc2), "r"(i), "r"(j):"16", "%cc");
358
+
359
+#define SQRADDDB                  \
360
+asm(                              \
361
+   " addc   %0,%0,%3        \n\t" \
362
+   " adde   %1,%1,%4        \n\t" \
363
+   " adde   %2,%2,%5        \n\t" \
364
+   " addc   %0,%0,%3        \n\t" \
365
+   " adde   %1,%1,%4        \n\t" \
366
+   " adde   %2,%2,%5        \n\t" \
367
+:"=r"(c0), "=r"(c1), "=r"(c2) : "r"(sc0), "r"(sc1), "r"(sc2), "0"(c0), "1"(c1), "2"(c2) : "%cc");
368
+
369
+#elif defined(TFM_PPC64)
370
+/* PPC64 */
371
+
372
+#define COMBA_START
373
+
374
+#define CLEAR_CARRY \
375
+   c0 = c1 = c2 = 0;
376
+
377
+#define COMBA_STORE(x) \
378
+   x = c0;
379
+
380
+#define COMBA_STORE2(x) \
381
+   x = c1;
382
+
383
+#define CARRY_FORWARD \
384
+   do { c0 = c1; c1 = c2; c2 = 0; } while (0);
385
+
386
+#define COMBA_FINI
387
+
388
+/* multiplies point i and j, updates carry "c1" and digit c2 */
389
+#define SQRADD(i, j)             \
390
+asm(                             \
391
+   " mulld  r16,%6,%6       \n\t" \
392
+   " addc   %0,%0,r16       \n\t" \
393
+   " mulhdu r16,%6,%6       \n\t" \
394
+   " adde   %1,%1,r16       \n\t" \
395
+   " addze  %2,%2          \n\t" \
396
+:"=r"(c0), "=r"(c1), "=r"(c2):"0"(c0), "1"(c1), "2"(c2), "r"(i):"r16","%cc");
397
+
398
+/* for squaring some of the terms are doubled... */
399
+#define SQRADD2(i, j)            \
400
+asm(                             \
401
+   " mulld  r16,%6,%7       \n\t" \
402
+   " mulhdu r17,%6,%7       \n\t" \
403
+   " addc   %0,%0,r16       \n\t" \
404
+   " adde   %1,%1,r17       \n\t" \
405
+   " addze  %2,%2          \n\t" \
406
+   " addc   %0,%0,r16       \n\t" \
407
+   " adde   %1,%1,r17       \n\t" \
408
+   " addze  %2,%2          \n\t" \
409
+:"=r"(c0), "=r"(c1), "=r"(c2):"0"(c0), "1"(c1), "2"(c2), "r"(i), "r"(j):"r16", "r17","%cc");
410
+
411
+#define SQRADDSC(i, j)            \
412
+asm(                              \
413
+   " mulld  %0,%6,%7        \n\t" \
414
+   " mulhdu %1,%6,%7        \n\t" \
415
+   " xor    %2,%2,%2        \n\t" \
416
+:"=r"(sc0), "=r"(sc1), "=r"(sc2):"0"(sc0), "1"(sc1), "2"(sc2), "r"(i),"r"(j) : "%cc");
417
+
418
+#define SQRADDAC(i, j)           \
419
+asm(                             \
420
+   " mulld  r16,%6,%7       \n\t" \
421
+   " addc   %0,%0,r16       \n\t" \
422
+   " mulhdu r16,%6,%7       \n\t" \
423
+   " adde   %1,%1,r16       \n\t" \
424
+   " addze  %2,%2          \n\t" \
425
+:"=r"(sc0), "=r"(sc1), "=r"(sc2):"0"(sc0), "1"(sc1), "2"(sc2), "r"(i), "r"(j):"r16", "%cc");
426
+
427
+#define SQRADDDB                  \
428
+asm(                              \
429
+   " addc   %0,%0,%3        \n\t" \
430
+   " adde   %1,%1,%4        \n\t" \
431
+   " adde   %2,%2,%5        \n\t" \
432
+   " addc   %0,%0,%3        \n\t" \
433
+   " adde   %1,%1,%4        \n\t" \
434
+   " adde   %2,%2,%5        \n\t" \
435
+:"=r"(c0), "=r"(c1), "=r"(c2) : "r"(sc0), "r"(sc1), "r"(sc2), "0"(c0), "1"(c1), "2"(c2) : "%cc");
436
+
437
+
438
+#elif defined(TFM_AVR32)
439
+
440
+/* AVR32 */
441
+
442
+#define COMBA_START
443
+
444
+#define CLEAR_CARRY \
445
+   c0 = c1 = c2 = 0;
446
+
447
+#define COMBA_STORE(x) \
448
+   x = c0;
449
+
450
+#define COMBA_STORE2(x) \
451
+   x = c1;
452
+
453
+#define CARRY_FORWARD \
454
+   do { c0 = c1; c1 = c2; c2 = 0; } while (0);
455
+
456
+#define COMBA_FINI
457
+
458
+/* multiplies point i and j, updates carry "c1" and digit c2 */
459
+#define SQRADD(i, j)             \
460
+asm(                             \
461
+   " mulu.d r2,%6,%6       \n\t" \
462
+   " add    %0,%0,r2       \n\t" \
463
+   " adc    %1,%1,r3       \n\t" \
464
+   " acr    %2             \n\t" \
465
+:"=r"(c0), "=r"(c1), "=r"(c2):"0"(c0), "1"(c1), "2"(c2), "r"(i):"r2","r3");
466
+
467
+/* for squaring some of the terms are doubled... */
468
+#define SQRADD2(i, j)            \
469
+asm(                             \
470
+   " mulu.d r2,%6,%7       \n\t" \
471
+   " add    %0,%0,r2       \n\t" \
472
+   " adc    %1,%1,r3       \n\t" \
473
+   " acr    %2,            \n\t" \
474
+   " add    %0,%0,r2       \n\t" \
475
+   " adc    %1,%1,r3       \n\t" \
476
+   " acr    %2,            \n\t" \
477
+:"=r"(c0), "=r"(c1), "=r"(c2):"0"(c0), "1"(c1), "2"(c2), "r"(i), "r"(j):"r2", "r3");
478
+
479
+#define SQRADDSC(i, j)            \
480
+asm(                              \
481
+   " mulu.d r2,%6,%7        \n\t" \
482
+   " mov    %0,r2           \n\t" \
483
+   " mov    %1,r3           \n\t" \
484
+   " eor    %2,%2           \n\t" \
485
+:"=r"(sc0), "=r"(sc1), "=r"(sc2):"0"(sc0), "1"(sc1), "2"(sc2), "r"(i),"r"(j) : "r2", "r3");
486
+
487
+#define SQRADDAC(i, j)           \
488
+asm(                             \
489
+   " mulu.d r2,%6,%7       \n\t" \
490
+   " add    %0,%0,r2       \n\t" \
491
+   " adc    %1,%1,r3       \n\t" \
492
+   " acr    %2             \n\t" \
493
+:"=r"(sc0), "=r"(sc1), "=r"(sc2):"0"(sc0), "1"(sc1), "2"(sc2), "r"(i), "r"(j):"r2", "r3");
494
+
495
+#define SQRADDDB                  \
496
+asm(                              \
497
+   " add    %0,%0,%3        \n\t" \
498
+   " adc    %1,%1,%4        \n\t" \
499
+   " adc    %2,%2,%5        \n\t" \
500
+   " add    %0,%0,%3        \n\t" \
501
+   " adc    %1,%1,%4        \n\t" \
502
+   " adc    %2,%2,%5        \n\t" \
503
+:"=r"(c0), "=r"(c1), "=r"(c2) : "r"(sc0), "r"(sc1), "r"(sc2), "0"(c0), "1"(c1), "2"(c2) : "%cc");
504
+
505
+#elif defined(TFM_MIPS)
506
+
507
+/* MIPS */
508
+
509
+#define COMBA_START
510
+
511
+#define CLEAR_CARRY \
512
+   c0 = c1 = c2 = 0;
513
+
514
+#define COMBA_STORE(x) \
515
+   x = c0;
516
+
517
+#define COMBA_STORE2(x) \
518
+   x = c1;
519
+
520
+#define CARRY_FORWARD \
521
+   do { c0 = c1; c1 = c2; c2 = 0; } while (0);
522
+
523
+#define COMBA_FINI
524
+
525
+/* multiplies point i and j, updates carry "c1" and digit c2 */
526
+#define SQRADD(i, j)              \
527
+asm(                              \
528
+   " multu  %6,%6          \n\t"  \
529
+   " mflo   $12            \n\t"  \
530
+   " mfhi   $13            \n\t"  \
531
+   " addu    %0,%0,$12     \n\t"  \
532
+   " sltu   $12,%0,$12     \n\t"  \
533
+   " addu    %1,%1,$13     \n\t"  \
534
+   " sltu   $13,%1,$13     \n\t"  \
535
+   " addu    %1,%1,$12     \n\t"  \
536
+   " sltu   $12,%1,$12     \n\t"  \
537
+   " addu    %2,%2,$13     \n\t"  \
538
+   " addu    %2,%2,$12     \n\t"  \
539
+:"=r"(c0), "=r"(c1), "=r"(c2):"0"(c0), "1"(c1), "2"(c2), "r"(i):"$12","$13");
540
+
541
+/* for squaring some of the terms are doubled... */
542
+#define SQRADD2(i, j)            \
543
+asm(                             \
544
+   " multu  %6,%7          \n\t" \
545
+   " mflo   $12            \n\t" \
546
+   " mfhi   $13            \n\t" \
547
+                                 \
548
+   " addu    %0,%0,$12     \n\t" \
549
+   " sltu   $14,%0,$12     \n\t" \
550
+   " addu    %1,%1,$13     \n\t" \
551
+   " sltu   $15,%1,$13     \n\t" \
552
+   " addu    %1,%1,$14     \n\t" \
553
+   " sltu   $14,%1,$14     \n\t" \
554
+   " addu    %2,%2,$15     \n\t" \
555
+   " addu    %2,%2,$14     \n\t" \
556
+                                 \
557
+   " addu    %0,%0,$12     \n\t" \
558
+   " sltu   $14,%0,$12     \n\t" \
559
+   " addu    %1,%1,$13     \n\t" \
560
+   " sltu   $15,%1,$13     \n\t" \
561
+   " addu    %1,%1,$14     \n\t" \
562
+   " sltu   $14,%1,$14     \n\t" \
563
+   " addu    %2,%2,$15     \n\t" \
564
+   " addu    %2,%2,$14     \n\t" \
565
+:"=r"(c0), "=r"(c1), "=r"(c2):"0"(c0), "1"(c1), "2"(c2), "r"(i), "r"(j):"$12", "$13", "$14", "$15");
566
+
567
+#define SQRADDSC(i, j)            \
568
+asm(                              \
569
+   " multu  %6,%7          \n\t"  \
570
+   " mflo   %0             \n\t"  \
571
+   " mfhi   %1             \n\t"  \
572
+   " xor    %2,%2,%2       \n\t"  \
573
+:"=r"(sc0), "=r"(sc1), "=r"(sc2):"0"(sc0), "1"(sc1), "2"(sc2), "r"(i),"r"(j) : "%cc");
574
+
575
+#define SQRADDAC(i, j)           \
576
+asm(                             \
577
+   " multu  %6,%7          \n\t" \
578
+   " mflo   $12            \n\t" \
579
+   " mfhi   $13            \n\t" \
580
+   " addu    %0,%0,$12     \n\t" \
581
+   " sltu   $12,%0,$12     \n\t" \
582
+   " addu    %1,%1,$13     \n\t" \
583
+   " sltu   $13,%1,$13     \n\t" \
584
+   " addu    %1,%1,$12     \n\t" \
585
+   " sltu   $12,%1,$12     \n\t" \
586
+   " addu    %2,%2,$13     \n\t" \
587
+   " addu    %2,%2,$12     \n\t" \
588
+:"=r"(sc0), "=r"(sc1), "=r"(sc2):"0"(sc0), "1"(sc1), "2"(sc2), "r"(i), "r"(j):"$12", "$13", "$14");
589
+
590
+#define SQRADDDB                  \
591
+asm(                              \
592
+   " addu    %0,%0,%3       \n\t" \
593
+   " sltu   $10,%0,%3       \n\t" \
594
+   " addu    %1,%1,$10      \n\t" \
595
+   " sltu   $10,%1,$10      \n\t" \
596
+   " addu    %1,%1,%4       \n\t" \
597
+   " sltu   $11,%1,%4       \n\t" \
598
+   " addu    %2,%2,$10      \n\t" \
599
+   " addu    %2,%2,$11      \n\t" \
600
+   " addu    %2,%2,%5       \n\t" \
601
+                                  \
602
+   " addu    %0,%0,%3       \n\t" \
603
+   " sltu   $10,%0,%3       \n\t" \
604
+   " addu    %1,%1,$10      \n\t" \
605
+   " sltu   $10,%1,$10      \n\t" \
606
+   " addu    %1,%1,%4       \n\t" \
607
+   " sltu   $11,%1,%4       \n\t" \
608
+   " addu    %2,%2,$10      \n\t" \
609
+   " addu    %2,%2,$11      \n\t" \
610
+   " addu    %2,%2,%5       \n\t" \
611
+:"=r"(c0), "=r"(c1), "=r"(c2) : "r"(sc0), "r"(sc1), "r"(sc2), "0"(c0), "1"(c1), "2"(c2) : "$10", "$11");
612
+
613
+#else
614
+
615
+#define TFM_ISO
616
+
617
+/* ISO C portable code */
618
+
619
+#define COMBA_START
620
+
621
+#define CLEAR_CARRY \
622
+   c0 = c1 = c2 = 0;
623
+
624
+#define COMBA_STORE(x) \
625
+   x = c0;
626
+
627
+#define COMBA_STORE2(x) \
628
+   x = c1;
629
+
630
+#define CARRY_FORWARD \
631
+   do { c0 = c1; c1 = c2; c2 = 0; } while (0);
632
+
633
+#define COMBA_FINI
634
+
635
+/* multiplies point i and j, updates carry "c1" and digit c2 */
636
+#define SQRADD(i, j)                                 \
637
+   do { fp_word t;                                   \
638
+   t = c0 + ((fp_word)i) * ((fp_word)j);  c0 = t;    \
639
+   t = c1 + (t >> DIGIT_BIT);             c1 = t; c2 += t >> DIGIT_BIT; \
640
+   } while (0);
641
+  
642
+
643
+/* for squaring some of the terms are doubled... */
644
+#define SQRADD2(i, j)                                                 \
645
+   do { fp_word t;                                                    \
646
+   t  = ((fp_word)i) * ((fp_word)j);                                  \
647
+   tt = (fp_word)c0 + t;                 c0 = tt;                              \
648
+   tt = (fp_word)c1 + (tt >> DIGIT_BIT); c1 = tt; c2 += tt >> DIGIT_BIT;       \
649
+   tt = (fp_word)c0 + t;                 c0 = tt;                              \
650
+   tt = (fp_word)c1 + (tt >> DIGIT_BIT); c1 = tt; c2 += tt >> DIGIT_BIT;       \
651
+   } while (0);
652
+
653
+#define SQRADDSC(i, j)                                                         \
654
+   do { fp_word t;                                                             \
655
+      t =  ((fp_word)i) * ((fp_word)j);                                        \
656
+      sc0 = (fp_digit)t; sc1 = (t >> DIGIT_BIT); sc2 = 0;                      \
657
+   } while (0);
658
+
659
+#define SQRADDAC(i, j)                                                         \
660
+   do { fp_word t;                                                             \
661
+   t = sc0 + ((fp_word)i) * ((fp_word)j);  sc0 = t;                            \
662
+   t = sc1 + (t >> DIGIT_BIT);             sc1 = t; sc2 += t >> DIGIT_BIT;     \
663
+   } while (0);
664
+
665
+#define SQRADDDB                                                               \
666
+   do { fp_word t;                                                             \
667
+   t = ((fp_word)sc0) + ((fp_word)sc0) + c0; c0 = t;                                                 \
668
+   t = ((fp_word)sc1) + ((fp_word)sc1) + c1 + (t >> DIGIT_BIT); c1 = t;                              \
669
+   c2 = c2 + ((fp_word)sc2) + ((fp_word)sc2) + (t >> DIGIT_BIT);                                     \
670
+   } while (0);
671
+
672
+#endif
673
+
674
+/* $Source: /cvs/libtom/tomsfastmath/src/sqr/fp_sqr_comba.c,v $ */
675
+/* $Revision: 1.4 $ */
676
+/* $Date: 2007/03/14 23:47:42 $ */