aboutsummaryrefslogtreecommitdiffstats
path: root/linux/crypto/ciphers/des/asm
diff options
context:
space:
mode:
Diffstat (limited to 'linux/crypto/ciphers/des/asm')
-rw-r--r--linux/crypto/ciphers/des/asm/crypt586.pl204
-rw-r--r--linux/crypto/ciphers/des/asm/des-586.pl251
-rw-r--r--linux/crypto/ciphers/des/asm/des686.pl230
-rw-r--r--linux/crypto/ciphers/des/asm/desboth.pl79
-rw-r--r--linux/crypto/ciphers/des/asm/perlasm/cbc.pl342
-rw-r--r--linux/crypto/ciphers/des/asm/perlasm/readme124
-rw-r--r--linux/crypto/ciphers/des/asm/perlasm/x86asm.pl111
-rw-r--r--linux/crypto/ciphers/des/asm/perlasm/x86ms.pl345
-rw-r--r--linux/crypto/ciphers/des/asm/perlasm/x86unix.pl403
-rw-r--r--linux/crypto/ciphers/des/asm/readme131
10 files changed, 2220 insertions, 0 deletions
diff --git a/linux/crypto/ciphers/des/asm/crypt586.pl b/linux/crypto/ciphers/des/asm/crypt586.pl
new file mode 100644
index 000000000..297e38dec
--- /dev/null
+++ b/linux/crypto/ciphers/des/asm/crypt586.pl
@@ -0,0 +1,204 @@
+#!/usr/bin/perl
+#
+# The inner loop instruction sequence and the IP/FP modifications are from
+# Svend Olaf Mikkelsen <svolaf@inet.uni-c.dk>
+# I've added the stuff needed for crypt() but I've not worried about making
+# things perfect.
+#
+
+push(@INC,"perlasm","../../perlasm");
+require "x86asm.pl";
+
+&asm_init($ARGV[0],"crypt586.pl");
+
+$L="edi";
+$R="esi";
+
+&external_label("des_SPtrans");
+&fcrypt_body("fcrypt_body");
+&asm_finish();
+
+sub fcrypt_body
+ {
+ local($name,$do_ip)=@_;
+
+ &function_begin($name,"EXTRN _des_SPtrans:DWORD");
+
+ &comment("");
+ &comment("Load the 2 words");
+ $ks="ebp";
+
+ &xor( $L, $L);
+ &xor( $R, $R);
+ &mov($ks,&wparam(1));
+
+ &push(25); # add a variable
+
+ &set_label("start");
+ for ($i=0; $i<16; $i+=2)
+ {
+ &comment("");
+ &comment("Round $i");
+ &D_ENCRYPT($i,$L,$R,$i*2,$ks,"des_SPtrans","eax","ebx","ecx","edx");
+
+ &comment("");
+ &comment("Round ".sprintf("%d",$i+1));
+ &D_ENCRYPT($i+1,$R,$L,($i+1)*2,$ks,"des_SPtrans","eax","ebx","ecx","edx");
+ }
+ &mov("ebx", &swtmp(0));
+ &mov("eax", $L);
+ &dec("ebx");
+ &mov($L, $R);
+ &mov($R, "eax");
+ &mov(&swtmp(0), "ebx");
+ &jnz(&label("start"));
+
+ &comment("");
+ &comment("FP");
+ &mov("edx",&wparam(0));
+
+ &FP_new($R,$L,"eax",3);
+ &mov(&DWP(0,"edx","",0),"eax");
+ &mov(&DWP(4,"edx","",0),$L);
+
+ &pop("ecx"); # remove variable
+
+ &function_end($name);
+ }
+
+sub D_ENCRYPT
+ {
+ local($r,$L,$R,$S,$ks,$desSP,$u,$tmp1,$tmp2,$t)=@_;
+
+ &mov( $u, &wparam(2)); # 2
+ &mov( $t, $R);
+ &shr( $t, 16); # 1
+ &mov( $tmp2, &wparam(3)); # 2
+ &xor( $t, $R); # 1
+
+ &and( $u, $t); # 2
+ &and( $t, $tmp2); # 2
+
+ &mov( $tmp1, $u);
+ &shl( $tmp1, 16); # 1
+ &mov( $tmp2, $t);
+ &shl( $tmp2, 16); # 1
+ &xor( $u, $tmp1); # 2
+ &xor( $t, $tmp2); # 2
+ &mov( $tmp1, &DWP(&n2a($S*4),$ks,"",0)); # 2
+ &xor( $u, $tmp1);
+ &mov( $tmp2, &DWP(&n2a(($S+1)*4),$ks,"",0)); # 2
+ &xor( $u, $R);
+ &xor( $t, $R);
+ &xor( $t, $tmp2);
+
+ &and( $u, "0xfcfcfcfc" ); # 2
+ &xor( $tmp1, $tmp1); # 1
+ &and( $t, "0xcfcfcfcf" ); # 2
+ &xor( $tmp2, $tmp2);
+ &movb( &LB($tmp1), &LB($u) );
+ &movb( &LB($tmp2), &HB($u) );
+ &rotr( $t, 4 );
+ &mov( $ks, &DWP(" $desSP",$tmp1,"",0));
+ &movb( &LB($tmp1), &LB($t) );
+ &xor( $L, $ks);
+ &mov( $ks, &DWP("0x200+$desSP",$tmp2,"",0));
+ &xor( $L, $ks);
+ &movb( &LB($tmp2), &HB($t) );
+ &shr( $u, 16);
+ &mov( $ks, &DWP("0x100+$desSP",$tmp1,"",0));
+ &xor( $L, $ks);
+ &movb( &LB($tmp1), &HB($u) );
+ &shr( $t, 16);
+ &mov( $ks, &DWP("0x300+$desSP",$tmp2,"",0));
+ &xor( $L, $ks);
+ &mov( $ks, &wparam(1));
+ &movb( &LB($tmp2), &HB($t) );
+ &and( $u, "0xff" );
+ &and( $t, "0xff" );
+ &mov( $tmp1, &DWP("0x600+$desSP",$tmp1,"",0));
+ &xor( $L, $tmp1);
+ &mov( $tmp1, &DWP("0x700+$desSP",$tmp2,"",0));
+ &xor( $L, $tmp1);
+ &mov( $tmp1, &DWP("0x400+$desSP",$u,"",0));
+ &xor( $L, $tmp1);
+ &mov( $tmp1, &DWP("0x500+$desSP",$t,"",0));
+ &xor( $L, $tmp1);
+ }
+
+sub n2a
+ {
+ sprintf("%d",$_[0]);
+ }
+
+# now has a side affect of rotating $a by $shift
+sub R_PERM_OP
+ {
+ local($a,$b,$tt,$shift,$mask,$last)=@_;
+
+ &rotl( $a, $shift ) if ($shift != 0);
+ &mov( $tt, $a );
+ &xor( $a, $b );
+ &and( $a, $mask );
+ if ($notlast eq $b)
+ {
+ &xor( $b, $a );
+ &xor( $tt, $a );
+ }
+ else
+ {
+ &xor( $tt, $a );
+ &xor( $b, $a );
+ }
+ &comment("");
+ }
+
+sub IP_new
+ {
+ local($l,$r,$tt,$lr)=@_;
+
+ &R_PERM_OP($l,$r,$tt, 4,"0xf0f0f0f0",$l);
+ &R_PERM_OP($r,$tt,$l,20,"0xfff0000f",$l);
+ &R_PERM_OP($l,$tt,$r,14,"0x33333333",$r);
+ &R_PERM_OP($tt,$r,$l,22,"0x03fc03fc",$r);
+ &R_PERM_OP($l,$r,$tt, 9,"0xaaaaaaaa",$r);
+
+ if ($lr != 3)
+ {
+ if (($lr-3) < 0)
+ { &rotr($tt, 3-$lr); }
+ else { &rotl($tt, $lr-3); }
+ }
+ if ($lr != 2)
+ {
+ if (($lr-2) < 0)
+ { &rotr($r, 2-$lr); }
+ else { &rotl($r, $lr-2); }
+ }
+ }
+
+sub FP_new
+ {
+ local($l,$r,$tt,$lr)=@_;
+
+ if ($lr != 2)
+ {
+ if (($lr-2) < 0)
+ { &rotl($r, 2-$lr); }
+ else { &rotr($r, $lr-2); }
+ }
+ if ($lr != 3)
+ {
+ if (($lr-3) < 0)
+ { &rotl($l, 3-$lr); }
+ else { &rotr($l, $lr-3); }
+ }
+
+ &R_PERM_OP($l,$r,$tt, 0,"0xaaaaaaaa",$r);
+ &R_PERM_OP($tt,$r,$l,23,"0x03fc03fc",$r);
+ &R_PERM_OP($l,$r,$tt,10,"0x33333333",$l);
+ &R_PERM_OP($r,$tt,$l,18,"0xfff0000f",$l);
+ &R_PERM_OP($l,$tt,$r,12,"0xf0f0f0f0",$r);
+ &rotr($tt , 4);
+ }
+
diff --git a/linux/crypto/ciphers/des/asm/des-586.pl b/linux/crypto/ciphers/des/asm/des-586.pl
new file mode 100644
index 000000000..7f2e09fa7
--- /dev/null
+++ b/linux/crypto/ciphers/des/asm/des-586.pl
@@ -0,0 +1,251 @@
+#!/usr/bin/perl
+#
+# The inner loop instruction sequence and the IP/FP modifications are from
+# Svend Olaf Mikkelsen <svolaf@inet.uni-c.dk>
+#
+
+push(@INC,"perlasm","../../perlasm");
+require "x86asm.pl";
+require "cbc.pl";
+require "desboth.pl";
+
+# base code is in microsft
+# op dest, source
+# format.
+#
+
+&asm_init($ARGV[0],"des-586.pl");
+
+$L="edi";
+$R="esi";
+
+&external_label("des_SPtrans");
+&des_encrypt("des_encrypt",1);
+&des_encrypt("des_encrypt2",0);
+&des_encrypt3("des_encrypt3",1);
+&des_encrypt3("des_decrypt3",0);
+&cbc("des_ncbc_encrypt","des_encrypt","des_encrypt",0,4,5,3,5,-1);
+&cbc("des_ede3_cbc_encrypt","des_encrypt3","des_decrypt3",0,6,7,3,4,5);
+
+&asm_finish();
+
+sub des_encrypt
+ {
+ local($name,$do_ip)=@_;
+
+ &function_begin_B($name,"EXTRN _des_SPtrans:DWORD");
+
+ &push("esi");
+ &push("edi");
+
+ &comment("");
+ &comment("Load the 2 words");
+ $ks="ebp";
+
+ if ($do_ip)
+ {
+ &mov($R,&wparam(0));
+ &xor( "ecx", "ecx" );
+
+ &push("ebx");
+ &push("ebp");
+
+ &mov("eax",&DWP(0,$R,"",0));
+ &mov("ebx",&wparam(2)); # get encrypt flag
+ &mov($L,&DWP(4,$R,"",0));
+ &comment("");
+ &comment("IP");
+ &IP_new("eax",$L,$R,3);
+ }
+ else
+ {
+ &mov("eax",&wparam(0));
+ &xor( "ecx", "ecx" );
+
+ &push("ebx");
+ &push("ebp");
+
+ &mov($R,&DWP(0,"eax","",0));
+ &mov("ebx",&wparam(2)); # get encrypt flag
+ &rotl($R,3);
+ &mov($L,&DWP(4,"eax","",0));
+ &rotl($L,3);
+ }
+
+ &mov( $ks, &wparam(1) );
+ &cmp("ebx","0");
+ &je(&label("start_decrypt"));
+
+ for ($i=0; $i<16; $i+=2)
+ {
+ &comment("");
+ &comment("Round $i");
+ &D_ENCRYPT($i,$L,$R,$i*2,$ks,"des_SPtrans","eax","ebx","ecx","edx");
+
+ &comment("");
+ &comment("Round ".sprintf("%d",$i+1));
+ &D_ENCRYPT($i+1,$R,$L,($i+1)*2,$ks,"des_SPtrans","eax","ebx","ecx","edx");
+ }
+ &jmp(&label("end"));
+
+ &set_label("start_decrypt");
+
+ for ($i=15; $i>0; $i-=2)
+ {
+ &comment("");
+ &comment("Round $i");
+ &D_ENCRYPT(15-$i,$L,$R,$i*2,$ks,"des_SPtrans","eax","ebx","ecx","edx");
+ &comment("");
+ &comment("Round ".sprintf("%d",$i-1));
+ &D_ENCRYPT(15-$i+1,$R,$L,($i-1)*2,$ks,"des_SPtrans","eax","ebx","ecx","edx");
+ }
+
+ &set_label("end");
+
+ if ($do_ip)
+ {
+ &comment("");
+ &comment("FP");
+ &mov("edx",&wparam(0));
+ &FP_new($L,$R,"eax",3);
+
+ &mov(&DWP(0,"edx","",0),"eax");
+ &mov(&DWP(4,"edx","",0),$R);
+ }
+ else
+ {
+ &comment("");
+ &comment("Fixup");
+ &rotr($L,3); # r
+ &mov("eax",&wparam(0));
+ &rotr($R,3); # l
+ &mov(&DWP(0,"eax","",0),$L);
+ &mov(&DWP(4,"eax","",0),$R);
+ }
+
+ &pop("ebp");
+ &pop("ebx");
+ &pop("edi");
+ &pop("esi");
+ &ret();
+
+ &function_end_B($name);
+ }
+
+sub D_ENCRYPT
+ {
+ local($r,$L,$R,$S,$ks,$desSP,$u,$tmp1,$tmp2,$t)=@_;
+
+ &mov( $u, &DWP(&n2a($S*4),$ks,"",0));
+ &xor( $tmp1, $tmp1);
+ &mov( $t, &DWP(&n2a(($S+1)*4),$ks,"",0));
+ &xor( $u, $R);
+ &xor( $t, $R);
+ &and( $u, "0xfcfcfcfc" );
+ &and( $t, "0xcfcfcfcf" );
+ &movb( &LB($tmp1), &LB($u) );
+ &movb( &LB($tmp2), &HB($u) );
+ &rotr( $t, 4 );
+ &mov( $ks, &DWP(" $desSP",$tmp1,"",0));
+ &movb( &LB($tmp1), &LB($t) );
+ &xor( $L, $ks);
+ &mov( $ks, &DWP("0x200+$desSP",$tmp2,"",0));
+ &xor( $L, $ks); ######
+ &movb( &LB($tmp2), &HB($t) );
+ &shr( $u, 16);
+ &mov( $ks, &DWP("0x100+$desSP",$tmp1,"",0));
+ &xor( $L, $ks); ######
+ &movb( &LB($tmp1), &HB($u) );
+ &shr( $t, 16);
+ &mov( $ks, &DWP("0x300+$desSP",$tmp2,"",0));
+ &xor( $L, $ks);
+ &mov( $ks, &wparam(1) );
+ &movb( &LB($tmp2), &HB($t) );
+ &and( $u, "0xff" );
+ &and( $t, "0xff" );
+ &mov( $tmp1, &DWP("0x600+$desSP",$tmp1,"",0));
+ &xor( $L, $tmp1);
+ &mov( $tmp1, &DWP("0x700+$desSP",$tmp2,"",0));
+ &xor( $L, $tmp1);
+ &mov( $tmp1, &DWP("0x400+$desSP",$u,"",0));
+ &xor( $L, $tmp1);
+ &mov( $tmp1, &DWP("0x500+$desSP",$t,"",0));
+ &xor( $L, $tmp1);
+ }
+
+sub n2a
+ {
+ sprintf("%d",$_[0]);
+ }
+
+# now has a side affect of rotating $a by $shift
+sub R_PERM_OP
+ {
+ local($a,$b,$tt,$shift,$mask,$last)=@_;
+
+ &rotl( $a, $shift ) if ($shift != 0);
+ &mov( $tt, $a );
+ &xor( $a, $b );
+ &and( $a, $mask );
+ if (!$last eq $b)
+ {
+ &xor( $b, $a );
+ &xor( $tt, $a );
+ }
+ else
+ {
+ &xor( $tt, $a );
+ &xor( $b, $a );
+ }
+ &comment("");
+ }
+
+sub IP_new
+ {
+ local($l,$r,$tt,$lr)=@_;
+
+ &R_PERM_OP($l,$r,$tt, 4,"0xf0f0f0f0",$l);
+ &R_PERM_OP($r,$tt,$l,20,"0xfff0000f",$l);
+ &R_PERM_OP($l,$tt,$r,14,"0x33333333",$r);
+ &R_PERM_OP($tt,$r,$l,22,"0x03fc03fc",$r);
+ &R_PERM_OP($l,$r,$tt, 9,"0xaaaaaaaa",$r);
+
+ if ($lr != 3)
+ {
+ if (($lr-3) < 0)
+ { &rotr($tt, 3-$lr); }
+ else { &rotl($tt, $lr-3); }
+ }
+ if ($lr != 2)
+ {
+ if (($lr-2) < 0)
+ { &rotr($r, 2-$lr); }
+ else { &rotl($r, $lr-2); }
+ }
+ }
+
+sub FP_new
+ {
+ local($l,$r,$tt,$lr)=@_;
+
+ if ($lr != 2)
+ {
+ if (($lr-2) < 0)
+ { &rotl($r, 2-$lr); }
+ else { &rotr($r, $lr-2); }
+ }
+ if ($lr != 3)
+ {
+ if (($lr-3) < 0)
+ { &rotl($l, 3-$lr); }
+ else { &rotr($l, $lr-3); }
+ }
+
+ &R_PERM_OP($l,$r,$tt, 0,"0xaaaaaaaa",$r);
+ &R_PERM_OP($tt,$r,$l,23,"0x03fc03fc",$r);
+ &R_PERM_OP($l,$r,$tt,10,"0x33333333",$l);
+ &R_PERM_OP($r,$tt,$l,18,"0xfff0000f",$l);
+ &R_PERM_OP($l,$tt,$r,12,"0xf0f0f0f0",$r);
+ &rotr($tt , 4);
+ }
+
diff --git a/linux/crypto/ciphers/des/asm/des686.pl b/linux/crypto/ciphers/des/asm/des686.pl
new file mode 100644
index 000000000..cf1a82fb5
--- /dev/null
+++ b/linux/crypto/ciphers/des/asm/des686.pl
@@ -0,0 +1,230 @@
+#!/usr/bin/perl
+
+$prog="des686.pl";
+
+# base code is in microsft
+# op dest, source
+# format.
+#
+
+# WILL NOT WORK ANYMORE WITH desboth.pl
+require "desboth.pl";
+
+if ( ($ARGV[0] eq "elf"))
+ { require "x86unix.pl"; }
+elsif ( ($ARGV[0] eq "a.out"))
+ { $aout=1; require "x86unix.pl"; }
+elsif ( ($ARGV[0] eq "sol"))
+ { $sol=1; require "x86unix.pl"; }
+elsif ( ($ARGV[0] eq "cpp"))
+ { $cpp=1; require "x86unix.pl"; }
+elsif ( ($ARGV[0] eq "win32"))
+ { require "x86ms.pl"; }
+else
+ {
+ print STDERR <<"EOF";
+Pick one target type from
+ elf - linux, FreeBSD etc
+ a.out - old linux
+ sol - x86 solaris
+ cpp - format so x86unix.cpp can be used
+ win32 - Windows 95/Windows NT
+EOF
+ exit(1);
+ }
+
+&comment("Don't even think of reading this code");
+&comment("It was automatically generated by $prog");
+&comment("Which is a perl program used to generate the x86 assember for");
+&comment("any of elf, a.out, Win32, or Solaris");
+&comment("It can be found in SSLeay 0.6.5+ or in libdes 3.26+");
+&comment("eric <eay\@cryptsoft.com>");
+&comment("");
+
+&file("dx86xxxx");
+
+$L="edi";
+$R="esi";
+
+&des_encrypt("des_encrypt",1);
+&des_encrypt("des_encrypt2",0);
+
+&des_encrypt3("des_encrypt3",1);
+&des_encrypt3("des_decrypt3",0);
+
+&file_end();
+
+sub des_encrypt
+ {
+ local($name,$do_ip)=@_;
+
+ &function_begin($name,"EXTRN _des_SPtrans:DWORD");
+
+ &comment("");
+ &comment("Load the 2 words");
+ &mov("eax",&wparam(0));
+ &mov($L,&DWP(0,"eax","",0));
+ &mov($R,&DWP(4,"eax","",0));
+
+ $ksp=&wparam(1);
+
+ if ($do_ip)
+ {
+ &comment("");
+ &comment("IP");
+ &IP_new($L,$R,"eax");
+ }
+
+ &comment("");
+ &comment("fixup rotate");
+ &rotl($R,3);
+ &rotl($L,3);
+ &exch($L,$R);
+
+ &comment("");
+ &comment("load counter, key_schedule and enc flag");
+ &mov("eax",&wparam(2)); # get encrypt flag
+ &mov("ebp",&wparam(1)); # get ks
+ &cmp("eax","0");
+ &je(&label("start_decrypt"));
+
+ # encrypting part
+
+ for ($i=0; $i<16; $i+=2)
+ {
+ &comment("");
+ &comment("Round $i");
+ &D_ENCRYPT($L,$R,$i*2,"ebp","des_SPtrans","ecx","edx","eax","ebx");
+
+ &comment("");
+ &comment("Round ".sprintf("%d",$i+1));
+ &D_ENCRYPT($R,$L,($i+1)*2,"ebp","des_SPtrans","ecx","edx","eax","ebx");
+ }
+ &jmp(&label("end"));
+
+ &set_label("start_decrypt");
+
+ for ($i=15; $i>0; $i-=2)
+ {
+ &comment("");
+ &comment("Round $i");
+ &D_ENCRYPT($L,$R,$i*2,"ebp","des_SPtrans","ecx","edx","eax","ebx");
+ &comment("");
+ &comment("Round ".sprintf("%d",$i-1));
+ &D_ENCRYPT($R,$L,($i-1)*2,"ebp","des_SPtrans","ecx","edx","eax","ebx");
+ }
+
+ &set_label("end");
+
+ &comment("");
+ &comment("Fixup");
+ &rotr($L,3); # r
+ &rotr($R,3); # l
+
+ if ($do_ip)
+ {
+ &comment("");
+ &comment("FP");
+ &FP_new($R,$L,"eax");
+ }
+
+ &mov("eax",&wparam(0));
+ &mov(&DWP(0,"eax","",0),$L);
+ &mov(&DWP(4,"eax","",0),$R);
+
+ &function_end($name);
+ }
+
+
+# The logic is to load R into 2 registers and operate on both at the same time.
+# We also load the 2 R's into 2 more registers so we can do the 'move word down a byte'
+# while also masking the other copy and doing a lookup. We then also accumulate the
+# L value in 2 registers then combine them at the end.
+sub D_ENCRYPT
+ {
+ local($L,$R,$S,$ks,$desSP,$u,$t,$tmp1,$tmp2,$tmp3)=@_;
+
+ &mov( $u, &DWP(&n2a($S*4),$ks,"",0));
+ &mov( $t, &DWP(&n2a(($S+1)*4),$ks,"",0));
+ &xor( $u, $R );
+ &xor( $t, $R );
+ &rotr( $t, 4 );
+
+ # the numbers at the end of the line are origional instruction order
+ &mov( $tmp2, $u ); # 1 2
+ &mov( $tmp1, $t ); # 1 1
+ &and( $tmp2, "0xfc" ); # 1 4
+ &and( $tmp1, "0xfc" ); # 1 3
+ &shr( $t, 8 ); # 1 5
+ &xor( $L, &DWP("0x100+$desSP",$tmp1,"",0)); # 1 7
+ &shr( $u, 8 ); # 1 6
+ &mov( $tmp1, &DWP(" $desSP",$tmp2,"",0)); # 1 8
+
+ &mov( $tmp2, $u ); # 2 2
+ &xor( $L, $tmp1 ); # 1 9
+ &and( $tmp2, "0xfc" ); # 2 4
+ &mov( $tmp1, $t ); # 2 1
+ &and( $tmp1, "0xfc" ); # 2 3
+ &shr( $t, 8 ); # 2 5
+ &xor( $L, &DWP("0x300+$desSP",$tmp1,"",0)); # 2 7
+ &shr( $u, 8 ); # 2 6
+ &mov( $tmp1, &DWP("0x200+$desSP",$tmp2,"",0)); # 2 8
+ &mov( $tmp2, $u ); # 3 2
+
+ &xor( $L, $tmp1 ); # 2 9
+ &and( $tmp2, "0xfc" ); # 3 4
+
+ &mov( $tmp1, $t ); # 3 1
+ &shr( $u, 8 ); # 3 6
+ &and( $tmp1, "0xfc" ); # 3 3
+ &shr( $t, 8 ); # 3 5
+ &xor( $L, &DWP("0x500+$desSP",$tmp1,"",0)); # 3 7
+ &mov( $tmp1, &DWP("0x400+$desSP",$tmp2,"",0)); # 3 8
+
+ &and( $t, "0xfc" ); # 4 1
+ &xor( $L, $tmp1 ); # 3 9
+
+ &and( $u, "0xfc" ); # 4 2
+ &xor( $L, &DWP("0x700+$desSP",$t,"",0)); # 4 3
+ &xor( $L, &DWP("0x600+$desSP",$u,"",0)); # 4 4
+ }
+
+sub PERM_OP
+ {
+ local($a,$b,$tt,$shift,$mask)=@_;
+
+ &mov( $tt, $a );
+ &shr( $tt, $shift );
+ &xor( $tt, $b );
+ &and( $tt, $mask );
+ &xor( $b, $tt );
+ &shl( $tt, $shift );
+ &xor( $a, $tt );
+ }
+
+sub IP_new
+ {
+ local($l,$r,$tt)=@_;
+
+ &PERM_OP($r,$l,$tt, 4,"0x0f0f0f0f");
+ &PERM_OP($l,$r,$tt,16,"0x0000ffff");
+ &PERM_OP($r,$l,$tt, 2,"0x33333333");
+ &PERM_OP($l,$r,$tt, 8,"0x00ff00ff");
+ &PERM_OP($r,$l,$tt, 1,"0x55555555");
+ }
+
+sub FP_new
+ {
+ local($l,$r,$tt)=@_;
+
+ &PERM_OP($l,$r,$tt, 1,"0x55555555");
+ &PERM_OP($r,$l,$tt, 8,"0x00ff00ff");
+ &PERM_OP($l,$r,$tt, 2,"0x33333333");
+ &PERM_OP($r,$l,$tt,16,"0x0000ffff");
+ &PERM_OP($l,$r,$tt, 4,"0x0f0f0f0f");
+ }
+
+sub n2a
+ {
+ sprintf("%d",$_[0]);
+ }
diff --git a/linux/crypto/ciphers/des/asm/desboth.pl b/linux/crypto/ciphers/des/asm/desboth.pl
new file mode 100644
index 000000000..8f939953a
--- /dev/null
+++ b/linux/crypto/ciphers/des/asm/desboth.pl
@@ -0,0 +1,79 @@
+#!/usr/bin/perl
+
+$L="edi";
+$R="esi";
+
+sub des_encrypt3
+ {
+ local($name,$enc)=@_;
+
+ &function_begin_B($name,"");
+ &push("ebx");
+ &mov("ebx",&wparam(0));
+
+ &push("ebp");
+ &push("esi");
+
+ &push("edi");
+
+ &comment("");
+ &comment("Load the data words");
+ &mov($L,&DWP(0,"ebx","",0));
+ &mov($R,&DWP(4,"ebx","",0));
+ &stack_push(3);
+
+ &comment("");
+ &comment("IP");
+ &IP_new($L,$R,"edx",0);
+
+ # put them back
+
+ if ($enc)
+ {
+ &mov(&DWP(4,"ebx","",0),$R);
+ &mov("eax",&wparam(1));
+ &mov(&DWP(0,"ebx","",0),"edx");
+ &mov("edi",&wparam(2));
+ &mov("esi",&wparam(3));
+ }
+ else
+ {
+ &mov(&DWP(4,"ebx","",0),$R);
+ &mov("esi",&wparam(1));
+ &mov(&DWP(0,"ebx","",0),"edx");
+ &mov("edi",&wparam(2));
+ &mov("eax",&wparam(3));
+ }
+ &mov(&swtmp(2), (($enc)?"1":"0"));
+ &mov(&swtmp(1), "eax");
+ &mov(&swtmp(0), "ebx");
+ &call("des_encrypt2");
+ &mov(&swtmp(2), (($enc)?"0":"1"));
+ &mov(&swtmp(1), "edi");
+ &mov(&swtmp(0), "ebx");
+ &call("des_encrypt2");
+ &mov(&swtmp(2), (($enc)?"1":"0"));
+ &mov(&swtmp(1), "esi");
+ &mov(&swtmp(0), "ebx");
+ &call("des_encrypt2");
+
+ &stack_pop(3);
+ &mov($L,&DWP(0,"ebx","",0));
+ &mov($R,&DWP(4,"ebx","",0));
+
+ &comment("");
+ &comment("FP");
+ &FP_new($L,$R,"eax",0);
+
+ &mov(&DWP(0,"ebx","",0),"eax");
+ &mov(&DWP(4,"ebx","",0),$R);
+
+ &pop("edi");
+ &pop("esi");
+ &pop("ebp");
+ &pop("ebx");
+ &ret();
+ &function_end_B($name);
+ }
+
+
diff --git a/linux/crypto/ciphers/des/asm/perlasm/cbc.pl b/linux/crypto/ciphers/des/asm/perlasm/cbc.pl
new file mode 100644
index 000000000..278930579
--- /dev/null
+++ b/linux/crypto/ciphers/des/asm/perlasm/cbc.pl
@@ -0,0 +1,342 @@
+#!/usr/bin/perl
+
+# void des_ncbc_encrypt(input, output, length, schedule, ivec, enc)
+# des_cblock (*input);
+# des_cblock (*output);
+# long length;
+# des_key_schedule schedule;
+# des_cblock (*ivec);
+# int enc;
+#
+# calls
+# des_encrypt((DES_LONG *)tin,schedule,DES_ENCRYPT);
+#
+
+#&cbc("des_ncbc_encrypt","des_encrypt",0);
+#&cbc("BF_cbc_encrypt","BF_encrypt","BF_encrypt",
+# 1,4,5,3,5,-1);
+#&cbc("des_ncbc_encrypt","des_encrypt","des_encrypt",
+# 0,4,5,3,5,-1);
+#&cbc("des_ede3_cbc_encrypt","des_encrypt3","des_decrypt3",
+# 0,6,7,3,4,5);
+#
+# When doing a cipher that needs bigendian order,
+# for encrypt, the iv is kept in bigendian form,
+# while for decrypt, it is kept in little endian.
+sub cbc
+ {
+ local($name,$enc_func,$dec_func,$swap,$iv_off,$enc_off,$p1,$p2,$p3)=@_;
+ # name is the function name
+ # enc_func and dec_func and the functions to call for encrypt/decrypt
+ # swap is true if byte order needs to be reversed
+ # iv_off is parameter number for the iv
+ # enc_off is parameter number for the encrypt/decrypt flag
+ # p1,p2,p3 are the offsets for parameters to be passed to the
+ # underlying calls.
+
+ &function_begin_B($name,"");
+ &comment("");
+
+ $in="esi";
+ $out="edi";
+ $count="ebp";
+
+ &push("ebp");
+ &push("ebx");
+ &push("esi");
+ &push("edi");
+
+ $data_off=4;
+ $data_off+=4 if ($p1 > 0);
+ $data_off+=4 if ($p2 > 0);
+ $data_off+=4 if ($p3 > 0);
+
+ &mov($count, &wparam(2)); # length
+
+ &comment("getting iv ptr from parameter $iv_off");
+ &mov("ebx", &wparam($iv_off)); # Get iv ptr
+
+ &mov($in, &DWP(0,"ebx","",0));# iv[0]
+ &mov($out, &DWP(4,"ebx","",0));# iv[1]
+
+ &push($out);
+ &push($in);
+ &push($out); # used in decrypt for iv[1]
+ &push($in); # used in decrypt for iv[0]
+
+ &mov("ebx", "esp"); # This is the address of tin[2]
+
+ &mov($in, &wparam(0)); # in
+ &mov($out, &wparam(1)); # out
+
+ # We have loaded them all, how lets push things
+ &comment("getting encrypt flag from parameter $enc_off");
+ &mov("ecx", &wparam($enc_off)); # Get enc flag
+ if ($p3 > 0)
+ {
+ &comment("get and push parameter $p3");
+ if ($enc_off != $p3)
+ { &mov("eax", &wparam($p3)); &push("eax"); }
+ else { &push("ecx"); }
+ }
+ if ($p2 > 0)
+ {
+ &comment("get and push parameter $p2");
+ if ($enc_off != $p2)
+ { &mov("eax", &wparam($p2)); &push("eax"); }
+ else { &push("ecx"); }
+ }
+ if ($p1 > 0)
+ {
+ &comment("get and push parameter $p1");
+ if ($enc_off != $p1)
+ { &mov("eax", &wparam($p1)); &push("eax"); }
+ else { &push("ecx"); }
+ }
+ &push("ebx"); # push data/iv
+
+ &cmp("ecx",0);
+ &jz(&label("decrypt"));
+
+ &and($count,0xfffffff8);
+ &mov("eax", &DWP($data_off,"esp","",0)); # load iv[0]
+ &mov("ebx", &DWP($data_off+4,"esp","",0)); # load iv[1]
+
+ &jz(&label("encrypt_finish"));
+
+ #############################################################
+
+ &set_label("encrypt_loop");
+ # encrypt start
+ # "eax" and "ebx" hold iv (or the last cipher text)
+
+ &mov("ecx", &DWP(0,$in,"",0)); # load first 4 bytes
+ &mov("edx", &DWP(4,$in,"",0)); # second 4 bytes
+
+ &xor("eax", "ecx");
+ &xor("ebx", "edx");
+
+ &bswap("eax") if $swap;
+ &bswap("ebx") if $swap;
+
+ &mov(&DWP($data_off,"esp","",0), "eax"); # put in array for call
+ &mov(&DWP($data_off+4,"esp","",0), "ebx"); #
+
+ &call($enc_func);
+
+ &mov("eax", &DWP($data_off,"esp","",0));
+ &mov("ebx", &DWP($data_off+4,"esp","",0));
+
+ &bswap("eax") if $swap;
+ &bswap("ebx") if $swap;
+
+ &mov(&DWP(0,$out,"",0),"eax");
+ &mov(&DWP(4,$out,"",0),"ebx");
+
+ # eax and ebx are the next iv.
+
+ &add($in, 8);
+ &add($out, 8);
+
+ &sub($count, 8);
+ &jnz(&label("encrypt_loop"));
+
+###################################################################3
+ &set_label("encrypt_finish");
+ &mov($count, &wparam(2)); # length
+ &and($count, 7);
+ &jz(&label("finish"));
+ &xor("ecx","ecx");
+ &xor("edx","edx");
+ &mov($count,&DWP(&label("cbc_enc_jmp_table"),"",$count,4));
+ &jmp_ptr($count);
+
+&set_label("ej7");
+ &xor("edx", "edx") if $ppro; # ppro friendly
+ &movb(&HB("edx"), &BP(6,$in,"",0));
+ &shl("edx",8);
+&set_label("ej6");
+ &movb(&HB("edx"), &BP(5,$in,"",0));
+&set_label("ej5");
+ &movb(&LB("edx"), &BP(4,$in,"",0));
+&set_label("ej4");
+ &mov("ecx", &DWP(0,$in,"",0));
+ &jmp(&label("ejend"));
+&set_label("ej3");
+ &movb(&HB("ecx"), &BP(2,$in,"",0));
+ &xor("ecx", "ecx") if $ppro; # ppro friendly
+ &shl("ecx",8);
+&set_label("ej2");
+ &movb(&HB("ecx"), &BP(1,$in,"",0));
+&set_label("ej1");
+ &movb(&LB("ecx"), &BP(0,$in,"",0));
+&set_label("ejend");
+
+ &xor("eax", "ecx");
+ &xor("ebx", "edx");
+
+ &bswap("eax") if $swap;
+ &bswap("ebx") if $swap;
+
+ &mov(&DWP($data_off,"esp","",0), "eax"); # put in array for call
+ &mov(&DWP($data_off+4,"esp","",0), "ebx"); #
+
+ &call($enc_func);
+
+ &mov("eax", &DWP($data_off,"esp","",0));
+ &mov("ebx", &DWP($data_off+4,"esp","",0));
+
+ &bswap("eax") if $swap;
+ &bswap("ebx") if $swap;
+
+ &mov(&DWP(0,$out,"",0),"eax");
+ &mov(&DWP(4,$out,"",0),"ebx");
+
+ &jmp(&label("finish"));
+
+ #############################################################
+ #############################################################
+ &set_label("decrypt",1);
+ # decrypt start
+ &and($count,0xfffffff8);
+ # The next 2 instructions are only for if the jz is taken
+ &mov("eax", &DWP($data_off+8,"esp","",0)); # get iv[0]
+ &mov("ebx", &DWP($data_off+12,"esp","",0)); # get iv[1]
+ &jz(&label("decrypt_finish"));
+
+ &set_label("decrypt_loop");
+ &mov("eax", &DWP(0,$in,"",0)); # load first 4 bytes
+ &mov("ebx", &DWP(4,$in,"",0)); # second 4 bytes
+
+ &bswap("eax") if $swap;
+ &bswap("ebx") if $swap;
+
+ &mov(&DWP($data_off,"esp","",0), "eax"); # put back
+ &mov(&DWP($data_off+4,"esp","",0), "ebx"); #
+
+ &call($dec_func);
+
+ &mov("eax", &DWP($data_off,"esp","",0)); # get return
+ &mov("ebx", &DWP($data_off+4,"esp","",0)); #
+
+ &bswap("eax") if $swap;
+ &bswap("ebx") if $swap;
+
+ &mov("ecx", &DWP($data_off+8,"esp","",0)); # get iv[0]
+ &mov("edx", &DWP($data_off+12,"esp","",0)); # get iv[1]
+
+ &xor("ecx", "eax");
+ &xor("edx", "ebx");
+
+ &mov("eax", &DWP(0,$in,"",0)); # get old cipher text,
+ &mov("ebx", &DWP(4,$in,"",0)); # next iv actually
+
+ &mov(&DWP(0,$out,"",0),"ecx");
+ &mov(&DWP(4,$out,"",0),"edx");
+
+ &mov(&DWP($data_off+8,"esp","",0), "eax"); # save iv
+ &mov(&DWP($data_off+12,"esp","",0), "ebx"); #
+
+ &add($in, 8);
+ &add($out, 8);
+
+ &sub($count, 8);
+ &jnz(&label("decrypt_loop"));
+############################ ENDIT #######################3
+ &set_label("decrypt_finish");
+ &mov($count, &wparam(2)); # length
+ &and($count, 7);
+ &jz(&label("finish"));
+
+ &mov("eax", &DWP(0,$in,"",0)); # load first 4 bytes
+ &mov("ebx", &DWP(4,$in,"",0)); # second 4 bytes
+
+ &bswap("eax") if $swap;
+ &bswap("ebx") if $swap;
+
+ &mov(&DWP($data_off,"esp","",0), "eax"); # put back
+ &mov(&DWP($data_off+4,"esp","",0), "ebx"); #
+
+ &call($dec_func);
+
+ &mov("eax", &DWP($data_off,"esp","",0)); # get return
+ &mov("ebx", &DWP($data_off+4,"esp","",0)); #
+
+ &bswap("eax") if $swap;
+ &bswap("ebx") if $swap;
+
+ &mov("ecx", &DWP($data_off+8,"esp","",0)); # get iv[0]
+ &mov("edx", &DWP($data_off+12,"esp","",0)); # get iv[1]
+
+ &xor("ecx", "eax");
+ &xor("edx", "ebx");
+
+ # this is for when we exit
+ &mov("eax", &DWP(0,$in,"",0)); # get old cipher text,
+ &mov("ebx", &DWP(4,$in,"",0)); # next iv actually
+
+&set_label("dj7");
+ &rotr("edx", 16);
+ &movb(&BP(6,$out,"",0), &LB("edx"));
+ &shr("edx",16);
+&set_label("dj6");
+ &movb(&BP(5,$out,"",0), &HB("edx"));
+&set_label("dj5");
+ &movb(&BP(4,$out,"",0), &LB("edx"));
+&set_label("dj4");
+ &mov(&DWP(0,$out,"",0), "ecx");
+ &jmp(&label("djend"));
+&set_label("dj3");
+ &rotr("ecx", 16);
+ &movb(&BP(2,$out,"",0), &LB("ecx"));
+ &shl("ecx",16);
+&set_label("dj2");
+ &movb(&BP(1,$in,"",0), &HB("ecx"));
+&set_label("dj1");
+ &movb(&BP(0,$in,"",0), &LB("ecx"));
+&set_label("djend");
+
+ # final iv is still in eax:ebx
+ &jmp(&label("finish"));
+
+
+############################ FINISH #######################3
+ &set_label("finish",1);
+ &mov("ecx", &wparam($iv_off)); # Get iv ptr
+
+ #################################################
+ $total=16+4;
+ $total+=4 if ($p1 > 0);
+ $total+=4 if ($p2 > 0);
+ $total+=4 if ($p3 > 0);
+ &add("esp",$total);
+
+ &mov(&DWP(0,"ecx","",0), "eax"); # save iv
+ &mov(&DWP(4,"ecx","",0), "ebx"); # save iv
+
+ &function_end_A($name);
+
+ &set_label("cbc_enc_jmp_table",1);
+ &data_word("0");
+ &data_word(&label("ej1"));
+ &data_word(&label("ej2"));
+ &data_word(&label("ej3"));
+ &data_word(&label("ej4"));
+ &data_word(&label("ej5"));
+ &data_word(&label("ej6"));
+ &data_word(&label("ej7"));
+ &set_label("cbc_dec_jmp_table",1);
+ &data_word("0");
+ &data_word(&label("dj1"));
+ &data_word(&label("dj2"));
+ &data_word(&label("dj3"));
+ &data_word(&label("dj4"));
+ &data_word(&label("dj5"));
+ &data_word(&label("dj6"));
+ &data_word(&label("dj7"));
+
+ &function_end_B($name);
+
+ }
+
+1;
diff --git a/linux/crypto/ciphers/des/asm/perlasm/readme b/linux/crypto/ciphers/des/asm/perlasm/readme
new file mode 100644
index 000000000..f02bbee75
--- /dev/null
+++ b/linux/crypto/ciphers/des/asm/perlasm/readme
@@ -0,0 +1,124 @@
+The perl scripts in this directory are my 'hack' to generate
+multiple different assembler formats via the one origional script.
+
+The way to use this library is to start with adding the path to this directory
+and then include it.
+
+push(@INC,"perlasm","../../perlasm");
+require "x86asm.pl";
+
+The first thing we do is setup the file and type of assember
+
+&asm_init($ARGV[0],$0);
+
+The first argument is the 'type'. Currently
+'cpp', 'sol', 'a.out', 'elf' or 'win32'.
+Argument 2 is the file name.
+
+The reciprocal function is
+&asm_finish() which should be called at the end.
+
+There are 2 main 'packages'. x86ms.pl, which is the microsoft assembler,
+and x86unix.pl which is the unix (gas) version.
+
+Functions of interest are:
+&external_label("des_SPtrans"); declare and external variable
+&LB(reg); Low byte for a register
+&HB(reg); High byte for a register
+&BP(off,base,index,scale) Byte pointer addressing
+&DWP(off,base,index,scale) Word pointer addressing
+&stack_push(num) Basically a 'sub esp, num*4' with extra
+&stack_pop(num) inverse of stack_push
+&function_begin(name,extra) Start a function with pushing of
+ edi, esi, ebx and ebp. extra is extra win32
+ external info that may be required.
+&function_begin_B(name,extra) Same as norma function_begin but no pushing.
+&function_end(name) Call at end of function.
+&function_end_A(name) Standard pop and ret, for use inside functions
+&function_end_B(name) Call at end but with poping or 'ret'.
+&swtmp(num) Address on stack temp word.
+&wparam(num) Parameter number num, that was push
+ in C convention. This all works over pushes
+ and pops.
+&comment("hello there") Put in a comment.
+&label("loop") Refer to a label, normally a jmp target.
+&set_label("loop") Set a label at this point.
+&data_word(word) Put in a word of data.
+
+So how does this all hold together? Given
+
+int calc(int len, int *data)
+ {
+ int i,j=0;
+
+ for (i=0; i<len; i++)
+ {
+ j+=other(data[i]);
+ }
+ }
+
+So a very simple version of this function could be coded as
+
+ push(@INC,"perlasm","../../perlasm");
+ require "x86asm.pl";
+
+ &asm_init($ARGV[0],"cacl.pl");
+
+ &external_label("other");
+
+ $tmp1= "eax";
+ $j= "edi";
+ $data= "esi";
+ $i= "ebp";
+
+ &comment("a simple function");
+ &function_begin("calc");
+ &mov( $data, &wparam(1)); # data
+ &xor( $j, $j);
+ &xor( $i, $i);
+
+ &set_label("loop");
+ &cmp( $i, &wparam(0));
+ &jge( &label("end"));
+
+ &mov( $tmp1, &DWP(0,$data,$i,4));
+ &push( $tmp1);
+ &call( "other");
+ &add( $j, "eax");
+ &pop( $tmp1);
+ &inc( $i);
+ &jmp( &label("loop"));
+
+ &set_label("end");
+ &mov( "eax", $j);
+
+ &function_end("calc");
+
+ &asm_finish();
+
+The above example is very very unoptimised but gives an idea of how
+things work.
+
+There is also a cbc mode function generator in cbc.pl
+
+&cbc( $name,
+ $encrypt_function_name,
+ $decrypt_function_name,
+ $true_if_byte_swap_needed,
+ $parameter_number_for_iv,
+ $parameter_number_for_encrypt_flag,
+ $first_parameter_to_pass,
+ $second_parameter_to_pass,
+ $third_parameter_to_pass);
+
+So for example, given
+void BF_encrypt(BF_LONG *data,BF_KEY *key);
+void BF_decrypt(BF_LONG *data,BF_KEY *key);
+void BF_cbc_encrypt(unsigned char *in, unsigned char *out, long length,
+ BF_KEY *ks, unsigned char *iv, int enc);
+
+&cbc("BF_cbc_encrypt","BF_encrypt","BF_encrypt",1,4,5,3,-1,-1);
+
+&cbc("des_ncbc_encrypt","des_encrypt","des_encrypt",0,4,5,3,5,-1);
+&cbc("des_ede3_cbc_encrypt","des_encrypt3","des_decrypt3",0,6,7,3,4,5);
+
diff --git a/linux/crypto/ciphers/des/asm/perlasm/x86asm.pl b/linux/crypto/ciphers/des/asm/perlasm/x86asm.pl
new file mode 100644
index 000000000..164a942c5
--- /dev/null
+++ b/linux/crypto/ciphers/des/asm/perlasm/x86asm.pl
@@ -0,0 +1,111 @@
+#!/usr/bin/perl
+
+# require 'x86asm.pl';
+# &asm_init("cpp","des-586.pl");
+# XXX
+# XXX
+# main'asm_finish
+
+sub main'asm_finish
+ {
+ &file_end();
+ &asm_finish_cpp() if $cpp;
+ print &asm_get_output();
+ }
+
+sub main'asm_init
+ {
+ ($type,$fn)=@_;
+ $filename=$fn;
+
+ $cpp=$sol=$aout=$win32=0;
+ if ( ($type eq "elf"))
+ { require "x86unix.pl"; }
+ elsif ( ($type eq "a.out"))
+ { $aout=1; require "x86unix.pl"; }
+ elsif ( ($type eq "sol"))
+ { $sol=1; require "x86unix.pl"; }
+ elsif ( ($type eq "cpp"))
+ { $cpp=1; require "x86unix.pl"; }
+ elsif ( ($type eq "win32"))
+ { $win32=1; require "x86ms.pl"; }
+ else
+ {
+ print STDERR <<"EOF";
+Pick one target type from
+ elf - linux, FreeBSD etc
+ a.out - old linux
+ sol - x86 solaris
+ cpp - format so x86unix.cpp can be used
+ win32 - Windows 95/Windows NT
+EOF
+ exit(1);
+ }
+
+ &asm_init_output();
+
+&comment("Don't even think of reading this code");
+&comment("It was automatically generated by $filename");
+&comment("Which is a perl program used to generate the x86 assember for");
+&comment("any of elf, a.out, BSDI,Win32, or Solaris");
+&comment("eric <eay\@cryptsoft.com>");
+&comment("");
+
+ $filename =~ s/\.pl$//;
+ &file($filename);
+ }
+
+sub asm_finish_cpp
+ {
+ return unless $cpp;
+
+ local($tmp,$i);
+ foreach $i (&get_labels())
+ {
+ $tmp.="#define $i _$i\n";
+ }
+ print <<"EOF";
+/* Run the C pre-processor over this file with one of the following defined
+ * ELF - elf object files,
+ * OUT - a.out object files,
+ * BSDI - BSDI style a.out object files
+ * SOL - Solaris style elf
+ */
+
+#define TYPE(a,b) .type a,b
+#define SIZE(a,b) .size a,b
+
+#if defined(OUT) || defined(BSDI)
+$tmp
+#endif
+
+#ifdef OUT
+#define OK 1
+#define ALIGN 4
+#endif
+
+#ifdef BSDI
+#define OK 1
+#define ALIGN 4
+#undef SIZE
+#undef TYPE
+#endif
+
+#if defined(ELF) || defined(SOL)
+#define OK 1
+#define ALIGN 16
+#endif
+
+#ifndef OK
+You need to define one of
+ELF - elf systems - linux-elf, NetBSD and DG-UX
+OUT - a.out systems - linux-a.out and FreeBSD
+SOL - solaris systems, which are elf with strange comment lines
+BSDI - a.out with a very primative version of as.
+#endif
+
+/* Let the Assembler begin :-) */
+EOF
+ }
+
+1;
diff --git a/linux/crypto/ciphers/des/asm/perlasm/x86ms.pl b/linux/crypto/ciphers/des/asm/perlasm/x86ms.pl
new file mode 100644
index 000000000..0681ea18c
--- /dev/null
+++ b/linux/crypto/ciphers/des/asm/perlasm/x86ms.pl
@@ -0,0 +1,345 @@
+#!/usr/bin/perl
+
+package x86ms;
+
+$label="L000";
+
+%lb=( 'eax', 'al',
+ 'ebx', 'bl',
+ 'ecx', 'cl',
+ 'edx', 'dl',
+ 'ax', 'al',
+ 'bx', 'bl',
+ 'cx', 'cl',
+ 'dx', 'dl',
+ );
+
+%hb=( 'eax', 'ah',
+ 'ebx', 'bh',
+ 'ecx', 'ch',
+ 'edx', 'dh',
+ 'ax', 'ah',
+ 'bx', 'bh',
+ 'cx', 'ch',
+ 'dx', 'dh',
+ );
+
+sub main'asm_init_output { @out=(); }
+sub main'asm_get_output { return(@out); }
+sub main'get_labels { return(@labels); }
+sub main'external_label { push(@labels,@_); }
+
+sub main'LB
+ {
+ (defined($lb{$_[0]})) || die "$_[0] does not have a 'low byte'\n";
+ return($lb{$_[0]});
+ }
+
+sub main'HB
+ {
+ (defined($hb{$_[0]})) || die "$_[0] does not have a 'high byte'\n";
+ return($hb{$_[0]});
+ }
+
+sub main'BP
+ {
+ &get_mem("BYTE",@_);
+ }
+
+sub main'DWP
+ {
+ &get_mem("DWORD",@_);
+ }
+
+sub main'stack_push
+ {
+ local($num)=@_;
+ $stack+=$num*4;
+ &main'sub("esp",$num*4);
+ }
+
+sub main'stack_pop
+ {
+ local($num)=@_;
+ $stack-=$num*4;
+ &main'add("esp",$num*4);
+ }
+
+sub get_mem
+ {
+ local($size,$addr,$reg1,$reg2,$idx)=@_;
+ local($t,$post);
+ local($ret)="$size PTR ";
+
+ $addr =~ s/^\s+//;
+ if ($addr =~ /^(.+)\+(.+)$/)
+ {
+ $reg2=&conv($1);
+ $addr="_$2";
+ }
+ elsif ($addr =~ /^[_a-zA-Z]/)
+ {
+ $addr="_$addr";
+ }
+
+ $reg1="$regs{$reg1}" if defined($regs{$reg1});
+ $reg2="$regs{$reg2}" if defined($regs{$reg2});
+ if (($addr ne "") && ($addr ne 0))
+ {
+ if ($addr !~ /^-/)
+ { $ret.=$addr; }
+ else { $post=$addr; }
+ }
+ if ($reg2 ne "")
+ {
+ $t="";
+ $t="*$idx" if ($idx != 0);
+ $reg1="+".$reg1 if ("$reg1$post" ne "");
+ $ret.="[$reg2$t$reg1$post]";
+ }
+ else
+ {
+ $ret.="[$reg1$post]"
+ }
+ return($ret);
+ }
+
+sub main'mov { &out2("mov",@_); }
+sub main'movb { &out2("mov",@_); }
+sub main'and { &out2("and",@_); }
+sub main'or { &out2("or",@_); }
+sub main'shl { &out2("shl",@_); }
+sub main'shr { &out2("shr",@_); }
+sub main'xor { &out2("xor",@_); }
+sub main'xorb { &out2("xor",@_); }
+sub main'add { &out2("add",@_); }
+sub main'adc { &out2("adc",@_); }
+sub main'sub { &out2("sub",@_); }
+sub main'rotl { &out2("rol",@_); }
+sub main'rotr { &out2("ror",@_); }
+sub main'exch { &out2("xchg",@_); }
+sub main'cmp { &out2("cmp",@_); }
+sub main'lea { &out2("lea",@_); }
+sub main'mul { &out1("mul",@_); }
+sub main'div { &out1("div",@_); }
+sub main'dec { &out1("dec",@_); }
+sub main'inc { &out1("inc",@_); }
+sub main'jmp { &out1("jmp",@_); }
+sub main'jmp_ptr { &out1p("jmp",@_); }
+sub main'je { &out1("je",@_); }
+sub main'jle { &out1("jle",@_); }
+sub main'jz { &out1("jz",@_); }
+sub main'jge { &out1("jge",@_); }
+sub main'jl { &out1("jl",@_); }
+sub main'jb { &out1("jb",@_); }
+sub main'jnz { &out1("jnz",@_); }
+sub main'jne { &out1("jne",@_); }
+sub main'push { &out1("push",@_); $stack+=4; }
+sub main'pop { &out1("pop",@_); $stack-=4; }
+sub main'bswap { &out1("bswap",@_); &using486(); }
+sub main'not { &out1("not",@_); }
+sub main'call { &out1("call",'_'.$_[0]); }
+sub main'ret { &out0("ret"); }
+sub main'nop { &out0("nop"); }
+
+sub out2
+ {
+ local($name,$p1,$p2)=@_;
+ local($l,$t);
+
+ push(@out,"\t$name\t");
+ $t=&conv($p1).",";
+ $l=length($t);
+ push(@out,$t);
+ $l=4-($l+9)/8;
+ push(@out,"\t" x $l);
+ push(@out,&conv($p2));
+ push(@out,"\n");
+ }
+
+sub out0
+ {
+ local($name)=@_;
+
+ push(@out,"\t$name\n");
+ }
+
+sub out1
+ {
+ local($name,$p1)=@_;
+ local($l,$t);
+
+ push(@out,"\t$name\t".&conv($p1)."\n");
+ }
+
+sub conv
+ {
+ local($p)=@_;
+
+ $p =~ s/0x([0-9A-Fa-f]+)/0$1h/;
+ return $p;
+ }
+
+sub using486
+ {
+ return if $using486;
+ $using486++;
+ grep(s/\.386/\.486/,@out);
+ }
+
+sub main'file
+ {
+ local($file)=@_;
+
+ local($tmp)=<<"EOF";
+ TITLE $file.asm
+ .386
+.model FLAT
+EOF
+ push(@out,$tmp);
+ }
+
+sub main'function_begin
+ {
+ local($func,$extra)=@_;
+
+ push(@labels,$func);
+
+ local($tmp)=<<"EOF";
+_TEXT SEGMENT
+PUBLIC _$func
+$extra
+_$func PROC NEAR
+ push ebp
+ push ebx
+ push esi
+ push edi
+EOF
+ push(@out,$tmp);
+ $stack=20;
+ }
+
+sub main'function_begin_B
+ {
+ local($func,$extra)=@_;
+
+ local($tmp)=<<"EOF";
+_TEXT SEGMENT
+PUBLIC _$func
+$extra
+_$func PROC NEAR
+EOF
+ push(@out,$tmp);
+ $stack=4;
+ }
+
+sub main'function_end
+ {
+ local($func)=@_;
+
+ local($tmp)=<<"EOF";
+ pop edi
+ pop esi
+ pop ebx
+ pop ebp
+ ret
+_$func ENDP
+_TEXT ENDS
+EOF
+ push(@out,$tmp);
+ $stack=0;
+ %label=();
+ }
+
+sub main'function_end_B
+ {
+ local($func)=@_;
+
+ local($tmp)=<<"EOF";
+_$func ENDP
+_TEXT ENDS
+EOF
+ push(@out,$tmp);
+ $stack=0;
+ %label=();
+ }
+
+sub main'function_end_A
+ {
+ local($func)=@_;
+
+ local($tmp)=<<"EOF";
+ pop edi
+ pop esi
+ pop ebx
+ pop ebp
+ ret
+EOF
+ push(@out,$tmp);
+ }
+
+sub main'file_end
+ {
+ push(@out,"END\n");
+ }
+
+sub main'wparam
+ {
+ local($num)=@_;
+
+ return(&main'DWP($stack+$num*4,"esp","",0));
+ }
+
+sub main'swtmp
+ {
+ return(&main'DWP($_[0]*4,"esp","",0));
+ }
+
+# Should use swtmp, which is above esp. Linix can trash the stack above esp
+#sub main'wtmp
+# {
+# local($num)=@_;
+#
+# return(&main'DWP(-(($num+1)*4),"esp","",0));
+# }
+
+sub main'comment
+ {
+ foreach (@_)
+ {
+ push(@out,"\t; $_\n");
+ }
+ }
+
+sub main'label
+ {
+ if (!defined($label{$_[0]}))
+ {
+ $label{$_[0]}="\$${label}${_[0]}";
+ $label++;
+ }
+ return($label{$_[0]});
+ }
+
+sub main'set_label
+ {
+ if (!defined($label{$_[0]}))
+ {
+ $label{$_[0]}="${label}${_[0]}";
+ $label++;
+ }
+ push(@out,"$label{$_[0]}:\n");
+ }
+
+sub main'data_word
+ {
+ push(@out,"\tDD\t$_[0]\n");
+ }
+
+sub out1p
+ {
+ local($name,$p1)=@_;
+ local($l,$t);
+
+ push(@out,"\t$name\t ".&conv($p1)."\n");
+ }
diff --git a/linux/crypto/ciphers/des/asm/perlasm/x86unix.pl b/linux/crypto/ciphers/des/asm/perlasm/x86unix.pl
new file mode 100644
index 000000000..1d661221c
--- /dev/null
+++ b/linux/crypto/ciphers/des/asm/perlasm/x86unix.pl
@@ -0,0 +1,403 @@
+#!/usr/bin/perl
+
+package x86unix;
+
+$label="L000";
+
+$align=($main'aout)?"4":"16";
+$under=($main'aout)?"_":"";
+$com_start=($main'sol)?"/":"#";
+
+sub main'asm_init_output { @out=(); }
+sub main'asm_get_output { return(@out); }
+sub main'get_labels { return(@labels); }
+sub main'external_label { push(@labels,@_); }
+
+if ($main'cpp)
+ {
+ $align="ALIGN";
+ $under="";
+ $com_start='/*';
+ $com_end='*/';
+ }
+
+%lb=( 'eax', '%al',
+ 'ebx', '%bl',
+ 'ecx', '%cl',
+ 'edx', '%dl',
+ 'ax', '%al',
+ 'bx', '%bl',
+ 'cx', '%cl',
+ 'dx', '%dl',
+ );
+
+%hb=( 'eax', '%ah',
+ 'ebx', '%bh',
+ 'ecx', '%ch',
+ 'edx', '%dh',
+ 'ax', '%ah',
+ 'bx', '%bh',
+ 'cx', '%ch',
+ 'dx', '%dh',
+ );
+
+%regs=( 'eax', '%eax',
+ 'ebx', '%ebx',
+ 'ecx', '%ecx',
+ 'edx', '%edx',
+ 'esi', '%esi',
+ 'edi', '%edi',
+ 'ebp', '%ebp',
+ 'esp', '%esp',
+ );
+
+%reg_val=(
+ 'eax', 0x00,
+ 'ebx', 0x03,
+ 'ecx', 0x01,
+ 'edx', 0x02,
+ 'esi', 0x06,
+ 'edi', 0x07,
+ 'ebp', 0x05,
+ 'esp', 0x04,
+ );
+
+sub main'LB
+ {
+ (defined($lb{$_[0]})) || die "$_[0] does not have a 'low byte'\n";
+ return($lb{$_[0]});
+ }
+
+sub main'HB
+ {
+ (defined($hb{$_[0]})) || die "$_[0] does not have a 'high byte'\n";
+ return($hb{$_[0]});
+ }
+
+sub main'DWP
+ {
+ local($addr,$reg1,$reg2,$idx)=@_;
+
+ $ret="";
+ $addr =~ s/(^|[+ \t])([A-Za-z_]+)($|[+ \t])/$1$under$2$3/;
+ $reg1="$regs{$reg1}" if defined($regs{$reg1});
+ $reg2="$regs{$reg2}" if defined($regs{$reg2});
+ $ret.=$addr if ($addr ne "") && ($addr ne 0);
+ if ($reg2 ne "")
+ { $ret.="($reg1,$reg2,$idx)"; }
+ else
+ { $ret.="($reg1)" }
+ return($ret);
+ }
+
+sub main'BP
+ {
+ return(&main'DWP(@_));
+ }
+
+#sub main'BP
+# {
+# local($addr,$reg1,$reg2,$idx)=@_;
+#
+# $ret="";
+#
+# $addr =~ s/(^|[+ \t])([A-Za-z_]+)($|[+ \t])/$1$under$2$3/;
+# $reg1="$regs{$reg1}" if defined($regs{$reg1});
+# $reg2="$regs{$reg2}" if defined($regs{$reg2});
+# $ret.=$addr if ($addr ne "") && ($addr ne 0);
+# if ($reg2 ne "")
+# { $ret.="($reg1,$reg2,$idx)"; }
+# else
+# { $ret.="($reg1)" }
+# return($ret);
+# }
+
+sub main'mov { &out2("movl",@_); }
+sub main'movb { &out2("movb",@_); }
+sub main'and { &out2("andl",@_); }
+sub main'or { &out2("orl",@_); }
+sub main'shl { &out2("sall",@_); }
+sub main'shr { &out2("shrl",@_); }
+sub main'xor { &out2("xorl",@_); }
+sub main'xorb { &out2("xorb",@_); }
+sub main'add { &out2("addl",@_); }
+sub main'adc { &out2("adcl",@_); }
+sub main'sub { &out2("subl",@_); }
+sub main'rotl { &out2("roll",@_); }
+sub main'rotr { &out2("rorl",@_); }
+sub main'exch { &out2("xchg",@_); }
+sub main'cmp { &out2("cmpl",@_); }
+sub main'lea { &out2("leal",@_); }
+sub main'mul { &out1("mull",@_); }
+sub main'div { &out1("divl",@_); }
+sub main'jmp { &out1("jmp",@_); }
+sub main'jmp_ptr { &out1p("jmp",@_); }
+sub main'je { &out1("je",@_); }
+sub main'jle { &out1("jle",@_); }
+sub main'jne { &out1("jne",@_); }
+sub main'jnz { &out1("jnz",@_); }
+sub main'jz { &out1("jz",@_); }
+sub main'jge { &out1("jge",@_); }
+sub main'jl { &out1("jl",@_); }
+sub main'jb { &out1("jb",@_); }
+sub main'dec { &out1("decl",@_); }
+sub main'inc { &out1("incl",@_); }
+sub main'push { &out1("pushl",@_); $stack+=4; }
+sub main'pop { &out1("popl",@_); $stack-=4; }
+sub main'bswap { &out1("bswapl",@_); }
+sub main'not { &out1("notl",@_); }
+sub main'call { &out1("call",$under.$_[0]); }
+sub main'ret { &out0("ret"); }
+sub main'nop { &out0("nop"); }
+
+sub out2
+ {
+ local($name,$p1,$p2)=@_;
+ local($l,$ll,$t);
+ local(%special)=( "roll",0xD1C0,"rorl",0xD1C8,
+ "rcll",0xD1D0,"rcrl",0xD1D8,
+ "shll",0xD1E0,"shrl",0xD1E8,
+ "sarl",0xD1F8);
+
+ if ((defined($special{$name})) && defined($regs{$p1}) && ($p2 == 1))
+ {
+ $op=$special{$name}|$reg_val{$p1};
+ $tmp1=sprintf ".byte %d\n",($op>>8)&0xff;
+ $tmp2=sprintf ".byte %d\t",$op &0xff;
+ push(@out,$tmp1);
+ push(@out,$tmp2);
+
+ $p2=&conv($p2);
+ $p1=&conv($p1);
+ &main'comment("$name $p2 $p1");
+ return;
+ }
+
+ push(@out,"\t$name\t");
+ $t=&conv($p2).",";
+ $l=length($t);
+ push(@out,$t);
+ $ll=4-($l+9)/8;
+ $tmp1=sprintf "\t" x $ll;
+ push(@out,$tmp1);
+ push(@out,&conv($p1)."\n");
+ }
+
+sub out1
+ {
+ local($name,$p1)=@_;
+ local($l,$t);
+
+ push(@out,"\t$name\t".&conv($p1)."\n");
+ }
+
+sub out1p
+ {
+ local($name,$p1)=@_;
+ local($l,$t);
+
+ push(@out,"\t$name\t*".&conv($p1)."\n");
+ }
+
+sub out0
+ {
+ push(@out,"\t$_[0]\n");
+ }
+
+sub conv
+ {
+ local($p)=@_;
+
+# $p =~ s/0x([0-9A-Fa-f]+)/0$1h/;
+
+ $p=$regs{$p} if (defined($regs{$p}));
+
+ $p =~ s/^(-{0,1}[0-9A-Fa-f]+)$/\$$1/;
+ $p =~ s/^(0x[0-9A-Fa-f]+)$/\$$1/;
+ return $p;
+ }
+
+sub main'file
+ {
+ local($file)=@_;
+
+ local($tmp)=<<"EOF";
+ .file "$file.s"
+ .version "01.01"
+gcc2_compiled.:
+EOF
+ push(@out,$tmp);
+ }
+
+sub main'function_begin
+ {
+ local($func)=@_;
+
+ $func=$under.$func;
+
+ local($tmp)=<<"EOF";
+.text
+ .align $align
+.globl $func
+EOF
+ push(@out,$tmp);
+ if ($main'cpp)
+ { $tmp=push(@out,"\tTYPE($func,\@function)\n"); }
+ else { $tmp=push(@out,"\t.type\t$func,\@function\n"); }
+ push(@out,"$func:\n");
+ $tmp=<<"EOF";
+ pushl %ebp
+ pushl %ebx
+ pushl %esi
+ pushl %edi
+
+EOF
+ push(@out,$tmp);
+ $stack=20;
+ }
+
+sub main'function_begin_B
+ {
+ local($func,$extra)=@_;
+
+ $func=$under.$func;
+
+ local($tmp)=<<"EOF";
+.text
+ .align $align
+.globl $func
+EOF
+ push(@out,$tmp);
+ if ($main'cpp)
+ { push(@out,"\tTYPE($func,\@function)\n"); }
+ else { push(@out,"\t.type $func,\@function\n"); }
+ push(@out,"$func:\n");
+ $stack=4;
+ }
+
+sub main'function_end
+ {
+ local($func)=@_;
+
+ $func=$under.$func;
+
+ local($tmp)=<<"EOF";
+ popl %edi
+ popl %esi
+ popl %ebx
+ popl %ebp
+ ret
+.${func}_end:
+EOF
+ push(@out,$tmp);
+ if ($main'cpp)
+ { push(@out,"\tSIZE($func,.${func}_end-$func)\n"); }
+ else { push(@out,"\t.size\t$func,.${func}_end-$func\n"); }
+ push(@out,".ident \"$func\"\n");
+ $stack=0;
+ %label=();
+ }
+
+sub main'function_end_A
+ {
+ local($func)=@_;
+
+ local($tmp)=<<"EOF";
+ popl %edi
+ popl %esi
+ popl %ebx
+ popl %ebp
+ ret
+EOF
+ push(@out,$tmp);
+ }
+
+sub main'function_end_B
+ {
+ local($func)=@_;
+
+ $func=$under.$func;
+
+ push(@out,".${func}_end:\n");
+ if ($main'cpp)
+ { push(@out,"\tSIZE($func,.${func}_end-$func)\n"); }
+ else { push(@out,"\t.size\t$func,.${func}_end-$func\n"); }
+ push(@out,".ident \"desasm.pl\"\n");
+ $stack=0;
+ %label=();
+ }
+
+sub main'wparam
+ {
+ local($num)=@_;
+
+ return(&main'DWP($stack+$num*4,"esp","",0));
+ }
+
+sub main'stack_push
+ {
+ local($num)=@_;
+ $stack+=$num*4;
+ &main'sub("esp",$num*4);
+ }
+
+sub main'stack_pop
+ {
+ local($num)=@_;
+ $stack-=$num*4;
+ &main'add("esp",$num*4);
+ }
+
+sub main'swtmp
+ {
+ return(&main'DWP($_[0]*4,"esp","",0));
+ }
+
+# Should use swtmp, which is above esp. Linix can trash the stack above esp
+#sub main'wtmp
+# {
+# local($num)=@_;
+#
+# return(&main'DWP(-($num+1)*4,"esp","",0));
+# }
+
+sub main'comment
+ {
+ foreach (@_)
+ {
+ if (/^\s*$/)
+ { push(@out,"\n"); }
+ else
+ { push(@out,"\t$com_start $_ $com_end\n"); }
+ }
+ }
+
+sub main'label
+ {
+ if (!defined($label{$_[0]}))
+ {
+ $label{$_[0]}=".${label}${_[0]}";
+ $label++;
+ }
+ return($label{$_[0]});
+ }
+
+sub main'set_label
+ {
+ if (!defined($label{$_[0]}))
+ {
+ $label{$_[0]}=".${label}${_[0]}";
+ $label++;
+ }
+ push(@out,".align $align\n") if ($_[1] != 0);
+ push(@out,"$label{$_[0]}:\n");
+ }
+
+sub main'file_end
+ {
+ }
+
+sub main'data_word
+ {
+ push(@out,"\t.long $_[0]\n");
+ }
diff --git a/linux/crypto/ciphers/des/asm/readme b/linux/crypto/ciphers/des/asm/readme
new file mode 100644
index 000000000..f8529d930
--- /dev/null
+++ b/linux/crypto/ciphers/des/asm/readme
@@ -0,0 +1,131 @@
+First up, let me say I don't like writing in assembler. It is not portable,
+dependant on the particular CPU architecture release and is generally a pig
+to debug and get right. Having said that, the x86 architecture is probably
+the most important for speed due to number of boxes and since
+it appears to be the worst architecture to to get
+good C compilers for. So due to this, I have lowered myself to do
+assembler for the inner DES routines in libdes :-).
+
+The file to implement in assembler is des_enc.c. Replace the following
+4 functions
+des_encrypt(DES_LONG data[2],des_key_schedule ks, int encrypt);
+des_encrypt2(DES_LONG data[2],des_key_schedule ks, int encrypt);
+des_encrypt3(DES_LONG data[2],des_key_schedule ks1,ks2,ks3);
+des_decrypt3(DES_LONG data[2],des_key_schedule ks1,ks2,ks3);
+
+They encrypt/decrypt the 64 bits held in 'data' using
+the 'ks' key schedules. The only difference between the 4 functions is that
+des_encrypt2() does not perform IP() or FP() on the data (this is an
+optimization for when doing triple DES and des_encrypt3() and des_decrypt3()
+perform triple des. The triple DES routines are in here because it does
+make a big difference to have them located near the des_encrypt2 function
+at link time..
+
+Now as we all know, there are lots of different operating systems running on
+x86 boxes, and unfortunately they normally try to make sure their assembler
+formating is not the same as the other peoples.
+The 4 main formats I know of are
+Microsoft Windows 95/Windows NT
+Elf Includes Linux and FreeBSD(?).
+a.out The older Linux.
+Solaris Same as Elf but different comments :-(.
+
+Now I was not overly keen to write 4 different copies of the same code,
+so I wrote a few perl routines to output the correct assembler, given
+a target assembler type. This code is ugly and is just a hack.
+The libraries are x86unix.pl and x86ms.pl.
+des586.pl, des686.pl and des-som[23].pl are the programs to actually
+generate the assembler.
+
+So to generate elf assembler
+perl des-som3.pl elf >dx86-elf.s
+For Windows 95/NT
+perl des-som2.pl win32 >win32.asm
+
+[ update 4 Jan 1996 ]
+I have added another way to do things.
+perl des-som3.pl cpp >dx86-cpp.s
+generates a file that will be included by dx86unix.cpp when it is compiled.
+To build for elf, a.out, solaris, bsdi etc,
+cc -E -DELF asm/dx86unix.cpp | as -o asm/dx86-elf.o
+cc -E -DSOL asm/dx86unix.cpp | as -o asm/dx86-sol.o
+cc -E -DOUT asm/dx86unix.cpp | as -o asm/dx86-out.o
+cc -E -DBSDI asm/dx86unix.cpp | as -o asm/dx86bsdi.o
+This was done to cut down the number of files in the distribution.
+
+Now the ugly part. I acquired my copy of Intels
+"Optimization's For Intel's 32-Bit Processors" and found a few interesting
+things. First, the aim of the exersize is to 'extract' one byte at a time
+from a word and do an array lookup. This involves getting the byte from
+the 4 locations in the word and moving it to a new word and doing the lookup.
+The most obvious way to do this is
+xor eax, eax # clear word
+movb al, cl # get low byte
+xor edi DWORD PTR 0x100+des_SP[eax] # xor in word
+movb al, ch # get next byte
+xor edi DWORD PTR 0x300+des_SP[eax] # xor in word
+shr ecx 16
+which seems ok. For the pentium, this system appears to be the best.
+One has to do instruction interleaving to keep both functional units
+operating, but it is basically very efficient.
+
+Now the crunch. When a full register is used after a partial write, eg.
+mov al, cl
+xor edi, DWORD PTR 0x100+des_SP[eax]
+386 - 1 cycle stall
+486 - 1 cycle stall
+586 - 0 cycle stall
+686 - at least 7 cycle stall (page 22 of the above mentioned document).
+
+So the technique that produces the best results on a pentium, according to
+the documentation, will produce hideous results on a pentium pro.
+
+To get around this, des686.pl will generate code that is not as fast on
+a pentium, should be very good on a pentium pro.
+mov eax, ecx # copy word
+shr ecx, 8 # line up next byte
+and eax, 0fch # mask byte
+xor edi DWORD PTR 0x100+des_SP[eax] # xor in array lookup
+mov eax, ecx # get word
+shr ecx 8 # line up next byte
+and eax, 0fch # mask byte
+xor edi DWORD PTR 0x300+des_SP[eax] # xor in array lookup
+
+Due to the execution units in the pentium, this actually works quite well.
+For a pentium pro it should be very good. This is the type of output
+Visual C++ generates.
+
+There is a third option. instead of using
+mov al, ch
+which is bad on the pentium pro, one may be able to use
+movzx eax, ch
+which may not incur the partial write penalty. On the pentium,
+this instruction takes 4 cycles so is not worth using but on the
+pentium pro it appears it may be worth while. I need access to one to
+experiment :-).
+
+eric (20 Oct 1996)
+
+22 Nov 1996 - I have asked people to run the 2 different version on pentium
+pros and it appears that the intel documentation is wrong. The
+mov al,bh is still faster on a pentium pro, so just use the des586.pl
+install des686.pl
+
+3 Dec 1996 - I added des_encrypt3/des_decrypt3 because I have moved these
+functions into des_enc.c because it does make a massive performance
+difference on some boxes to have the functions code located close to
+the des_encrypt2() function.
+
+9 Jan 1997 - des-som2.pl is now the correct perl script to use for
+pentiums. It contains an inner loop from
+Svend Olaf Mikkelsen <svolaf@inet.uni-c.dk> which does raw ecb DES calls at
+273,000 per second. He had a previous version at 250,000 and the best
+I was able to get was 203,000. The content has not changed, this is all
+due to instruction sequencing (and actual instructions choice) which is able
+to keep both functional units of the pentium going.
+We may have lost the ugly register usage restrictions when x86 went 32 bit
+but for the pentium it has been replaced by evil instruction ordering tricks.
+
+13 Jan 1997 - des-som3.pl, more optimizations from Svend Olaf.
+raw DES at 281,000 per second on a pentium 100.
+