diff options
Diffstat (limited to 'linux/crypto/ciphers/des/asm')
-rw-r--r-- | linux/crypto/ciphers/des/asm/crypt586.pl | 204 | ||||
-rw-r--r-- | linux/crypto/ciphers/des/asm/des-586.pl | 251 | ||||
-rw-r--r-- | linux/crypto/ciphers/des/asm/des686.pl | 230 | ||||
-rw-r--r-- | linux/crypto/ciphers/des/asm/desboth.pl | 79 | ||||
-rw-r--r-- | linux/crypto/ciphers/des/asm/perlasm/cbc.pl | 342 | ||||
-rw-r--r-- | linux/crypto/ciphers/des/asm/perlasm/readme | 124 | ||||
-rw-r--r-- | linux/crypto/ciphers/des/asm/perlasm/x86asm.pl | 111 | ||||
-rw-r--r-- | linux/crypto/ciphers/des/asm/perlasm/x86ms.pl | 345 | ||||
-rw-r--r-- | linux/crypto/ciphers/des/asm/perlasm/x86unix.pl | 403 | ||||
-rw-r--r-- | linux/crypto/ciphers/des/asm/readme | 131 |
10 files changed, 2220 insertions, 0 deletions
diff --git a/linux/crypto/ciphers/des/asm/crypt586.pl b/linux/crypto/ciphers/des/asm/crypt586.pl new file mode 100644 index 000000000..297e38dec --- /dev/null +++ b/linux/crypto/ciphers/des/asm/crypt586.pl @@ -0,0 +1,204 @@ +#!/usr/bin/perl +# +# The inner loop instruction sequence and the IP/FP modifications are from +# Svend Olaf Mikkelsen <svolaf@inet.uni-c.dk> +# I've added the stuff needed for crypt() but I've not worried about making +# things perfect. +# + +push(@INC,"perlasm","../../perlasm"); +require "x86asm.pl"; + +&asm_init($ARGV[0],"crypt586.pl"); + +$L="edi"; +$R="esi"; + +&external_label("des_SPtrans"); +&fcrypt_body("fcrypt_body"); +&asm_finish(); + +sub fcrypt_body + { + local($name,$do_ip)=@_; + + &function_begin($name,"EXTRN _des_SPtrans:DWORD"); + + &comment(""); + &comment("Load the 2 words"); + $ks="ebp"; + + &xor( $L, $L); + &xor( $R, $R); + &mov($ks,&wparam(1)); + + &push(25); # add a variable + + &set_label("start"); + for ($i=0; $i<16; $i+=2) + { + &comment(""); + &comment("Round $i"); + &D_ENCRYPT($i,$L,$R,$i*2,$ks,"des_SPtrans","eax","ebx","ecx","edx"); + + &comment(""); + &comment("Round ".sprintf("%d",$i+1)); + &D_ENCRYPT($i+1,$R,$L,($i+1)*2,$ks,"des_SPtrans","eax","ebx","ecx","edx"); + } + &mov("ebx", &swtmp(0)); + &mov("eax", $L); + &dec("ebx"); + &mov($L, $R); + &mov($R, "eax"); + &mov(&swtmp(0), "ebx"); + &jnz(&label("start")); + + &comment(""); + &comment("FP"); + &mov("edx",&wparam(0)); + + &FP_new($R,$L,"eax",3); + &mov(&DWP(0,"edx","",0),"eax"); + &mov(&DWP(4,"edx","",0),$L); + + &pop("ecx"); # remove variable + + &function_end($name); + } + +sub D_ENCRYPT + { + local($r,$L,$R,$S,$ks,$desSP,$u,$tmp1,$tmp2,$t)=@_; + + &mov( $u, &wparam(2)); # 2 + &mov( $t, $R); + &shr( $t, 16); # 1 + &mov( $tmp2, &wparam(3)); # 2 + &xor( $t, $R); # 1 + + &and( $u, $t); # 2 + &and( $t, $tmp2); # 2 + + &mov( $tmp1, $u); + &shl( $tmp1, 16); # 1 + &mov( $tmp2, $t); + &shl( $tmp2, 16); # 1 + &xor( $u, $tmp1); # 2 + &xor( $t, $tmp2); # 2 + &mov( $tmp1, &DWP(&n2a($S*4),$ks,"",0)); # 2 + &xor( $u, $tmp1); + &mov( $tmp2, &DWP(&n2a(($S+1)*4),$ks,"",0)); # 2 + &xor( $u, $R); + &xor( $t, $R); + &xor( $t, $tmp2); + + &and( $u, "0xfcfcfcfc" ); # 2 + &xor( $tmp1, $tmp1); # 1 + &and( $t, "0xcfcfcfcf" ); # 2 + &xor( $tmp2, $tmp2); + &movb( &LB($tmp1), &LB($u) ); + &movb( &LB($tmp2), &HB($u) ); + &rotr( $t, 4 ); + &mov( $ks, &DWP(" $desSP",$tmp1,"",0)); + &movb( &LB($tmp1), &LB($t) ); + &xor( $L, $ks); + &mov( $ks, &DWP("0x200+$desSP",$tmp2,"",0)); + &xor( $L, $ks); + &movb( &LB($tmp2), &HB($t) ); + &shr( $u, 16); + &mov( $ks, &DWP("0x100+$desSP",$tmp1,"",0)); + &xor( $L, $ks); + &movb( &LB($tmp1), &HB($u) ); + &shr( $t, 16); + &mov( $ks, &DWP("0x300+$desSP",$tmp2,"",0)); + &xor( $L, $ks); + &mov( $ks, &wparam(1)); + &movb( &LB($tmp2), &HB($t) ); + &and( $u, "0xff" ); + &and( $t, "0xff" ); + &mov( $tmp1, &DWP("0x600+$desSP",$tmp1,"",0)); + &xor( $L, $tmp1); + &mov( $tmp1, &DWP("0x700+$desSP",$tmp2,"",0)); + &xor( $L, $tmp1); + &mov( $tmp1, &DWP("0x400+$desSP",$u,"",0)); + &xor( $L, $tmp1); + &mov( $tmp1, &DWP("0x500+$desSP",$t,"",0)); + &xor( $L, $tmp1); + } + +sub n2a + { + sprintf("%d",$_[0]); + } + +# now has a side affect of rotating $a by $shift +sub R_PERM_OP + { + local($a,$b,$tt,$shift,$mask,$last)=@_; + + &rotl( $a, $shift ) if ($shift != 0); + &mov( $tt, $a ); + &xor( $a, $b ); + &and( $a, $mask ); + if ($notlast eq $b) + { + &xor( $b, $a ); + &xor( $tt, $a ); + } + else + { + &xor( $tt, $a ); + &xor( $b, $a ); + } + &comment(""); + } + +sub IP_new + { + local($l,$r,$tt,$lr)=@_; + + &R_PERM_OP($l,$r,$tt, 4,"0xf0f0f0f0",$l); + &R_PERM_OP($r,$tt,$l,20,"0xfff0000f",$l); + &R_PERM_OP($l,$tt,$r,14,"0x33333333",$r); + &R_PERM_OP($tt,$r,$l,22,"0x03fc03fc",$r); + &R_PERM_OP($l,$r,$tt, 9,"0xaaaaaaaa",$r); + + if ($lr != 3) + { + if (($lr-3) < 0) + { &rotr($tt, 3-$lr); } + else { &rotl($tt, $lr-3); } + } + if ($lr != 2) + { + if (($lr-2) < 0) + { &rotr($r, 2-$lr); } + else { &rotl($r, $lr-2); } + } + } + +sub FP_new + { + local($l,$r,$tt,$lr)=@_; + + if ($lr != 2) + { + if (($lr-2) < 0) + { &rotl($r, 2-$lr); } + else { &rotr($r, $lr-2); } + } + if ($lr != 3) + { + if (($lr-3) < 0) + { &rotl($l, 3-$lr); } + else { &rotr($l, $lr-3); } + } + + &R_PERM_OP($l,$r,$tt, 0,"0xaaaaaaaa",$r); + &R_PERM_OP($tt,$r,$l,23,"0x03fc03fc",$r); + &R_PERM_OP($l,$r,$tt,10,"0x33333333",$l); + &R_PERM_OP($r,$tt,$l,18,"0xfff0000f",$l); + &R_PERM_OP($l,$tt,$r,12,"0xf0f0f0f0",$r); + &rotr($tt , 4); + } + diff --git a/linux/crypto/ciphers/des/asm/des-586.pl b/linux/crypto/ciphers/des/asm/des-586.pl new file mode 100644 index 000000000..7f2e09fa7 --- /dev/null +++ b/linux/crypto/ciphers/des/asm/des-586.pl @@ -0,0 +1,251 @@ +#!/usr/bin/perl +# +# The inner loop instruction sequence and the IP/FP modifications are from +# Svend Olaf Mikkelsen <svolaf@inet.uni-c.dk> +# + +push(@INC,"perlasm","../../perlasm"); +require "x86asm.pl"; +require "cbc.pl"; +require "desboth.pl"; + +# base code is in microsft +# op dest, source +# format. +# + +&asm_init($ARGV[0],"des-586.pl"); + +$L="edi"; +$R="esi"; + +&external_label("des_SPtrans"); +&des_encrypt("des_encrypt",1); +&des_encrypt("des_encrypt2",0); +&des_encrypt3("des_encrypt3",1); +&des_encrypt3("des_decrypt3",0); +&cbc("des_ncbc_encrypt","des_encrypt","des_encrypt",0,4,5,3,5,-1); +&cbc("des_ede3_cbc_encrypt","des_encrypt3","des_decrypt3",0,6,7,3,4,5); + +&asm_finish(); + +sub des_encrypt + { + local($name,$do_ip)=@_; + + &function_begin_B($name,"EXTRN _des_SPtrans:DWORD"); + + &push("esi"); + &push("edi"); + + &comment(""); + &comment("Load the 2 words"); + $ks="ebp"; + + if ($do_ip) + { + &mov($R,&wparam(0)); + &xor( "ecx", "ecx" ); + + &push("ebx"); + &push("ebp"); + + &mov("eax",&DWP(0,$R,"",0)); + &mov("ebx",&wparam(2)); # get encrypt flag + &mov($L,&DWP(4,$R,"",0)); + &comment(""); + &comment("IP"); + &IP_new("eax",$L,$R,3); + } + else + { + &mov("eax",&wparam(0)); + &xor( "ecx", "ecx" ); + + &push("ebx"); + &push("ebp"); + + &mov($R,&DWP(0,"eax","",0)); + &mov("ebx",&wparam(2)); # get encrypt flag + &rotl($R,3); + &mov($L,&DWP(4,"eax","",0)); + &rotl($L,3); + } + + &mov( $ks, &wparam(1) ); + &cmp("ebx","0"); + &je(&label("start_decrypt")); + + for ($i=0; $i<16; $i+=2) + { + &comment(""); + &comment("Round $i"); + &D_ENCRYPT($i,$L,$R,$i*2,$ks,"des_SPtrans","eax","ebx","ecx","edx"); + + &comment(""); + &comment("Round ".sprintf("%d",$i+1)); + &D_ENCRYPT($i+1,$R,$L,($i+1)*2,$ks,"des_SPtrans","eax","ebx","ecx","edx"); + } + &jmp(&label("end")); + + &set_label("start_decrypt"); + + for ($i=15; $i>0; $i-=2) + { + &comment(""); + &comment("Round $i"); + &D_ENCRYPT(15-$i,$L,$R,$i*2,$ks,"des_SPtrans","eax","ebx","ecx","edx"); + &comment(""); + &comment("Round ".sprintf("%d",$i-1)); + &D_ENCRYPT(15-$i+1,$R,$L,($i-1)*2,$ks,"des_SPtrans","eax","ebx","ecx","edx"); + } + + &set_label("end"); + + if ($do_ip) + { + &comment(""); + &comment("FP"); + &mov("edx",&wparam(0)); + &FP_new($L,$R,"eax",3); + + &mov(&DWP(0,"edx","",0),"eax"); + &mov(&DWP(4,"edx","",0),$R); + } + else + { + &comment(""); + &comment("Fixup"); + &rotr($L,3); # r + &mov("eax",&wparam(0)); + &rotr($R,3); # l + &mov(&DWP(0,"eax","",0),$L); + &mov(&DWP(4,"eax","",0),$R); + } + + &pop("ebp"); + &pop("ebx"); + &pop("edi"); + &pop("esi"); + &ret(); + + &function_end_B($name); + } + +sub D_ENCRYPT + { + local($r,$L,$R,$S,$ks,$desSP,$u,$tmp1,$tmp2,$t)=@_; + + &mov( $u, &DWP(&n2a($S*4),$ks,"",0)); + &xor( $tmp1, $tmp1); + &mov( $t, &DWP(&n2a(($S+1)*4),$ks,"",0)); + &xor( $u, $R); + &xor( $t, $R); + &and( $u, "0xfcfcfcfc" ); + &and( $t, "0xcfcfcfcf" ); + &movb( &LB($tmp1), &LB($u) ); + &movb( &LB($tmp2), &HB($u) ); + &rotr( $t, 4 ); + &mov( $ks, &DWP(" $desSP",$tmp1,"",0)); + &movb( &LB($tmp1), &LB($t) ); + &xor( $L, $ks); + &mov( $ks, &DWP("0x200+$desSP",$tmp2,"",0)); + &xor( $L, $ks); ###### + &movb( &LB($tmp2), &HB($t) ); + &shr( $u, 16); + &mov( $ks, &DWP("0x100+$desSP",$tmp1,"",0)); + &xor( $L, $ks); ###### + &movb( &LB($tmp1), &HB($u) ); + &shr( $t, 16); + &mov( $ks, &DWP("0x300+$desSP",$tmp2,"",0)); + &xor( $L, $ks); + &mov( $ks, &wparam(1) ); + &movb( &LB($tmp2), &HB($t) ); + &and( $u, "0xff" ); + &and( $t, "0xff" ); + &mov( $tmp1, &DWP("0x600+$desSP",$tmp1,"",0)); + &xor( $L, $tmp1); + &mov( $tmp1, &DWP("0x700+$desSP",$tmp2,"",0)); + &xor( $L, $tmp1); + &mov( $tmp1, &DWP("0x400+$desSP",$u,"",0)); + &xor( $L, $tmp1); + &mov( $tmp1, &DWP("0x500+$desSP",$t,"",0)); + &xor( $L, $tmp1); + } + +sub n2a + { + sprintf("%d",$_[0]); + } + +# now has a side affect of rotating $a by $shift +sub R_PERM_OP + { + local($a,$b,$tt,$shift,$mask,$last)=@_; + + &rotl( $a, $shift ) if ($shift != 0); + &mov( $tt, $a ); + &xor( $a, $b ); + &and( $a, $mask ); + if (!$last eq $b) + { + &xor( $b, $a ); + &xor( $tt, $a ); + } + else + { + &xor( $tt, $a ); + &xor( $b, $a ); + } + &comment(""); + } + +sub IP_new + { + local($l,$r,$tt,$lr)=@_; + + &R_PERM_OP($l,$r,$tt, 4,"0xf0f0f0f0",$l); + &R_PERM_OP($r,$tt,$l,20,"0xfff0000f",$l); + &R_PERM_OP($l,$tt,$r,14,"0x33333333",$r); + &R_PERM_OP($tt,$r,$l,22,"0x03fc03fc",$r); + &R_PERM_OP($l,$r,$tt, 9,"0xaaaaaaaa",$r); + + if ($lr != 3) + { + if (($lr-3) < 0) + { &rotr($tt, 3-$lr); } + else { &rotl($tt, $lr-3); } + } + if ($lr != 2) + { + if (($lr-2) < 0) + { &rotr($r, 2-$lr); } + else { &rotl($r, $lr-2); } + } + } + +sub FP_new + { + local($l,$r,$tt,$lr)=@_; + + if ($lr != 2) + { + if (($lr-2) < 0) + { &rotl($r, 2-$lr); } + else { &rotr($r, $lr-2); } + } + if ($lr != 3) + { + if (($lr-3) < 0) + { &rotl($l, 3-$lr); } + else { &rotr($l, $lr-3); } + } + + &R_PERM_OP($l,$r,$tt, 0,"0xaaaaaaaa",$r); + &R_PERM_OP($tt,$r,$l,23,"0x03fc03fc",$r); + &R_PERM_OP($l,$r,$tt,10,"0x33333333",$l); + &R_PERM_OP($r,$tt,$l,18,"0xfff0000f",$l); + &R_PERM_OP($l,$tt,$r,12,"0xf0f0f0f0",$r); + &rotr($tt , 4); + } + diff --git a/linux/crypto/ciphers/des/asm/des686.pl b/linux/crypto/ciphers/des/asm/des686.pl new file mode 100644 index 000000000..cf1a82fb5 --- /dev/null +++ b/linux/crypto/ciphers/des/asm/des686.pl @@ -0,0 +1,230 @@ +#!/usr/bin/perl + +$prog="des686.pl"; + +# base code is in microsft +# op dest, source +# format. +# + +# WILL NOT WORK ANYMORE WITH desboth.pl +require "desboth.pl"; + +if ( ($ARGV[0] eq "elf")) + { require "x86unix.pl"; } +elsif ( ($ARGV[0] eq "a.out")) + { $aout=1; require "x86unix.pl"; } +elsif ( ($ARGV[0] eq "sol")) + { $sol=1; require "x86unix.pl"; } +elsif ( ($ARGV[0] eq "cpp")) + { $cpp=1; require "x86unix.pl"; } +elsif ( ($ARGV[0] eq "win32")) + { require "x86ms.pl"; } +else + { + print STDERR <<"EOF"; +Pick one target type from + elf - linux, FreeBSD etc + a.out - old linux + sol - x86 solaris + cpp - format so x86unix.cpp can be used + win32 - Windows 95/Windows NT +EOF + exit(1); + } + +&comment("Don't even think of reading this code"); +&comment("It was automatically generated by $prog"); +&comment("Which is a perl program used to generate the x86 assember for"); +&comment("any of elf, a.out, Win32, or Solaris"); +&comment("It can be found in SSLeay 0.6.5+ or in libdes 3.26+"); +&comment("eric <eay\@cryptsoft.com>"); +&comment(""); + +&file("dx86xxxx"); + +$L="edi"; +$R="esi"; + +&des_encrypt("des_encrypt",1); +&des_encrypt("des_encrypt2",0); + +&des_encrypt3("des_encrypt3",1); +&des_encrypt3("des_decrypt3",0); + +&file_end(); + +sub des_encrypt + { + local($name,$do_ip)=@_; + + &function_begin($name,"EXTRN _des_SPtrans:DWORD"); + + &comment(""); + &comment("Load the 2 words"); + &mov("eax",&wparam(0)); + &mov($L,&DWP(0,"eax","",0)); + &mov($R,&DWP(4,"eax","",0)); + + $ksp=&wparam(1); + + if ($do_ip) + { + &comment(""); + &comment("IP"); + &IP_new($L,$R,"eax"); + } + + &comment(""); + &comment("fixup rotate"); + &rotl($R,3); + &rotl($L,3); + &exch($L,$R); + + &comment(""); + &comment("load counter, key_schedule and enc flag"); + &mov("eax",&wparam(2)); # get encrypt flag + &mov("ebp",&wparam(1)); # get ks + &cmp("eax","0"); + &je(&label("start_decrypt")); + + # encrypting part + + for ($i=0; $i<16; $i+=2) + { + &comment(""); + &comment("Round $i"); + &D_ENCRYPT($L,$R,$i*2,"ebp","des_SPtrans","ecx","edx","eax","ebx"); + + &comment(""); + &comment("Round ".sprintf("%d",$i+1)); + &D_ENCRYPT($R,$L,($i+1)*2,"ebp","des_SPtrans","ecx","edx","eax","ebx"); + } + &jmp(&label("end")); + + &set_label("start_decrypt"); + + for ($i=15; $i>0; $i-=2) + { + &comment(""); + &comment("Round $i"); + &D_ENCRYPT($L,$R,$i*2,"ebp","des_SPtrans","ecx","edx","eax","ebx"); + &comment(""); + &comment("Round ".sprintf("%d",$i-1)); + &D_ENCRYPT($R,$L,($i-1)*2,"ebp","des_SPtrans","ecx","edx","eax","ebx"); + } + + &set_label("end"); + + &comment(""); + &comment("Fixup"); + &rotr($L,3); # r + &rotr($R,3); # l + + if ($do_ip) + { + &comment(""); + &comment("FP"); + &FP_new($R,$L,"eax"); + } + + &mov("eax",&wparam(0)); + &mov(&DWP(0,"eax","",0),$L); + &mov(&DWP(4,"eax","",0),$R); + + &function_end($name); + } + + +# The logic is to load R into 2 registers and operate on both at the same time. +# We also load the 2 R's into 2 more registers so we can do the 'move word down a byte' +# while also masking the other copy and doing a lookup. We then also accumulate the +# L value in 2 registers then combine them at the end. +sub D_ENCRYPT + { + local($L,$R,$S,$ks,$desSP,$u,$t,$tmp1,$tmp2,$tmp3)=@_; + + &mov( $u, &DWP(&n2a($S*4),$ks,"",0)); + &mov( $t, &DWP(&n2a(($S+1)*4),$ks,"",0)); + &xor( $u, $R ); + &xor( $t, $R ); + &rotr( $t, 4 ); + + # the numbers at the end of the line are origional instruction order + &mov( $tmp2, $u ); # 1 2 + &mov( $tmp1, $t ); # 1 1 + &and( $tmp2, "0xfc" ); # 1 4 + &and( $tmp1, "0xfc" ); # 1 3 + &shr( $t, 8 ); # 1 5 + &xor( $L, &DWP("0x100+$desSP",$tmp1,"",0)); # 1 7 + &shr( $u, 8 ); # 1 6 + &mov( $tmp1, &DWP(" $desSP",$tmp2,"",0)); # 1 8 + + &mov( $tmp2, $u ); # 2 2 + &xor( $L, $tmp1 ); # 1 9 + &and( $tmp2, "0xfc" ); # 2 4 + &mov( $tmp1, $t ); # 2 1 + &and( $tmp1, "0xfc" ); # 2 3 + &shr( $t, 8 ); # 2 5 + &xor( $L, &DWP("0x300+$desSP",$tmp1,"",0)); # 2 7 + &shr( $u, 8 ); # 2 6 + &mov( $tmp1, &DWP("0x200+$desSP",$tmp2,"",0)); # 2 8 + &mov( $tmp2, $u ); # 3 2 + + &xor( $L, $tmp1 ); # 2 9 + &and( $tmp2, "0xfc" ); # 3 4 + + &mov( $tmp1, $t ); # 3 1 + &shr( $u, 8 ); # 3 6 + &and( $tmp1, "0xfc" ); # 3 3 + &shr( $t, 8 ); # 3 5 + &xor( $L, &DWP("0x500+$desSP",$tmp1,"",0)); # 3 7 + &mov( $tmp1, &DWP("0x400+$desSP",$tmp2,"",0)); # 3 8 + + &and( $t, "0xfc" ); # 4 1 + &xor( $L, $tmp1 ); # 3 9 + + &and( $u, "0xfc" ); # 4 2 + &xor( $L, &DWP("0x700+$desSP",$t,"",0)); # 4 3 + &xor( $L, &DWP("0x600+$desSP",$u,"",0)); # 4 4 + } + +sub PERM_OP + { + local($a,$b,$tt,$shift,$mask)=@_; + + &mov( $tt, $a ); + &shr( $tt, $shift ); + &xor( $tt, $b ); + &and( $tt, $mask ); + &xor( $b, $tt ); + &shl( $tt, $shift ); + &xor( $a, $tt ); + } + +sub IP_new + { + local($l,$r,$tt)=@_; + + &PERM_OP($r,$l,$tt, 4,"0x0f0f0f0f"); + &PERM_OP($l,$r,$tt,16,"0x0000ffff"); + &PERM_OP($r,$l,$tt, 2,"0x33333333"); + &PERM_OP($l,$r,$tt, 8,"0x00ff00ff"); + &PERM_OP($r,$l,$tt, 1,"0x55555555"); + } + +sub FP_new + { + local($l,$r,$tt)=@_; + + &PERM_OP($l,$r,$tt, 1,"0x55555555"); + &PERM_OP($r,$l,$tt, 8,"0x00ff00ff"); + &PERM_OP($l,$r,$tt, 2,"0x33333333"); + &PERM_OP($r,$l,$tt,16,"0x0000ffff"); + &PERM_OP($l,$r,$tt, 4,"0x0f0f0f0f"); + } + +sub n2a + { + sprintf("%d",$_[0]); + } diff --git a/linux/crypto/ciphers/des/asm/desboth.pl b/linux/crypto/ciphers/des/asm/desboth.pl new file mode 100644 index 000000000..8f939953a --- /dev/null +++ b/linux/crypto/ciphers/des/asm/desboth.pl @@ -0,0 +1,79 @@ +#!/usr/bin/perl + +$L="edi"; +$R="esi"; + +sub des_encrypt3 + { + local($name,$enc)=@_; + + &function_begin_B($name,""); + &push("ebx"); + &mov("ebx",&wparam(0)); + + &push("ebp"); + &push("esi"); + + &push("edi"); + + &comment(""); + &comment("Load the data words"); + &mov($L,&DWP(0,"ebx","",0)); + &mov($R,&DWP(4,"ebx","",0)); + &stack_push(3); + + &comment(""); + &comment("IP"); + &IP_new($L,$R,"edx",0); + + # put them back + + if ($enc) + { + &mov(&DWP(4,"ebx","",0),$R); + &mov("eax",&wparam(1)); + &mov(&DWP(0,"ebx","",0),"edx"); + &mov("edi",&wparam(2)); + &mov("esi",&wparam(3)); + } + else + { + &mov(&DWP(4,"ebx","",0),$R); + &mov("esi",&wparam(1)); + &mov(&DWP(0,"ebx","",0),"edx"); + &mov("edi",&wparam(2)); + &mov("eax",&wparam(3)); + } + &mov(&swtmp(2), (($enc)?"1":"0")); + &mov(&swtmp(1), "eax"); + &mov(&swtmp(0), "ebx"); + &call("des_encrypt2"); + &mov(&swtmp(2), (($enc)?"0":"1")); + &mov(&swtmp(1), "edi"); + &mov(&swtmp(0), "ebx"); + &call("des_encrypt2"); + &mov(&swtmp(2), (($enc)?"1":"0")); + &mov(&swtmp(1), "esi"); + &mov(&swtmp(0), "ebx"); + &call("des_encrypt2"); + + &stack_pop(3); + &mov($L,&DWP(0,"ebx","",0)); + &mov($R,&DWP(4,"ebx","",0)); + + &comment(""); + &comment("FP"); + &FP_new($L,$R,"eax",0); + + &mov(&DWP(0,"ebx","",0),"eax"); + &mov(&DWP(4,"ebx","",0),$R); + + &pop("edi"); + &pop("esi"); + &pop("ebp"); + &pop("ebx"); + &ret(); + &function_end_B($name); + } + + diff --git a/linux/crypto/ciphers/des/asm/perlasm/cbc.pl b/linux/crypto/ciphers/des/asm/perlasm/cbc.pl new file mode 100644 index 000000000..278930579 --- /dev/null +++ b/linux/crypto/ciphers/des/asm/perlasm/cbc.pl @@ -0,0 +1,342 @@ +#!/usr/bin/perl + +# void des_ncbc_encrypt(input, output, length, schedule, ivec, enc) +# des_cblock (*input); +# des_cblock (*output); +# long length; +# des_key_schedule schedule; +# des_cblock (*ivec); +# int enc; +# +# calls +# des_encrypt((DES_LONG *)tin,schedule,DES_ENCRYPT); +# + +#&cbc("des_ncbc_encrypt","des_encrypt",0); +#&cbc("BF_cbc_encrypt","BF_encrypt","BF_encrypt", +# 1,4,5,3,5,-1); +#&cbc("des_ncbc_encrypt","des_encrypt","des_encrypt", +# 0,4,5,3,5,-1); +#&cbc("des_ede3_cbc_encrypt","des_encrypt3","des_decrypt3", +# 0,6,7,3,4,5); +# +# When doing a cipher that needs bigendian order, +# for encrypt, the iv is kept in bigendian form, +# while for decrypt, it is kept in little endian. +sub cbc + { + local($name,$enc_func,$dec_func,$swap,$iv_off,$enc_off,$p1,$p2,$p3)=@_; + # name is the function name + # enc_func and dec_func and the functions to call for encrypt/decrypt + # swap is true if byte order needs to be reversed + # iv_off is parameter number for the iv + # enc_off is parameter number for the encrypt/decrypt flag + # p1,p2,p3 are the offsets for parameters to be passed to the + # underlying calls. + + &function_begin_B($name,""); + &comment(""); + + $in="esi"; + $out="edi"; + $count="ebp"; + + &push("ebp"); + &push("ebx"); + &push("esi"); + &push("edi"); + + $data_off=4; + $data_off+=4 if ($p1 > 0); + $data_off+=4 if ($p2 > 0); + $data_off+=4 if ($p3 > 0); + + &mov($count, &wparam(2)); # length + + &comment("getting iv ptr from parameter $iv_off"); + &mov("ebx", &wparam($iv_off)); # Get iv ptr + + &mov($in, &DWP(0,"ebx","",0));# iv[0] + &mov($out, &DWP(4,"ebx","",0));# iv[1] + + &push($out); + &push($in); + &push($out); # used in decrypt for iv[1] + &push($in); # used in decrypt for iv[0] + + &mov("ebx", "esp"); # This is the address of tin[2] + + &mov($in, &wparam(0)); # in + &mov($out, &wparam(1)); # out + + # We have loaded them all, how lets push things + &comment("getting encrypt flag from parameter $enc_off"); + &mov("ecx", &wparam($enc_off)); # Get enc flag + if ($p3 > 0) + { + &comment("get and push parameter $p3"); + if ($enc_off != $p3) + { &mov("eax", &wparam($p3)); &push("eax"); } + else { &push("ecx"); } + } + if ($p2 > 0) + { + &comment("get and push parameter $p2"); + if ($enc_off != $p2) + { &mov("eax", &wparam($p2)); &push("eax"); } + else { &push("ecx"); } + } + if ($p1 > 0) + { + &comment("get and push parameter $p1"); + if ($enc_off != $p1) + { &mov("eax", &wparam($p1)); &push("eax"); } + else { &push("ecx"); } + } + &push("ebx"); # push data/iv + + &cmp("ecx",0); + &jz(&label("decrypt")); + + &and($count,0xfffffff8); + &mov("eax", &DWP($data_off,"esp","",0)); # load iv[0] + &mov("ebx", &DWP($data_off+4,"esp","",0)); # load iv[1] + + &jz(&label("encrypt_finish")); + + ############################################################# + + &set_label("encrypt_loop"); + # encrypt start + # "eax" and "ebx" hold iv (or the last cipher text) + + &mov("ecx", &DWP(0,$in,"",0)); # load first 4 bytes + &mov("edx", &DWP(4,$in,"",0)); # second 4 bytes + + &xor("eax", "ecx"); + &xor("ebx", "edx"); + + &bswap("eax") if $swap; + &bswap("ebx") if $swap; + + &mov(&DWP($data_off,"esp","",0), "eax"); # put in array for call + &mov(&DWP($data_off+4,"esp","",0), "ebx"); # + + &call($enc_func); + + &mov("eax", &DWP($data_off,"esp","",0)); + &mov("ebx", &DWP($data_off+4,"esp","",0)); + + &bswap("eax") if $swap; + &bswap("ebx") if $swap; + + &mov(&DWP(0,$out,"",0),"eax"); + &mov(&DWP(4,$out,"",0),"ebx"); + + # eax and ebx are the next iv. + + &add($in, 8); + &add($out, 8); + + &sub($count, 8); + &jnz(&label("encrypt_loop")); + +###################################################################3 + &set_label("encrypt_finish"); + &mov($count, &wparam(2)); # length + &and($count, 7); + &jz(&label("finish")); + &xor("ecx","ecx"); + &xor("edx","edx"); + &mov($count,&DWP(&label("cbc_enc_jmp_table"),"",$count,4)); + &jmp_ptr($count); + +&set_label("ej7"); + &xor("edx", "edx") if $ppro; # ppro friendly + &movb(&HB("edx"), &BP(6,$in,"",0)); + &shl("edx",8); +&set_label("ej6"); + &movb(&HB("edx"), &BP(5,$in,"",0)); +&set_label("ej5"); + &movb(&LB("edx"), &BP(4,$in,"",0)); +&set_label("ej4"); + &mov("ecx", &DWP(0,$in,"",0)); + &jmp(&label("ejend")); +&set_label("ej3"); + &movb(&HB("ecx"), &BP(2,$in,"",0)); + &xor("ecx", "ecx") if $ppro; # ppro friendly + &shl("ecx",8); +&set_label("ej2"); + &movb(&HB("ecx"), &BP(1,$in,"",0)); +&set_label("ej1"); + &movb(&LB("ecx"), &BP(0,$in,"",0)); +&set_label("ejend"); + + &xor("eax", "ecx"); + &xor("ebx", "edx"); + + &bswap("eax") if $swap; + &bswap("ebx") if $swap; + + &mov(&DWP($data_off,"esp","",0), "eax"); # put in array for call + &mov(&DWP($data_off+4,"esp","",0), "ebx"); # + + &call($enc_func); + + &mov("eax", &DWP($data_off,"esp","",0)); + &mov("ebx", &DWP($data_off+4,"esp","",0)); + + &bswap("eax") if $swap; + &bswap("ebx") if $swap; + + &mov(&DWP(0,$out,"",0),"eax"); + &mov(&DWP(4,$out,"",0),"ebx"); + + &jmp(&label("finish")); + + ############################################################# + ############################################################# + &set_label("decrypt",1); + # decrypt start + &and($count,0xfffffff8); + # The next 2 instructions are only for if the jz is taken + &mov("eax", &DWP($data_off+8,"esp","",0)); # get iv[0] + &mov("ebx", &DWP($data_off+12,"esp","",0)); # get iv[1] + &jz(&label("decrypt_finish")); + + &set_label("decrypt_loop"); + &mov("eax", &DWP(0,$in,"",0)); # load first 4 bytes + &mov("ebx", &DWP(4,$in,"",0)); # second 4 bytes + + &bswap("eax") if $swap; + &bswap("ebx") if $swap; + + &mov(&DWP($data_off,"esp","",0), "eax"); # put back + &mov(&DWP($data_off+4,"esp","",0), "ebx"); # + + &call($dec_func); + + &mov("eax", &DWP($data_off,"esp","",0)); # get return + &mov("ebx", &DWP($data_off+4,"esp","",0)); # + + &bswap("eax") if $swap; + &bswap("ebx") if $swap; + + &mov("ecx", &DWP($data_off+8,"esp","",0)); # get iv[0] + &mov("edx", &DWP($data_off+12,"esp","",0)); # get iv[1] + + &xor("ecx", "eax"); + &xor("edx", "ebx"); + + &mov("eax", &DWP(0,$in,"",0)); # get old cipher text, + &mov("ebx", &DWP(4,$in,"",0)); # next iv actually + + &mov(&DWP(0,$out,"",0),"ecx"); + &mov(&DWP(4,$out,"",0),"edx"); + + &mov(&DWP($data_off+8,"esp","",0), "eax"); # save iv + &mov(&DWP($data_off+12,"esp","",0), "ebx"); # + + &add($in, 8); + &add($out, 8); + + &sub($count, 8); + &jnz(&label("decrypt_loop")); +############################ ENDIT #######################3 + &set_label("decrypt_finish"); + &mov($count, &wparam(2)); # length + &and($count, 7); + &jz(&label("finish")); + + &mov("eax", &DWP(0,$in,"",0)); # load first 4 bytes + &mov("ebx", &DWP(4,$in,"",0)); # second 4 bytes + + &bswap("eax") if $swap; + &bswap("ebx") if $swap; + + &mov(&DWP($data_off,"esp","",0), "eax"); # put back + &mov(&DWP($data_off+4,"esp","",0), "ebx"); # + + &call($dec_func); + + &mov("eax", &DWP($data_off,"esp","",0)); # get return + &mov("ebx", &DWP($data_off+4,"esp","",0)); # + + &bswap("eax") if $swap; + &bswap("ebx") if $swap; + + &mov("ecx", &DWP($data_off+8,"esp","",0)); # get iv[0] + &mov("edx", &DWP($data_off+12,"esp","",0)); # get iv[1] + + &xor("ecx", "eax"); + &xor("edx", "ebx"); + + # this is for when we exit + &mov("eax", &DWP(0,$in,"",0)); # get old cipher text, + &mov("ebx", &DWP(4,$in,"",0)); # next iv actually + +&set_label("dj7"); + &rotr("edx", 16); + &movb(&BP(6,$out,"",0), &LB("edx")); + &shr("edx",16); +&set_label("dj6"); + &movb(&BP(5,$out,"",0), &HB("edx")); +&set_label("dj5"); + &movb(&BP(4,$out,"",0), &LB("edx")); +&set_label("dj4"); + &mov(&DWP(0,$out,"",0), "ecx"); + &jmp(&label("djend")); +&set_label("dj3"); + &rotr("ecx", 16); + &movb(&BP(2,$out,"",0), &LB("ecx")); + &shl("ecx",16); +&set_label("dj2"); + &movb(&BP(1,$in,"",0), &HB("ecx")); +&set_label("dj1"); + &movb(&BP(0,$in,"",0), &LB("ecx")); +&set_label("djend"); + + # final iv is still in eax:ebx + &jmp(&label("finish")); + + +############################ FINISH #######################3 + &set_label("finish",1); + &mov("ecx", &wparam($iv_off)); # Get iv ptr + + ################################################# + $total=16+4; + $total+=4 if ($p1 > 0); + $total+=4 if ($p2 > 0); + $total+=4 if ($p3 > 0); + &add("esp",$total); + + &mov(&DWP(0,"ecx","",0), "eax"); # save iv + &mov(&DWP(4,"ecx","",0), "ebx"); # save iv + + &function_end_A($name); + + &set_label("cbc_enc_jmp_table",1); + &data_word("0"); + &data_word(&label("ej1")); + &data_word(&label("ej2")); + &data_word(&label("ej3")); + &data_word(&label("ej4")); + &data_word(&label("ej5")); + &data_word(&label("ej6")); + &data_word(&label("ej7")); + &set_label("cbc_dec_jmp_table",1); + &data_word("0"); + &data_word(&label("dj1")); + &data_word(&label("dj2")); + &data_word(&label("dj3")); + &data_word(&label("dj4")); + &data_word(&label("dj5")); + &data_word(&label("dj6")); + &data_word(&label("dj7")); + + &function_end_B($name); + + } + +1; diff --git a/linux/crypto/ciphers/des/asm/perlasm/readme b/linux/crypto/ciphers/des/asm/perlasm/readme new file mode 100644 index 000000000..f02bbee75 --- /dev/null +++ b/linux/crypto/ciphers/des/asm/perlasm/readme @@ -0,0 +1,124 @@ +The perl scripts in this directory are my 'hack' to generate +multiple different assembler formats via the one origional script. + +The way to use this library is to start with adding the path to this directory +and then include it. + +push(@INC,"perlasm","../../perlasm"); +require "x86asm.pl"; + +The first thing we do is setup the file and type of assember + +&asm_init($ARGV[0],$0); + +The first argument is the 'type'. Currently +'cpp', 'sol', 'a.out', 'elf' or 'win32'. +Argument 2 is the file name. + +The reciprocal function is +&asm_finish() which should be called at the end. + +There are 2 main 'packages'. x86ms.pl, which is the microsoft assembler, +and x86unix.pl which is the unix (gas) version. + +Functions of interest are: +&external_label("des_SPtrans"); declare and external variable +&LB(reg); Low byte for a register +&HB(reg); High byte for a register +&BP(off,base,index,scale) Byte pointer addressing +&DWP(off,base,index,scale) Word pointer addressing +&stack_push(num) Basically a 'sub esp, num*4' with extra +&stack_pop(num) inverse of stack_push +&function_begin(name,extra) Start a function with pushing of + edi, esi, ebx and ebp. extra is extra win32 + external info that may be required. +&function_begin_B(name,extra) Same as norma function_begin but no pushing. +&function_end(name) Call at end of function. +&function_end_A(name) Standard pop and ret, for use inside functions +&function_end_B(name) Call at end but with poping or 'ret'. +&swtmp(num) Address on stack temp word. +&wparam(num) Parameter number num, that was push + in C convention. This all works over pushes + and pops. +&comment("hello there") Put in a comment. +&label("loop") Refer to a label, normally a jmp target. +&set_label("loop") Set a label at this point. +&data_word(word) Put in a word of data. + +So how does this all hold together? Given + +int calc(int len, int *data) + { + int i,j=0; + + for (i=0; i<len; i++) + { + j+=other(data[i]); + } + } + +So a very simple version of this function could be coded as + + push(@INC,"perlasm","../../perlasm"); + require "x86asm.pl"; + + &asm_init($ARGV[0],"cacl.pl"); + + &external_label("other"); + + $tmp1= "eax"; + $j= "edi"; + $data= "esi"; + $i= "ebp"; + + &comment("a simple function"); + &function_begin("calc"); + &mov( $data, &wparam(1)); # data + &xor( $j, $j); + &xor( $i, $i); + + &set_label("loop"); + &cmp( $i, &wparam(0)); + &jge( &label("end")); + + &mov( $tmp1, &DWP(0,$data,$i,4)); + &push( $tmp1); + &call( "other"); + &add( $j, "eax"); + &pop( $tmp1); + &inc( $i); + &jmp( &label("loop")); + + &set_label("end"); + &mov( "eax", $j); + + &function_end("calc"); + + &asm_finish(); + +The above example is very very unoptimised but gives an idea of how +things work. + +There is also a cbc mode function generator in cbc.pl + +&cbc( $name, + $encrypt_function_name, + $decrypt_function_name, + $true_if_byte_swap_needed, + $parameter_number_for_iv, + $parameter_number_for_encrypt_flag, + $first_parameter_to_pass, + $second_parameter_to_pass, + $third_parameter_to_pass); + +So for example, given +void BF_encrypt(BF_LONG *data,BF_KEY *key); +void BF_decrypt(BF_LONG *data,BF_KEY *key); +void BF_cbc_encrypt(unsigned char *in, unsigned char *out, long length, + BF_KEY *ks, unsigned char *iv, int enc); + +&cbc("BF_cbc_encrypt","BF_encrypt","BF_encrypt",1,4,5,3,-1,-1); + +&cbc("des_ncbc_encrypt","des_encrypt","des_encrypt",0,4,5,3,5,-1); +&cbc("des_ede3_cbc_encrypt","des_encrypt3","des_decrypt3",0,6,7,3,4,5); + diff --git a/linux/crypto/ciphers/des/asm/perlasm/x86asm.pl b/linux/crypto/ciphers/des/asm/perlasm/x86asm.pl new file mode 100644 index 000000000..164a942c5 --- /dev/null +++ b/linux/crypto/ciphers/des/asm/perlasm/x86asm.pl @@ -0,0 +1,111 @@ +#!/usr/bin/perl + +# require 'x86asm.pl'; +# &asm_init("cpp","des-586.pl"); +# XXX +# XXX +# main'asm_finish + +sub main'asm_finish + { + &file_end(); + &asm_finish_cpp() if $cpp; + print &asm_get_output(); + } + +sub main'asm_init + { + ($type,$fn)=@_; + $filename=$fn; + + $cpp=$sol=$aout=$win32=0; + if ( ($type eq "elf")) + { require "x86unix.pl"; } + elsif ( ($type eq "a.out")) + { $aout=1; require "x86unix.pl"; } + elsif ( ($type eq "sol")) + { $sol=1; require "x86unix.pl"; } + elsif ( ($type eq "cpp")) + { $cpp=1; require "x86unix.pl"; } + elsif ( ($type eq "win32")) + { $win32=1; require "x86ms.pl"; } + else + { + print STDERR <<"EOF"; +Pick one target type from + elf - linux, FreeBSD etc + a.out - old linux + sol - x86 solaris + cpp - format so x86unix.cpp can be used + win32 - Windows 95/Windows NT +EOF + exit(1); + } + + &asm_init_output(); + +&comment("Don't even think of reading this code"); +&comment("It was automatically generated by $filename"); +&comment("Which is a perl program used to generate the x86 assember for"); +&comment("any of elf, a.out, BSDI,Win32, or Solaris"); +&comment("eric <eay\@cryptsoft.com>"); +&comment(""); + + $filename =~ s/\.pl$//; + &file($filename); + } + +sub asm_finish_cpp + { + return unless $cpp; + + local($tmp,$i); + foreach $i (&get_labels()) + { + $tmp.="#define $i _$i\n"; + } + print <<"EOF"; +/* Run the C pre-processor over this file with one of the following defined + * ELF - elf object files, + * OUT - a.out object files, + * BSDI - BSDI style a.out object files + * SOL - Solaris style elf + */ + +#define TYPE(a,b) .type a,b +#define SIZE(a,b) .size a,b + +#if defined(OUT) || defined(BSDI) +$tmp +#endif + +#ifdef OUT +#define OK 1 +#define ALIGN 4 +#endif + +#ifdef BSDI +#define OK 1 +#define ALIGN 4 +#undef SIZE +#undef TYPE +#endif + +#if defined(ELF) || defined(SOL) +#define OK 1 +#define ALIGN 16 +#endif + +#ifndef OK +You need to define one of +ELF - elf systems - linux-elf, NetBSD and DG-UX +OUT - a.out systems - linux-a.out and FreeBSD +SOL - solaris systems, which are elf with strange comment lines +BSDI - a.out with a very primative version of as. +#endif + +/* Let the Assembler begin :-) */ +EOF + } + +1; diff --git a/linux/crypto/ciphers/des/asm/perlasm/x86ms.pl b/linux/crypto/ciphers/des/asm/perlasm/x86ms.pl new file mode 100644 index 000000000..0681ea18c --- /dev/null +++ b/linux/crypto/ciphers/des/asm/perlasm/x86ms.pl @@ -0,0 +1,345 @@ +#!/usr/bin/perl + +package x86ms; + +$label="L000"; + +%lb=( 'eax', 'al', + 'ebx', 'bl', + 'ecx', 'cl', + 'edx', 'dl', + 'ax', 'al', + 'bx', 'bl', + 'cx', 'cl', + 'dx', 'dl', + ); + +%hb=( 'eax', 'ah', + 'ebx', 'bh', + 'ecx', 'ch', + 'edx', 'dh', + 'ax', 'ah', + 'bx', 'bh', + 'cx', 'ch', + 'dx', 'dh', + ); + +sub main'asm_init_output { @out=(); } +sub main'asm_get_output { return(@out); } +sub main'get_labels { return(@labels); } +sub main'external_label { push(@labels,@_); } + +sub main'LB + { + (defined($lb{$_[0]})) || die "$_[0] does not have a 'low byte'\n"; + return($lb{$_[0]}); + } + +sub main'HB + { + (defined($hb{$_[0]})) || die "$_[0] does not have a 'high byte'\n"; + return($hb{$_[0]}); + } + +sub main'BP + { + &get_mem("BYTE",@_); + } + +sub main'DWP + { + &get_mem("DWORD",@_); + } + +sub main'stack_push + { + local($num)=@_; + $stack+=$num*4; + &main'sub("esp",$num*4); + } + +sub main'stack_pop + { + local($num)=@_; + $stack-=$num*4; + &main'add("esp",$num*4); + } + +sub get_mem + { + local($size,$addr,$reg1,$reg2,$idx)=@_; + local($t,$post); + local($ret)="$size PTR "; + + $addr =~ s/^\s+//; + if ($addr =~ /^(.+)\+(.+)$/) + { + $reg2=&conv($1); + $addr="_$2"; + } + elsif ($addr =~ /^[_a-zA-Z]/) + { + $addr="_$addr"; + } + + $reg1="$regs{$reg1}" if defined($regs{$reg1}); + $reg2="$regs{$reg2}" if defined($regs{$reg2}); + if (($addr ne "") && ($addr ne 0)) + { + if ($addr !~ /^-/) + { $ret.=$addr; } + else { $post=$addr; } + } + if ($reg2 ne "") + { + $t=""; + $t="*$idx" if ($idx != 0); + $reg1="+".$reg1 if ("$reg1$post" ne ""); + $ret.="[$reg2$t$reg1$post]"; + } + else + { + $ret.="[$reg1$post]" + } + return($ret); + } + +sub main'mov { &out2("mov",@_); } +sub main'movb { &out2("mov",@_); } +sub main'and { &out2("and",@_); } +sub main'or { &out2("or",@_); } +sub main'shl { &out2("shl",@_); } +sub main'shr { &out2("shr",@_); } +sub main'xor { &out2("xor",@_); } +sub main'xorb { &out2("xor",@_); } +sub main'add { &out2("add",@_); } +sub main'adc { &out2("adc",@_); } +sub main'sub { &out2("sub",@_); } +sub main'rotl { &out2("rol",@_); } +sub main'rotr { &out2("ror",@_); } +sub main'exch { &out2("xchg",@_); } +sub main'cmp { &out2("cmp",@_); } +sub main'lea { &out2("lea",@_); } +sub main'mul { &out1("mul",@_); } +sub main'div { &out1("div",@_); } +sub main'dec { &out1("dec",@_); } +sub main'inc { &out1("inc",@_); } +sub main'jmp { &out1("jmp",@_); } +sub main'jmp_ptr { &out1p("jmp",@_); } +sub main'je { &out1("je",@_); } +sub main'jle { &out1("jle",@_); } +sub main'jz { &out1("jz",@_); } +sub main'jge { &out1("jge",@_); } +sub main'jl { &out1("jl",@_); } +sub main'jb { &out1("jb",@_); } +sub main'jnz { &out1("jnz",@_); } +sub main'jne { &out1("jne",@_); } +sub main'push { &out1("push",@_); $stack+=4; } +sub main'pop { &out1("pop",@_); $stack-=4; } +sub main'bswap { &out1("bswap",@_); &using486(); } +sub main'not { &out1("not",@_); } +sub main'call { &out1("call",'_'.$_[0]); } +sub main'ret { &out0("ret"); } +sub main'nop { &out0("nop"); } + +sub out2 + { + local($name,$p1,$p2)=@_; + local($l,$t); + + push(@out,"\t$name\t"); + $t=&conv($p1).","; + $l=length($t); + push(@out,$t); + $l=4-($l+9)/8; + push(@out,"\t" x $l); + push(@out,&conv($p2)); + push(@out,"\n"); + } + +sub out0 + { + local($name)=@_; + + push(@out,"\t$name\n"); + } + +sub out1 + { + local($name,$p1)=@_; + local($l,$t); + + push(@out,"\t$name\t".&conv($p1)."\n"); + } + +sub conv + { + local($p)=@_; + + $p =~ s/0x([0-9A-Fa-f]+)/0$1h/; + return $p; + } + +sub using486 + { + return if $using486; + $using486++; + grep(s/\.386/\.486/,@out); + } + +sub main'file + { + local($file)=@_; + + local($tmp)=<<"EOF"; + TITLE $file.asm + .386 +.model FLAT +EOF + push(@out,$tmp); + } + +sub main'function_begin + { + local($func,$extra)=@_; + + push(@labels,$func); + + local($tmp)=<<"EOF"; +_TEXT SEGMENT +PUBLIC _$func +$extra +_$func PROC NEAR + push ebp + push ebx + push esi + push edi +EOF + push(@out,$tmp); + $stack=20; + } + +sub main'function_begin_B + { + local($func,$extra)=@_; + + local($tmp)=<<"EOF"; +_TEXT SEGMENT +PUBLIC _$func +$extra +_$func PROC NEAR +EOF + push(@out,$tmp); + $stack=4; + } + +sub main'function_end + { + local($func)=@_; + + local($tmp)=<<"EOF"; + pop edi + pop esi + pop ebx + pop ebp + ret +_$func ENDP +_TEXT ENDS +EOF + push(@out,$tmp); + $stack=0; + %label=(); + } + +sub main'function_end_B + { + local($func)=@_; + + local($tmp)=<<"EOF"; +_$func ENDP +_TEXT ENDS +EOF + push(@out,$tmp); + $stack=0; + %label=(); + } + +sub main'function_end_A + { + local($func)=@_; + + local($tmp)=<<"EOF"; + pop edi + pop esi + pop ebx + pop ebp + ret +EOF + push(@out,$tmp); + } + +sub main'file_end + { + push(@out,"END\n"); + } + +sub main'wparam + { + local($num)=@_; + + return(&main'DWP($stack+$num*4,"esp","",0)); + } + +sub main'swtmp + { + return(&main'DWP($_[0]*4,"esp","",0)); + } + +# Should use swtmp, which is above esp. Linix can trash the stack above esp +#sub main'wtmp +# { +# local($num)=@_; +# +# return(&main'DWP(-(($num+1)*4),"esp","",0)); +# } + +sub main'comment + { + foreach (@_) + { + push(@out,"\t; $_\n"); + } + } + +sub main'label + { + if (!defined($label{$_[0]})) + { + $label{$_[0]}="\$${label}${_[0]}"; + $label++; + } + return($label{$_[0]}); + } + +sub main'set_label + { + if (!defined($label{$_[0]})) + { + $label{$_[0]}="${label}${_[0]}"; + $label++; + } + push(@out,"$label{$_[0]}:\n"); + } + +sub main'data_word + { + push(@out,"\tDD\t$_[0]\n"); + } + +sub out1p + { + local($name,$p1)=@_; + local($l,$t); + + push(@out,"\t$name\t ".&conv($p1)."\n"); + } diff --git a/linux/crypto/ciphers/des/asm/perlasm/x86unix.pl b/linux/crypto/ciphers/des/asm/perlasm/x86unix.pl new file mode 100644 index 000000000..1d661221c --- /dev/null +++ b/linux/crypto/ciphers/des/asm/perlasm/x86unix.pl @@ -0,0 +1,403 @@ +#!/usr/bin/perl + +package x86unix; + +$label="L000"; + +$align=($main'aout)?"4":"16"; +$under=($main'aout)?"_":""; +$com_start=($main'sol)?"/":"#"; + +sub main'asm_init_output { @out=(); } +sub main'asm_get_output { return(@out); } +sub main'get_labels { return(@labels); } +sub main'external_label { push(@labels,@_); } + +if ($main'cpp) + { + $align="ALIGN"; + $under=""; + $com_start='/*'; + $com_end='*/'; + } + +%lb=( 'eax', '%al', + 'ebx', '%bl', + 'ecx', '%cl', + 'edx', '%dl', + 'ax', '%al', + 'bx', '%bl', + 'cx', '%cl', + 'dx', '%dl', + ); + +%hb=( 'eax', '%ah', + 'ebx', '%bh', + 'ecx', '%ch', + 'edx', '%dh', + 'ax', '%ah', + 'bx', '%bh', + 'cx', '%ch', + 'dx', '%dh', + ); + +%regs=( 'eax', '%eax', + 'ebx', '%ebx', + 'ecx', '%ecx', + 'edx', '%edx', + 'esi', '%esi', + 'edi', '%edi', + 'ebp', '%ebp', + 'esp', '%esp', + ); + +%reg_val=( + 'eax', 0x00, + 'ebx', 0x03, + 'ecx', 0x01, + 'edx', 0x02, + 'esi', 0x06, + 'edi', 0x07, + 'ebp', 0x05, + 'esp', 0x04, + ); + +sub main'LB + { + (defined($lb{$_[0]})) || die "$_[0] does not have a 'low byte'\n"; + return($lb{$_[0]}); + } + +sub main'HB + { + (defined($hb{$_[0]})) || die "$_[0] does not have a 'high byte'\n"; + return($hb{$_[0]}); + } + +sub main'DWP + { + local($addr,$reg1,$reg2,$idx)=@_; + + $ret=""; + $addr =~ s/(^|[+ \t])([A-Za-z_]+)($|[+ \t])/$1$under$2$3/; + $reg1="$regs{$reg1}" if defined($regs{$reg1}); + $reg2="$regs{$reg2}" if defined($regs{$reg2}); + $ret.=$addr if ($addr ne "") && ($addr ne 0); + if ($reg2 ne "") + { $ret.="($reg1,$reg2,$idx)"; } + else + { $ret.="($reg1)" } + return($ret); + } + +sub main'BP + { + return(&main'DWP(@_)); + } + +#sub main'BP +# { +# local($addr,$reg1,$reg2,$idx)=@_; +# +# $ret=""; +# +# $addr =~ s/(^|[+ \t])([A-Za-z_]+)($|[+ \t])/$1$under$2$3/; +# $reg1="$regs{$reg1}" if defined($regs{$reg1}); +# $reg2="$regs{$reg2}" if defined($regs{$reg2}); +# $ret.=$addr if ($addr ne "") && ($addr ne 0); +# if ($reg2 ne "") +# { $ret.="($reg1,$reg2,$idx)"; } +# else +# { $ret.="($reg1)" } +# return($ret); +# } + +sub main'mov { &out2("movl",@_); } +sub main'movb { &out2("movb",@_); } +sub main'and { &out2("andl",@_); } +sub main'or { &out2("orl",@_); } +sub main'shl { &out2("sall",@_); } +sub main'shr { &out2("shrl",@_); } +sub main'xor { &out2("xorl",@_); } +sub main'xorb { &out2("xorb",@_); } +sub main'add { &out2("addl",@_); } +sub main'adc { &out2("adcl",@_); } +sub main'sub { &out2("subl",@_); } +sub main'rotl { &out2("roll",@_); } +sub main'rotr { &out2("rorl",@_); } +sub main'exch { &out2("xchg",@_); } +sub main'cmp { &out2("cmpl",@_); } +sub main'lea { &out2("leal",@_); } +sub main'mul { &out1("mull",@_); } +sub main'div { &out1("divl",@_); } +sub main'jmp { &out1("jmp",@_); } +sub main'jmp_ptr { &out1p("jmp",@_); } +sub main'je { &out1("je",@_); } +sub main'jle { &out1("jle",@_); } +sub main'jne { &out1("jne",@_); } +sub main'jnz { &out1("jnz",@_); } +sub main'jz { &out1("jz",@_); } +sub main'jge { &out1("jge",@_); } +sub main'jl { &out1("jl",@_); } +sub main'jb { &out1("jb",@_); } +sub main'dec { &out1("decl",@_); } +sub main'inc { &out1("incl",@_); } +sub main'push { &out1("pushl",@_); $stack+=4; } +sub main'pop { &out1("popl",@_); $stack-=4; } +sub main'bswap { &out1("bswapl",@_); } +sub main'not { &out1("notl",@_); } +sub main'call { &out1("call",$under.$_[0]); } +sub main'ret { &out0("ret"); } +sub main'nop { &out0("nop"); } + +sub out2 + { + local($name,$p1,$p2)=@_; + local($l,$ll,$t); + local(%special)=( "roll",0xD1C0,"rorl",0xD1C8, + "rcll",0xD1D0,"rcrl",0xD1D8, + "shll",0xD1E0,"shrl",0xD1E8, + "sarl",0xD1F8); + + if ((defined($special{$name})) && defined($regs{$p1}) && ($p2 == 1)) + { + $op=$special{$name}|$reg_val{$p1}; + $tmp1=sprintf ".byte %d\n",($op>>8)&0xff; + $tmp2=sprintf ".byte %d\t",$op &0xff; + push(@out,$tmp1); + push(@out,$tmp2); + + $p2=&conv($p2); + $p1=&conv($p1); + &main'comment("$name $p2 $p1"); + return; + } + + push(@out,"\t$name\t"); + $t=&conv($p2).","; + $l=length($t); + push(@out,$t); + $ll=4-($l+9)/8; + $tmp1=sprintf "\t" x $ll; + push(@out,$tmp1); + push(@out,&conv($p1)."\n"); + } + +sub out1 + { + local($name,$p1)=@_; + local($l,$t); + + push(@out,"\t$name\t".&conv($p1)."\n"); + } + +sub out1p + { + local($name,$p1)=@_; + local($l,$t); + + push(@out,"\t$name\t*".&conv($p1)."\n"); + } + +sub out0 + { + push(@out,"\t$_[0]\n"); + } + +sub conv + { + local($p)=@_; + +# $p =~ s/0x([0-9A-Fa-f]+)/0$1h/; + + $p=$regs{$p} if (defined($regs{$p})); + + $p =~ s/^(-{0,1}[0-9A-Fa-f]+)$/\$$1/; + $p =~ s/^(0x[0-9A-Fa-f]+)$/\$$1/; + return $p; + } + +sub main'file + { + local($file)=@_; + + local($tmp)=<<"EOF"; + .file "$file.s" + .version "01.01" +gcc2_compiled.: +EOF + push(@out,$tmp); + } + +sub main'function_begin + { + local($func)=@_; + + $func=$under.$func; + + local($tmp)=<<"EOF"; +.text + .align $align +.globl $func +EOF + push(@out,$tmp); + if ($main'cpp) + { $tmp=push(@out,"\tTYPE($func,\@function)\n"); } + else { $tmp=push(@out,"\t.type\t$func,\@function\n"); } + push(@out,"$func:\n"); + $tmp=<<"EOF"; + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + +EOF + push(@out,$tmp); + $stack=20; + } + +sub main'function_begin_B + { + local($func,$extra)=@_; + + $func=$under.$func; + + local($tmp)=<<"EOF"; +.text + .align $align +.globl $func +EOF + push(@out,$tmp); + if ($main'cpp) + { push(@out,"\tTYPE($func,\@function)\n"); } + else { push(@out,"\t.type $func,\@function\n"); } + push(@out,"$func:\n"); + $stack=4; + } + +sub main'function_end + { + local($func)=@_; + + $func=$under.$func; + + local($tmp)=<<"EOF"; + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.${func}_end: +EOF + push(@out,$tmp); + if ($main'cpp) + { push(@out,"\tSIZE($func,.${func}_end-$func)\n"); } + else { push(@out,"\t.size\t$func,.${func}_end-$func\n"); } + push(@out,".ident \"$func\"\n"); + $stack=0; + %label=(); + } + +sub main'function_end_A + { + local($func)=@_; + + local($tmp)=<<"EOF"; + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +EOF + push(@out,$tmp); + } + +sub main'function_end_B + { + local($func)=@_; + + $func=$under.$func; + + push(@out,".${func}_end:\n"); + if ($main'cpp) + { push(@out,"\tSIZE($func,.${func}_end-$func)\n"); } + else { push(@out,"\t.size\t$func,.${func}_end-$func\n"); } + push(@out,".ident \"desasm.pl\"\n"); + $stack=0; + %label=(); + } + +sub main'wparam + { + local($num)=@_; + + return(&main'DWP($stack+$num*4,"esp","",0)); + } + +sub main'stack_push + { + local($num)=@_; + $stack+=$num*4; + &main'sub("esp",$num*4); + } + +sub main'stack_pop + { + local($num)=@_; + $stack-=$num*4; + &main'add("esp",$num*4); + } + +sub main'swtmp + { + return(&main'DWP($_[0]*4,"esp","",0)); + } + +# Should use swtmp, which is above esp. Linix can trash the stack above esp +#sub main'wtmp +# { +# local($num)=@_; +# +# return(&main'DWP(-($num+1)*4,"esp","",0)); +# } + +sub main'comment + { + foreach (@_) + { + if (/^\s*$/) + { push(@out,"\n"); } + else + { push(@out,"\t$com_start $_ $com_end\n"); } + } + } + +sub main'label + { + if (!defined($label{$_[0]})) + { + $label{$_[0]}=".${label}${_[0]}"; + $label++; + } + return($label{$_[0]}); + } + +sub main'set_label + { + if (!defined($label{$_[0]})) + { + $label{$_[0]}=".${label}${_[0]}"; + $label++; + } + push(@out,".align $align\n") if ($_[1] != 0); + push(@out,"$label{$_[0]}:\n"); + } + +sub main'file_end + { + } + +sub main'data_word + { + push(@out,"\t.long $_[0]\n"); + } diff --git a/linux/crypto/ciphers/des/asm/readme b/linux/crypto/ciphers/des/asm/readme new file mode 100644 index 000000000..f8529d930 --- /dev/null +++ b/linux/crypto/ciphers/des/asm/readme @@ -0,0 +1,131 @@ +First up, let me say I don't like writing in assembler. It is not portable, +dependant on the particular CPU architecture release and is generally a pig +to debug and get right. Having said that, the x86 architecture is probably +the most important for speed due to number of boxes and since +it appears to be the worst architecture to to get +good C compilers for. So due to this, I have lowered myself to do +assembler for the inner DES routines in libdes :-). + +The file to implement in assembler is des_enc.c. Replace the following +4 functions +des_encrypt(DES_LONG data[2],des_key_schedule ks, int encrypt); +des_encrypt2(DES_LONG data[2],des_key_schedule ks, int encrypt); +des_encrypt3(DES_LONG data[2],des_key_schedule ks1,ks2,ks3); +des_decrypt3(DES_LONG data[2],des_key_schedule ks1,ks2,ks3); + +They encrypt/decrypt the 64 bits held in 'data' using +the 'ks' key schedules. The only difference between the 4 functions is that +des_encrypt2() does not perform IP() or FP() on the data (this is an +optimization for when doing triple DES and des_encrypt3() and des_decrypt3() +perform triple des. The triple DES routines are in here because it does +make a big difference to have them located near the des_encrypt2 function +at link time.. + +Now as we all know, there are lots of different operating systems running on +x86 boxes, and unfortunately they normally try to make sure their assembler +formating is not the same as the other peoples. +The 4 main formats I know of are +Microsoft Windows 95/Windows NT +Elf Includes Linux and FreeBSD(?). +a.out The older Linux. +Solaris Same as Elf but different comments :-(. + +Now I was not overly keen to write 4 different copies of the same code, +so I wrote a few perl routines to output the correct assembler, given +a target assembler type. This code is ugly and is just a hack. +The libraries are x86unix.pl and x86ms.pl. +des586.pl, des686.pl and des-som[23].pl are the programs to actually +generate the assembler. + +So to generate elf assembler +perl des-som3.pl elf >dx86-elf.s +For Windows 95/NT +perl des-som2.pl win32 >win32.asm + +[ update 4 Jan 1996 ] +I have added another way to do things. +perl des-som3.pl cpp >dx86-cpp.s +generates a file that will be included by dx86unix.cpp when it is compiled. +To build for elf, a.out, solaris, bsdi etc, +cc -E -DELF asm/dx86unix.cpp | as -o asm/dx86-elf.o +cc -E -DSOL asm/dx86unix.cpp | as -o asm/dx86-sol.o +cc -E -DOUT asm/dx86unix.cpp | as -o asm/dx86-out.o +cc -E -DBSDI asm/dx86unix.cpp | as -o asm/dx86bsdi.o +This was done to cut down the number of files in the distribution. + +Now the ugly part. I acquired my copy of Intels +"Optimization's For Intel's 32-Bit Processors" and found a few interesting +things. First, the aim of the exersize is to 'extract' one byte at a time +from a word and do an array lookup. This involves getting the byte from +the 4 locations in the word and moving it to a new word and doing the lookup. +The most obvious way to do this is +xor eax, eax # clear word +movb al, cl # get low byte +xor edi DWORD PTR 0x100+des_SP[eax] # xor in word +movb al, ch # get next byte +xor edi DWORD PTR 0x300+des_SP[eax] # xor in word +shr ecx 16 +which seems ok. For the pentium, this system appears to be the best. +One has to do instruction interleaving to keep both functional units +operating, but it is basically very efficient. + +Now the crunch. When a full register is used after a partial write, eg. +mov al, cl +xor edi, DWORD PTR 0x100+des_SP[eax] +386 - 1 cycle stall +486 - 1 cycle stall +586 - 0 cycle stall +686 - at least 7 cycle stall (page 22 of the above mentioned document). + +So the technique that produces the best results on a pentium, according to +the documentation, will produce hideous results on a pentium pro. + +To get around this, des686.pl will generate code that is not as fast on +a pentium, should be very good on a pentium pro. +mov eax, ecx # copy word +shr ecx, 8 # line up next byte +and eax, 0fch # mask byte +xor edi DWORD PTR 0x100+des_SP[eax] # xor in array lookup +mov eax, ecx # get word +shr ecx 8 # line up next byte +and eax, 0fch # mask byte +xor edi DWORD PTR 0x300+des_SP[eax] # xor in array lookup + +Due to the execution units in the pentium, this actually works quite well. +For a pentium pro it should be very good. This is the type of output +Visual C++ generates. + +There is a third option. instead of using +mov al, ch +which is bad on the pentium pro, one may be able to use +movzx eax, ch +which may not incur the partial write penalty. On the pentium, +this instruction takes 4 cycles so is not worth using but on the +pentium pro it appears it may be worth while. I need access to one to +experiment :-). + +eric (20 Oct 1996) + +22 Nov 1996 - I have asked people to run the 2 different version on pentium +pros and it appears that the intel documentation is wrong. The +mov al,bh is still faster on a pentium pro, so just use the des586.pl +install des686.pl + +3 Dec 1996 - I added des_encrypt3/des_decrypt3 because I have moved these +functions into des_enc.c because it does make a massive performance +difference on some boxes to have the functions code located close to +the des_encrypt2() function. + +9 Jan 1997 - des-som2.pl is now the correct perl script to use for +pentiums. It contains an inner loop from +Svend Olaf Mikkelsen <svolaf@inet.uni-c.dk> which does raw ecb DES calls at +273,000 per second. He had a previous version at 250,000 and the best +I was able to get was 203,000. The content has not changed, this is all +due to instruction sequencing (and actual instructions choice) which is able +to keep both functional units of the pentium going. +We may have lost the ugly register usage restrictions when x86 went 32 bit +but for the pentium it has been replaced by evil instruction ordering tricks. + +13 Jan 1997 - des-som3.pl, more optimizations from Svend Olaf. +raw DES at 281,000 per second on a pentium 100. + |