File: aes-encrypt-internal.c

package info (click to toggle)
nettle 1.15-5
  • links: PTS, VCS
  • area: main
  • in suites: lenny
  • size: 3,184 kB
  • ctags: 2,262
  • sloc: ansic: 18,917; sh: 3,259; asm: 1,217; makefile: 519; cpp: 77
file content (105 lines) | stat: -rw-r--r-- 3,181 bytes parent folder | download | duplicates (5)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
/* aes-encrypt-internal.c
 *
 * Encryption function for the aes/rijndael block cipher.
 */

/* nettle, low-level cryptographics library
 *
 * Copyright (C) 2002 Niels Mller
 *  
 * The nettle library is free software; you can redistribute it and/or modify
 * it under the terms of the GNU Lesser General Public License as published by
 * the Free Software Foundation; either version 2.1 of the License, or (at your
 * option) any later version.
 * 
 * The nettle library is distributed in the hope that it will be useful, but
 * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
 * or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
 * License for more details.
 * 
 * You should have received a copy of the GNU Lesser General Public License
 * along with the nettle library; see the file COPYING.LIB.  If not, write to
 * the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
 * MA 02111-1307, USA.
 */

#if HAVE_CONFIG_H
# include "config.h"
#endif

#include <assert.h>

#include "aes-internal.h"
#include "macros.h"

void
_nettle_aes_encrypt(const struct aes_ctx *ctx,
		    const struct aes_table *T,
		    unsigned length, uint8_t *dst,
		    const uint8_t *src)
{
  FOR_BLOCKS(length, dst, src, AES_BLOCK_SIZE)
    {
      uint32_t w0, w1, w2, w3;		/* working ciphertext */
      uint32_t t0, t1, t2, t3;
      unsigned round;
      
      /* Get clear text, using little-endian byte order.
       * Also XOR with the first subkey. */

      w0 = LE_READ_UINT32(src)      ^ ctx->keys[0];
      w1 = LE_READ_UINT32(src + 4)  ^ ctx->keys[1];
      w2 = LE_READ_UINT32(src + 8)  ^ ctx->keys[2];
      w3 = LE_READ_UINT32(src + 12) ^ ctx->keys[3];

      for (round = 1; round < ctx->nrounds; round++)
	{
	  t0 = AES_ROUND(T, w0, w1, w2, w3, ctx->keys[4*round]);
	  t1 = AES_ROUND(T, w1, w2, w3, w0, ctx->keys[4*round + 1]);
	  t2 = AES_ROUND(T, w2, w3, w0, w1, ctx->keys[4*round + 2]);
	  t3 = AES_ROUND(T, w3, w0, w1, w2, ctx->keys[4*round + 3]);

	  /* FIXME: We could unrolling the loop twice, to avoid these
	     assignments. If all eight variables fit in registers, that
	     should give a speedup. */
	  w0 = t0;
	  w1 = t1;
	  w2 = t2;
	  w3 = t3;
	}

      /* Final round */

      t0 = AES_FINAL_ROUND(T, w0, w1, w2, w3, ctx->keys[4*round]);
      t1 = AES_FINAL_ROUND(T, w1, w2, w3, w0, ctx->keys[4*round + 1]);
      t2 = AES_FINAL_ROUND(T, w2, w3, w0, w1, ctx->keys[4*round + 2]);
      t3 = AES_FINAL_ROUND(T, w3, w0, w1, w2, ctx->keys[4*round + 3]);

      LE_WRITE_UINT32(dst, t0);
      LE_WRITE_UINT32(dst + 8, t2);
      LE_WRITE_UINT32(dst + 4, t1);
      LE_WRITE_UINT32(dst + 12, t3);
    }
}

/* Some stats, all for AES 128:

   A. Table-driven indexing (the approach of the old unified
      _aes_crypt function).
   B. Unrolling the j-loop.

   C. Eliminated the use of IDXk(j) in the main loop.

   D. Put wtxt in four scalar variables.

   E. Also put t in four scalar variables.

       P4 2.2 GHz         AMD Duron 1.4GHz
       
       MB/s  code size
   A   35.9  0x202        17 MB/s
   B   37.3  0x334
   C   33.0  0x2a7
   D   40.7  0x3f9
   E   42.9  0x44a        26 MB/s
 */