Text file src/crypto/md5/md5block_arm.s

     1  // Copyright 2013 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  //
     5  // ARM version of md5block.go
     6  
     7  #include "textflag.h"
     8  
     9  // Register definitions
    10  #define Rtable	R0	// Pointer to MD5 constants table
    11  #define Rdata	R1	// Pointer to data to hash
    12  #define Ra	R2	// MD5 accumulator
    13  #define Rb	R3	// MD5 accumulator
    14  #define Rc	R4	// MD5 accumulator
    15  #define Rd	R5	// MD5 accumulator
    16  #define Rc0	R6	// MD5 constant
    17  #define Rc1	R7	// MD5 constant
    18  #define Rc2	R8	// MD5 constant
    19  // r9, r10 are forbidden
    20  // r11 is OK provided you check the assembler that no synthetic instructions use it
    21  #define Rc3	R11	// MD5 constant
    22  #define Rt0	R12	// temporary
    23  #define Rt1	R14	// temporary
    24  
    25  // func block(dig *digest, p []byte)
    26  // 0(FP) is *digest
    27  // 4(FP) is p.array (struct Slice)
    28  // 8(FP) is p.len
    29  //12(FP) is p.cap
    30  //
    31  // Stack frame
    32  #define p_end	end-4(SP)	// pointer to the end of data
    33  #define p_data	data-8(SP)	// current data pointer
    34  #define buf	buffer-(8+4*16)(SP)	//16 words temporary buffer
    35  		// 3 words at 4..12(R13) for called routine parameters
    36  
    37  TEXT	·block(SB), NOSPLIT, $84-16
    38  	MOVW	p+4(FP), Rdata	// pointer to the data
    39  	MOVW	p_len+8(FP), Rt0	// number of bytes
    40  	ADD	Rdata, Rt0
    41  	MOVW	Rt0, p_end	// pointer to end of data
    42  
    43  loop:
    44  	MOVW	Rdata, p_data	// Save Rdata
    45  	AND.S	$3, Rdata, Rt0	// TST $3, Rdata not working see issue 5921
    46  	BEQ	aligned			// aligned detected - skip copy
    47  
    48  	// Copy the unaligned source data into the aligned temporary buffer
    49  	// memmove(to=4(R13), from=8(R13), n=12(R13)) - Corrupts all registers
    50  	MOVW	$buf, Rtable	// to
    51  	MOVW	$64, Rc0		// n
    52  	MOVM.IB	[Rtable,Rdata,Rc0], (R13)
    53  	BL	runtime·memmove(SB)
    54  
    55  	// Point to the local aligned copy of the data
    56  	MOVW	$buf, Rdata
    57  
    58  aligned:
    59  	// Point to the table of constants
    60  	// A PC relative add would be cheaper than this
    61  	MOVW	$·table(SB), Rtable
    62  
    63  	// Load up initial MD5 accumulator
    64  	MOVW	dig+0(FP), Rc0
    65  	MOVM.IA (Rc0), [Ra,Rb,Rc,Rd]
    66  
    67  // a += (((c^d)&b)^d) + X[index] + const
    68  // a = a<<shift | a>>(32-shift) + b
    69  #define ROUND1(Ra, Rb, Rc, Rd, index, shift, Rconst) \
    70  	EOR	Rc, Rd, Rt0		; \
    71  	AND	Rb, Rt0			; \
    72  	EOR	Rd, Rt0			; \
    73  	MOVW	(index<<2)(Rdata), Rt1	; \
    74  	ADD	Rt1, Rt0			; \
    75  	ADD	Rconst, Rt0			; \
    76  	ADD	Rt0, Ra			; \
    77  	ADD	Ra@>(32-shift), Rb, Ra	;
    78  
    79  	MOVM.IA.W (Rtable), [Rc0,Rc1,Rc2,Rc3]
    80  	ROUND1(Ra, Rb, Rc, Rd,  0,	7, Rc0)
    81  	ROUND1(Rd, Ra, Rb, Rc,  1, 12, Rc1)
    82  	ROUND1(Rc, Rd, Ra, Rb,  2, 17, Rc2)
    83  	ROUND1(Rb, Rc, Rd, Ra,  3, 22, Rc3)
    84  
    85  	MOVM.IA.W (Rtable), [Rc0,Rc1,Rc2,Rc3]
    86  	ROUND1(Ra, Rb, Rc, Rd,  4,	7, Rc0)
    87  	ROUND1(Rd, Ra, Rb, Rc,  5, 12, Rc1)
    88  	ROUND1(Rc, Rd, Ra, Rb,  6, 17, Rc2)
    89  	ROUND1(Rb, Rc, Rd, Ra,  7, 22, Rc3)
    90  
    91  	MOVM.IA.W (Rtable), [Rc0,Rc1,Rc2,Rc3]
    92  	ROUND1(Ra, Rb, Rc, Rd,  8,	7, Rc0)
    93  	ROUND1(Rd, Ra, Rb, Rc,  9, 12, Rc1)
    94  	ROUND1(Rc, Rd, Ra, Rb, 10, 17, Rc2)
    95  	ROUND1(Rb, Rc, Rd, Ra, 11, 22, Rc3)
    96  
    97  	MOVM.IA.W (Rtable), [Rc0,Rc1,Rc2,Rc3]
    98  	ROUND1(Ra, Rb, Rc, Rd, 12,	7, Rc0)
    99  	ROUND1(Rd, Ra, Rb, Rc, 13, 12, Rc1)
   100  	ROUND1(Rc, Rd, Ra, Rb, 14, 17, Rc2)
   101  	ROUND1(Rb, Rc, Rd, Ra, 15, 22, Rc3)
   102  
   103  // a += (((b^c)&d)^c) + X[index] + const
   104  // a = a<<shift | a>>(32-shift) + b
   105  #define ROUND2(Ra, Rb, Rc, Rd, index, shift, Rconst) \
   106  	EOR	Rb, Rc, Rt0		; \
   107  	AND	Rd, Rt0			; \
   108  	EOR	Rc, Rt0			; \
   109  	MOVW	(index<<2)(Rdata), Rt1	; \
   110  	ADD	Rt1, Rt0			; \
   111  	ADD	Rconst, Rt0			; \
   112  	ADD	Rt0, Ra			; \
   113  	ADD	Ra@>(32-shift), Rb, Ra	;
   114  
   115  	MOVM.IA.W (Rtable), [Rc0,Rc1,Rc2,Rc3]
   116  	ROUND2(Ra, Rb, Rc, Rd,  1,	5, Rc0)
   117  	ROUND2(Rd, Ra, Rb, Rc,  6,	9, Rc1)
   118  	ROUND2(Rc, Rd, Ra, Rb, 11, 14, Rc2)
   119  	ROUND2(Rb, Rc, Rd, Ra,  0, 20, Rc3)
   120  
   121  	MOVM.IA.W (Rtable), [Rc0,Rc1,Rc2,Rc3]
   122  	ROUND2(Ra, Rb, Rc, Rd,  5,	5, Rc0)
   123  	ROUND2(Rd, Ra, Rb, Rc, 10,	9, Rc1)
   124  	ROUND2(Rc, Rd, Ra, Rb, 15, 14, Rc2)
   125  	ROUND2(Rb, Rc, Rd, Ra,  4, 20, Rc3)
   126  
   127  	MOVM.IA.W (Rtable), [Rc0,Rc1,Rc2,Rc3]
   128  	ROUND2(Ra, Rb, Rc, Rd,  9,	5, Rc0)
   129  	ROUND2(Rd, Ra, Rb, Rc, 14,	9, Rc1)
   130  	ROUND2(Rc, Rd, Ra, Rb,  3, 14, Rc2)
   131  	ROUND2(Rb, Rc, Rd, Ra,  8, 20, Rc3)
   132  
   133  	MOVM.IA.W (Rtable), [Rc0,Rc1,Rc2,Rc3]
   134  	ROUND2(Ra, Rb, Rc, Rd, 13,	5, Rc0)
   135  	ROUND2(Rd, Ra, Rb, Rc,  2,	9, Rc1)
   136  	ROUND2(Rc, Rd, Ra, Rb,  7, 14, Rc2)
   137  	ROUND2(Rb, Rc, Rd, Ra, 12, 20, Rc3)
   138  
   139  // a += (b^c^d) + X[index] + const
   140  // a = a<<shift | a>>(32-shift) + b
   141  #define ROUND3(Ra, Rb, Rc, Rd, index, shift, Rconst) \
   142  	EOR	Rb, Rc, Rt0		; \
   143  	EOR	Rd, Rt0			; \
   144  	MOVW	(index<<2)(Rdata), Rt1	; \
   145  	ADD	Rt1, Rt0			; \
   146  	ADD	Rconst, Rt0			; \
   147  	ADD	Rt0, Ra			; \
   148  	ADD	Ra@>(32-shift), Rb, Ra	;
   149  
   150  	MOVM.IA.W (Rtable), [Rc0,Rc1,Rc2,Rc3]
   151  	ROUND3(Ra, Rb, Rc, Rd,  5,	4, Rc0)
   152  	ROUND3(Rd, Ra, Rb, Rc,  8, 11, Rc1)
   153  	ROUND3(Rc, Rd, Ra, Rb, 11, 16, Rc2)
   154  	ROUND3(Rb, Rc, Rd, Ra, 14, 23, Rc3)
   155  
   156  	MOVM.IA.W (Rtable), [Rc0,Rc1,Rc2,Rc3]
   157  	ROUND3(Ra, Rb, Rc, Rd,  1,	4, Rc0)
   158  	ROUND3(Rd, Ra, Rb, Rc,  4, 11, Rc1)
   159  	ROUND3(Rc, Rd, Ra, Rb,  7, 16, Rc2)
   160  	ROUND3(Rb, Rc, Rd, Ra, 10, 23, Rc3)
   161  
   162  	MOVM.IA.W (Rtable), [Rc0,Rc1,Rc2,Rc3]
   163  	ROUND3(Ra, Rb, Rc, Rd, 13,	4, Rc0)
   164  	ROUND3(Rd, Ra, Rb, Rc,  0, 11, Rc1)
   165  	ROUND3(Rc, Rd, Ra, Rb,  3, 16, Rc2)
   166  	ROUND3(Rb, Rc, Rd, Ra,  6, 23, Rc3)
   167  
   168  	MOVM.IA.W (Rtable), [Rc0,Rc1,Rc2,Rc3]
   169  	ROUND3(Ra, Rb, Rc, Rd,  9,	4, Rc0)
   170  	ROUND3(Rd, Ra, Rb, Rc, 12, 11, Rc1)
   171  	ROUND3(Rc, Rd, Ra, Rb, 15, 16, Rc2)
   172  	ROUND3(Rb, Rc, Rd, Ra,  2, 23, Rc3)
   173  
   174  // a += (c^(b|^d)) + X[index] + const
   175  // a = a<<shift | a>>(32-shift) + b
   176  #define ROUND4(Ra, Rb, Rc, Rd, index, shift, Rconst) \
   177  	MVN	Rd, Rt0			; \
   178  	ORR	Rb, Rt0			; \
   179  	EOR	Rc, Rt0			; \
   180  	MOVW	(index<<2)(Rdata), Rt1	; \
   181  	ADD	Rt1, Rt0			; \
   182  	ADD	Rconst, Rt0			; \
   183  	ADD	Rt0, Ra			; \
   184  	ADD	Ra@>(32-shift), Rb, Ra	;
   185  
   186  	MOVM.IA.W (Rtable), [Rc0,Rc1,Rc2,Rc3]
   187  	ROUND4(Ra, Rb, Rc, Rd,  0,	6, Rc0)
   188  	ROUND4(Rd, Ra, Rb, Rc,  7, 10, Rc1)
   189  	ROUND4(Rc, Rd, Ra, Rb, 14, 15, Rc2)
   190  	ROUND4(Rb, Rc, Rd, Ra,  5, 21, Rc3)
   191  
   192  	MOVM.IA.W (Rtable), [Rc0,Rc1,Rc2,Rc3]
   193  	ROUND4(Ra, Rb, Rc, Rd, 12,	6, Rc0)
   194  	ROUND4(Rd, Ra, Rb, Rc,  3, 10, Rc1)
   195  	ROUND4(Rc, Rd, Ra, Rb, 10, 15, Rc2)
   196  	ROUND4(Rb, Rc, Rd, Ra,  1, 21, Rc3)
   197  
   198  	MOVM.IA.W (Rtable), [Rc0,Rc1,Rc2,Rc3]
   199  	ROUND4(Ra, Rb, Rc, Rd,  8,	6, Rc0)
   200  	ROUND4(Rd, Ra, Rb, Rc, 15, 10, Rc1)
   201  	ROUND4(Rc, Rd, Ra, Rb,  6, 15, Rc2)
   202  	ROUND4(Rb, Rc, Rd, Ra, 13, 21, Rc3)
   203  
   204  	MOVM.IA.W (Rtable), [Rc0,Rc1,Rc2,Rc3]
   205  	ROUND4(Ra, Rb, Rc, Rd,  4,	6, Rc0)
   206  	ROUND4(Rd, Ra, Rb, Rc, 11, 10, Rc1)
   207  	ROUND4(Rc, Rd, Ra, Rb,  2, 15, Rc2)
   208  	ROUND4(Rb, Rc, Rd, Ra,  9, 21, Rc3)
   209  
   210  	MOVW	dig+0(FP), Rt0
   211  	MOVM.IA (Rt0), [Rc0,Rc1,Rc2,Rc3]
   212  
   213  	ADD	Rc0, Ra
   214  	ADD	Rc1, Rb
   215  	ADD	Rc2, Rc
   216  	ADD	Rc3, Rd
   217  
   218  	MOVM.IA [Ra,Rb,Rc,Rd], (Rt0)
   219  
   220  	MOVW	p_data, Rdata
   221  	MOVW	p_end, Rt0
   222  	ADD	$64, Rdata
   223  	CMP	Rt0, Rdata
   224  	BLO	loop
   225  
   226  	RET
   227  
   228  // MD5 constants table
   229  
   230  	// Round 1
   231  	DATA	·table+0x00(SB)/4, $0xd76aa478
   232  	DATA	·table+0x04(SB)/4, $0xe8c7b756
   233  	DATA	·table+0x08(SB)/4, $0x242070db
   234  	DATA	·table+0x0c(SB)/4, $0xc1bdceee
   235  	DATA	·table+0x10(SB)/4, $0xf57c0faf
   236  	DATA	·table+0x14(SB)/4, $0x4787c62a
   237  	DATA	·table+0x18(SB)/4, $0xa8304613
   238  	DATA	·table+0x1c(SB)/4, $0xfd469501
   239  	DATA	·table+0x20(SB)/4, $0x698098d8
   240  	DATA	·table+0x24(SB)/4, $0x8b44f7af
   241  	DATA	·table+0x28(SB)/4, $0xffff5bb1
   242  	DATA	·table+0x2c(SB)/4, $0x895cd7be
   243  	DATA	·table+0x30(SB)/4, $0x6b901122
   244  	DATA	·table+0x34(SB)/4, $0xfd987193
   245  	DATA	·table+0x38(SB)/4, $0xa679438e
   246  	DATA	·table+0x3c(SB)/4, $0x49b40821
   247  	// Round 2
   248  	DATA	·table+0x40(SB)/4, $0xf61e2562
   249  	DATA	·table+0x44(SB)/4, $0xc040b340
   250  	DATA	·table+0x48(SB)/4, $0x265e5a51
   251  	DATA	·table+0x4c(SB)/4, $0xe9b6c7aa
   252  	DATA	·table+0x50(SB)/4, $0xd62f105d
   253  	DATA	·table+0x54(SB)/4, $0x02441453
   254  	DATA	·table+0x58(SB)/4, $0xd8a1e681
   255  	DATA	·table+0x5c(SB)/4, $0xe7d3fbc8
   256  	DATA	·table+0x60(SB)/4, $0x21e1cde6
   257  	DATA	·table+0x64(SB)/4, $0xc33707d6
   258  	DATA	·table+0x68(SB)/4, $0xf4d50d87
   259  	DATA	·table+0x6c(SB)/4, $0x455a14ed
   260  	DATA	·table+0x70(SB)/4, $0xa9e3e905
   261  	DATA	·table+0x74(SB)/4, $0xfcefa3f8
   262  	DATA	·table+0x78(SB)/4, $0x676f02d9
   263  	DATA	·table+0x7c(SB)/4, $0x8d2a4c8a
   264  	// Round 3
   265  	DATA	·table+0x80(SB)/4, $0xfffa3942
   266  	DATA	·table+0x84(SB)/4, $0x8771f681
   267  	DATA	·table+0x88(SB)/4, $0x6d9d6122
   268  	DATA	·table+0x8c(SB)/4, $0xfde5380c
   269  	DATA	·table+0x90(SB)/4, $0xa4beea44
   270  	DATA	·table+0x94(SB)/4, $0x4bdecfa9
   271  	DATA	·table+0x98(SB)/4, $0xf6bb4b60
   272  	DATA	·table+0x9c(SB)/4, $0xbebfbc70
   273  	DATA	·table+0xa0(SB)/4, $0x289b7ec6
   274  	DATA	·table+0xa4(SB)/4, $0xeaa127fa
   275  	DATA	·table+0xa8(SB)/4, $0xd4ef3085
   276  	DATA	·table+0xac(SB)/4, $0x04881d05
   277  	DATA	·table+0xb0(SB)/4, $0xd9d4d039
   278  	DATA	·table+0xb4(SB)/4, $0xe6db99e5
   279  	DATA	·table+0xb8(SB)/4, $0x1fa27cf8
   280  	DATA	·table+0xbc(SB)/4, $0xc4ac5665
   281  	// Round 4
   282  	DATA	·table+0xc0(SB)/4, $0xf4292244
   283  	DATA	·table+0xc4(SB)/4, $0x432aff97
   284  	DATA	·table+0xc8(SB)/4, $0xab9423a7
   285  	DATA	·table+0xcc(SB)/4, $0xfc93a039
   286  	DATA	·table+0xd0(SB)/4, $0x655b59c3
   287  	DATA	·table+0xd4(SB)/4, $0x8f0ccc92
   288  	DATA	·table+0xd8(SB)/4, $0xffeff47d
   289  	DATA	·table+0xdc(SB)/4, $0x85845dd1
   290  	DATA	·table+0xe0(SB)/4, $0x6fa87e4f
   291  	DATA	·table+0xe4(SB)/4, $0xfe2ce6e0
   292  	DATA	·table+0xe8(SB)/4, $0xa3014314
   293  	DATA	·table+0xec(SB)/4, $0x4e0811a1
   294  	DATA	·table+0xf0(SB)/4, $0xf7537e82
   295  	DATA	·table+0xf4(SB)/4, $0xbd3af235
   296  	DATA	·table+0xf8(SB)/4, $0x2ad7d2bb
   297  	DATA	·table+0xfc(SB)/4, $0xeb86d391
   298  	// Global definition
   299  	GLOBL	·table(SB),8,$256
   300  

View as plain text