1#[cfg(not(any(target_arch = "x86", target_arch = "x86_64")))]
4compile_error!("x86-sha backend can be used only on x86 and x86_64 target arches");
5
6#[cfg(target_arch = "x86")]
7use core::arch::x86::*;
8#[cfg(target_arch = "x86_64")]
9use core::arch::x86_64::*;
10
11macro_rules! rounds4 {
12 ($h0:ident, $h1:ident, $wk:expr, $i:expr) => {
13 _mm_sha1rnds4_epu32($h0, _mm_sha1nexte_epu32($h1, $wk), $i)
14 };
15}
16
17macro_rules! schedule {
18 ($v0:expr, $v1:expr, $v2:expr, $v3:expr) => {
19 _mm_sha1msg2_epu32(_mm_xor_si128(_mm_sha1msg1_epu32($v0, $v1), $v2), $v3)
20 };
21}
22
23macro_rules! schedule_rounds4 {
24 (
25 $h0:ident, $h1:ident,
26 $w0:expr, $w1:expr, $w2:expr, $w3:expr, $w4:expr,
27 $i:expr
28 ) => {
29 $w4 = schedule!($w0, $w1, $w2, $w3);
30 $h1 = rounds4!($h0, $h1, $w4, $i);
31 };
32}
33
34#[target_feature(enable = "sha,sse2,ssse3,sse4.1")]
35#[allow(unsafe_op_in_unsafe_fn)]
36pub(crate) unsafe fn compress(state: &mut [u32; 5], blocks: &[[u8; 64]]) {
37 #[allow(non_snake_case)]
38 let MASK: __m128i = _mm_set_epi64x(0x0001_0203_0405_0607, 0x0809_0A0B_0C0D_0E0F);
39
40 let mut state_abcd = _mm_loadu_si128(state.as_ptr().cast());
41 state_abcd = _mm_shuffle_epi32(state_abcd, 0b00011011);
42 let mut state_e = _mm_set_epi32(state[4] as i32, 0, 0, 0);
43
44 for block in blocks {
45 let block_ptr: *const __m128i = block.as_ptr().cast();
46
47 let mut w0 = _mm_shuffle_epi8(_mm_loadu_si128(block_ptr.offset(0)), MASK);
48 let mut w1 = _mm_shuffle_epi8(_mm_loadu_si128(block_ptr.offset(1)), MASK);
49 let mut w2 = _mm_shuffle_epi8(_mm_loadu_si128(block_ptr.offset(2)), MASK);
50 let mut w3 = _mm_shuffle_epi8(_mm_loadu_si128(block_ptr.offset(3)), MASK);
51 #[allow(clippy::needless_late_init)]
52 let mut w4;
53
54 let mut h0 = state_abcd;
55 let mut h1 = _mm_add_epi32(state_e, w0);
56
57 h1 = _mm_sha1rnds4_epu32(h0, h1, 0);
59 h0 = rounds4!(h1, h0, w1, 0);
60 h1 = rounds4!(h0, h1, w2, 0);
61 h0 = rounds4!(h1, h0, w3, 0);
62 schedule_rounds4!(h0, h1, w0, w1, w2, w3, w4, 0);
63
64 schedule_rounds4!(h1, h0, w1, w2, w3, w4, w0, 1);
66 schedule_rounds4!(h0, h1, w2, w3, w4, w0, w1, 1);
67 schedule_rounds4!(h1, h0, w3, w4, w0, w1, w2, 1);
68 schedule_rounds4!(h0, h1, w4, w0, w1, w2, w3, 1);
69 schedule_rounds4!(h1, h0, w0, w1, w2, w3, w4, 1);
70
71 schedule_rounds4!(h0, h1, w1, w2, w3, w4, w0, 2);
73 schedule_rounds4!(h1, h0, w2, w3, w4, w0, w1, 2);
74 schedule_rounds4!(h0, h1, w3, w4, w0, w1, w2, 2);
75 schedule_rounds4!(h1, h0, w4, w0, w1, w2, w3, 2);
76 schedule_rounds4!(h0, h1, w0, w1, w2, w3, w4, 2);
77
78 schedule_rounds4!(h1, h0, w1, w2, w3, w4, w0, 3);
80 schedule_rounds4!(h0, h1, w2, w3, w4, w0, w1, 3);
81 schedule_rounds4!(h1, h0, w3, w4, w0, w1, w2, 3);
82 schedule_rounds4!(h0, h1, w4, w0, w1, w2, w3, 3);
83 schedule_rounds4!(h1, h0, w0, w1, w2, w3, w4, 3);
84
85 state_abcd = _mm_add_epi32(state_abcd, h0);
86 state_e = _mm_sha1nexte_epu32(h1, state_e);
87 }
88
89 state_abcd = _mm_shuffle_epi32(state_abcd, 0b00011011);
90 _mm_storeu_si128(state.as_mut_ptr().cast(), state_abcd);
91 state[4] = _mm_extract_epi32(state_e, 3) as u32;
92}