1use super::Adler32Imp;
2
3pub fn get_imp() -> Option<Adler32Imp> {
5 get_imp_inner()
6}
7
8#[inline]
9#[cfg(all(
10 target_feature = "simd128",
11 any(
12 target_arch = "wasm32",
13 all(feature = "nightly", target_arch = "wasm64")
14 )
15))]
16fn get_imp_inner() -> Option<Adler32Imp> {
17 Some(imp::update)
18}
19
20#[inline]
21#[cfg(not(all(
22 target_feature = "simd128",
23 any(
24 target_arch = "wasm32",
25 all(feature = "nightly", target_arch = "wasm64")
26 )
27)))]
28fn get_imp_inner() -> Option<Adler32Imp> {
29 None
30}
31
32#[cfg(all(
33 target_feature = "simd128",
34 any(
35 target_arch = "wasm32",
36 all(feature = "nightly", target_arch = "wasm64")
37 )
38))]
39mod imp {
40 const MOD: u32 = 65521;
41 const NMAX: usize = 5552;
42 const BLOCK_SIZE: usize = 32;
43 const CHUNK_SIZE: usize = NMAX / BLOCK_SIZE * BLOCK_SIZE;
44
45 #[cfg(target_arch = "wasm32")]
46 use core::arch::wasm32::*;
47 #[cfg(target_arch = "wasm64")]
48 use core::arch::wasm64::*;
49
50 pub fn update(a: u16, b: u16, data: &[u8]) -> (u16, u16) {
51 update_imp(a, b, data)
52 }
53
54 #[inline]
55 #[target_feature(enable = "simd128")]
56 fn update_imp(a: u16, b: u16, data: &[u8]) -> (u16, u16) {
57 let mut a = a as u32;
58 let mut b = b as u32;
59
60 let chunks = data.chunks_exact(CHUNK_SIZE);
61 let remainder = chunks.remainder();
62 for chunk in chunks {
63 update_chunk_block(&mut a, &mut b, chunk);
64 }
65
66 update_block(&mut a, &mut b, remainder);
67
68 (a as u16, b as u16)
69 }
70
71 fn update_chunk_block(a: &mut u32, b: &mut u32, chunk: &[u8]) {
72 debug_assert_eq!(
73 chunk.len(),
74 CHUNK_SIZE,
75 "Unexpected chunk size (expected {}, got {})",
76 CHUNK_SIZE,
77 chunk.len()
78 );
79
80 reduce_add_blocks(a, b, chunk);
81
82 *a %= MOD;
83 *b %= MOD;
84 }
85
86 fn update_block(a: &mut u32, b: &mut u32, chunk: &[u8]) {
87 debug_assert!(
88 chunk.len() <= CHUNK_SIZE,
89 "Unexpected chunk size (expected <= {}, got {})",
90 CHUNK_SIZE,
91 chunk.len()
92 );
93
94 for byte in reduce_add_blocks(a, b, chunk) {
95 *a += *byte as u32;
96 *b += *a;
97 }
98
99 *a %= MOD;
100 *b %= MOD;
101 }
102
103 #[inline(always)]
104 fn reduce_add_blocks<'a>(a: &mut u32, b: &mut u32, chunk: &'a [u8]) -> &'a [u8] {
105 if chunk.len() < BLOCK_SIZE {
106 return chunk;
107 }
108
109 let blocks = chunk.chunks_exact(BLOCK_SIZE);
110 let blocks_remainder = blocks.remainder();
111
112 let weight_hi_v = get_weight_hi();
113 let weight_lo_v = get_weight_lo();
114
115 let mut p_v = u32x4(*a * blocks.len() as u32, 0, 0, 0);
116 let mut a_v = u32x4(0, 0, 0, 0);
117 let mut b_v = u32x4(*b, 0, 0, 0);
118
119 for block in blocks {
120 let block_ptr = block.as_ptr() as *const v128;
121 let v_lo = unsafe { block_ptr.read_unaligned() };
122 let v_hi = unsafe { block_ptr.add(1).read_unaligned() };
123
124 p_v = u32x4_add(p_v, a_v);
125
126 a_v = u32x4_add(a_v, u32x4_extadd_quarters_u8x16(v_lo));
127 let mad = i32x4_dot_i8x16(v_lo, weight_lo_v);
128 b_v = u32x4_add(b_v, mad);
129
130 a_v = u32x4_add(a_v, u32x4_extadd_quarters_u8x16(v_hi));
131 let mad = i32x4_dot_i8x16(v_hi, weight_hi_v);
132 b_v = u32x4_add(b_v, mad);
133 }
134
135 b_v = u32x4_add(b_v, u32x4_shl(p_v, 5));
136
137 *a += reduce_add(a_v);
138 *b = reduce_add(b_v);
139
140 blocks_remainder
141 }
142
143 #[inline(always)]
144 fn i32x4_dot_i8x16(a: v128, b: v128) -> v128 {
145 let a_lo = u16x8_extend_low_u8x16(a);
146 let a_hi = u16x8_extend_high_u8x16(a);
147
148 let b_lo = u16x8_extend_low_u8x16(b);
149 let b_hi = u16x8_extend_high_u8x16(b);
150
151 let lo = i32x4_dot_i16x8(a_lo, b_lo);
152 let hi = i32x4_dot_i16x8(a_hi, b_hi);
153
154 i32x4_add(lo, hi)
155 }
156
157 #[inline(always)]
158 fn u32x4_extadd_quarters_u8x16(a: v128) -> v128 {
159 u32x4_extadd_pairwise_u16x8(u16x8_extadd_pairwise_u8x16(a))
160 }
161
162 #[inline(always)]
163 fn reduce_add(v: v128) -> u32 {
164 let arr: [u32; 4] = unsafe { core::mem::transmute(v) };
165 let mut sum = 0u32;
166 for val in arr {
167 sum = sum.wrapping_add(val);
168 }
169 sum
170 }
171
172 #[inline(always)]
173 fn get_weight_lo() -> v128 {
174 u8x16(
175 32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17,
176 )
177 }
178
179 #[inline(always)]
180 fn get_weight_hi() -> v128 {
181 u8x16(16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1)
182 }
183}
184
185#[cfg(test)]
186mod tests {
187 use rand::Rng;
188
189 #[test]
190 fn zeroes() {
191 assert_sum_eq(&[]);
192 assert_sum_eq(&[0]);
193 assert_sum_eq(&[0, 0]);
194 assert_sum_eq(&[0; 100]);
195 assert_sum_eq(&[0; 1024]);
196 assert_sum_eq(&[0; 512 * 1024]);
197 }
198
199 #[test]
200 fn ones() {
201 assert_sum_eq(&[]);
202 assert_sum_eq(&[1]);
203 assert_sum_eq(&[1, 1]);
204 assert_sum_eq(&[1; 100]);
205 assert_sum_eq(&[1; 1024]);
206 assert_sum_eq(&[1; 512 * 1024]);
207 }
208
209 #[test]
210 fn random() {
211 let mut random = [0; 512 * 1024];
212 rand::thread_rng().fill(&mut random[..]);
213
214 assert_sum_eq(&random[..1]);
215 assert_sum_eq(&random[..100]);
216 assert_sum_eq(&random[..1024]);
217 assert_sum_eq(&random[..512 * 1024]);
218 }
219
220 #[test]
222 fn wiki() {
223 assert_sum_eq(b"Wikipedia");
224 }
225
226 fn assert_sum_eq(data: &[u8]) {
227 if let Some(update) = super::get_imp() {
228 let (a, b) = update(1, 0, data);
229 let left = u32::from(b) << 16 | u32::from(a);
230 let right = adler::adler32_slice(data);
231
232 assert_eq!(left, right, "len({})", data.len());
233 }
234 }
235}