Revision d650574e libavcodec/arm/mdct_neon.S
libavcodec/arm/mdct_neon.S  

169  169 
sub r9, r7, #16 @ in4d 
170  170 
add r2, r7, lr, lsl #1 @ in3u 
171  171 
add r8, r9, lr, lsl #1 @ in3d 
172 
add r5, r4, lr, lsl #1 

173 
sub r5, r5, #16 

174 
sub r3, r3, #4 

172  175 
mov r12, #16 
173  176  
174 
vld2.32 {d16,d18},[r9,:128],r12 @ x,x in4d1,in4d0


175 
vld2.32 {d17,d19},[r8,:128],r12 @ x,x in3d1,in3d0


176 
vld2.32 {d20,d21},[r7,:128]! @ in4u0,in4u1 x,x


177 
vld2.32 {d16,d18},[r9,:128],r12 @ in0u0,in0u1 in4d1,in4d0


178 
vld2.32 {d17,d19},[r8,:128],r12 @ in2u0,in2u1 in3d1,in3d0


179 
vld2.32 {d0, d2}, [r7,:128]! @ in4u0,in4u1 in2d1,in2d0


177  180 
vrev64.32 q9, q9 @ in4d0,in4d1 in3d0,in3d1 
178 
vld2.32 {d0, d1}, [r2,:128]! @ in3u0,in3u1 x,x 

179 
vsub.f32 d20, d18, d20 @ in4din4u I 

180 
vld2.32 {d2,d3}, [r4,:128]! @ c0,c1 s0,s1 

181 
vadd.f32 d0, d0, d19 @ in3u+in3d R 

181 
vld2.32 {d1, d3}, [r2,:128]! @ in3u0,in3u1 in1d1,in1d0 

182 
vsub.f32 d0, d18, d0 @ in4din4u I 

183 
vld2.32 {d20,d21},[r4,:128]! @ c0,c1 s0,s1 

184 
vrev64.32 q1, q1 @ in2d0,in2d1 in1d0,in1d1 

185 
vld2.32 {d30,d31},[r5,:128],r12 @ c2,c3 s2,s3 

186 
vadd.f32 d1, d1, d19 @ in3u+in3d R 

187 
vsub.f32 d16, d16, d2 @ in0uin2d R 

188 
vadd.f32 d17, d17, d3 @ in2u+in1d I 

182  189 
1: 
183 
vmul.f32 d7, d20, d3 @ I*s 

184 
vmul.f32 d6, d0, d2 @ R*c 

185 
ldr r6, [r3], #4 

186 
vmul.f32 d4, d0, d3 @ R*s 

187 
vmul.f32 d5, d20, d2 @ I*c 

190 
vmul.f32 d7, d0, d21 @ I*s 

191 
ldr r10, [r3, lr, lsr #1] 

192 
vmul.f32 d6, d1, d20 @ R*c 

193 
ldr r6, [r3, #4]! 

194 
vmul.f32 d4, d1, d21 @ R*s 

195 
vmul.f32 d5, d0, d20 @ I*c 

196 
vmul.f32 d24, d16, d30 @ R*c 

197 
vmul.f32 d25, d17, d31 @ I*s 

198 
vmul.f32 d22, d16, d31 @ R*s 

199 
vmul.f32 d23, d17, d30 @ I*c 

188  200 
subs lr, lr, #16 
189  201 
vsub.f32 d6, d6, d7 @ R*cI*s 
190  202 
vadd.f32 d7, d4, d5 @ R*s+I*c 
191 
uxth r10, r6, ror #16 

192 
uxth r6, r6 

193 
add r10, r1, r10, lsl #3 

194 
add r6, r1, r6, lsl #3 

203 
vsub.f32 d24, d25, d24 @ I*sR*c 

204 
vadd.f32 d25, d22, d23 @ R*sI*c 

195  205 
beq 1f 
196 
vld2.32 {d16,d18},[r9,:128],r12 @ x,x in4d1,in4d0 

197 
vld2.32 {d17,d19},[r8,:128],r12 @ x,x in3d1,in3d0 

206 
mov r12, #16 

207 
vld2.32 {d16,d18},[r9,:128],r12 @ in0u0,in0u1 in4d1,in4d0 

208 
vld2.32 {d17,d19},[r8,:128],r12 @ in2u0,in2u1 in3d1,in3d0 

198  209 
vneg.f32 d7, d7 @ R*sI*c 
199 
vld2.32 {d20,d21},[r7,:128]! @ in4u0,in4u1 x,x


210 
vld2.32 {d0, d2}, [r7,:128]! @ in4u0,in4u1 in2d1,in2d0


200  211 
vrev64.32 q9, q9 @ in4d0,in4d1 in3d0,in3d1 
201 
vld2.32 {d0, d1}, [r2,:128]! @ in3u0,in3u1 x,x 

202 
vsub.f32 d20, d18, d20 @ in4din4u I 

203 
vld2.32 {d2,d3}, [r4,:128]! @ c0,c1 s0,s1 

204 
vadd.f32 d0, d0, d19 @ in3u+in3d R 

212 
vld2.32 {d1, d3}, [r2,:128]! @ in3u0,in3u1 in1d1,in1d0 

213 
vsub.f32 d0, d18, d0 @ in4din4u I 

214 
vld2.32 {d20,d21},[r4,:128]! @ c0,c1 s0,s1 

215 
vrev64.32 q1, q1 @ in2d0,in2d1 in1d0,in1d1 

216 
vld2.32 {d30,d31},[r5,:128],r12 @ c2,c3 s2,s3 

217 
vadd.f32 d1, d1, d19 @ in3u+in3d R 

218 
vsub.f32 d16, d16, d2 @ in0uin2d R 

219 
vadd.f32 d17, d17, d3 @ in2u+in1d I 

220 
uxth r12, r6, ror #16 

221 
uxth r6, r6 

222 
add r12, r1, r12, lsl #3 

223 
add r6, r1, r6, lsl #3 

205  224 
vst2.32 {d6[0],d7[0]}, [r6,:64] 
206 
vst2.32 {d6[1],d7[1]}, [r10,:64] 

225 
vst2.32 {d6[1],d7[1]}, [r12,:64] 

226 
uxth r6, r10, ror #16 

227 
uxth r10, r10 

228 
add r6 , r1, r6, lsl #3 

229 
add r10, r1, r10, lsl #3 

230 
vst2.32 {d24[0],d25[0]},[r10,:64] 

231 
vst2.32 {d24[1],d25[1]},[r6,:64] 

207  232 
b 1b 
208  233 
1: 
209  234 
vneg.f32 d7, d7 @ R*sI*c 
210 
vst2.32 {d6[0],d7[0]}, [r6,:64] 

211 
vst2.32 {d6[1],d7[1]}, [r10,:64] 

212  
213 
mov r12, #1 

214 
ldr lr, [r0, #28] @ mdct_bits 

215 
lsl lr, r12, lr @ n = 1 << nbits 

216 
sub r8, r2, #16 @ in1d 

217 
add r2, r9, #16 @ in0u 

218 
sub r9, r7, #16 @ in2d 

219 
mov r12, #16 

220  
221 
vld2.32 {d16,d18},[r9,:128],r12 @ x,x in2d1,in2d0 

222 
vld2.32 {d17,d19},[r8,:128],r12 @ x,x in1d1,in1d0 

223 
vld2.32 {d20,d21},[r7,:128]! @ in2u0,in2u1 x,x 

224 
vrev64.32 q9, q9 @ in2d0,in2d1 in1d0,in1d1 

225 
vld2.32 {d0, d1}, [r2,:128]! @ in0u0,in0u1 x,x 

226 
vsub.f32 d0, d0, d18 @ in0uin2d R 

227 
vld2.32 {d2,d3}, [r4,:128]! @ c0,c1 s0,s1 

228 
vadd.f32 d20, d20, d19 @ in2u+in1d I 

229 
1: 

230 
vmul.f32 d6, d0, d2 @ R*c 

231 
vmul.f32 d7, d20, d3 @ I*s 

232 
ldr r6, [r3], #4 

233 
vmul.f32 d4, d0, d3 @ R*s 

234 
vmul.f32 d5, d20, d2 @ I*c 

235 
subs lr, lr, #16 

236 
vsub.f32 d6, d7, d6 @ I*sR*c 

237 
vadd.f32 d7, d4, d5 @ R*sI*c 

238 
uxth r10, r6, ror #16 

235 
uxth r12, r6, ror #16 

239  236 
uxth r6, r6 
240 
add r10, r1, r10, lsl #3


237 
add r12, r1, r12, lsl #3


241  238 
add r6, r1, r6, lsl #3 
242 
beq 1f 

243 
vld2.32 {d16,d18},[r9,:128],r12 @ x,x in2d1,in2d0 

244 
vld2.32 {d17,d19},[r8,:128],r12 @ x,x in1d1,in1d0 

245 
vld2.32 {d20,d21},[r7,:128]! @ in2u0,in2u1 x,x 

246 
vrev64.32 q9, q9 @ in2d0,in2d1 in1d0,in1d1 

247 
vld2.32 {d0, d1}, [r2,:128]! @ in0u0,in0u1 x,x 

248 
vsub.f32 d0, d0, d18 @ in0uin2d R 

249 
vld2.32 {d2,d3}, [r4,:128]! @ c0,c1 s0,s1 

250 
vadd.f32 d20, d20, d19 @ in2u+in1d I 

251 
vst2.32 {d6[0],d7[0]}, [r6,:64] 

252 
vst2.32 {d6[1],d7[1]}, [r10,:64] 

253 
b 1b 

254 
1: 

255  239 
vst2.32 {d6[0],d7[0]}, [r6,:64] 
256 
vst2.32 {d6[1],d7[1]}, [r10,:64] 

240 
vst2.32 {d6[1],d7[1]}, [r12,:64] 

241 
uxth r6, r10, ror #16 

242 
uxth r10, r10 

243 
add r6 , r1, r6, lsl #3 

244 
add r10, r1, r10, lsl #3 

245 
vst2.32 {d24[0],d25[0]},[r10,:64] 

246 
vst2.32 {d24[1],d25[1]},[r6,:64] 

257  247  
258  248 
mov r4, r0 
259  249 
mov r6, r1 
Also available in: Unified diff