## ffmpeg / libavcodec / jrevdct.c @ d771bcae

History | View | Annotate | Download (33 KB)

1 | de6d9b64 | Fabrice Bellard | ```
/*
``` |
---|---|---|---|

2 | ```
* jrevdct.c
``` |
||

3 | ```
*
``` |
||

4 | ```
* Copyright (C) 1991, 1992, Thomas G. Lane.
``` |
||

5 | ```
* This file is part of the Independent JPEG Group's software.
``` |
||

6 | ```
* For conditions of distribution and use, see the accompanying README file.
``` |
||

7 | ```
*
``` |
||

8 | ```
* This file contains the basic inverse-DCT transformation subroutine.
``` |
||

9 | ```
*
``` |
||

10 | ```
* This implementation is based on an algorithm described in
``` |
||

11 | ```
* C. Loeffler, A. Ligtenberg and G. Moschytz, "Practical Fast 1-D DCT
``` |
||

12 | ```
* Algorithms with 11 Multiplications", Proc. Int'l. Conf. on Acoustics,
``` |
||

13 | ```
* Speech, and Signal Processing 1989 (ICASSP '89), pp. 988-991.
``` |
||

14 | ```
* The primary algorithm described there uses 11 multiplies and 29 adds.
``` |
||

15 | ```
* We use their alternate method with 12 multiplies and 32 adds.
``` |
||

16 | ```
* The advantage of this method is that no data path contains more than one
``` |
||

17 | ```
* multiplication; this allows a very simple and accurate implementation in
``` |
||

18 | ```
* scaled fixed-point arithmetic, with a minimal number of shifts.
``` |
||

19 | ```
*
``` |
||

20 | ```
* I've made lots of modifications to attempt to take advantage of the
``` |
||

21 | ```
* sparse nature of the DCT matrices we're getting. Although the logic
``` |
||

22 | ```
* is cumbersome, it's straightforward and the resulting code is much
``` |
||

23 | ```
* faster.
``` |
||

24 | ```
*
``` |
||

25 | ```
* A better way to do this would be to pass in the DCT block as a sparse
``` |
||

26 | ```
* matrix, perhaps with the difference cases encoded.
``` |
||

27 | ```
*/
``` |
||

28 | #include "common.h" |
||

29 | #include "dsputil.h" |
||

30 | |||

31 | ```
#define EIGHT_BIT_SAMPLES
``` |
||

32 | |||

33 | #define DCTSIZE 8 |
||

34 | #define DCTSIZE2 64 |
||

35 | |||

36 | ```
#define GLOBAL
``` |
||

37 | |||

38 | ```
#define RIGHT_SHIFT(x, n) ((x) >> (n))
``` |
||

39 | |||

40 | ```
typedef DCTELEM DCTBLOCK[DCTSIZE2];
``` |
||

41 | |||

42 | #define CONST_BITS 13 |
||

43 | |||

44 | ```
/*
``` |
||

45 | ```
* This routine is specialized to the case DCTSIZE = 8.
``` |
||

46 | ```
*/
``` |
||

47 | |||

48 | #if DCTSIZE != 8 |
||

49 | Sorry, this code only copes with 8x8 DCTs. /* deliberate syntax err */ |
||

50 | ```
#endif
``` |
||

51 | |||

52 | |||

53 | ```
/*
``` |
||

54 | ```
* A 2-D IDCT can be done by 1-D IDCT on each row followed by 1-D IDCT
``` |
||

55 | ```
* on each column. Direct algorithms are also available, but they are
``` |
||

56 | ```
* much more complex and seem not to be any faster when reduced to code.
``` |
||

57 | ```
*
``` |
||

58 | ```
* The poop on this scaling stuff is as follows:
``` |
||

59 | ```
*
``` |
||

60 | ```
* Each 1-D IDCT step produces outputs which are a factor of sqrt(N)
``` |
||

61 | ```
* larger than the true IDCT outputs. The final outputs are therefore
``` |
||

62 | ```
* a factor of N larger than desired; since N=8 this can be cured by
``` |
||

63 | ```
* a simple right shift at the end of the algorithm. The advantage of
``` |
||

64 | ```
* this arrangement is that we save two multiplications per 1-D IDCT,
``` |
||

65 | ```
* because the y0 and y4 inputs need not be divided by sqrt(N).
``` |
||

66 | ```
*
``` |
||

67 | ```
* We have to do addition and subtraction of the integer inputs, which
``` |
||

68 | ```
* is no problem, and multiplication by fractional constants, which is
``` |
||

69 | ```
* a problem to do in integer arithmetic. We multiply all the constants
``` |
||

70 | ```
* by CONST_SCALE and convert them to integer constants (thus retaining
``` |
||

71 | ```
* CONST_BITS bits of precision in the constants). After doing a
``` |
||

72 | ```
* multiplication we have to divide the product by CONST_SCALE, with proper
``` |
||

73 | ```
* rounding, to produce the correct output. This division can be done
``` |
||

74 | ```
* cheaply as a right shift of CONST_BITS bits. We postpone shifting
``` |
||

75 | ```
* as long as possible so that partial sums can be added together with
``` |
||

76 | ```
* full fractional precision.
``` |
||

77 | ```
*
``` |
||

78 | ```
* The outputs of the first pass are scaled up by PASS1_BITS bits so that
``` |
||

79 | ```
* they are represented to better-than-integral precision. These outputs
``` |
||

80 | ```
* require BITS_IN_JSAMPLE + PASS1_BITS + 3 bits; this fits in a 16-bit word
``` |
||

81 | ```
* with the recommended scaling. (To scale up 12-bit sample data further, an
``` |
||

82 | ```
* intermediate int32 array would be needed.)
``` |
||

83 | ```
*
``` |
||

84 | ```
* To avoid overflow of the 32-bit intermediate results in pass 2, we must
``` |
||

85 | ```
* have BITS_IN_JSAMPLE + CONST_BITS + PASS1_BITS <= 26. Error analysis
``` |
||

86 | ```
* shows that the values given below are the most effective.
``` |
||

87 | ```
*/
``` |
||

88 | |||

89 | ```
#ifdef EIGHT_BIT_SAMPLES
``` |
||

90 | #define PASS1_BITS 2 |
||

91 | ```
#else
``` |
||

92 | #define PASS1_BITS 1 /* lose a little precision to avoid overflow */ |
||

93 | ```
#endif
``` |
||

94 | |||

95 | #define ONE ((INT32) 1) |
||

96 | |||

97 | ```
#define CONST_SCALE (ONE << CONST_BITS)
``` |
||

98 | |||

99 | ```
/* Convert a positive real constant to an integer scaled by CONST_SCALE.
``` |
||

100 | ```
* IMPORTANT: if your compiler doesn't do this arithmetic at compile time,
``` |
||

101 | ```
* you will pay a significant penalty in run time. In that case, figure
``` |
||

102 | ```
* the correct integer constant values and insert them by hand.
``` |
||

103 | ```
*/
``` |
||

104 | |||

105 | ```
/* Actually FIX is no longer used, we precomputed them all */
``` |
||

106 | #define FIX(x) ((INT32) ((x) * CONST_SCALE + 0.5)) |
||

107 | |||

108 | ```
/* Descale and correctly round an INT32 value that's scaled by N bits.
``` |
||

109 | ```
* We assume RIGHT_SHIFT rounds towards minus infinity, so adding
``` |
||

110 | ```
* the fudge factor is correct for either sign of X.
``` |
||

111 | ```
*/
``` |
||

112 | |||

113 | #define DESCALE(x,n) RIGHT_SHIFT((x) + (ONE << ((n)-1)), n) |
||

114 | |||

115 | ```
/* Multiply an INT32 variable by an INT32 constant to yield an INT32 result.
``` |
||

116 | ```
* For 8-bit samples with the recommended scaling, all the variable
``` |
||

117 | ```
* and constant values involved are no more than 16 bits wide, so a
``` |
||

118 | ```
* 16x16->32 bit multiply can be used instead of a full 32x32 multiply;
``` |
||

119 | ```
* this provides a useful speedup on many machines.
``` |
||

120 | ```
* There is no way to specify a 16x16->32 multiply in portable C, but
``` |
||

121 | ```
* some C compilers will do the right thing if you provide the correct
``` |
||

122 | ```
* combination of casts.
``` |
||

123 | ```
* NB: for 12-bit samples, a full 32-bit multiplication will be needed.
``` |
||

124 | ```
*/
``` |
||

125 | |||

126 | ```
#ifdef EIGHT_BIT_SAMPLES
``` |
||

127 | #ifdef SHORTxSHORT_32 /* may work if 'int' is 32 bits */ |
||

128 | #define MULTIPLY(var,const) (((INT16) (var)) * ((INT16) (const))) |
||

129 | ```
#endif
``` |
||

130 | #ifdef SHORTxLCONST_32 /* known to work with Microsoft C 6.0 */ |
||

131 | #define MULTIPLY(var,const) (((INT16) (var)) * ((INT32) (const))) |
||

132 | ```
#endif
``` |
||

133 | ```
#endif
``` |
||

134 | |||

135 | #ifndef MULTIPLY /* default definition */ |
||

136 | #define MULTIPLY(var,const) ((var) * (const)) |
||

137 | ```
#endif
``` |
||

138 | |||

139 | |||

140 | ```
/*
``` |
||

141 | ```
Unlike our decoder where we approximate the FIXes, we need to use exact
``` |
||

142 | ```
ones here or successive P-frames will drift too much with Reference frame coding
``` |
||

143 | ```
*/
``` |
||

144 | #define FIX_0_211164243 1730 |
||

145 | #define FIX_0_275899380 2260 |
||

146 | #define FIX_0_298631336 2446 |
||

147 | #define FIX_0_390180644 3196 |
||

148 | #define FIX_0_509795579 4176 |
||

149 | #define FIX_0_541196100 4433 |
||

150 | #define FIX_0_601344887 4926 |
||

151 | #define FIX_0_765366865 6270 |
||

152 | #define FIX_0_785694958 6436 |
||

153 | #define FIX_0_899976223 7373 |
||

154 | #define FIX_1_061594337 8697 |
||

155 | #define FIX_1_111140466 9102 |
||

156 | #define FIX_1_175875602 9633 |
||

157 | #define FIX_1_306562965 10703 |
||

158 | #define FIX_1_387039845 11363 |
||

159 | #define FIX_1_451774981 11893 |
||

160 | #define FIX_1_501321110 12299 |
||

161 | #define FIX_1_662939225 13623 |
||

162 | #define FIX_1_847759065 15137 |
||

163 | #define FIX_1_961570560 16069 |
||

164 | #define FIX_2_053119869 16819 |
||

165 | #define FIX_2_172734803 17799 |
||

166 | #define FIX_2_562915447 20995 |
||

167 | #define FIX_3_072711026 25172 |
||

168 | |||

169 | ```
/*
``` |
||

170 | ```
* Perform the inverse DCT on one block of coefficients.
``` |
||

171 | ```
*/
``` |
||

172 | |||

173 | ```
void j_rev_dct(DCTBLOCK data)
``` |
||

174 | { |
||

175 | INT32 tmp0, tmp1, tmp2, tmp3; |
||

176 | INT32 tmp10, tmp11, tmp12, tmp13; |
||

177 | INT32 z1, z2, z3, z4, z5; |
||

178 | INT32 d0, d1, d2, d3, d4, d5, d6, d7; |
||

179 | ```
register DCTELEM *dataptr;
``` |
||

180 | ```
int rowctr;
``` |
||

181 | |||

182 | ```
/* Pass 1: process rows. */
``` |
||

183 | ```
/* Note results are scaled up by sqrt(8) compared to a true IDCT; */
``` |
||

184 | ```
/* furthermore, we scale the results by 2**PASS1_BITS. */
``` |
||

185 | |||

186 | dataptr = data; |
||

187 | |||

188 | for (rowctr = DCTSIZE-1; rowctr >= 0; rowctr--) { |
||

189 | ```
/* Due to quantization, we will usually find that many of the input
``` |
||

190 | ```
* coefficients are zero, especially the AC terms. We can exploit this
``` |
||

191 | ```
* by short-circuiting the IDCT calculation for any row in which all
``` |
||

192 | ```
* the AC terms are zero. In that case each output is equal to the
``` |
||

193 | ```
* DC coefficient (with scale factor as needed).
``` |
||

194 | ```
* With typical images and quantization tables, half or more of the
``` |
||

195 | ```
* row DCT calculations can be simplified this way.
``` |
||

196 | ```
*/
``` |
||

197 | |||

198 | register int *idataptr = (int*)dataptr; |
||

199 | |||

200 | ```
d0 = dataptr[0];
``` |
||

201 | ```
d1 = dataptr[1];
``` |
||

202 | ```
d2 = dataptr[2];
``` |
||

203 | ```
d3 = dataptr[3];
``` |
||

204 | ```
d4 = dataptr[4];
``` |
||

205 | ```
d5 = dataptr[5];
``` |
||

206 | ```
d6 = dataptr[6];
``` |
||

207 | ```
d7 = dataptr[7];
``` |
||

208 | |||

209 | if ((d1 == 0) && (idataptr[1] | idataptr[2] | idataptr[3]) == 0) { |
||

210 | ```
/* AC terms all zero */
``` |
||

211 | ```
if (d0) {
``` |
||

212 | ```
/* Compute a 32 bit value to assign. */
``` |
||

213 | DCTELEM dcval = (DCTELEM) (d0 << PASS1_BITS); |
||

214 | register int v = (dcval & 0xffff) | ((dcval << 16) & 0xffff0000); |
||

215 | |||

216 | ```
idataptr[0] = v;
``` |
||

217 | ```
idataptr[1] = v;
``` |
||

218 | ```
idataptr[2] = v;
``` |
||

219 | ```
idataptr[3] = v;
``` |
||

220 | } |
||

221 | |||

222 | ```
dataptr += DCTSIZE; /* advance pointer to next row */
``` |
||

223 | ```
continue;
``` |
||

224 | } |
||

225 | |||

226 | ```
/* Even part: reverse the even part of the forward DCT. */
``` |
||

227 | ```
/* The rotator is sqrt(2)*c(-6). */
``` |
||

228 | { |
||

229 | ```
if (d6) {
``` |
||

230 | ```
if (d4) {
``` |
||

231 | ```
if (d2) {
``` |
||

232 | ```
if (d0) {
``` |
||

233 | ```
/* d0 != 0, d2 != 0, d4 != 0, d6 != 0 */
``` |
||

234 | z1 = MULTIPLY(d2 + d6, FIX_0_541196100); |
||

235 | tmp2 = z1 + MULTIPLY(-d6, FIX_1_847759065); |
||

236 | tmp3 = z1 + MULTIPLY(d2, FIX_0_765366865); |
||

237 | |||

238 | tmp0 = (d0 + d4) << CONST_BITS; |
||

239 | tmp1 = (d0 - d4) << CONST_BITS; |
||

240 | |||

241 | tmp10 = tmp0 + tmp3; |
||

242 | tmp13 = tmp0 - tmp3; |
||

243 | tmp11 = tmp1 + tmp2; |
||

244 | tmp12 = tmp1 - tmp2; |
||

245 | ```
} else {
``` |
||

246 | ```
/* d0 == 0, d2 != 0, d4 != 0, d6 != 0 */
``` |
||

247 | z1 = MULTIPLY(d2 + d6, FIX_0_541196100); |
||

248 | tmp2 = z1 + MULTIPLY(-d6, FIX_1_847759065); |
||

249 | tmp3 = z1 + MULTIPLY(d2, FIX_0_765366865); |
||

250 | |||

251 | tmp0 = d4 << CONST_BITS; |
||

252 | |||

253 | tmp10 = tmp0 + tmp3; |
||

254 | tmp13 = tmp0 - tmp3; |
||

255 | tmp11 = tmp2 - tmp0; |
||

256 | tmp12 = -(tmp0 + tmp2); |
||

257 | } |
||

258 | ```
} else {
``` |
||

259 | ```
if (d0) {
``` |
||

260 | ```
/* d0 != 0, d2 == 0, d4 != 0, d6 != 0 */
``` |
||

261 | tmp2 = MULTIPLY(-d6, FIX_1_306562965); |
||

262 | tmp3 = MULTIPLY(d6, FIX_0_541196100); |
||

263 | |||

264 | tmp0 = (d0 + d4) << CONST_BITS; |
||

265 | tmp1 = (d0 - d4) << CONST_BITS; |
||

266 | |||

267 | tmp10 = tmp0 + tmp3; |
||

268 | tmp13 = tmp0 - tmp3; |
||

269 | tmp11 = tmp1 + tmp2; |
||

270 | tmp12 = tmp1 - tmp2; |
||

271 | ```
} else {
``` |
||

272 | ```
/* d0 == 0, d2 == 0, d4 != 0, d6 != 0 */
``` |
||

273 | tmp2 = MULTIPLY(-d6, FIX_1_306562965); |
||

274 | tmp3 = MULTIPLY(d6, FIX_0_541196100); |
||

275 | |||

276 | tmp0 = d4 << CONST_BITS; |
||

277 | |||

278 | tmp10 = tmp0 + tmp3; |
||

279 | tmp13 = tmp0 - tmp3; |
||

280 | tmp11 = tmp2 - tmp0; |
||

281 | tmp12 = -(tmp0 + tmp2); |
||

282 | } |
||

283 | } |
||

284 | ```
} else {
``` |
||

285 | ```
if (d2) {
``` |
||

286 | ```
if (d0) {
``` |
||

287 | ```
/* d0 != 0, d2 != 0, d4 == 0, d6 != 0 */
``` |
||

288 | z1 = MULTIPLY(d2 + d6, FIX_0_541196100); |
||

289 | tmp2 = z1 + MULTIPLY(-d6, FIX_1_847759065); |
||

290 | tmp3 = z1 + MULTIPLY(d2, FIX_0_765366865); |
||

291 | |||

292 | tmp0 = d0 << CONST_BITS; |
||

293 | |||

294 | tmp10 = tmp0 + tmp3; |
||

295 | tmp13 = tmp0 - tmp3; |
||

296 | tmp11 = tmp0 + tmp2; |
||

297 | tmp12 = tmp0 - tmp2; |
||

298 | ```
} else {
``` |
||

299 | ```
/* d0 == 0, d2 != 0, d4 == 0, d6 != 0 */
``` |
||

300 | z1 = MULTIPLY(d2 + d6, FIX_0_541196100); |
||

301 | tmp2 = z1 + MULTIPLY(-d6, FIX_1_847759065); |
||

302 | tmp3 = z1 + MULTIPLY(d2, FIX_0_765366865); |
||

303 | |||

304 | tmp10 = tmp3; |
||

305 | tmp13 = -tmp3; |
||

306 | tmp11 = tmp2; |
||

307 | tmp12 = -tmp2; |
||

308 | } |
||

309 | ```
} else {
``` |
||

310 | ```
if (d0) {
``` |
||

311 | ```
/* d0 != 0, d2 == 0, d4 == 0, d6 != 0 */
``` |
||

312 | tmp2 = MULTIPLY(-d6, FIX_1_306562965); |
||

313 | tmp3 = MULTIPLY(d6, FIX_0_541196100); |
||

314 | |||

315 | tmp0 = d0 << CONST_BITS; |
||

316 | |||

317 | tmp10 = tmp0 + tmp3; |
||

318 | tmp13 = tmp0 - tmp3; |
||

319 | tmp11 = tmp0 + tmp2; |
||

320 | tmp12 = tmp0 - tmp2; |
||

321 | ```
} else {
``` |
||

322 | ```
/* d0 == 0, d2 == 0, d4 == 0, d6 != 0 */
``` |
||

323 | tmp2 = MULTIPLY(-d6, FIX_1_306562965); |
||

324 | tmp3 = MULTIPLY(d6, FIX_0_541196100); |
||

325 | |||

326 | tmp10 = tmp3; |
||

327 | tmp13 = -tmp3; |
||

328 | tmp11 = tmp2; |
||

329 | tmp12 = -tmp2; |
||

330 | } |
||

331 | } |
||

332 | } |
||

333 | ```
} else {
``` |
||

334 | ```
if (d4) {
``` |
||

335 | ```
if (d2) {
``` |
||

336 | ```
if (d0) {
``` |
||

337 | ```
/* d0 != 0, d2 != 0, d4 != 0, d6 == 0 */
``` |
||

338 | tmp2 = MULTIPLY(d2, FIX_0_541196100); |
||

339 | tmp3 = MULTIPLY(d2, FIX_1_306562965); |
||

340 | |||

341 | tmp0 = (d0 + d4) << CONST_BITS; |
||

342 | tmp1 = (d0 - d4) << CONST_BITS; |
||

343 | |||

344 | tmp10 = tmp0 + tmp3; |
||

345 | tmp13 = tmp0 - tmp3; |
||

346 | tmp11 = tmp1 + tmp2; |
||

347 | tmp12 = tmp1 - tmp2; |
||

348 | ```
} else {
``` |
||

349 | ```
/* d0 == 0, d2 != 0, d4 != 0, d6 == 0 */
``` |
||

350 | tmp2 = MULTIPLY(d2, FIX_0_541196100); |
||

351 | tmp3 = MULTIPLY(d2, FIX_1_306562965); |
||

352 | |||

353 | tmp0 = d4 << CONST_BITS; |
||

354 | |||

355 | tmp10 = tmp0 + tmp3; |
||

356 | tmp13 = tmp0 - tmp3; |
||

357 | tmp11 = tmp2 - tmp0; |
||

358 | tmp12 = -(tmp0 + tmp2); |
||

359 | } |
||

360 | ```
} else {
``` |
||

361 | ```
if (d0) {
``` |
||

362 | ```
/* d0 != 0, d2 == 0, d4 != 0, d6 == 0 */
``` |
||

363 | tmp10 = tmp13 = (d0 + d4) << CONST_BITS; |
||

364 | tmp11 = tmp12 = (d0 - d4) << CONST_BITS; |
||

365 | ```
} else {
``` |
||

366 | ```
/* d0 == 0, d2 == 0, d4 != 0, d6 == 0 */
``` |
||

367 | tmp10 = tmp13 = d4 << CONST_BITS; |
||

368 | tmp11 = tmp12 = -tmp10; |
||

369 | } |
||

370 | } |
||

371 | ```
} else {
``` |
||

372 | ```
if (d2) {
``` |
||

373 | ```
if (d0) {
``` |
||

374 | ```
/* d0 != 0, d2 != 0, d4 == 0, d6 == 0 */
``` |
||

375 | tmp2 = MULTIPLY(d2, FIX_0_541196100); |
||

376 | tmp3 = MULTIPLY(d2, FIX_1_306562965); |
||

377 | |||

378 | tmp0 = d0 << CONST_BITS; |
||

379 | |||

380 | tmp10 = tmp0 + tmp3; |
||

381 | tmp13 = tmp0 - tmp3; |
||

382 | tmp11 = tmp0 + tmp2; |
||

383 | tmp12 = tmp0 - tmp2; |
||

384 | ```
} else {
``` |
||

385 | ```
/* d0 == 0, d2 != 0, d4 == 0, d6 == 0 */
``` |
||

386 | tmp2 = MULTIPLY(d2, FIX_0_541196100); |
||

387 | tmp3 = MULTIPLY(d2, FIX_1_306562965); |
||

388 | |||

389 | tmp10 = tmp3; |
||

390 | tmp13 = -tmp3; |
||

391 | tmp11 = tmp2; |
||

392 | tmp12 = -tmp2; |
||

393 | } |
||

394 | ```
} else {
``` |
||

395 | ```
if (d0) {
``` |
||

396 | ```
/* d0 != 0, d2 == 0, d4 == 0, d6 == 0 */
``` |
||

397 | tmp10 = tmp13 = tmp11 = tmp12 = d0 << CONST_BITS; |
||

398 | ```
} else {
``` |
||

399 | ```
/* d0 == 0, d2 == 0, d4 == 0, d6 == 0 */
``` |
||

400 | ```
tmp10 = tmp13 = tmp11 = tmp12 = 0;
``` |
||

401 | } |
||

402 | } |
||

403 | } |
||

404 | } |
||

405 | |||

406 | ```
/* Odd part per figure 8; the matrix is unitary and hence its
``` |
||

407 | ```
* transpose is its inverse. i0..i3 are y7,y5,y3,y1 respectively.
``` |
||

408 | ```
*/
``` |
||

409 | |||

410 | ```
if (d7) {
``` |
||

411 | ```
if (d5) {
``` |
||

412 | ```
if (d3) {
``` |
||

413 | ```
if (d1) {
``` |
||

414 | ```
/* d1 != 0, d3 != 0, d5 != 0, d7 != 0 */
``` |
||

415 | z1 = d7 + d1; |
||

416 | z2 = d5 + d3; |
||

417 | z3 = d7 + d3; |
||

418 | z4 = d5 + d1; |
||

419 | z5 = MULTIPLY(z3 + z4, FIX_1_175875602); |
||

420 | |||

421 | tmp0 = MULTIPLY(d7, FIX_0_298631336); |
||

422 | tmp1 = MULTIPLY(d5, FIX_2_053119869); |
||

423 | tmp2 = MULTIPLY(d3, FIX_3_072711026); |
||

424 | tmp3 = MULTIPLY(d1, FIX_1_501321110); |
||

425 | z1 = MULTIPLY(-z1, FIX_0_899976223); |
||

426 | z2 = MULTIPLY(-z2, FIX_2_562915447); |
||

427 | z3 = MULTIPLY(-z3, FIX_1_961570560); |
||

428 | z4 = MULTIPLY(-z4, FIX_0_390180644); |
||

429 | |||

430 | z3 += z5; |
||

431 | z4 += z5; |
||

432 | |||

433 | tmp0 += z1 + z3; |
||

434 | tmp1 += z2 + z4; |
||

435 | tmp2 += z2 + z3; |
||

436 | tmp3 += z1 + z4; |
||

437 | ```
} else {
``` |
||

438 | ```
/* d1 == 0, d3 != 0, d5 != 0, d7 != 0 */
``` |
||

439 | z2 = d5 + d3; |
||

440 | z3 = d7 + d3; |
||

441 | z5 = MULTIPLY(z3 + d5, FIX_1_175875602); |
||

442 | |||

443 | tmp0 = MULTIPLY(d7, FIX_0_298631336); |
||

444 | tmp1 = MULTIPLY(d5, FIX_2_053119869); |
||

445 | tmp2 = MULTIPLY(d3, FIX_3_072711026); |
||

446 | z1 = MULTIPLY(-d7, FIX_0_899976223); |
||

447 | z2 = MULTIPLY(-z2, FIX_2_562915447); |
||

448 | z3 = MULTIPLY(-z3, FIX_1_961570560); |
||

449 | z4 = MULTIPLY(-d5, FIX_0_390180644); |
||

450 | |||

451 | z3 += z5; |
||

452 | z4 += z5; |
||

453 | |||

454 | tmp0 += z1 + z3; |
||

455 | tmp1 += z2 + z4; |
||

456 | tmp2 += z2 + z3; |
||

457 | tmp3 = z1 + z4; |
||

458 | } |
||

459 | ```
} else {
``` |
||

460 | ```
if (d1) {
``` |
||

461 | ```
/* d1 != 0, d3 == 0, d5 != 0, d7 != 0 */
``` |
||

462 | z1 = d7 + d1; |
||

463 | z4 = d5 + d1; |
||

464 | z5 = MULTIPLY(d7 + z4, FIX_1_175875602); |
||

465 | |||

466 | tmp0 = MULTIPLY(d7, FIX_0_298631336); |
||

467 | tmp1 = MULTIPLY(d5, FIX_2_053119869); |
||

468 | tmp3 = MULTIPLY(d1, FIX_1_501321110); |
||

469 | z1 = MULTIPLY(-z1, FIX_0_899976223); |
||

470 | z2 = MULTIPLY(-d5, FIX_2_562915447); |
||

471 | z3 = MULTIPLY(-d7, FIX_1_961570560); |
||

472 | z4 = MULTIPLY(-z4, FIX_0_390180644); |
||

473 | |||

474 | z3 += z5; |
||

475 | z4 += z5; |
||

476 | |||

477 | tmp0 += z1 + z3; |
||

478 | tmp1 += z2 + z4; |
||

479 | tmp2 = z2 + z3; |
||

480 | tmp3 += z1 + z4; |
||

481 | ```
} else {
``` |
||

482 | ```
/* d1 == 0, d3 == 0, d5 != 0, d7 != 0 */
``` |
||

483 | tmp0 = MULTIPLY(-d7, FIX_0_601344887); |
||

484 | z1 = MULTIPLY(-d7, FIX_0_899976223); |
||

485 | z3 = MULTIPLY(-d7, FIX_1_961570560); |
||

486 | tmp1 = MULTIPLY(-d5, FIX_0_509795579); |
||

487 | z2 = MULTIPLY(-d5, FIX_2_562915447); |
||

488 | z4 = MULTIPLY(-d5, FIX_0_390180644); |
||

489 | z5 = MULTIPLY(d5 + d7, FIX_1_175875602); |
||

490 | |||

491 | z3 += z5; |
||

492 | z4 += z5; |
||

493 | |||

494 | tmp0 += z3; |
||

495 | tmp1 += z4; |
||

496 | tmp2 = z2 + z3; |
||

497 | tmp3 = z1 + z4; |
||

498 | } |
||

499 | } |
||

500 | ```
} else {
``` |
||

501 | ```
if (d3) {
``` |
||

502 | ```
if (d1) {
``` |
||

503 | ```
/* d1 != 0, d3 != 0, d5 == 0, d7 != 0 */
``` |
||

504 | z1 = d7 + d1; |
||

505 | z3 = d7 + d3; |
||

506 | z5 = MULTIPLY(z3 + d1, FIX_1_175875602); |
||

507 | |||

508 | tmp0 = MULTIPLY(d7, FIX_0_298631336); |
||

509 | tmp2 = MULTIPLY(d3, FIX_3_072711026); |
||

510 | tmp3 = MULTIPLY(d1, FIX_1_501321110); |
||

511 | z1 = MULTIPLY(-z1, FIX_0_899976223); |
||

512 | z2 = MULTIPLY(-d3, FIX_2_562915447); |
||

513 | z3 = MULTIPLY(-z3, FIX_1_961570560); |
||

514 | z4 = MULTIPLY(-d1, FIX_0_390180644); |
||

515 | |||

516 | z3 += z5; |
||

517 | z4 += z5; |
||

518 | |||

519 | tmp0 += z1 + z3; |
||

520 | tmp1 = z2 + z4; |
||

521 | tmp2 += z2 + z3; |
||

522 | tmp3 += z1 + z4; |
||

523 | ```
} else {
``` |
||

524 | ```
/* d1 == 0, d3 != 0, d5 == 0, d7 != 0 */
``` |
||

525 | z3 = d7 + d3; |
||

526 | |||

527 | tmp0 = MULTIPLY(-d7, FIX_0_601344887); |
||

528 | z1 = MULTIPLY(-d7, FIX_0_899976223); |
||

529 | tmp2 = MULTIPLY(d3, FIX_0_509795579); |
||

530 | z2 = MULTIPLY(-d3, FIX_2_562915447); |
||

531 | z5 = MULTIPLY(z3, FIX_1_175875602); |
||

532 | z3 = MULTIPLY(-z3, FIX_0_785694958); |
||

533 | |||

534 | tmp0 += z3; |
||

535 | tmp1 = z2 + z5; |
||

536 | tmp2 += z3; |
||

537 | tmp3 = z1 + z5; |
||

538 | } |
||

539 | ```
} else {
``` |
||

540 | ```
if (d1) {
``` |
||

541 | ```
/* d1 != 0, d3 == 0, d5 == 0, d7 != 0 */
``` |
||

542 | z1 = d7 + d1; |
||

543 | z5 = MULTIPLY(z1, FIX_1_175875602); |
||

544 | |||

545 | z1 = MULTIPLY(z1, FIX_0_275899380); |
||

546 | z3 = MULTIPLY(-d7, FIX_1_961570560); |
||

547 | tmp0 = MULTIPLY(-d7, FIX_1_662939225); |
||

548 | z4 = MULTIPLY(-d1, FIX_0_390180644); |
||

549 | tmp3 = MULTIPLY(d1, FIX_1_111140466); |
||

550 | |||

551 | tmp0 += z1; |
||

552 | tmp1 = z4 + z5; |
||

553 | tmp2 = z3 + z5; |
||

554 | tmp3 += z1; |
||

555 | ```
} else {
``` |
||

556 | ```
/* d1 == 0, d3 == 0, d5 == 0, d7 != 0 */
``` |
||

557 | tmp0 = MULTIPLY(-d7, FIX_1_387039845); |
||

558 | tmp1 = MULTIPLY(d7, FIX_1_175875602); |
||

559 | tmp2 = MULTIPLY(-d7, FIX_0_785694958); |
||

560 | tmp3 = MULTIPLY(d7, FIX_0_275899380); |
||

561 | } |
||

562 | } |
||

563 | } |
||

564 | ```
} else {
``` |
||

565 | ```
if (d5) {
``` |
||

566 | ```
if (d3) {
``` |
||

567 | ```
if (d1) {
``` |
||

568 | ```
/* d1 != 0, d3 != 0, d5 != 0, d7 == 0 */
``` |
||

569 | z2 = d5 + d3; |
||

570 | z4 = d5 + d1; |
||

571 | z5 = MULTIPLY(d3 + z4, FIX_1_175875602); |
||

572 | |||

573 | tmp1 = MULTIPLY(d5, FIX_2_053119869); |
||

574 | tmp2 = MULTIPLY(d3, FIX_3_072711026); |
||

575 | tmp3 = MULTIPLY(d1, FIX_1_501321110); |
||

576 | z1 = MULTIPLY(-d1, FIX_0_899976223); |
||

577 | z2 = MULTIPLY(-z2, FIX_2_562915447); |
||

578 | z3 = MULTIPLY(-d3, FIX_1_961570560); |
||

579 | z4 = MULTIPLY(-z4, FIX_0_390180644); |
||

580 | |||

581 | z3 += z5; |
||

582 | z4 += z5; |
||

583 | |||

584 | tmp0 = z1 + z3; |
||

585 | tmp1 += z2 + z4; |
||

586 | tmp2 += z2 + z3; |
||

587 | tmp3 += z1 + z4; |
||

588 | ```
} else {
``` |
||

589 | ```
/* d1 == 0, d3 != 0, d5 != 0, d7 == 0 */
``` |
||

590 | z2 = d5 + d3; |
||

591 | |||

592 | z5 = MULTIPLY(z2, FIX_1_175875602); |
||

593 | tmp1 = MULTIPLY(d5, FIX_1_662939225); |
||

594 | z4 = MULTIPLY(-d5, FIX_0_390180644); |
||

595 | z2 = MULTIPLY(-z2, FIX_1_387039845); |
||

596 | tmp2 = MULTIPLY(d3, FIX_1_111140466); |
||

597 | z3 = MULTIPLY(-d3, FIX_1_961570560); |
||

598 | |||

599 | tmp0 = z3 + z5; |
||

600 | tmp1 += z2; |
||

601 | tmp2 += z2; |
||

602 | tmp3 = z4 + z5; |
||

603 | } |
||

604 | ```
} else {
``` |
||

605 | ```
if (d1) {
``` |
||

606 | ```
/* d1 != 0, d3 == 0, d5 != 0, d7 == 0 */
``` |
||

607 | z4 = d5 + d1; |
||

608 | |||

609 | z5 = MULTIPLY(z4, FIX_1_175875602); |
||

610 | z1 = MULTIPLY(-d1, FIX_0_899976223); |
||

611 | tmp3 = MULTIPLY(d1, FIX_0_601344887); |
||

612 | tmp1 = MULTIPLY(-d5, FIX_0_509795579); |
||

613 | z2 = MULTIPLY(-d5, FIX_2_562915447); |
||

614 | z4 = MULTIPLY(z4, FIX_0_785694958); |
||

615 | |||

616 | tmp0 = z1 + z5; |
||

617 | tmp1 += z4; |
||

618 | tmp2 = z2 + z5; |
||

619 | tmp3 += z4; |
||

620 | ```
} else {
``` |
||

621 | ```
/* d1 == 0, d3 == 0, d5 != 0, d7 == 0 */
``` |
||

622 | tmp0 = MULTIPLY(d5, FIX_1_175875602); |
||

623 | tmp1 = MULTIPLY(d5, FIX_0_275899380); |
||

624 | tmp2 = MULTIPLY(-d5, FIX_1_387039845); |
||

625 | tmp3 = MULTIPLY(d5, FIX_0_785694958); |
||

626 | } |
||

627 | } |
||

628 | ```
} else {
``` |
||

629 | ```
if (d3) {
``` |
||

630 | ```
if (d1) {
``` |
||

631 | ```
/* d1 != 0, d3 != 0, d5 == 0, d7 == 0 */
``` |
||

632 | z5 = d1 + d3; |
||

633 | tmp3 = MULTIPLY(d1, FIX_0_211164243); |
||

634 | tmp2 = MULTIPLY(-d3, FIX_1_451774981); |
||

635 | z1 = MULTIPLY(d1, FIX_1_061594337); |
||

636 | z2 = MULTIPLY(-d3, FIX_2_172734803); |
||

637 | z4 = MULTIPLY(z5, FIX_0_785694958); |
||

638 | z5 = MULTIPLY(z5, FIX_1_175875602); |
||

639 | |||

640 | tmp0 = z1 - z4; |
||

641 | tmp1 = z2 + z4; |
||

642 | tmp2 += z5; |
||

643 | tmp3 += z5; |
||

644 | ```
} else {
``` |
||

645 | ```
/* d1 == 0, d3 != 0, d5 == 0, d7 == 0 */
``` |
||

646 | tmp0 = MULTIPLY(-d3, FIX_0_785694958); |
||

647 | tmp1 = MULTIPLY(-d3, FIX_1_387039845); |
||

648 | tmp2 = MULTIPLY(-d3, FIX_0_275899380); |
||

649 | tmp3 = MULTIPLY(d3, FIX_1_175875602); |
||

650 | } |
||

651 | ```
} else {
``` |
||

652 | ```
if (d1) {
``` |
||

653 | ```
/* d1 != 0, d3 == 0, d5 == 0, d7 == 0 */
``` |
||

654 | tmp0 = MULTIPLY(d1, FIX_0_275899380); |
||

655 | tmp1 = MULTIPLY(d1, FIX_0_785694958); |
||

656 | tmp2 = MULTIPLY(d1, FIX_1_175875602); |
||

657 | tmp3 = MULTIPLY(d1, FIX_1_387039845); |
||

658 | ```
} else {
``` |
||

659 | ```
/* d1 == 0, d3 == 0, d5 == 0, d7 == 0 */
``` |
||

660 | ```
tmp0 = tmp1 = tmp2 = tmp3 = 0;
``` |
||

661 | } |
||

662 | } |
||

663 | } |
||

664 | } |
||

665 | } |
||

666 | ```
/* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */
``` |
||

667 | |||

668 | ```
dataptr[0] = (DCTELEM) DESCALE(tmp10 + tmp3, CONST_BITS-PASS1_BITS);
``` |
||

669 | ```
dataptr[7] = (DCTELEM) DESCALE(tmp10 - tmp3, CONST_BITS-PASS1_BITS);
``` |
||

670 | ```
dataptr[1] = (DCTELEM) DESCALE(tmp11 + tmp2, CONST_BITS-PASS1_BITS);
``` |
||

671 | ```
dataptr[6] = (DCTELEM) DESCALE(tmp11 - tmp2, CONST_BITS-PASS1_BITS);
``` |
||

672 | ```
dataptr[2] = (DCTELEM) DESCALE(tmp12 + tmp1, CONST_BITS-PASS1_BITS);
``` |
||

673 | ```
dataptr[5] = (DCTELEM) DESCALE(tmp12 - tmp1, CONST_BITS-PASS1_BITS);
``` |
||

674 | ```
dataptr[3] = (DCTELEM) DESCALE(tmp13 + tmp0, CONST_BITS-PASS1_BITS);
``` |
||

675 | ```
dataptr[4] = (DCTELEM) DESCALE(tmp13 - tmp0, CONST_BITS-PASS1_BITS);
``` |
||

676 | |||

677 | ```
dataptr += DCTSIZE; /* advance pointer to next row */
``` |
||

678 | } |
||

679 | |||

680 | ```
/* Pass 2: process columns. */
``` |
||

681 | ```
/* Note that we must descale the results by a factor of 8 == 2**3, */
``` |
||

682 | ```
/* and also undo the PASS1_BITS scaling. */
``` |
||

683 | |||

684 | dataptr = data; |
||

685 | for (rowctr = DCTSIZE-1; rowctr >= 0; rowctr--) { |
||

686 | ```
/* Columns of zeroes can be exploited in the same way as we did with rows.
``` |
||

687 | ```
* However, the row calculation has created many nonzero AC terms, so the
``` |
||

688 | ```
* simplification applies less often (typically 5% to 10% of the time).
``` |
||

689 | ```
* On machines with very fast multiplication, it's possible that the
``` |
||

690 | ```
* test takes more time than it's worth. In that case this section
``` |
||

691 | ```
* may be commented out.
``` |
||

692 | ```
*/
``` |
||

693 | |||

694 | ```
d0 = dataptr[DCTSIZE*0];
``` |
||

695 | ```
d1 = dataptr[DCTSIZE*1];
``` |
||

696 | ```
d2 = dataptr[DCTSIZE*2];
``` |
||

697 | ```
d3 = dataptr[DCTSIZE*3];
``` |
||

698 | ```
d4 = dataptr[DCTSIZE*4];
``` |
||

699 | ```
d5 = dataptr[DCTSIZE*5];
``` |
||

700 | ```
d6 = dataptr[DCTSIZE*6];
``` |
||

701 | ```
d7 = dataptr[DCTSIZE*7];
``` |
||

702 | |||

703 | ```
/* Even part: reverse the even part of the forward DCT. */
``` |
||

704 | ```
/* The rotator is sqrt(2)*c(-6). */
``` |
||

705 | ```
if (d6) {
``` |
||

706 | ```
if (d4) {
``` |
||

707 | ```
if (d2) {
``` |
||

708 | ```
if (d0) {
``` |
||

709 | ```
/* d0 != 0, d2 != 0, d4 != 0, d6 != 0 */
``` |
||

710 | z1 = MULTIPLY(d2 + d6, FIX_0_541196100); |
||

711 | tmp2 = z1 + MULTIPLY(-d6, FIX_1_847759065); |
||

712 | tmp3 = z1 + MULTIPLY(d2, FIX_0_765366865); |
||

713 | |||

714 | tmp0 = (d0 + d4) << CONST_BITS; |
||

715 | tmp1 = (d0 - d4) << CONST_BITS; |
||

716 | |||

717 | tmp10 = tmp0 + tmp3; |
||

718 | tmp13 = tmp0 - tmp3; |
||

719 | tmp11 = tmp1 + tmp2; |
||

720 | tmp12 = tmp1 - tmp2; |
||

721 | ```
} else {
``` |
||

722 | ```
/* d0 == 0, d2 != 0, d4 != 0, d6 != 0 */
``` |
||

723 | z1 = MULTIPLY(d2 + d6, FIX_0_541196100); |
||

724 | tmp2 = z1 + MULTIPLY(-d6, FIX_1_847759065); |
||

725 | tmp3 = z1 + MULTIPLY(d2, FIX_0_765366865); |
||

726 | |||

727 | tmp0 = d4 << CONST_BITS; |
||

728 | |||

729 | tmp10 = tmp0 + tmp3; |
||

730 | tmp13 = tmp0 - tmp3; |
||

731 | tmp11 = tmp2 - tmp0; |
||

732 | tmp12 = -(tmp0 + tmp2); |
||

733 | } |
||

734 | ```
} else {
``` |
||

735 | ```
if (d0) {
``` |
||

736 | ```
/* d0 != 0, d2 == 0, d4 != 0, d6 != 0 */
``` |
||

737 | tmp2 = MULTIPLY(-d6, FIX_1_306562965); |
||

738 | tmp3 = MULTIPLY(d6, FIX_0_541196100); |
||

739 | |||

740 | tmp0 = (d0 + d4) << CONST_BITS; |
||

741 | tmp1 = (d0 - d4) << CONST_BITS; |
||

742 | |||

743 | tmp10 = tmp0 + tmp3; |
||

744 | tmp13 = tmp0 - tmp3; |
||

745 | tmp11 = tmp1 + tmp2; |
||

746 | tmp12 = tmp1 - tmp2; |
||

747 | ```
} else {
``` |
||

748 | ```
/* d0 == 0, d2 == 0, d4 != 0, d6 != 0 */
``` |
||

749 | tmp2 = MULTIPLY(-d6, FIX_1_306562965); |
||

750 | tmp3 = MULTIPLY(d6, FIX_0_541196100); |
||

751 | |||

752 | tmp0 = d4 << CONST_BITS; |
||

753 | |||

754 | tmp10 = tmp0 + tmp3; |
||

755 | tmp13 = tmp0 - tmp3; |
||

756 | tmp11 = tmp2 - tmp0; |
||

757 | tmp12 = -(tmp0 + tmp2); |
||

758 | } |
||

759 | } |
||

760 | ```
} else {
``` |
||

761 | ```
if (d2) {
``` |
||

762 | ```
if (d0) {
``` |
||

763 | ```
/* d0 != 0, d2 != 0, d4 == 0, d6 != 0 */
``` |
||

764 | z1 = MULTIPLY(d2 + d6, FIX_0_541196100); |
||

765 | tmp2 = z1 + MULTIPLY(-d6, FIX_1_847759065); |
||

766 | tmp3 = z1 + MULTIPLY(d2, FIX_0_765366865); |
||

767 | |||

768 | tmp0 = d0 << CONST_BITS; |
||

769 | |||

770 | tmp10 = tmp0 + tmp3; |
||

771 | tmp13 = tmp0 - tmp3; |
||

772 | tmp11 = tmp0 + tmp2; |
||

773 | tmp12 = tmp0 - tmp2; |
||

774 | ```
} else {
``` |
||

775 | ```
/* d0 == 0, d2 != 0, d4 == 0, d6 != 0 */
``` |
||

776 | z1 = MULTIPLY(d2 + d6, FIX_0_541196100); |
||

777 | tmp2 = z1 + MULTIPLY(-d6, FIX_1_847759065); |
||

778 | tmp3 = z1 + MULTIPLY(d2, FIX_0_765366865); |
||

779 | |||

780 | tmp10 = tmp3; |
||

781 | tmp13 = -tmp3; |
||

782 | tmp11 = tmp2; |
||

783 | tmp12 = -tmp2; |
||

784 | } |
||

785 | ```
} else {
``` |
||

786 | ```
if (d0) {
``` |
||

787 | ```
/* d0 != 0, d2 == 0, d4 == 0, d6 != 0 */
``` |
||

788 | tmp2 = MULTIPLY(-d6, FIX_1_306562965); |
||

789 | tmp3 = MULTIPLY(d6, FIX_0_541196100); |
||

790 | |||

791 | tmp0 = d0 << CONST_BITS; |
||

792 | |||

793 | tmp10 = tmp0 + tmp3; |
||

794 | tmp13 = tmp0 - tmp3; |
||

795 | tmp11 = tmp0 + tmp2; |
||

796 | tmp12 = tmp0 - tmp2; |
||

797 | ```
} else {
``` |
||

798 | ```
/* d0 == 0, d2 == 0, d4 == 0, d6 != 0 */
``` |
||

799 | tmp2 = MULTIPLY(-d6, FIX_1_306562965); |
||

800 | tmp3 = MULTIPLY(d6, FIX_0_541196100); |
||

801 | |||

802 | tmp10 = tmp3; |
||

803 | tmp13 = -tmp3; |
||

804 | tmp11 = tmp2; |
||

805 | tmp12 = -tmp2; |
||

806 | } |
||

807 | } |
||

808 | } |
||

809 | ```
} else {
``` |
||

810 | ```
if (d4) {
``` |
||

811 | ```
if (d2) {
``` |
||

812 | ```
if (d0) {
``` |
||

813 | ```
/* d0 != 0, d2 != 0, d4 != 0, d6 == 0 */
``` |
||

814 | tmp2 = MULTIPLY(d2, FIX_0_541196100); |
||

815 | tmp3 = MULTIPLY(d2, FIX_1_306562965); |
||

816 | |||

817 | tmp0 = (d0 + d4) << CONST_BITS; |
||

818 | tmp1 = (d0 - d4) << CONST_BITS; |
||

819 | |||

820 | tmp10 = tmp0 + tmp3; |
||

821 | tmp13 = tmp0 - tmp3; |
||

822 | tmp11 = tmp1 + tmp2; |
||

823 | tmp12 = tmp1 - tmp2; |
||

824 | ```
} else {
``` |
||

825 | ```
/* d0 == 0, d2 != 0, d4 != 0, d6 == 0 */
``` |
||

826 | tmp2 = MULTIPLY(d2, FIX_0_541196100); |
||

827 | tmp3 = MULTIPLY(d2, FIX_1_306562965); |
||

828 | |||

829 | tmp0 = d4 << CONST_BITS; |
||

830 | |||

831 | tmp10 = tmp0 + tmp3; |
||

832 | tmp13 = tmp0 - tmp3; |
||

833 | tmp11 = tmp2 - tmp0; |
||

834 | tmp12 = -(tmp0 + tmp2); |
||

835 | } |
||

836 | ```
} else {
``` |
||

837 | ```
if (d0) {
``` |
||

838 | ```
/* d0 != 0, d2 == 0, d4 != 0, d6 == 0 */
``` |
||

839 | tmp10 = tmp13 = (d0 + d4) << CONST_BITS; |
||

840 | tmp11 = tmp12 = (d0 - d4) << CONST_BITS; |
||

841 | ```
} else {
``` |
||

842 | ```
/* d0 == 0, d2 == 0, d4 != 0, d6 == 0 */
``` |
||

843 | tmp10 = tmp13 = d4 << CONST_BITS; |
||

844 | tmp11 = tmp12 = -tmp10; |
||

845 | } |
||

846 | } |
||

847 | ```
} else {
``` |
||

848 | ```
if (d2) {
``` |
||

849 | ```
if (d0) {
``` |
||

850 | ```
/* d0 != 0, d2 != 0, d4 == 0, d6 == 0 */
``` |
||

851 | tmp2 = MULTIPLY(d2, FIX_0_541196100); |
||

852 | tmp3 = MULTIPLY(d2, FIX_1_306562965); |
||

853 | |||

854 | tmp0 = d0 << CONST_BITS; |
||

855 | |||

856 | tmp10 = tmp0 + tmp3; |
||

857 | tmp13 = tmp0 - tmp3; |
||

858 | tmp11 = tmp0 + tmp2; |
||

859 | tmp12 = tmp0 - tmp2; |
||

860 | ```
} else {
``` |
||

861 | ```
/* d0 == 0, d2 != 0, d4 == 0, d6 == 0 */
``` |
||

862 | tmp2 = MULTIPLY(d2, FIX_0_541196100); |
||

863 | tmp3 = MULTIPLY(d2, FIX_1_306562965); |
||

864 | |||

865 | tmp10 = tmp3; |
||

866 | tmp13 = -tmp3; |
||

867 | tmp11 = tmp2; |
||

868 | tmp12 = -tmp2; |
||

869 | } |
||

870 | ```
} else {
``` |
||

871 | ```
if (d0) {
``` |
||

872 | ```
/* d0 != 0, d2 == 0, d4 == 0, d6 == 0 */
``` |
||

873 | tmp10 = tmp13 = tmp11 = tmp12 = d0 << CONST_BITS; |
||

874 | ```
} else {
``` |
||

875 | ```
/* d0 == 0, d2 == 0, d4 == 0, d6 == 0 */
``` |
||

876 | ```
tmp10 = tmp13 = tmp11 = tmp12 = 0;
``` |
||

877 | } |
||

878 | } |
||

879 | } |
||

880 | } |
||

881 | |||

882 | ```
/* Odd part per figure 8; the matrix is unitary and hence its
``` |
||

883 | ```
* transpose is its inverse. i0..i3 are y7,y5,y3,y1 respectively.
``` |
||

884 | ```
*/
``` |
||

885 | ```
if (d7) {
``` |
||

886 | ```
if (d5) {
``` |
||

887 | ```
if (d3) {
``` |
||

888 | ```
if (d1) {
``` |
||

889 | ```
/* d1 != 0, d3 != 0, d5 != 0, d7 != 0 */
``` |
||

890 | z1 = d7 + d1; |
||

891 | z2 = d5 + d3; |
||

892 | z3 = d7 + d3; |
||

893 | z4 = d5 + d1; |
||

894 | z5 = MULTIPLY(z3 + z4, FIX_1_175875602); |
||

895 | |||

896 | tmp0 = MULTIPLY(d7, FIX_0_298631336); |
||

897 | tmp1 = MULTIPLY(d5, FIX_2_053119869); |
||

898 | tmp2 = MULTIPLY(d3, FIX_3_072711026); |
||

899 | tmp3 = MULTIPLY(d1, FIX_1_501321110); |
||

900 | z1 = MULTIPLY(-z1, FIX_0_899976223); |
||

901 | z2 = MULTIPLY(-z2, FIX_2_562915447); |
||

902 | z3 = MULTIPLY(-z3, FIX_1_961570560); |
||

903 | z4 = MULTIPLY(-z4, FIX_0_390180644); |
||

904 | |||

905 | z3 += z5; |
||

906 | z4 += z5; |
||

907 | |||

908 | tmp0 += z1 + z3; |
||

909 | tmp1 += z2 + z4; |
||

910 | tmp2 += z2 + z3; |
||

911 | tmp3 += z1 + z4; |
||

912 | ```
} else {
``` |
||

913 | ```
/* d1 == 0, d3 != 0, d5 != 0, d7 != 0 */
``` |
||

914 | z1 = d7; |
||

915 | z2 = d5 + d3; |
||

916 | z3 = d7 + d3; |
||

917 | z5 = MULTIPLY(z3 + d5, FIX_1_175875602); |
||

918 | |||

919 | tmp0 = MULTIPLY(d7, FIX_0_298631336); |
||

920 | tmp1 = MULTIPLY(d5, FIX_2_053119869); |
||

921 | tmp2 = MULTIPLY(d3, FIX_3_072711026); |
||

922 | z1 = MULTIPLY(-d7, FIX_0_899976223); |
||

923 | z2 = MULTIPLY(-z2, FIX_2_562915447); |
||

924 | z3 = MULTIPLY(-z3, FIX_1_961570560); |
||

925 | z4 = MULTIPLY(-d5, FIX_0_390180644); |
||

926 | |||

927 | z3 += z5; |
||

928 | z4 += z5; |
||

929 | |||

930 | tmp0 += z1 + z3; |
||

931 | tmp1 += z2 + z4; |
||

932 | tmp2 += z2 + z3; |
||

933 | tmp3 = z1 + z4; |
||

934 | } |
||

935 | ```
} else {
``` |
||

936 | ```
if (d1) {
``` |
||

937 | ```
/* d1 != 0, d3 == 0, d5 != 0, d7 != 0 */
``` |
||

938 | z1 = d7 + d1; |
||

939 | z2 = d5; |
||

940 | z3 = d7; |
||

941 | z4 = d5 + d1; |
||

942 | z5 = MULTIPLY(z3 + z4, FIX_1_175875602); |
||

943 | |||

944 | tmp0 = MULTIPLY(d7, FIX_0_298631336); |
||

945 | tmp1 = MULTIPLY(d5, FIX_2_053119869); |
||

946 | tmp3 = MULTIPLY(d1, FIX_1_501321110); |
||

947 | z1 = MULTIPLY(-z1, FIX_0_899976223); |
||

948 | z2 = MULTIPLY(-d5, FIX_2_562915447); |
||

949 | z3 = MULTIPLY(-d7, FIX_1_961570560); |
||

950 | z4 = MULTIPLY(-z4, FIX_0_390180644); |
||

951 | |||

952 | z3 += z5; |
||

953 | z4 += z5; |
||

954 | |||

955 | tmp0 += z1 + z3; |
||

956 | tmp1 += z2 + z4; |
||

957 | tmp2 = z2 + z3; |
||

958 | tmp3 += z1 + z4; |
||

959 | ```
} else {
``` |
||

960 | ```
/* d1 == 0, d3 == 0, d5 != 0, d7 != 0 */
``` |
||

961 | tmp0 = MULTIPLY(-d7, FIX_0_601344887); |
||

962 | z1 = MULTIPLY(-d7, FIX_0_899976223); |
||

963 | z3 = MULTIPLY(-d7, FIX_1_961570560); |
||

964 | tmp1 = MULTIPLY(-d5, FIX_0_509795579); |
||

965 | z2 = MULTIPLY(-d5, FIX_2_562915447); |
||

966 | z4 = MULTIPLY(-d5, FIX_0_390180644); |
||

967 | z5 = MULTIPLY(d5 + d7, FIX_1_175875602); |
||

968 | |||

969 | z3 += z5; |
||

970 | z4 += z5; |
||

971 | |||

972 | tmp0 += z3; |
||

973 | tmp1 += z4; |
||

974 | tmp2 = z2 + z3; |
||

975 | tmp3 = z1 + z4; |
||

976 | } |
||

977 | } |
||

978 | ```
} else {
``` |
||

979 | ```
if (d3) {
``` |
||

980 | ```
if (d1) {
``` |
||

981 | ```
/* d1 != 0, d3 != 0, d5 == 0, d7 != 0 */
``` |
||

982 | z1 = d7 + d1; |
||

983 | z3 = d7 + d3; |
||

984 | z5 = MULTIPLY(z3 + d1, FIX_1_175875602); |
||

985 | |||

986 | tmp0 = MULTIPLY(d7, FIX_0_298631336); |
||

987 | tmp2 = MULTIPLY(d3, FIX_3_072711026); |
||

988 | tmp3 = MULTIPLY(d1, FIX_1_501321110); |
||

989 | z1 = MULTIPLY(-z1, FIX_0_899976223); |
||

990 | z2 = MULTIPLY(-d3, FIX_2_562915447); |
||

991 | z3 = MULTIPLY(-z3, FIX_1_961570560); |
||

992 | z4 = MULTIPLY(-d1, FIX_0_390180644); |
||

993 | |||

994 | z3 += z5; |
||

995 | z4 += z5; |
||

996 | |||

997 | tmp0 += z1 + z3; |
||

998 | tmp1 = z2 + z4; |
||

999 | tmp2 += z2 + z3; |
||

1000 | tmp3 += z1 + z4; |
||

1001 | ```
} else {
``` |
||

1002 | ```
/* d1 == 0, d3 != 0, d5 == 0, d7 != 0 */
``` |
||

1003 | z3 = d7 + d3; |
||

1004 | |||

1005 | tmp0 = MULTIPLY(-d7, FIX_0_601344887); |
||

1006 | z1 = MULTIPLY(-d7, FIX_0_899976223); |
||

1007 | tmp2 = MULTIPLY(d3, FIX_0_509795579); |
||

1008 | z2 = MULTIPLY(-d3, FIX_2_562915447); |
||

1009 | z5 = MULTIPLY(z3, FIX_1_175875602); |
||

1010 | z3 = MULTIPLY(-z3, FIX_0_785694958); |
||

1011 | |||

1012 | tmp0 += z3; |
||

1013 | tmp1 = z2 + z5; |
||

1014 | tmp2 += z3; |
||

1015 | tmp3 = z1 + z5; |
||

1016 | } |
||

1017 | ```
} else {
``` |
||

1018 | ```
if (d1) {
``` |
||

1019 | ```
/* d1 != 0, d3 == 0, d5 == 0, d7 != 0 */
``` |
||

1020 | z1 = d7 + d1; |
||

1021 | z5 = MULTIPLY(z1, FIX_1_175875602); |
||

1022 | |||

1023 | z1 = MULTIPLY(z1, FIX_0_275899380); |
||

1024 | z3 = MULTIPLY(-d7, FIX_1_961570560); |
||

1025 | tmp0 = MULTIPLY(-d7, FIX_1_662939225); |
||

1026 | z4 = MULTIPLY(-d1, FIX_0_390180644); |
||

1027 | tmp3 = MULTIPLY(d1, FIX_1_111140466); |
||

1028 | |||

1029 | tmp0 += z1; |
||

1030 | tmp1 = z4 + z5; |
||

1031 | tmp2 = z3 + z5; |
||

1032 | tmp3 += z1; |
||

1033 | ```
} else {
``` |
||

1034 | ```
/* d1 == 0, d3 == 0, d5 == 0, d7 != 0 */
``` |
||

1035 | tmp0 = MULTIPLY(-d7, FIX_1_387039845); |
||

1036 | tmp1 = MULTIPLY(d7, FIX_1_175875602); |
||

1037 | tmp2 = MULTIPLY(-d7, FIX_0_785694958); |
||

1038 | tmp3 = MULTIPLY(d7, FIX_0_275899380); |
||

1039 | } |
||

1040 | } |
||

1041 | } |
||

1042 | ```
} else {
``` |
||

1043 | ```
if (d5) {
``` |
||

1044 | ```
if (d3) {
``` |
||

1045 | ```
if (d1) {
``` |
||

1046 | ```
/* d1 != 0, d3 != 0, d5 != 0, d7 == 0 */
``` |
||

1047 | z2 = d5 + d3; |
||

1048 | z4 = d5 + d1; |
||

1049 | z5 = MULTIPLY(d3 + z4, FIX_1_175875602); |
||

1050 | |||

1051 | tmp1 = MULTIPLY(d5, FIX_2_053119869); |
||

1052 | tmp2 = MULTIPLY(d3, FIX_3_072711026); |
||

1053 | tmp3 = MULTIPLY(d1, FIX_1_501321110); |
||

1054 | z1 = MULTIPLY(-d1, FIX_0_899976223); |
||

1055 | z2 = MULTIPLY(-z2, FIX_2_562915447); |
||

1056 | z3 = MULTIPLY(-d3, FIX_1_961570560); |
||

1057 | z4 = MULTIPLY(-z4, FIX_0_390180644); |
||

1058 | |||

1059 | z3 += z5; |
||

1060 | z4 += z5; |
||

1061 | |||

1062 | tmp0 = z1 + z3; |
||

1063 | tmp1 += z2 + z4; |
||

1064 | tmp2 += z2 + z3; |
||

1065 | tmp3 += z1 + z4; |
||

1066 | ```
} else {
``` |
||

1067 | ```
/* d1 == 0, d3 != 0, d5 != 0, d7 == 0 */
``` |
||

1068 | z2 = d5 + d3; |
||

1069 | |||

1070 | z5 = MULTIPLY(z2, FIX_1_175875602); |
||

1071 | tmp1 = MULTIPLY(d5, FIX_1_662939225); |
||

1072 | z4 = MULTIPLY(-d5, FIX_0_390180644); |
||

1073 | z2 = MULTIPLY(-z2, FIX_1_387039845); |
||

1074 | tmp2 = MULTIPLY(d3, FIX_1_111140466); |
||

1075 | z3 = MULTIPLY(-d3, FIX_1_961570560); |
||

1076 | |||

1077 | tmp0 = z3 + z5; |
||

1078 | tmp1 += z2; |
||

1079 | tmp2 += z2; |
||

1080 | tmp3 = z4 + z5; |
||

1081 | } |
||

1082 | ```
} else {
``` |
||

1083 | ```
if (d1) {
``` |
||

1084 | ```
/* d1 != 0, d3 == 0, d5 != 0, d7 == 0 */
``` |
||

1085 | z4 = d5 + d1; |
||

1086 | |||

1087 | z5 = MULTIPLY(z4, FIX_1_175875602); |
||

1088 | z1 = MULTIPLY(-d1, FIX_0_899976223); |
||

1089 | tmp3 = MULTIPLY(d1, FIX_0_601344887); |
||

1090 | tmp1 = MULTIPLY(-d5, FIX_0_509795579); |
||

1091 | z2 = MULTIPLY(-d5, FIX_2_562915447); |
||

1092 | z4 = MULTIPLY(z4, FIX_0_785694958); |
||

1093 | |||

1094 | tmp0 = z1 + z5; |
||

1095 | tmp1 += z4; |
||

1096 | tmp2 = z2 + z5; |
||

1097 | tmp3 += z4; |
||

1098 | ```
} else {
``` |
||

1099 | ```
/* d1 == 0, d3 == 0, d5 != 0, d7 == 0 */
``` |
||

1100 | tmp0 = MULTIPLY(d5, FIX_1_175875602); |
||

1101 | tmp1 = MULTIPLY(d5, FIX_0_275899380); |
||

1102 | tmp2 = MULTIPLY(-d5, FIX_1_387039845); |
||

1103 | tmp3 = MULTIPLY(d5, FIX_0_785694958); |
||

1104 | } |
||

1105 | } |
||

1106 | ```
} else {
``` |
||

1107 | ```
if (d3) {
``` |
||

1108 | ```
if (d1) {
``` |
||

1109 | ```
/* d1 != 0, d3 != 0, d5 == 0, d7 == 0 */
``` |
||

1110 | z5 = d1 + d3; |
||

1111 | tmp3 = MULTIPLY(d1, FIX_0_211164243); |
||

1112 | tmp2 = MULTIPLY(-d3, FIX_1_451774981); |
||

1113 | z1 = MULTIPLY(d1, FIX_1_061594337); |
||

1114 | z2 = MULTIPLY(-d3, FIX_2_172734803); |
||

1115 | z4 = MULTIPLY(z5, FIX_0_785694958); |
||

1116 | z5 = MULTIPLY(z5, FIX_1_175875602); |
||

1117 | |||

1118 | tmp0 = z1 - z4; |
||

1119 | tmp1 = z2 + z4; |
||

1120 | tmp2 += z5; |
||

1121 | tmp3 += z5; |
||

1122 | ```
} else {
``` |
||

1123 | ```
/* d1 == 0, d3 != 0, d5 == 0, d7 == 0 */
``` |
||

1124 | tmp0 = MULTIPLY(-d3, FIX_0_785694958); |
||

1125 | tmp1 = MULTIPLY(-d3, FIX_1_387039845); |
||

1126 | tmp2 = MULTIPLY(-d3, FIX_0_275899380); |
||

1127 | tmp3 = MULTIPLY(d3, FIX_1_175875602); |
||

1128 | } |
||

1129 | ```
} else {
``` |
||

1130 | ```
if (d1) {
``` |
||

1131 | ```
/* d1 != 0, d3 == 0, d5 == 0, d7 == 0 */
``` |
||

1132 | tmp0 = MULTIPLY(d1, FIX_0_275899380); |
||

1133 | tmp1 = MULTIPLY(d1, FIX_0_785694958); |
||

1134 | tmp2 = MULTIPLY(d1, FIX_1_175875602); |
||

1135 | tmp3 = MULTIPLY(d1, FIX_1_387039845); |
||

1136 | ```
} else {
``` |
||

1137 | ```
/* d1 == 0, d3 == 0, d5 == 0, d7 == 0 */
``` |
||

1138 | ```
tmp0 = tmp1 = tmp2 = tmp3 = 0;
``` |
||

1139 | } |
||

1140 | } |
||

1141 | } |
||

1142 | } |
||

1143 | |||

1144 | ```
/* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */
``` |
||

1145 | |||

1146 | ```
dataptr[DCTSIZE*0] = (DCTELEM) DESCALE(tmp10 + tmp3,
``` |
||

1147 | ```
CONST_BITS+PASS1_BITS+3);
``` |
||

1148 | ```
dataptr[DCTSIZE*7] = (DCTELEM) DESCALE(tmp10 - tmp3,
``` |
||

1149 | ```
CONST_BITS+PASS1_BITS+3);
``` |
||

1150 | ```
dataptr[DCTSIZE*1] = (DCTELEM) DESCALE(tmp11 + tmp2,
``` |
||

1151 | ```
CONST_BITS+PASS1_BITS+3);
``` |
||

1152 | ```
dataptr[DCTSIZE*6] = (DCTELEM) DESCALE(tmp11 - tmp2,
``` |
||

1153 | ```
CONST_BITS+PASS1_BITS+3);
``` |
||

1154 | ```
dataptr[DCTSIZE*2] = (DCTELEM) DESCALE(tmp12 + tmp1,
``` |
||

1155 | ```
CONST_BITS+PASS1_BITS+3);
``` |
||

1156 | ```
dataptr[DCTSIZE*5] = (DCTELEM) DESCALE(tmp12 - tmp1,
``` |
||

1157 | ```
CONST_BITS+PASS1_BITS+3);
``` |
||

1158 | ```
dataptr[DCTSIZE*3] = (DCTELEM) DESCALE(tmp13 + tmp0,
``` |
||

1159 | ```
CONST_BITS+PASS1_BITS+3);
``` |
||

1160 | ```
dataptr[DCTSIZE*4] = (DCTELEM) DESCALE(tmp13 - tmp0,
``` |
||

1161 | ```
CONST_BITS+PASS1_BITS+3);
``` |
||

1162 | |||

1163 | ```
dataptr++; /* advance pointer to next column */
``` |
||

1164 | } |
||

1165 | } |
||

1166 |