FTEQW
Documentation of the FTE engine source tree.
image_astc.h
Go to the documentation of this file.
1//Note: this code does not claim to be bit-correct.
2//It doesn't support volume textures.
3//It doesn't validate block extents (and is generally unaware of more than one block anyway)
4//It doesn't implement all validation checks, either.
5//Do NOT use this code to validate any encoders...
6
7//Based upon documentation here: https://www.khronos.org/registry/OpenGL/extensions/OES/OES_texture_compression_astc.txt
8
9#ifndef ASTC_PUBLIC
10#define ASTC_PUBLIC
11#endif
12
13#define ASTC_WITH_LDR //comment out this line to disable pure-LDR decoding (the hdr code can still be used).
14#define ASTC_WITH_HDR //comment out this line to disable HDR decoding.
15#define ASTC_WITH_HDRTEST //comment out this line to disable checking for which profile is needed.
16//#define ASTC_WITH_3D
17
18#ifdef ASTC_WITH_LDR
19 ASTC_PUBLIC void ASTC_Decode_LDR8(unsigned char *in, unsigned char *out, int pixstride/*outwidth*/, int layerstride/*outwidth*outheight*/, int bw,int bh,int bd); //generates RGBA8 data (gives error colour for hdr blocks!)
20#endif
21#ifdef ASTC_WITH_HDR
22 ASTC_PUBLIC void ASTC_Decode_HDR(unsigned char *in, unsigned short *out, int pixstride/*outwidth*/, int layerstride/*outwidth*outheight*/, int bw,int bh,int bd); //generates RGBA16F data.
23#endif
24#ifdef ASTC_WITH_HDRTEST
25 ASTC_PUBLIC int ASTC_BlocksAreHDR(unsigned char *in, size_t datasize, int bw, int bh, int bd); //returns true if n consecutive blocks require the HDR profile (ie: detects when you need to soft-decode for drivers with partial support, as opposed to just always decompressing).
26#endif
27
28
29
30#include <math.h>
31#include <stdio.h>
32#include <string.h>
33#ifndef Vector4Set
34 #define Vector4Set(r,x,y,z,w) {(r)[0] = x; (r)[1] = y;(r)[2] = z;(r)[3]=w;}
35#endif
36#ifndef countof
37 #define countof(array) (sizeof(array)/sizeof(array[0]))
38#endif
39#if defined(ASTC_WITH_LDR) || defined(ASTC_WITH_HDR)
40 #define ASTC_WITH_DECODE
41#endif
43{
44 //valid blocks
45 ASTC_OKAY, //we can decode at least part of this normally (hdr endpoints may still result in per-endpoint errors).
46 ASTC_VOID_LDR, //not an error - the block is a single LDR colour, with an RGBA16 colour in the last 8 bytes.
47 ASTC_VOID_HDR, //not an error - the block is a single HDR colour, with an RGBA16F colour in the last 8 bytes.
48
49 //invalid blocks
50 ASTC_ERROR, //validation errors
51 ASTC_UNSUPPORTED, //basically just volume textures
52 ASTC_RESERVED, //reserved bits. basically an error but might not be in the future.
53};
55{
56 unsigned char *in; //the 16 bytes of the block
57 unsigned char blocksize[3]; //block width, height, depth(1 for 2d).
58
59 enum astc_status_e status; //block status/type.
60 unsigned char dualplane; //two sets of weights instead of one.
61 unsigned char ccs; //second set applies to this component
62
63 unsigned char precision; //defines the precision of the weights
64
65 int wcount[4]; //x,y,z,total weight counts
66 int weight_bits; //size of weights section.
67 int config_bits; //size of header before the endpoint bits
68 int ep_bits; //size available to endpoints
69 unsigned char weights[64]; //official limit to the number of weights stored
70
71 unsigned char partitions; //number of active partitions to select from (and number of endpoints to read)
72 unsigned short partindex; //used for deciding which partition each pixel belongs in
73 struct astc_part
74 {
75 unsigned char mode; //endpoint modes
76#ifdef ASTC_WITH_HDR
77 unsigned char hdr; //endpoint colour mode - &1=rgb, &2=alpha
78#endif
79 int ep[2][4];
80 } part[4];
81};
82
83static unsigned char ASTC_readbits(unsigned char *in, unsigned int offset, unsigned int count)
84{ //only reads up to 9 bits, because offset 7 with 10 bits needs to read more than two bytes
85 unsigned short s;
86 in += offset>>3;
87 offset &= 7;
88 s = in[0];
89 if (offset+count>8)
90 s |= (in[1]<<8);
91 s>>=offset;
92 return s & ((1u<<count)-1);
93}
94static unsigned int ASTC_readmanybits(unsigned char *in, unsigned int offset, unsigned int count)
95{
96 unsigned int r = 0;
97 while(count > 8)
98 {
99 count -= 8;
100 r |= ASTC_readbits(in, offset+count, 8)<<count;
101 }
102 r |= ASTC_readbits(in, offset, count);
103 return r;
104}
105
106//weights cover a range of 0-64 inclusive
107//>32 is +1 (otherwise it would be 0-63)
108//high bits are folded over
109static unsigned char dequant_weight_1b[1<<1] = {0x00,0x40};
110static unsigned char dequant_weight_2b[1<<2] = {0x00,0x15,0x2b,0x40};
111static unsigned char dequant_weight_3b[1<<3] = {0x00,0x09,0x12,0x1b,0x25,0x2e,0x37,0x40};
112static unsigned char dequant_weight_4b[1<<4] = {0x00,0x04,0x08,0x0c,0x11,0x15,0x19,0x1d,0x23,0x27,0x2b,0x2f,0x34,0x38,0x3c,0x40};
113static unsigned char dequant_weight_5b[1<<5] = {0x00,0x02,0x04,0x06,0x08,0x0a,0x0c,0x0e,0x10,0x12,0x14,0x16,0x18,0x1a,0x1c,0x1e,0x22,0x24,0x26,0x28,0x2a,0x2c,0x2e,0x30,0x32,0x34,0x36,0x38,0x3a,0x3c,0x3e,0x40};
114static unsigned char dequant_weight_0t[3] = {0,32,64};
115static unsigned char dequant_weight_1t[6] = {0x00,0x40,0x0c,0x34,0x19,0x27};
116static unsigned char dequant_weight_2t[12] = {0x00,0x40,0x11,0x2f,0x06,0x3a,0x17,0x29,0x0c,0x34,0x1d,0x23};
117static unsigned char dequant_weight_3t[24] = {0x00,0x40,0x08,0x38,0x10,0x30,0x18,0x28,0x02,0x3e,0x0b,0x35,0x13,0x2d,0x1b,0x25,0x05,0x3b,0x0d,0x33,0x16,0x2a,0x1e,0x22};
118static unsigned char dequant_weight_0q[5] = {0,16,32,48,64};
119static unsigned char dequant_weight_1q[10] = {0x00,0x40,0x05,0x3b,0x0b,0x35,0x11,0x2f,0x17,0x29};
120static unsigned char dequant_weight_2q[20] = {0x00,0x40,0x10,0x30,0x03,0x3d,0x13,0x2d,0x06,0x3a,0x17,0x29,0x09,0x37,0x1a,0x26,0x0d,0x33,0x1d,0x23};
121static const struct
122{
123 unsigned char extra, bits, *dequant;
124} astc_weightmode[] =
125{
126 {0,0, NULL}, //invalid
127 {0,0, NULL}, //invalid
128 {0,1, dequant_weight_1b}, //2
129 {1,0, dequant_weight_0t}, //3
130 {0,2, dequant_weight_2b}, //4
131 {2,0, dequant_weight_0q}, //5
132 {1,1, dequant_weight_1t}, //6
133 {0,3, dequant_weight_3b}, //8
134 {0,0, NULL}, //invalid
135 {0,0, NULL}, //invalid
136 {2,1, dequant_weight_1q}, //10
137 {1,2, dequant_weight_2t}, //12
138 {0,4, dequant_weight_4b}, //16
139 {2,2, dequant_weight_2q}, //20
140 {1,3, dequant_weight_3t}, //24
141 {0,5, dequant_weight_5b}, //32
142};
143static unsigned int ASTC_DecodeSize(unsigned int count, unsigned int bits, unsigned char extra)
144{
145 return ((extra==1)?((count*8)+4)/5:0) +
146 ((extra==2)?((count*7)+2)/3:0) +
147 count*bits;
148}
149
150
151static void ASTC_ReadBlockMode(struct astc_block_info *b)
152{
153 unsigned char *in = b->in;
154 unsigned short s = ASTC_readmanybits(in, 0, 13);//in[0] | (in[1]<<8);
155 b->config_bits = 13;
156
157 if ((s&0x1ff)==0x1fc)
158 { //void extent
159 if (s&0x200)
160 b->status = ASTC_VOID_HDR;
161 else
162 b->status = ASTC_VOID_LDR;
163 b->dualplane = b->precision = b->wcount[0] = b->wcount[1] = b->wcount[2] = b->partitions = 0;
164 return;
165 }
166 b->status = ASTC_OKAY;
167 b->dualplane = (s>>10)&1; //Dp
168 b->precision = (s>>(9-3))&(1<<3);//P
169 b->precision |= (s>>4)&1; //p0
170 if (b->blocksize[2] != 1)
171 { //3d blocks have a different header layout
172#ifdef ASTC_WITH_3D
173 if (s&3)
174 {
175 b->precision|=(s&3)<<1; //p2, p1
176 b->wcount[0] = ((s>>5)&3)+2, b->wcount[1] = ((s>>7)&3)+2, b->wcount[2] = ((s>>2)&3)+2;
177 }
178 else
179 {
180 b->precision|=(s&0xc)>>1; //p2, p1
181 if ((s&0x180)!=0x180)
182 {
183 b->dualplane = 0; //always single plane.
184 b->precision &= 7; //clear the high precision bit (reused for 'b')
185 if (!(s&0x180))
186 b->wcount[0] = 6, b->wcount[1] = ((s>>9)&3)+2, b->wcount[2] = ((s>>5)&3)+2;
187 else if (!(s&0x80))
188 b->wcount[0] = ((s>>5)&3)+2, b->wcount[1] = 6, b->wcount[2] = ((s>>9)&3)+2;
189 else
190 b->wcount[0] = ((s>>5)&3)+2, b->wcount[1] = ((s>>9)&3)+2, b->wcount[2] = 6;
191 }
192 else if ((s&0x60)!=0x60)
193 {
194 if (!(s&0x60))
195 b->wcount[0] = 6, b->wcount[1] = 2, b->wcount[2] = 2;
196 else if (!(s&0x20))
197 b->wcount[0] = 2, b->wcount[1] = 6, b->wcount[2] = 2;
198 else //40
199 b->wcount[0] = 2, b->wcount[1] = 2, b->wcount[2] = 6;
200 }
201 else
202 b->status = ASTC_RESERVED; //reserved (or void extent, but those were handled above)
203 }
204#else
205 b->status = ASTC_UNSUPPORTED;
206#endif
207 }
208 else
209 {
210 b->wcount[2] = 1;
211 if (s&3)
212 { //one of the first 5 layouts...
213 b->precision|=(s&3)<<1; //p2, p1
214 if (!(s&8))
215 { //first two layouts...
216 if (!(s&4))
217 { //layout0
218 b->wcount[0] = ((s>>7)&3)+4;
219 b->wcount[1] = ((s>>5)&3)+2;
220 }
221 else
222 { //layout1
223 b->wcount[0] = ((s>>7)&3)+8;
224 b->wcount[1] = ((s>>5)&3)+2;
225 }
226 }
227 else if (!(s&4))
228 { //layout2
229 b->wcount[0] = ((s>>5)&3)+2;
230 b->wcount[1] = ((s>>7)&3)+8;
231 }
232 else if (!(s&256))
233 { //layout3
234 b->wcount[0] = ((s>>5)&3)+2;
235 b->wcount[1] = ((s>>7)&1)+6;
236 }
237 else
238 { //layout4
239 b->wcount[0] = ((s>>7)&1)+2;
240 b->wcount[1] = ((s>>5)&3)+2;
241 }
242 }
243 else
244 { //one of the later layouts
245 b->precision|=(s&0xc)>>1; //p2, p1
246 if (!(s&384))
247 {
248 b->wcount[0] = 12;
249 b->wcount[1] = ((s>>5)&3)+2;
250 }
251 else if ((s&384)==128)
252 {
253 b->wcount[0] = ((s>>5)&3)+2;
254 b->wcount[1] = 12;
255 }
256 else if ((s&480)==384)
257 {
258 b->wcount[0] = 6;
259 b->wcount[1] = 10;
260 }
261 else if ((s&480)==416)
262 {
263 b->wcount[0] = 10;
264 b->wcount[1] = 6;
265 }
266 else if ((s&384)==256)
267 {
268 b->wcount[0] = ((s>>5)&3)+6;
269 b->wcount[1] = ((s>>9)&3)+6;
270 b->dualplane = 0; //forget the Dp bit, its reused in this layout
271 b->precision &= 7; //forget the P bit, too
272 }
273 else
274 b->status = ASTC_RESERVED; //reserved
275 }
276 }
277 b->partitions = ((s>>11)&3)+1;
278
279 if (b->partitions > 3 && b->dualplane)
280 b->status = ASTC_ERROR; //apparently.
281
282 if (b->wcount[0] > b->blocksize[0] || b->wcount[1] > b->blocksize[1] || b->wcount[2] > b->blocksize[2])
283 b->status = ASTC_ERROR; //invalid weight counts.
284
285 b->wcount[3] = b->wcount[0] * b->wcount[1] * b->wcount[2];
286 b->wcount[3]<<=b->dualplane; //dual-plane has twice the weights - interleaved.
287 if (b->wcount[3] > countof(b->weights))
288 b->status = ASTC_ERROR; //more than 64 weights are banned, for some reason
289 b->weight_bits = ASTC_DecodeSize(b->wcount[3], astc_weightmode[b->precision].bits, astc_weightmode[b->precision].extra);
290}
291
292static void ASTC_ReadPartitions(struct astc_block_info *b)
293{
294 int sel;
295 int i;
296 unsigned char *in = b->in;
297 int weight_bits = b->weight_bits;
298
299 if (b->partitions == 1)
300 { //single-partition mode, simple CEM
301 b->partindex = 0;
302 b->part[0].mode = ASTC_readbits(in, b->config_bits, 4);
303 b->config_bits += 4;
304 }
305 else
306 { //multi
307 b->partindex = ASTC_readmanybits(in, b->config_bits, 10);
308 b->config_bits += 10;
309 sel = ASTC_readbits(in, b->config_bits, 6);
310 b->config_bits += 6;
311 if (!(sel&3))
312 {
313 sel = (sel>>2)&0xf;
314 for (i = 0; i < b->partitions; i++)
315 b->part[i].mode = sel; //all the same
316 }
317 else
318 {
319 int shift = 2;
320 int highbits = b->partitions*3 - 4;
321
322 weight_bits += highbits;
323 sel |= ASTC_readbits(in, 128-weight_bits, highbits)<<6; //I don't know why this is separate. it seems like an unnecessary complication to me.
324
325 for (i = 0; i < b->partitions; i++, shift++)
326 {
327 b->part[i].mode = ((sel&3)-1)<<2; //class groups
328 b->part[i].mode += ((sel>>shift)&1)<<2;//class
329 }
330 for (i = 0; i < b->partitions; i++, shift+=2)
331 b->part[i].mode += (sel>>shift)&3; //specific mode info
332 }
333 }
334 if (b->dualplane)
335 {
336 weight_bits += 2;
337 b->ccs = ASTC_readbits(in, 128-weight_bits, 2);
338 }
339 else
340 b->ccs = 0;
341
342 b->ep_bits = 128 - weight_bits - b->config_bits;
343 //weights are at 128-weight_bits to 128
344 //epdata is at config_bits to config_bits+ep_bits
345}
346
347#ifdef ASTC_WITH_HDRTEST
348ASTC_PUBLIC int ASTC_BlocksAreHDR(unsigned char *in, size_t datasize, int bw, int bh, int bd)
349{
350 struct astc_block_info b;
351 int i;
352 size_t blocks = datasize/16;
353 b.in = in;
354 b.blocksize[0] = bw;
355 b.blocksize[1] = bh;
356 b.blocksize[2] = bd;
357 while(blocks --> 0)
358 {
359 ASTC_ReadBlockMode(&b);
360 if (b.status == ASTC_VOID_HDR)
361 return 1; //if we're getting hdr blocks then we can decode properly only with hdr
362 if (b.status == ASTC_VOID_LDR)
363 return 0; //if we're getting ldr blocks, then its unlikely that there's any hdr blocks in there.
364 if (b.status != ASTC_OKAY)
365 continue;
366 ASTC_ReadPartitions(&b);
367 for (i = 0; i < b.partitions; i++)
368 {
369 switch(b.part[i].mode)
370 {
371 case 2:
372 case 3:
373 case 7:
374 case 11:
375 case 14:
376 case 15:
377 return 1;
378 }
379 }
380 b.in += 16;
381 }
382 return 0;
383}
384#endif
385
386#ifdef ASTC_WITH_DECODE
387static unsigned char ASTC_readbits2(unsigned char *in, unsigned int *offset, unsigned int count)
388{ //only reads up to 9 bits, because offset 7 with 10 bits needs to read more than two bytes
389 unsigned char r = ASTC_readbits(in, *offset, count);
390 *offset += count;
391 return r;
392}
393static void ASTC_Decode(unsigned char *in, unsigned char *out, int count, unsigned int offset, int bits, int extra, unsigned char *dequant)
394{
395 unsigned char block[5];
396 int j;
397
398 //unfortunately these trits depend upon the values of the later bits in each block.
399 //if only it were a nice simple modulo...
400 if (extra==1)
401 {
402 //read it 5 samples at a time
403 while(count > 0)
404 {
405 unsigned int t, c;
406
407 block[0] = ASTC_readbits2(in, &offset, bits);
408 t = ASTC_readbits2(in, &offset, 2);
409 if (count > 1)
410 {
411 block[1] = ASTC_readbits2(in, &offset, bits);
412 t |= ASTC_readbits2(in, &offset, 2)<<2;
413 }
414 else
415 block[1] = 0;
416 if (count > 2)
417 {
418 block[2] = ASTC_readbits2(in, &offset, bits);
419 t |= ASTC_readbits2(in, &offset, 1)<<4;
420 }
421 else
422 block[2] = 0;
423 if (count > 3)
424 {
425 block[3] = ASTC_readbits2(in, &offset, bits);
426 t |= ASTC_readbits2(in, &offset, 2)<<5;
427 }
428 else
429 block[3] = 0;
430 if (count > 4)
431 {
432 block[4] = ASTC_readbits2(in, &offset, bits);
433 t |= ASTC_readbits2(in, &offset, 1)<<7;
434 }
435 else
436 block[4] = 0;
437
438 //okay, we read the block, now figure out the trits and pack them into the high part of the result
439 if ((t&0x1c) == 0x1c)
440 {
441 c = ((t>>3)&0x1c) | (t&3);
442 block[4] |= 2<<bits;
443 block[3] |= 2<<bits;
444 }
445 else
446 {
447 c = t&0x1f;
448 if ((t&0x60) == 0x60)
449 {
450 block[4] |= 2<<bits;
451 block[3] |= (t>>7)<<bits;
452 }
453 else
454 {
455 block[4] |= (t>>7)<<bits;
456 block[3] |= ((t>>5)&3)<<bits;
457 }
458 }
459 if ((c&3)==3)
460 {
461 block[2] |= 2<<bits;
462 block[1] |= ((c>>4)&1)<<bits;
463 block[0] |= (((c>>2)&2) | ((c>>2)&~(c>>3)&1))<<bits;
464 }
465 else if ((c&0xc)==0xc)
466 {
467 block[2] |= 2<<bits;
468 block[1] |= 2<<bits;
469 block[0] |= (c&3)<<bits;
470 }
471 else
472 {
473 block[2] |= ((c>>4)&1)<<bits;
474 block[1] |= ((c>>2)&3)<<bits;
475 block[0] |= ((c&2)|(c&1&~(c>>1)))<<bits;
476 }
477
478 //spit out the result
479 for (j = 0; j < 5 && j < count; j++)
480 *out++ = dequant[block[j]];
481 count -= 5;
482 }
483 }
484 else if (extra == 2)
485 {
486 //read it 3 samples at a time
487 while(count > 0)
488 {
489 unsigned int t, c;
490
491 block[0] = ASTC_readbits2(in, &offset, bits);
492 t = ASTC_readbits2(in, &offset, 3);
493 if (count > 1)
494 {
495 block[1] = ASTC_readbits2(in, &offset, bits);
496 t |= ASTC_readbits2(in, &offset, 2)<<3;
497 }
498 else
499 block[1] = 0;
500 if (count > 2)
501 {
502 block[2] = ASTC_readbits2(in, &offset, bits);
503 t |= ASTC_readbits2(in, &offset, 2)<<5;
504 }
505 else
506 block[2] = 0;
507
508 //okay, we read the block, now figure out the trits and pack them into the high part of the result
509 if ((t&6)==6 && !(t&0x60))
510 {
511 block[2] |= (((t&1)<<2) | (((t>>4)&~t&1)<<1) | ((t>>3)&~t&1))<<bits;
512 block[1] |= 4<<bits;
513 block[0] |= 4<<bits;
514 }
515 else
516 {
517 if ((t&6) == 6)
518 {
519 block[2] |= 4<<bits;
520 c = ((t>>3)&3)<<3;
521 c |= (~(t>>5)&3)<<1;
522 c |= t&1;
523 }
524 else
525 {
526 block[2] |= ((t>>5)&3)<<bits;
527 c = t&0x1f;
528 }
529
530 if ((c&7) == 5)
531 {
532 block[1] |= 4<<bits;
533 block[0] |= ((c>>3)&3)<<bits;
534 }
535 else
536 {
537 block[1] |= ((c>>3)&3)<<bits;
538 block[0] |= (c&7)<<bits;
539 }
540 }
541
542 //spit out the result
543 for (j = 0; j < 3 && j < count; j++)
544 *out++ = dequant[block[j]];
545 count -= 3;
546 }
547 }
548 else while(count --> 0) //pure bits, nice and simple
549 {
550 unsigned char val = ASTC_readbits2(in, &offset, bits);
551
552 *out++ = dequant[val];
553 }
554}
555
556//endpoints have a logical value between 0 and 255.
557//bit replication is used to fill in missing precision
558static unsigned char dequant_ep_1b[1<<1] = {0,255};
559static unsigned char dequant_ep_2b[1<<2] = {0x00,0x55,0xaa,0xff};
560static unsigned char dequant_ep_3b[1<<3] = {0x00,0x24,0x49,0x6d,0x92,0xb6,0xdb,0xff};
561static unsigned char dequant_ep_4b[1<<4] = {
562 0x00,0x11,0x22,0x33,0x44,0x55,0x66,0x77,0x88,0x99,0xaa,0xbb,0xcc,0xdd,0xee,0xff};
563static unsigned char dequant_ep_5b[1<<5] = {
564 0x00,0x08,0x10,0x18,0x21,0x29,0x31,0x39,0x42,0x4a,0x52,0x5a,0x63,0x6b,0x73,0x7b,
565 0x84,0x8c,0x94,0x9c,0xa5,0xad,0xb5,0xbd,0xc6,0xce,0xd6,0xde,0xe7,0xef,0xf7,0xff};
566static unsigned char dequant_ep_6b[1<<6] = {
567 0x00,0x04,0x08,0x0c,0x10,0x14,0x18,0x1c,0x20,0x24,0x28,0x2c,0x30,0x34,0x38,0x3c,
568 0x41,0x45,0x49,0x4d,0x51,0x55,0x59,0x5d,0x61,0x65,0x69,0x6d,0x71,0x75,0x79,0x7d,
569 0x82,0x86,0x8a,0x8e,0x92,0x96,0x9a,0x9e,0xa2,0xa6,0xaa,0xae,0xb2,0xb6,0xba,0xbe,
570 0xc3,0xc7,0xcb,0xcf,0xd3,0xd7,0xdb,0xdf,0xe3,0xe7,0xeb,0xef,0xf3,0xf7,0xfb,0xff};
571static unsigned char dequant_ep_7b[1<<7] = {
572 0x00,0x02,0x04,0x06,0x08,0x0a,0x0c,0x0e,0x10,0x12,0x14,0x16,0x18,0x1a,0x1c,0x1e,
573 0x20,0x22,0x24,0x26,0x28,0x2a,0x2c,0x2e,0x30,0x32,0x34,0x36,0x38,0x3a,0x3c,0x3e,
574 0x40,0x42,0x44,0x46,0x48,0x4a,0x4c,0x4e,0x50,0x52,0x54,0x56,0x58,0x5a,0x5c,0x5e,
575 0x60,0x62,0x64,0x66,0x68,0x6a,0x6c,0x6e,0x70,0x72,0x74,0x76,0x78,0x7a,0x7c,0x7e,
576 0x81,0x83,0x85,0x87,0x89,0x8b,0x8d,0x8f,0x91,0x93,0x95,0x97,0x99,0x9b,0x9d,0x9f,
577 0xa1,0xa3,0xa5,0xa7,0xa9,0xab,0xad,0xaf,0xb1,0xb3,0xb5,0xb7,0xb9,0xbb,0xbd,0xbf,
578 0xc1,0xc3,0xc5,0xc7,0xc9,0xcb,0xcd,0xcf,0xd1,0xd3,0xd5,0xd7,0xd9,0xdb,0xdd,0xdf,
579 0xe1,0xe3,0xe5,0xe7,0xe9,0xeb,0xed,0xef,0xf1,0xf3,0xf5,0xf7,0xf9,0xfb,0xfd,0xff};
580static unsigned char dequant_ep_8b[1<<8] = {
581 0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,0x08,0x09,0x0a,0x0b,0x0c,0x0d,0x0e,0x0f,
582 0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,0x18,0x19,0x1a,0x1b,0x1c,0x1d,0x1e,0x1f,
583 0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,0x28,0x29,0x2a,0x2b,0x2c,0x2d,0x2e,0x2f,
584 0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,0x38,0x39,0x3a,0x3b,0x3c,0x3d,0x3e,0x3f,
585 0x40,0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4a,0x4b,0x4c,0x4d,0x4e,0x4f,
586 0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5a,0x5b,0x5c,0x5d,0x5e,0x5f,
587 0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0x6a,0x6b,0x6c,0x6d,0x6e,0x6f,
588 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,0x78,0x79,0x7a,0x7b,0x7c,0x7d,0x7e,0x7f,
589 0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87,0x88,0x89,0x8a,0x8b,0x8c,0x8d,0x8e,0x8f,
590 0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97,0x98,0x99,0x9a,0x9b,0x9c,0x9d,0x9e,0x9f,
591 0xa0,0xa1,0xa2,0xa3,0xa4,0xa5,0xa6,0xa7,0xa8,0xa9,0xaa,0xab,0xac,0xad,0xae,0xaf,
592 0xb0,0xb1,0xb2,0xb3,0xb4,0xb5,0xb6,0xb7,0xb8,0xb9,0xba,0xbb,0xbc,0xbd,0xbe,0xbf,
593 0xc0,0xc1,0xc2,0xc3,0xc4,0xc5,0xc6,0xc7,0xc8,0xc9,0xca,0xcb,0xcc,0xcd,0xce,0xcf,
594 0xd0,0xd1,0xd2,0xd3,0xd4,0xd5,0xd6,0xd7,0xd8,0xd9,0xda,0xdb,0xdc,0xdd,0xde,0xdf,
595 0xe0,0xe1,0xe2,0xe3,0xe4,0xe5,0xe6,0xe7,0xe8,0xe9,0xea,0xeb,0xec,0xed,0xee,0xef,
596 0xf0,0xf1,0xf2,0xf3,0xf4,0xf5,0xf6,0xf7,0xf8,0xf9,0xfa,0xfb,0xfc,0xfd,0xfe,0xff};
597static unsigned char dequant_ep_0t[3] = {0,128,255};
598static unsigned char dequant_ep_1t[6] = {0x00,0xff,0x33,0xcc,0x66,0x99};
599static unsigned char dequant_ep_2t[12] = {0x00,0xff,0x45,0xba,0x17,0xe8,0x5c,0xa3,0x2e,0xd1,0x74,0x8b};
600static unsigned char dequant_ep_3t[24] = {0x00,0xff,0x21,0xde,0x42,0xbd,0x63,0x9c,0x0b,0xf4,0x2c,0xd3,0x4d,0xb2,0x6e,0x91,0x16,0xe9,0x37,0xc8,0x58,0xa7,0x79,0x86};
601static unsigned char dequant_ep_4t[48] = {0x00,0xff,0x10,0xef,0x20,0xdf,0x30,0xcf,0x41,0xbe,0x51,0xae,0x61,0x9e,0x71,0x8e,0x05,0xfa,0x15,0xea,0x26,0xd9,0x36,0xc9,0x46,0xb9,0x56,0xa9,0x67,0x98,0x77,0x88,0x0b,0xf4,0x1b,0xe4,0x2b,0xd4,0x3b,0xc4,0x4c,0xb3,0x5c,0xa3,0x6c,0x93,0x7c,0x83};
602static unsigned char dequant_ep_5t[96] = {0x00,0xff,0x08,0xf7,0x10,0xef,0x18,0xe7,0x20,0xdf,0x28,0xd7,0x30,0xcf,0x38,0xc7,0x40,0xbf,0x48,0xb7,0x50,0xaf,0x58,0xa7,0x60,0x9f,0x68,0x97,0x70,0x8f,0x78,0x87,0x02,0xfd,0x0a,0xf5,0x12,0xed,0x1a,0xe5,0x23,0xdc,0x2b,0xd4,0x33,0xcc,0x3b,0xc4,0x43,0xbc,0x4b,0xb4,0x53,0xac,0x5b,0xa4,0x63,0x9c,0x6b,0x94,0x73,0x8c,0x7b,0x84,0x05,0xfa,0x0d,0xf2,0x15,0xea,0x1d,0xe2,0x25,0xda,0x2d,0xd2,0x35,0xca,0x3d,0xc2,0x46,0xb9,0x4e,0xb1,0x56,0xa9,0x5e,0xa1,0x66,0x99,0x6e,0x91,0x76,0x89,0x7e,0x81};
603static unsigned char dequant_ep_6t[192]= {0x00,0xff,0x04,0xfb,0x08,0xf7,0x0c,0xf3,0x10,0xef,0x14,0xeb,0x18,0xe7,0x1c,0xe3,0x20,0xdf,0x24,0xdb,0x28,0xd7,0x2c,0xd3,0x30,0xcf,0x34,0xcb,0x38,0xc7,0x3c,0xc3,0x40,0xbf,0x44,0xbb,0x48,0xb7,0x4c,0xb3,0x50,0xaf,0x54,0xab,0x58,0xa7,0x5c,0xa3,0x60,0x9f,0x64,0x9b,0x68,0x97,0x6c,0x93,0x70,0x8f,0x74,0x8b,0x78,0x87,0x7c,0x83,0x01,0xfe,0x05,0xfa,0x09,0xf6,0x0d,0xf2,0x11,0xee,0x15,0xea,0x19,0xe6,0x1d,0xe2,0x21,0xde,0x25,0xda,0x29,0xd6,0x2d,0xd2,0x31,0xce,0x35,0xca,0x39,0xc6,0x3d,0xc2,0x41,0xbe,0x45,0xba,0x49,0xb6,0x4d,0xb2,0x51,0xae,0x55,0xaa,0x59,0xa6,0x5d,0xa2,0x61,0x9e,0x65,0x9a,0x69,0x96,0x6d,0x92,0x71,0x8e,0x75,0x8a,0x79,0x86,0x7d,0x82,0x02,0xfd,0x06,0xf9,0x0a,0xf5,0x0e,0xf1,0x12,0xed,0x16,0xe9,0x1a,0xe5,0x1e,0xe1,0x22,0xdd,0x26,0xd9,0x2a,0xd5,0x2e,0xd1,0x32,0xcd,0x36,0xc9,0x3a,0xc5,0x3e,0xc1,0x42,0xbd,0x46,0xb9,0x4a,0xb5,0x4e,0xb1,0x52,0xad,0x56,0xa9,0x5a,0xa5,0x5e,0xa1,0x62,0x9d,0x66,0x99,0x6a,0x95,0x6e,0x91,0x72,0x8d,0x76,0x89,0x7a,0x85,0x7e,0x81};
604static unsigned char dequant_ep_0q[5] = {0,64,128,192,255};
605static unsigned char dequant_ep_1q[10] = {0x00,0xff,0x1c,0xe3,0x38,0xc7,0x54,0xab,0x71,0x8e};
606static unsigned char dequant_ep_2q[20] = {0x00,0xff,0x43,0xbc,0x0d,0xf2,0x50,0xaf,0x1b,0xe4,0x5e,0xa1,0x28,0xd7,0x6b,0x94,0x36,0xc9,0x79,0x86};
607static unsigned char dequant_ep_3q[40] = {0x00,0xff,0x20,0xdf,0x41,0xbe,0x61,0x9e,0x06,0xf9,0x27,0xd8,0x47,0xb8,0x68,0x97,0x0d,0xf2,0x2d,0xd2,0x4e,0xb1,0x6e,0x91,0x13,0xec,0x34,0xcb,0x54,0xab,0x75,0x8a,0x1a,0xe5,0x3a,0xc5,0x5b,0xa4,0x7b,0x84};
608static unsigned char dequant_ep_4q[80] = {0x00,0xff,0x10,0xef,0x20,0xdf,0x30,0xcf,0x40,0xbf,0x50,0xaf,0x60,0x9f,0x70,0x8f,0x03,0xfc,0x13,0xec,0x23,0xdc,0x33,0xcc,0x43,0xbc,0x53,0xac,0x64,0x9b,0x74,0x8b,0x06,0xf9,0x16,0xe9,0x26,0xd9,0x36,0xc9,0x47,0xb8,0x57,0xa8,0x67,0x98,0x77,0x88,0x09,0xf6,0x19,0xe6,0x2a,0xd5,0x3a,0xc5,0x4a,0xb5,0x5a,0xa5,0x6a,0x95,0x7a,0x85,0x0d,0xf2,0x1d,0xe2,0x2d,0xd2,0x3d,0xc2,0x4d,0xb2,0x5d,0xa2,0x6d,0x92,0x7d,0x82};
609static unsigned char dequant_ep_5q[160]= {0x00,0xff,0x08,0xf7,0x10,0xef,0x18,0xe7,0x20,0xdf,0x28,0xd7,0x30,0xcf,0x38,0xc7,0x40,0xbf,0x48,0xb7,0x50,0xaf,0x58,0xa7,0x60,0x9f,0x68,0x97,0x70,0x8f,0x78,0x87,0x01,0xfe,0x09,0xf6,0x11,0xee,0x19,0xe6,0x21,0xde,0x29,0xd6,0x31,0xce,0x39,0xc6,0x41,0xbe,0x49,0xb6,0x51,0xae,0x59,0xa6,0x61,0x9e,0x69,0x96,0x71,0x8e,0x79,0x86,0x03,0xfc,0x0b,0xf4,0x13,0xec,0x1b,0xe4,0x23,0xdc,0x2b,0xd4,0x33,0xcc,0x3b,0xc4,0x43,0xbc,0x4b,0xb4,0x53,0xac,0x5b,0xa4,0x63,0x9c,0x6b,0x94,0x73,0x8c,0x7b,0x84,0x04,0xfb,0x0c,0xf3,0x14,0xeb,0x1c,0xe3,0x24,0xdb,0x2c,0xd3,0x34,0xcb,0x3c,0xc3,0x44,0xbb,0x4c,0xb3,0x54,0xab,0x5c,0xa3,0x64,0x9b,0x6c,0x93,0x74,0x8b,0x7c,0x83,0x06,0xf9,0x0e,0xf1,0x16,0xe9,0x1e,0xe1,0x26,0xd9,0x2e,0xd1,0x36,0xc9,0x3e,0xc1,0x46,0xb9,0x4e,0xb1,0x56,0xa9,0x5e,0xa1,0x66,0x99,0x6e,0x91,0x76,0x89,0x7e,0x81};
610static const struct
611{
612 unsigned char extra, bits, *dequant;
613} astc_epvmode[] =
614{
615 {0,1, dequant_ep_1b}, //2
616 {1,0, dequant_ep_0t}, //3
617 {0,2, dequant_ep_2b}, //4
618 {2,0, dequant_ep_0q}, //5
619 {1,1, dequant_ep_1t}, //6
620 {0,3, dequant_ep_3b}, //8
621 {2,1, dequant_ep_1q}, //10
622 {1,2, dequant_ep_2t}, //12
623 {0,4, dequant_ep_4b}, //16
624 {2,2, dequant_ep_2q}, //20
625 {1,3, dequant_ep_3t}, //24
626 {0,5, dequant_ep_5b}, //32
627 {2,3, dequant_ep_3q}, //40
628 {1,4, dequant_ep_4t}, //48
629 {0,6, dequant_ep_6b}, //64
630 {2,4, dequant_ep_4q}, //80
631 {1,5, dequant_ep_5t}, //96
632 {0,7, dequant_ep_7b}, //128
633 {2,5, dequant_ep_5q}, //160
634 {1,6, dequant_ep_6t}, //192
635 {0,8, dequant_ep_8b}, //256
636 //other modes don't make any sense
637};
638/*static void ASTC_CalcDequant(void)
639{
640 int i;
641
642 int extra = 0;
643 int bits = 1;
644 int isweight = 1;
645 int targbits = isweight?6:8;
646 int v;
647
648 static qboolean nospam;
649 if (nospam)
650 return;
651 nospam = true;
652
653 //binary:
654 if (!extra)
655 {
656 for (bits = 1; bits <= (isweight?5:8); bits++)
657 {
658 Con_Printf("table: %s_%ib", isweight?"weight":"ep", bits);
659 for (i = 0; i < (1<<bits); i++)
660 {
661 v = i;
662 v<<=(targbits-bits);
663 v|=v>>bits;
664 v|=v>>bits;
665 v|=v>>bits;
666 v|=v>>bits;
667 v|=v>>bits;
668 v|=v>>bits;
669 v|=v>>bits;
670 v|=v>>bits;
671
672 if (isweight && v > 32)
673 v++; //0-64 instead of 0-63
674
675 Con_Printf("0x%02x,", v);
676 }
677 Con_Printf("\n");
678 }
679 }
680 else if (extra == 1)
681 {
682 int A,B,C,D;
683
684 for (bits = 1; bits <= (isweight?3:6); bits++)
685 {
686 Con_Printf("table: %s_%it:\n", isweight?"weight":"ep", bits);
687 for (i = 0; i < ((2<<bits)|(1<<bits)); i++)
688 {
689 switch(bits)
690 {
691 case 1:
692 A = (i&1)*(isweight?0x7f:0x1ff);
693 B = 0;
694 C = isweight?50:204;
695 D = i>>bits;
696 break;
697 case 2:
698 A = (i&1)*(isweight?0x7f:0x1ff);
699 B = ((i>>1)&1) * (isweight?0b1000101:0b100010110);
700 C = isweight?25:93;
701 D = i>>bits;
702 break;
703 case 3:
704 A = (i&1)*(isweight?0x7f:0x1ff);
705 B = ((i>>1)&1) * (isweight?0b0100001:0b010000101); //b
706 B|= ((i>>2)&1) * (isweight?0b1000010:0b100001010); //c
707 C = isweight?11:44;
708 D = i>>bits;
709 break;
710 case 4:
711 A = (i&1)*0x1ff;
712 B = ((i>>1)&1) * 0b001000001; //b
713 B|= ((i>>2)&1) * 0b010000010; //c
714 B|= ((i>>3)&1) * 0b100000100; //d
715 C = 22;
716 D = i>>bits;
717 break;
718 case 5:
719 A = (i&1)*0x1ff;
720 B = ((i>>1)&1) * 0b000100000; //b
721 B|= ((i>>2)&1) * 0b001000000; //c
722 B|= ((i>>3)&1) * 0b010000001; //d
723 B|= ((i>>4)&1) * 0b100000010; //e
724 C = 11;
725 D = i>>bits;
726 break;
727 case 6:
728 A = (i&1)*0x1ff;
729 B = ((i>>1)&1) * 0b000010000; //b
730 B|= ((i>>2)&1) * 0b000100000; //c
731 B|= ((i>>3)&1) * 0b001000000; //d
732 B|= ((i>>4)&1) * 0b010000000; //e
733 B|= ((i>>5)&1) * 0b100000001; //f
734 C = 5;
735 D = i>>bits;
736 break;
737 }
738 v = D * C + B;
739 v = v ^ A;
740 v = (A & (isweight?0x20:0x80)) | (v >> 2);
741
742 if (isweight && v > 32)
743 v++; //0-64 instead of 0-63
744
745 Con_Printf("0x%02x,", v);
746 }
747 Con_Printf("\n");
748 }
749 }
750 else if (extra == 2)
751 {
752 int A,B,C,D;
753
754 for (bits = 1; bits <= (isweight?2:5); bits++)
755 {
756 Con_Printf("table: %s_%iq:\n", isweight?"weight":"ep", bits);
757 for (i = 0; i < ((4<<bits)|(1<<bits)); i++)
758 {
759 switch(bits)
760 {
761 case 1:
762 A = (i&1)*(isweight?0x7f:0x1ff);
763 B = 0;
764 C = isweight?23:113;
765 D = i>>bits;
766 break;
767 case 2:
768 A = (i&1)*(isweight?0x7f:0x1ff);
769 B = ((i>>1)&1) * (isweight?0b1000010:0b100001100);
770 C = isweight?13:54;
771 D = i>>bits;
772 break;
773 case 3:
774 A = (i&1)*0x1ff;
775 B = ((i>>1)&1) * 0b010000010; //b
776 B|= ((i>>2)&1) * 0b100000101; //c
777 C = 26;
778 D = i>>bits;
779 break;
780 case 4:
781 A = (i&1)*0x1ff;
782 B = ((i>>1)&1) * 0b001000000; //b
783 B|= ((i>>2)&1) * 0b010000001; //c
784 B|= ((i>>3)&1) * 0b100000010; //d
785 C = 13;
786 D = i>>bits;
787 break;
788 case 5:
789 A = (i&1)*0x1ff;
790 B = ((i>>1)&1) * 0b000100000; //b
791 B|= ((i>>2)&1) * 0b001000000; //c
792 B|= ((i>>3)&1) * 0b010000000; //d
793 B|= ((i>>4)&1) * 0b100000001; //e
794 C = 6;
795 D = i>>bits;
796 break;
797 }
798 v = D * C + B;
799 v = v ^ A;
800 v = (A & (isweight?0x20:0x80)) | (v >> 2);
801
802 if (isweight && v > 32)
803 v++; //0-64 instead of 0-63
804
805 Con_Printf("0x%02x,", v);
806 }
807 Con_Printf("\n");
808 }
809 }
810}*/
811
812static void ASTC_blue_contract(int *out, int r, int g, int b, int a)
813{
814 out[0] = (r+b) >> 1;
815 out[1] = (g+b) >> 1;
816 out[2] = b;
817 out[3] = a;
818}
819static int ASTC_bit_transfer_signed(int a, unsigned char *b) //returns new value for a.
820{
821 *b >>= 1;
822 *b |= a & 0x80;
823 a >>= 1;
824 a &= 0x3F;
825 if((a&0x20)!=0)
826 a=a-0x40;
827 return a;
828}
829static void ASTC_clamp_unorm8(int *c)
830{
831 c[0] = bound(0, c[0], 255);
832 c[1] = bound(0, c[1], 255);
833 c[2] = bound(0, c[2], 255);
834 c[3] = bound(0, c[3], 255);
835}
836
837#ifdef ASTC_WITH_HDR
838static void ASTC_HDR_Mode_2(struct astc_part *p, unsigned char *v)
839{
840 int y0,y1;
841 if(v[1] >= v[0])
842 {
843 y0 = (v[0] << 4);
844 y1 = (v[1] << 4);
845 }
846 else
847 {
848 y0 = (v[1] << 4) + 8;
849 y1 = (v[0] << 4) - 8;
850 }
851 Vector4Set(p->ep[0], y0, y0, y0, 0x780);
852 Vector4Set(p->ep[1], y1, y1, y1, 0x780);
853 p->hdr = 0xf;
854}
855static void ASTC_HDR_Mode_3(struct astc_part *p, unsigned char *v)
856{
857 int y0, y1, d;
858 if((v[0]&0x80) != 0)
859 {
860 y0 = ((v[1] & 0xE0) << 4) | ((v[0] & 0x7F) << 2);
861 d = (v[1] & 0x1F) << 2;
862 }
863 else
864 {
865 y0 = ((v[1] & 0xF0) << 4) | ((v[0] & 0x7F) << 1);
866 d = (v[1] & 0x0F) << 1;
867 }
868
869 y1 = y0 + d;
870 if(y1 > 0xFFF)
871 y1 = 0xFFF;
872
873 Vector4Set(p->ep[0], y0, y0, y0, 0x780);
874 Vector4Set(p->ep[1], y1, y1, y1, 0x780);
875 p->hdr = 0xf;
876}
877static void ASTC_HDR_Mode_7(struct astc_part *p, unsigned char *v)
878{
879 int modeval = ((v[0]&0xC0)>>6) | ((v[1]&0x80)>>5) | ((v[2]&0x80)>>4);
880 int majcomp;
881 int mode;
882 static const int shamts[6] = { 1,1,2,3,4,5 };
883 int shamt,t;
884
885 int red, green, blue, scale;
886 int x0,x1,x2,x3,x4,x5,x6,ohm;
887
888 if( (modeval & 0xC ) != 0xC )
889 {
890 majcomp = modeval >> 2;
891 mode = modeval & 3;
892 }
893 else if( modeval != 0xF )
894 {
895 majcomp = modeval & 3;
896 mode = 4;
897 }
898 else
899 {
900 majcomp = 0; mode = 5;
901 }
902
903 red = v[0] & 0x3f;
904 green = v[1] & 0x1f;
905 blue = v[2] & 0x1f;
906 scale = v[3] & 0x1f;
907
908 x0 = (v[1] >> 6) & 1; x1 = (v[1] >> 5) & 1;
909 x2 = (v[2] >> 6) & 1; x3 = (v[2] >> 5) & 1;
910 x4 = (v[3] >> 7) & 1; x5 = (v[3] >> 6) & 1;
911 x6 = (v[3] >> 5) & 1;
912
913 ohm = 1 << mode;
914 if( ohm & 0x30 ) green |= x0 << 6;
915 if( ohm & 0x3A ) green |= x1 << 5;
916 if( ohm & 0x30 ) blue |= x2 << 6;
917 if( ohm & 0x3A ) blue |= x3 << 5;
918 if( ohm & 0x3D ) scale |= x6 << 5;
919 if( ohm & 0x2D ) scale |= x5 << 6;
920 if( ohm & 0x04 ) scale |= x4 << 7;
921 if( ohm & 0x3B ) red |= x4 << 6;
922 if( ohm & 0x04 ) red |= x3 << 6;
923 if( ohm & 0x10 ) red |= x5 << 7;
924 if( ohm & 0x0F ) red |= x2 << 7;
925 if( ohm & 0x05 ) red |= x1 << 8;
926 if( ohm & 0x0A ) red |= x0 << 8;
927 if( ohm & 0x05 ) red |= x0 << 9;
928 if( ohm & 0x02 ) red |= x6 << 9;
929 if( ohm & 0x01 ) red |= x3 << 10;
930 if( ohm & 0x02 ) red |= x5 << 10;
931
932 shamt = shamts[mode];
933 red <<= shamt; green <<= shamt; blue <<= shamt; scale <<= shamt;
934
935 if( mode != 5 ) { green = red - green; blue = red - blue; }
936
937 if( majcomp == 1 )
938 {
939 t = red;
940 red = green;
941 green = t;
942 }
943 if( majcomp == 2 )
944 {
945 t = red;
946 red = blue;
947 blue = t;
948 }
949
950 p->ep[1][0] = bound( 0, red, 0xFFF );
951 p->ep[1][1] = bound( 0, green, 0xFFF );
952 p->ep[1][2] = bound( 0, blue, 0xFFF );
953
954 p->ep[0][0] = bound( 0, red - scale, 0xFFF );
955 p->ep[0][1] = bound( 0, green - scale, 0xFFF );
956 p->ep[0][2] = bound( 0, blue - scale, 0xFFF );
957
958 p->ep[1][3] = p->ep[0][3] = 0x780;
959
960 p->hdr = 0xf;
961}
962static void ASTC_HDR_Mode_11(struct astc_part *p, unsigned char *v)
963{
964 static const int dbitstab[8] = {7,6,7,6,5,6,5,6};
965 int shamt;
966 int majcomp = ((v[4] & 0x80) >> 7) | ((v[5] & 0x80) >> 6);
967 int mode,va,vb0,vb1,vc,vd0,vd1;
968 int x0,x1,x2,x3,x4,x5,ohm;
969
970 if( majcomp == 3 )
971 {
972 Vector4Set(p->ep[0], v[0] << 4, v[2] << 4, (v[4] & 0x7f) << 5, 0x780);
973 Vector4Set(p->ep[1], v[1] << 4, v[3] << 4, (v[5] & 0x7f) << 5, 0x780);
974 p->hdr = 0xf;
975 return;
976 }
977
978 mode = ((v[1]&0x80)>>7) | ((v[2]&0x80)>>6) | ((v[3]&0x80)>>5);
979 va = v[0] | ((v[1] & 0x40) << 2);
980 vb0 = v[2] & 0x3f;
981 vb1 = v[3] & 0x3f;
982 vc = v[1] & 0x3f;
983 vd0 = v[4] & 0x7f;
984 vd1 = v[5] & 0x7f;
985
986 if (vd0 & (1<<(dbitstab[mode]-1)))
987 vd0 |= -1 & ~((1u<<dbitstab[mode])-1);
988 if (vd1 & (1<<(dbitstab[mode]-1)))
989 vd1 |= -1 & ~((1u<<dbitstab[mode])-1);
990
991 x0 = (v[2] >> 6) & 1;
992 x1 = (v[3] >> 6) & 1;
993 x2 = (v[4] >> 6) & 1;
994 x3 = (v[5] >> 6) & 1;
995 x4 = (v[4] >> 5) & 1;
996 x5 = (v[5] >> 5) & 1;
997
998 ohm = 1 << mode;
999 if( ohm & 0xA4 ) va |= x0 << 9;
1000 if( ohm & 0x08 ) va |= x2 << 9;
1001 if( ohm & 0x50 ) va |= x4 << 9;
1002 if( ohm & 0x50 ) va |= x5 << 10;
1003 if( ohm & 0xA0 ) va |= x1 << 10;
1004 if( ohm & 0xC0 ) va |= x2 << 11;
1005 if( ohm & 0x04 ) vc |= x1 << 6;
1006 if( ohm & 0xE8 ) vc |= x3 << 6;
1007 if( ohm & 0x20 ) vc |= x2 << 7;
1008 if( ohm & 0x5B ) vb0 |= x0 << 6;
1009 if( ohm & 0x5B ) vb1 |= x1 << 6;
1010 if( ohm & 0x12 ) vb0 |= x2 << 7;
1011 if( ohm & 0x12 ) vb1 |= x3 << 7;
1012
1013 // Now shift up so that major component is at top of 12-bit value
1014 shamt = (mode >> 1) ^ 3;
1015 va <<= shamt; vb0 <<= shamt; vb1 <<= shamt;
1016 vc <<= shamt; vd0 <<= shamt; vd1 <<= shamt;
1017
1018 p->ep[1][0] = bound( 0, va, 0xFFF );
1019 p->ep[1][1] = bound( 0, va - vb0, 0xFFF );
1020 p->ep[1][2] = bound( 0, va - vb1, 0xFFF );
1021
1022 p->ep[0][0] = bound( 0, va - vc, 0xFFF );
1023 p->ep[0][1] = bound( 0, va - vb0 - vc - vd0, 0xFFF );
1024 p->ep[0][2] = bound( 0, va - vb1 - vc - vd1, 0xFFF );
1025
1026 if( majcomp == 1 )
1027 {
1028 p->ep[0][3] = p->ep[0][0];
1029 p->ep[0][0] = p->ep[0][1];
1030 p->ep[0][1] = p->ep[0][3];
1031 p->ep[1][3] = p->ep[1][0];
1032 p->ep[1][0] = p->ep[1][1];
1033 p->ep[1][1] = p->ep[1][3];
1034 }
1035 else if( majcomp == 2 )
1036 {
1037 p->ep[0][3] = p->ep[0][0];
1038 p->ep[0][0] = p->ep[0][2];
1039 p->ep[0][2] = p->ep[0][3];
1040 p->ep[1][3] = p->ep[1][0];
1041 p->ep[1][0] = p->ep[1][2];
1042 p->ep[1][2] = p->ep[1][3];
1043 }
1044
1045 p->ep[0][3] = p->ep[1][3] = 0x780;
1046
1047 p->hdr = 0xf;
1048}
1049static void ASTC_HDR_Mode_14(struct astc_part *p, unsigned char *v)
1050{
1051 ASTC_HDR_Mode_11(p, v);
1052
1053 p->ep[0][3] = v[6];
1054 p->ep[1][3] = v[7];
1055 p->hdr &= 0x7;
1056}
1057static void ASTC_HDR_Mode_15(struct astc_part *p, unsigned char *v)
1058{
1059 int v6=v[6], v7=v[7];
1060 int mode;
1061 ASTC_HDR_Mode_11(p,v);
1062
1063 mode = ((v6 >> 7) & 1) | ((v7 >> 6) & 2);
1064 v6 &= 0x7F;
1065 v7 &= 0x7F;
1066
1067 if(mode==3)
1068 {
1069 p->ep[0][3] = v6 << 5;
1070 p->ep[1][3] = v7 << 5;
1071 }
1072 else
1073 {
1074 v6 |= (v7 << (mode+1)) & 0x780;
1075 v7 &= (0x3F >> mode);
1076 v7 ^= 0x20 >> mode;
1077 v7 -= 0x20 >> mode;
1078 v6 <<= (4-mode);
1079 v7 <<= (4-mode);
1080
1081 v7 += v6;
1082 v7 = bound(0, v7, 0xFFF);
1083 p->ep[0][3] = v6;
1084 p->ep[1][3] = v7;
1085 }
1086}
1087#endif
1088
1089static void ASTC_DecodeEndpoints(struct astc_block_info *b, unsigned char *v)
1090{
1091 int i, t0, t1, t3, t5, t7;
1092
1093 for (i = 0; i < b->partitions; i++)
1094 {
1095#ifdef ASTC_WITH_HDR
1096 b->part[i].hdr = 0;
1097#endif
1098 switch (b->part[i].mode & 15)
1099 {
1100#ifdef ASTC_WITH_HDR
1101 case 2: //HDR Luminance, large range
1102 ASTC_HDR_Mode_2(&b->part[i], v);
1103 break;
1104 case 3: //HDR Luminance, small range
1105 ASTC_HDR_Mode_3(&b->part[i], v);
1106 break;
1107 case 7: //HDR RGB, base+scale
1108 ASTC_HDR_Mode_7(&b->part[i], v);
1109 break;
1110 case 11: //HDR RGB
1111 ASTC_HDR_Mode_11(&b->part[i], v);
1112 break;
1113 case 14: //HDR RGB + LDR Alpha
1114 ASTC_HDR_Mode_14(&b->part[i], v);
1115 break;
1116 case 15: //HDR RGB + HDR Alpha
1117 ASTC_HDR_Mode_15(&b->part[i], v);
1118 break;
1119#endif
1120 default: //the error colour - for unsupported hdr endpoints. unreachable when hdr is enabled. just fill it with the error colour.
1121 Vector4Set(b->part[i].ep[0], 0xff, 0, 0xff, 0xff);
1122 Vector4Set(b->part[i].ep[1], 0xff, 0, 0xff, 0xff);
1123 break;
1124
1125 case 0: //LDR Luminance, direct
1126 Vector4Set(b->part[i].ep[0], v[0], v[0], v[0], 0xff);
1127 Vector4Set(b->part[i].ep[1], v[1], v[1], v[1], 0xff);
1128 break;
1129 case 1: //LDR Luminance, base+offset
1130 t0 = (v[0]>>2)|(v[1]&0xc0);
1131 t1 = t0+(v[1]&0x3f);
1132 if (t1>0xff)
1133 t1=0xff;
1134 Vector4Set(b->part[i].ep[0], t0, t0, t0, 0xff);
1135 Vector4Set(b->part[i].ep[1], t1, t1, t1, 0xff);
1136 break;
1137 case 4: //LDR Luminance+Alpha,direct
1138 Vector4Set(b->part[i].ep[0], v[0], v[0], v[0], v[2]);
1139 Vector4Set(b->part[i].ep[1], v[1], v[1], v[1], v[3]);
1140 break;
1141 case 5: //LDR Luminance+Alpha, base+offset
1142 t1 = ASTC_bit_transfer_signed(v[1],&v[0]);
1143 t3 = ASTC_bit_transfer_signed(v[3],&v[2]);
1144 Vector4Set(b->part[i].ep[0],v[0],v[0],v[0],v[2]);
1145 Vector4Set(b->part[i].ep[1],v[0]+t1,v[0]+t1,v[0]+t1,v[2]+t3);
1146 ASTC_clamp_unorm8(b->part[i].ep[0]);
1147 ASTC_clamp_unorm8(b->part[i].ep[1]);
1148 break;
1149 case 6: //LDR RGB, base+scale
1150 Vector4Set(b->part[i].ep[0], ((int)v[0]*(int)v[3])>>8, ((int)v[1]*(int)v[3])>>8, ((int)v[2]*(int)v[3])>>8, 0xff);
1151 Vector4Set(b->part[i].ep[1], v[0], v[1], v[2], 0xff);
1152 break;
1153 case 8: //LDR RGB, Direct
1154 t0 = (int)v[0]+(int)v[2]+(int)v[4];
1155 t1 = (int)v[1]+(int)v[3]+(int)v[5];
1156 if (t1>=t0)
1157 {
1158 Vector4Set(b->part[i].ep[0], v[0],v[2],v[4],0xff);
1159 Vector4Set(b->part[i].ep[1], v[1],v[3],v[5],0xff);
1160 }
1161 else
1162 {
1163 ASTC_blue_contract(b->part[i].ep[0], v[1],v[3],v[5], 0xff);
1164 ASTC_blue_contract(b->part[i].ep[1], v[0],v[2],v[4], 0xff);
1165 }
1166 break;
1167 case 9: //LDR RGB, base+offset
1168 t1 = ASTC_bit_transfer_signed(v[1],&v[0]);
1169 t3 = ASTC_bit_transfer_signed(v[3],&v[2]);
1170 t5 = ASTC_bit_transfer_signed(v[5],&v[4]);
1171 if(t1+t3+t5 >= 0)
1172 {
1173 Vector4Set(b->part[i].ep[0],v[0],v[2],v[4],0xff);
1174 Vector4Set(b->part[i].ep[1],v[0]+t1,v[2]+t3,v[4]+t5,0xff);
1175 }
1176 else
1177 {
1178 ASTC_blue_contract(b->part[i].ep[0], v[0]+t1,v[2]+t3,v[4]+t5, 0xff);
1179 ASTC_blue_contract(b->part[i].ep[1], v[0],v[2],v[4], 0xff);
1180 }
1181 ASTC_clamp_unorm8(b->part[i].ep[0]);
1182 ASTC_clamp_unorm8(b->part[i].ep[1]);
1183 break;
1184 case 10: //LDR RGB, base+scale plus two A
1185 Vector4Set(b->part[i].ep[0], ((int)v[0]*v[3])>>8, ((int)v[1]*v[3])>>8, ((int)v[2]*v[3])>>8, v[4]);
1186 Vector4Set(b->part[i].ep[1], v[0], v[1], v[2], v[5]);
1187 break;
1188 case 12: //LDR RGBA, direct
1189 if (v[1]+(int)v[3]+v[5]>=v[0]+(int)v[2]+v[4])
1190 {
1191 Vector4Set(b->part[i].ep[0], v[0],v[2],v[4],v[6]);
1192 Vector4Set(b->part[i].ep[1], v[1],v[3],v[5],v[7]);
1193 }
1194 else
1195 {
1196 ASTC_blue_contract(b->part[i].ep[0], v[1],v[3],v[5],v[7]);
1197 ASTC_blue_contract(b->part[i].ep[1], v[0],v[2],v[4],v[6]);
1198 }
1199 break;
1200 case 13: //LDR RGBA, base+offset
1201 t1 = ASTC_bit_transfer_signed(v[1],&v[0]);
1202 t3 = ASTC_bit_transfer_signed(v[3],&v[2]);
1203 t5 = ASTC_bit_transfer_signed(v[5],&v[4]);
1204 t7 = ASTC_bit_transfer_signed(v[7],&v[6]);
1205 if(t1+t3+t5>=0)
1206 {
1207 Vector4Set(b->part[i].ep[0], v[0],v[2],v[4],v[6]);
1208 Vector4Set(b->part[i].ep[1], v[0]+t1,v[2]+t3,v[4]+t5,v[6]+t7);
1209 }
1210 else
1211 {
1212 ASTC_blue_contract(b->part[i].ep[0], v[0]+t1,v[2]+t3,v[4]+t5,v[6]+t7);
1213 ASTC_blue_contract(b->part[i].ep[1], v[0],v[2],v[4],v[6]);
1214 }
1215 ASTC_clamp_unorm8(b->part[i].ep[0]);
1216 ASTC_clamp_unorm8(b->part[i].ep[1]);
1217 break;
1218 }
1219 v += ((b->part[i].mode>>2)+1)<<1;
1220 }
1221}
1222static void ASTC_ReadEndpoints(struct astc_block_info *b)
1223{
1224 int i;
1225 int cembits;
1226
1227 unsigned char epv[18]; //maximum raw endpoint values,
1228 char epvalues;
1229 unsigned char gahffs[16], t;
1230
1231 //figure out how many raw values we need
1232 epvalues = 0;
1233 for (i = 0; i < b->partitions; i++)
1234 epvalues += ((b->part[i].mode>>2)+1)<<1;
1235 if (epvalues > countof(epv))
1236 {
1237 b->status = ASTC_ERROR;
1238 return;
1239 }
1240
1241 //the endpoint bits are encoded using the largest size available that'll still fit, yielding raw values between 0-255.
1242 for(i = countof(astc_epvmode)-1; i >= 0; i--)
1243 {
1244 cembits = ASTC_DecodeSize(epvalues, astc_epvmode[i].bits, astc_epvmode[i].extra);
1245 if(cembits <= b->ep_bits)
1246 {
1247 //read the values.
1248 ASTC_Decode(b->in, epv, epvalues, b->config_bits, astc_epvmode[i].bits, astc_epvmode[i].extra, astc_epvmode[i].dequant);
1249 //and decode them.
1250 ASTC_DecodeEndpoints(b, epv);
1251
1252 //weight bits are backwards (gah! ffs!)
1253 //so swap them around so our decode function doesn't need to care
1254 for (i = 0; i < countof(gahffs); i++)
1255 {
1256 t = b->in[i];
1257 t = (t>>4)|(t<<4);
1258 t = ((t&0xcc)>>2)|((t&0x33)<<2);
1259 t = ((t&0xaa)>>1)|((t&0x55)<<1);
1260 gahffs[15-i] = t;
1261 }
1262 //weights are aligned at the end... now the start. gah! ffs!
1263 ASTC_Decode(gahffs, b->weights, b->wcount[3], 0, astc_weightmode[b->precision].bits, astc_weightmode[b->precision].extra, astc_weightmode[b->precision].dequant);
1264 return;
1265 }
1266 }
1267 b->status = ASTC_ERROR;
1268}
1269
1270static unsigned int hash52(unsigned int p)
1271{
1272 p ^= p >> 15; p -= p << 17; p += p << 7; p += p << 4;
1273 p ^= p >> 5; p += p << 16; p ^= p >> 7; p ^= p >> 3;
1274 p ^= p << 6; p ^= p >> 17;
1275 return p;
1276}
1277static int ASTC_ChoosePartition(int seed, int x, int y, int z, int partitions, int smallblock)
1278{
1279 int sh1, sh2, sh3, a,b,c,d;
1280 unsigned int rnum;
1281 unsigned char seed1,seed2,seed3,seed4,seed5,seed6,seed7,seed8,seed9,seed10,seed11,seed12;
1282 if (partitions==1)
1283 return 0;
1284 if (smallblock)
1285 {
1286 x <<= 1;
1287 y <<= 1;
1288 z <<= 1;
1289 }
1290 seed += (partitions-1) * 1024;
1291 rnum = hash52(seed);
1292 seed1 = rnum & 0xF;
1293 seed2 = (rnum >> 4) & 0xF;
1294 seed3 = (rnum >> 8) & 0xF;
1295 seed4 = (rnum >> 12) & 0xF;
1296 seed5 = (rnum >> 16) & 0xF;
1297 seed6 = (rnum >> 20) & 0xF;
1298 seed7 = (rnum >> 24) & 0xF;
1299 seed8 = (rnum >> 28) & 0xF;
1300 seed9 = (rnum >> 18) & 0xF;
1301 seed10 = (rnum >> 22) & 0xF;
1302 seed11 = (rnum >> 26) & 0xF;
1303 seed12 = ((rnum >> 30) | (rnum << 2)) & 0xF;
1304
1305 seed1 *= seed1; seed2 *= seed2;
1306 seed3 *= seed3; seed4 *= seed4;
1307 seed5 *= seed5; seed6 *= seed6;
1308 seed7 *= seed7; seed8 *= seed8;
1309 seed9 *= seed9; seed10 *= seed10;
1310 seed11 *= seed11; seed12 *= seed12;
1311
1312
1313 if (seed & 1)
1314 {
1315 sh1 = ((seed&2) ? 4:5);
1316 sh2 = ((partitions==3) ? 6:5);
1317 }
1318 else
1319 {
1320 sh1 = ((partitions==3) ? 6:5);
1321 sh2 = ((seed&2) ? 4:5);
1322 }
1323 sh3 = (seed & 0x10) ? sh1 : sh2;
1324
1325 seed1 >>= sh1; seed2 >>= sh2; seed3 >>= sh1; seed4 >>= sh2;
1326 seed5 >>= sh1; seed6 >>= sh2; seed7 >>= sh1; seed8 >>= sh2;
1327 seed9 >>= sh3; seed10 >>= sh3; seed11 >>= sh3; seed12 >>= sh3;
1328
1329 a = seed1*x + seed2*y + seed11*z + (rnum >> 14);
1330 b = seed3*x + seed4*y + seed12*z + (rnum >> 10);
1331 c = seed5*x + seed6*y + seed9 *z + (rnum >> 6);
1332 d = seed7*x + seed8*y + seed10*z + (rnum >> 2);
1333
1334 a &= 0x3F; b &= 0x3F; c &= 0x3F; d &= 0x3F;
1335
1336 if (partitions < 4)
1337 d = 0;
1338 if (partitions < 3)
1339 c = 0;
1340
1341 if (a >= b && a >= c && a >= d)
1342 return 0;
1343 else if (b >= c && b >= d)
1344 return 1;
1345 else if (c >= d)
1346 return 2;
1347 else
1348 return 3;
1349}
1350#endif
1351
1352#ifdef ASTC_WITH_LDR
1353//Spits out 8-bit RGBA data for a single block. Any HDR blocks will result in the error colour.
1354//sRGB can be applied by the caller, if needed.
1355ASTC_PUBLIC void ASTC_Decode_LDR8(unsigned char *in, unsigned char *out, int pixstride, int layerstride, int bw, int bh, int bd)
1356{
1357 struct astc_block_info b;
1358 int x, y;
1359 int stride = pixstride*4;
1360#ifdef ASTC_WITH_3D
1361 int z;
1362 layerstride = layerstride*4-(stride*bh);
1363#else
1364 if (bd != 1)
1365 return; //error!
1366#endif
1367 b.in = in;
1368 b.blocksize[0] = bw;
1369 b.blocksize[1] = bh;
1370 b.blocksize[2] = bd;
1371 ASTC_ReadBlockMode(&b);
1372
1373 if (b.status == ASTC_VOID_LDR)
1374 { //void extent
1375 //Note: we don't validate the extents.
1376 for (y = 0; y < bh; y++, out += stride)
1377 for (x = 0; x < bw; x++)
1378 {
1379 out[(x<<2)+0] = in[9];
1380 out[(x<<2)+1] = in[11];
1381 out[(x<<2)+2] = in[13];
1382 out[(x<<2)+3] = in[15];
1383 }
1384 return;
1385 }
1386
1387 if (b.status == ASTC_OKAY)
1388 ASTC_ReadPartitions(&b);
1389 if (b.status == ASTC_OKAY)
1390 ASTC_ReadEndpoints(&b);
1391
1392 if (b.status == ASTC_OKAY)
1393 {
1394 #define N b.wcount[0]
1395 #define M b.wcount[1]
1396 int s1=1<<b.dualplane,s2=N<<b.dualplane; //values for 2d blocks (3d blocks will override)
1397 int s3=((bd!=1?N*M:0)+N+1)<<b.dualplane; //small variation for 3d blocks.
1398
1399 int smallblock = (b.blocksize[0]*b.blocksize[1]*b.blocksize[2])<31;
1400 int fs, s, ds = (1024+b.blocksize[0]/2)/(b.blocksize[0]-1);
1401 int ft, t, dt = (1024+b.blocksize[1]/2)/(b.blocksize[1]-1);
1402#ifdef ASTC_WITH_3D
1403 int fr, r, dr = (1024+b.blocksize[2]/2)/(b.blocksize[2]-1);
1404#endif
1405 int v0, w, w00,w01,w10,w11;
1406 struct astc_part *p;
1407
1408#ifdef ASTC_WITH_HDR
1409 for (x = 0; x < b.partitions; x++)
1410 { //the LDR profile treats HDR endpoints as the error colour. this is per-partition rather than per-block.
1411 if (b.part[x].hdr)
1412 {
1413 Vector4Set(b.part[x].ep[0], 0xff, 0, 0xff, 0xff);
1414 Vector4Set(b.part[x].ep[1], 0xff, 0, 0xff, 0xff);
1415 }
1416 //else FIXME: when spitting out 8bit, we're meant to have an extra 9th bit which is always set, in order to avoid round-to-zero biasing the result of the final 8 bits.
1417 }
1418#endif
1419
1420#ifdef ASTC_WITH_3D
1421 for (z = 0; z < bd; z++, out += layerstride-stride*bh)
1422#endif
1423 {
1424#ifdef ASTC_WITH_3D
1425 r = ((dr*z)*(b.wcount[2]-1)+32)>>6;
1426 fr=r&0xf;
1427#endif
1428 for (y = 0; y < bh; y++, out += stride)
1429 {
1430 t = ((dt*y)*(b.wcount[1]-1)+32)>>6;
1431 ft=t&0xf;
1432 for (x = 0; x < bw; x++)
1433 {
1434 p = &b.part[ASTC_ChoosePartition(b.partindex, x,y,0, b.partitions, smallblock)];
1435 s = ((ds*x)*(b.wcount[0]-1)+32)>>6;
1436 fs=s&0xf;
1437#ifdef ASTC_WITH_3D
1438 if (bd != 1)
1439 { //3d blocks use simplex interpolation instead of 8-way interpolation. its easier for hardware but more cycles for us.
1440 if (fs>fr)
1441 { //figure out which weights/factors to use.
1442 if (ft>fr)
1443 {
1444 if (fs>ft)
1445 s1=1, s2=N, w00=16-fs, w01=fs-ft, w10=ft-fr, w11=fr;
1446 else
1447 s1=N, s2=1, w00=16-ft, w01=ft-fs, w10=fs-fr, w11=fr;
1448 }
1449 else
1450 s1=1, s2=N*M, w00=16-fs, w01=fs-fr, w10=fr-ft, w11=ft;
1451 }
1452 else
1453 {
1454 if (fs>ft)
1455 s1=N*M, s2=1, w00=16-fr, w01=fr-fs, w10=fs-ft, w11=ft;
1456 else
1457 {
1458 if (ft>fr)
1459 s1=N, s2=N*M, w00=16-ft, w01=ft-fr, w10=fr-fs, w11=fs;
1460 else
1461 s1=N*M, s2=N, w00=16-fr, w01=fr-ft, w10=ft-fs, w11=fs;
1462 }
1463 }
1464
1465 s1 <<= b.dualplane;
1466 s2 <<= b.dualplane;
1467 s2+=s1;
1468 //s3 = (N*M+N+1)<<b.dualplane;
1469 v0 = ((s>>4)+(t>>4)*N+(r>>4)*N*M) << b.dualplane;
1470 }
1471 else
1472#endif
1473 {
1474 //s1 = 1<<b.dualplane;
1475 //s2 = (N)<<b.dualplane;
1476 //s3 = (N+1)<<b.dualplane;
1477 w11 = (fs*ft+8) >> 4;
1478 w10 = ft - w11;
1479 w01 = fs - w11;
1480 w00 = 16 - fs - ft + w11;
1481 v0 = ((s>>4)+(t>>4)*N) << b.dualplane;
1482 }
1483 w = ( w00*b.weights[v0] +
1484 w01*b.weights[v0+s1] +
1485 w10*b.weights[v0+s2] +
1486 w11*b.weights[v0+s3] + 8) >> 4;
1487 out[(x<<2)+0] = ((64-w)*p->ep[0][0] + w*p->ep[1][0])>>6;
1488 out[(x<<2)+1] = ((64-w)*p->ep[0][1] + w*p->ep[1][1])>>6;
1489 out[(x<<2)+2] = ((64-w)*p->ep[0][2] + w*p->ep[1][2])>>6;
1490 out[(x<<2)+3] = ((64-w)*p->ep[0][3] + w*p->ep[1][3])>>6;
1491
1492 if (b.dualplane)
1493 { //dual planes has a second set of weights that override a single channel
1494 v0++;
1495 w = ( w00*b.weights[v0] +
1496 w01*b.weights[v0+s1] +
1497 w10*b.weights[v0+s2] +
1498 w11*b.weights[v0+s3] + 8) >> 4;
1499 out[(x<<2)+b.ccs] = ((64-w)*p->ep[0][b.ccs] + w*p->ep[1][b.ccs])>>6;
1500 }
1501 }
1502 }
1503 }
1504 #undef N
1505 #undef M
1506 }
1507 else
1508 { //error colour == magenta
1509#ifdef ASTC_WITH_3D
1510 for (z = 0; z < bd; z++, out += layerstride)
1511#endif
1512 for (y = 0; y < bh; y++, out += stride)
1513 for (x = 0; x < bw; x++)
1514 {
1515 out[(x<<2)+0] = 0xff;
1516 out[(x<<2)+1] = 0;
1517 out[(x<<2)+2] = 0xff;
1518 out[(x<<2)+3] = 0xff;
1519 }
1520 }
1521}
1522#endif
1523
1524#ifdef ASTC_WITH_HDR
1525static unsigned short ASTC_GenHalffloat(int hdr, int rawval)
1526{
1527 if (hdr)
1528 {
1529 int fp16, m;
1530 fp16 = (rawval&0xF800) >> 1;
1531 m = rawval&0x7FF;
1532 if (m < 512)
1533 fp16 |= (3*m)>>3;
1534 else if (m >= 1536)
1535 fp16 |= (5*m - 2048)>>3;
1536 else
1537 fp16 |= (4*m - 512)>>3;
1538 return fp16;
1539 }
1540 else
1541 {
1542 union
1543 {
1544 float f;
1545 unsigned int u;
1546 } u = {rawval/65535.0};
1547 int e = 0;
1548 int m;
1549
1550 e = ((u.u>>23)&0xff) - 127;
1551 if (e < -15)
1552 return 0; //too small exponent, treat it as a 0 denormal
1553 if (e > 15)
1554 m = 0; //infinity instead of a nan
1555 else
1556 m = (u.u&((1<<23)-1))>>13;
1557 return ((e+15)<<10) | m;
1558 }
1559}
1560
1561//Spits out half-float RGBA data for a single block.
1562ASTC_PUBLIC void ASTC_Decode_HDR(unsigned char *in, unsigned short *out, int pixstride, int layerstride, int bw, int bh, int bd)
1563{
1564 int x, y;
1565 int stride = pixstride*4;
1566 struct astc_block_info b;
1567#ifdef ASTC_WITH_3D
1568 int z;
1569 layerstride = layerstride*4-(stride*bh);
1570#else
1571 if (bd != 1)
1572 return; //error!
1573#endif
1574 b.in = in;
1575 b.blocksize[0] = bw;
1576 b.blocksize[1] = bh;
1577 b.blocksize[2] = bd;
1578
1579 ASTC_ReadBlockMode(&b);
1580
1581 if (b.status == ASTC_VOID_HDR)
1582 { //void extent
1583 //Note: we don't validate the extents.
1584 for (y = 0; y < bh; y++, out += stride)
1585 for (x = 0; x < bw; x++)
1586 { //hdr void extents already use fp16
1587 out[(x<<2)+0] = in[8] | (in[9]<<8);
1588 out[(x<<2)+1] = in[10] | (in[11]<<8);
1589 out[(x<<2)+2] = in[12] | (in[13]<<8);
1590 out[(x<<2)+3] = in[14] | (in[15]<<8);
1591 }
1592 return;
1593 }
1594 if (b.status == ASTC_VOID_LDR)
1595 { //void extent
1596 //Note: we don't validate the extents.
1597 for (y = 0; y < bh; y++, out += stride)
1598 for (x = 0; x < bw; x++)
1599 {
1600 out[(x<<2)+0] = ASTC_GenHalffloat(0, in[8] | (in[9]<<8));
1601 out[(x<<2)+1] = ASTC_GenHalffloat(0, in[10] | (in[11]<<8));
1602 out[(x<<2)+2] = ASTC_GenHalffloat(0, in[12] | (in[13]<<8));
1603 out[(x<<2)+3] = ASTC_GenHalffloat(0, in[14] | (in[15]<<8));
1604 }
1605 return;
1606 }
1607
1608 if (b.status == ASTC_OKAY)
1609 ASTC_ReadPartitions(&b);
1610 if (b.status == ASTC_OKAY)
1611 ASTC_ReadEndpoints(&b);
1612
1613 if (b.status == ASTC_OKAY)
1614 {
1615 #define N b.wcount[0]
1616 #define M b.wcount[1]
1617 int s1=1<<b.dualplane,s2=N<<b.dualplane; //values for 2d blocks (3d blocks will override)
1618 int s3=((bd!=1?N*M:0)+N+1)<<b.dualplane; //small variation for 3d blocks.
1619
1620 int smallblock = (b.blocksize[0]*b.blocksize[1]*b.blocksize[2])<31;
1621 int fs, s, ds = (1024+b.blocksize[0]/2)/(b.blocksize[0]-1);
1622 int ft, t, dt = (1024+b.blocksize[1]/2)/(b.blocksize[1]-1);
1623#ifdef ASTC_WITH_3D
1624 int fr, r, dr = (1024+b.blocksize[2]/2)/(b.blocksize[2]-1);
1625#endif
1626 int v0, w, w00,w01,w10,w11;
1627 struct astc_part *p;
1628
1629 for (x = 0; x < b.partitions; x++)
1630 { //we need to do a little extra processing here
1631 for (y = 0; y < 4; y++)
1632 {
1633 if (b.part[x].hdr&(1<<y))
1634 { //the 12bit endpoint values are shifted up to 16bit...
1635 b.part[x].ep[0][y] <<= 4;
1636 b.part[x].ep[1][y] <<= 4;
1637 }
1638 else
1639 { //convert to unorm16.
1640 b.part[x].ep[0][y] |= b.part[x].ep[0][y] << 8;
1641 b.part[x].ep[1][y] |= b.part[x].ep[1][y] << 8;
1642 }
1643 }
1644 }
1645
1646#ifdef ASTC_WITH_3D
1647 for (z = 0; z < bd; z++, out += layerstride)
1648#endif
1649 {
1650#ifdef ASTC_WITH_3D
1651 r = ((dr*z)*(b.wcount[2]-1)+32)>>6;
1652 fr=r&0xf;
1653#endif
1654 for (y = 0; y < bh; y++, out += stride)
1655 {
1656 t = ((dt*y)*(b.wcount[1]-1)+32)>>6;
1657 ft=t&0xf;
1658 for (x = 0; x < bw; x++)
1659 {
1660 p = &b.part[ASTC_ChoosePartition(b.partindex, x,y,0, b.partitions, smallblock)];
1661 s = ((ds*x)*(b.wcount[0]-1)+32)>>6;
1662 fs=s&0xf;
1663#ifdef ASTC_WITH_3D
1664 if (bd != 1)
1665 { //3d blocks use simplex interpolation instead of 8-way interpolation. its easier for hardware but more cycles for us.
1666 if (fs>fr)
1667 { //figure out which weights/factors to use.
1668 if (ft>fr)
1669 {
1670 if (fs>ft)
1671 s1=1, s2=N, w00=16-fs, w01=fs-ft, w10=ft-fr, w11=fr;
1672 else
1673 s1=N, s2=1, w00=16-ft, w01=ft-fs, w10=fs-fr, w11=fr;
1674 }
1675 else
1676 s1=1, s2=N*M, w00=16-fs, w01=fs-fr, w10=fr-ft, w11=ft;
1677 }
1678 else
1679 {
1680 if (fs>ft)
1681 s1=N*M, s2=1, w00=16-fr, w01=fr-fs, w10=fs-ft, w11=ft;
1682 else
1683 {
1684 if (ft>fr)
1685 s1=N, s2=N*M, w00=16-ft, w01=ft-fr, w10=fr-fs, w11=fs;
1686 else
1687 s1=N*M, s2=N, w00=16-fr, w01=fr-ft, w10=ft-fs, w11=fs;
1688 }
1689 }
1690
1691 s1 <<= b.dualplane;
1692 s2 <<= b.dualplane;
1693 s2+=s1;
1694 //s3 = (N*M+N+1)<<b.dualplane;
1695 v0 = (((s>>4))+((t>>4)*N)+(r>>4)*N*M) << b.dualplane;
1696 }
1697 else
1698#endif
1699 {
1700 //s1 = 1<<b.dualplane;
1701 //s2 = (N)<<b.dualplane;
1702 //s3 = (N+1)<<b.dualplane;
1703 w11 = (fs*ft+8) >> 4;
1704 w10 = ft - w11;
1705 w01 = fs - w11;
1706 w00 = 16 - fs - ft + w11;
1707
1708 v0 = (((s>>4))+(t>>4)*N) << b.dualplane;
1709 }
1710 w = ( w00*b.weights[v0] +
1711 w01*b.weights[v0+s1] +
1712 w10*b.weights[v0+s2] +
1713 w11*b.weights[v0+s3] + 8) >> 4;
1714 out[(x<<2)+0] = ASTC_GenHalffloat(p->hdr&1, ((64-w)*p->ep[0][0] + w*p->ep[1][0])>>6);
1715 out[(x<<2)+1] = ASTC_GenHalffloat(p->hdr&1, ((64-w)*p->ep[0][1] + w*p->ep[1][1])>>6);
1716 out[(x<<2)+2] = ASTC_GenHalffloat(p->hdr&1, ((64-w)*p->ep[0][2] + w*p->ep[1][2])>>6);
1717 out[(x<<2)+3] = ASTC_GenHalffloat(p->hdr&8, ((64-w)*p->ep[0][3] + w*p->ep[1][3])>>6);
1718
1719 if (b.dualplane)
1720 { //dual planes has a second set of weights that override a single channel
1721 v0++;
1722 w = ( w00*b.weights[v0] +
1723 w01*b.weights[v0+s1] +
1724 w10*b.weights[v0+s2] +
1725 w11*b.weights[v0+s3] + 8) >> 4;
1726 out[(x<<2)+b.ccs] = ASTC_GenHalffloat(p->hdr&(1<<b.ccs), ((64-w)*p->ep[0][b.ccs] + w*p->ep[1][b.ccs])>>6);
1727 }
1728 }
1729 }
1730 }
1731 #undef N
1732 #undef M
1733 }
1734 else
1735 { //error colour == magenta
1736#ifdef ASTC_WITH_3D
1737 for (z = 0; z < bd; z++, out += layerstride)
1738#endif
1739 for (y = 0; y < bh; y++, out += stride)
1740 for (x = 0; x < bw; x++)
1741 {
1742 out[(x<<2)+0] = 0xf<<10;
1743 out[(x<<2)+1] = 0;
1744 out[(x<<2)+2] = 0xf<<10;
1745 out[(x<<2)+3] = 0xf<<10;
1746 }
1747 }
1748}
1749#endif
enum @13::coninfomode_e mode
Definition: com_phys_ode.c:695
char *VARGS va(const char *format,...)
Definition: common.c:6687
s
Definition: execloop.h:53
GLclampf GLclampf blue
Definition: gl_vidcommon.c:39
GLfloat t
Definition: gl_vidcommon.c:184
GLclampf green
Definition: gl_vidcommon.c:39
static GLuint GLfloat x0
Definition: gl_vidnt.c:848
static GLuint GLfloat GLfloat GLfloat GLfloat GLfloat GLfloat GLfloat GLfloat s1
Definition: gl_vidnt.c:848
static GLuint GLfloat GLfloat GLfloat GLfloat GLfloat GLfloat GLfloat t0
Definition: gl_vidnt.c:848
static CONST PIXELFORMATDESCRIPTOR *static int
Definition: gl_vidnt.c:222
static GLuint GLfloat GLfloat GLfloat GLfloat y1
Definition: gl_vidnt.c:848
static GLuint GLfloat GLfloat GLfloat GLfloat GLfloat GLfloat GLfloat GLfloat GLfloat t1
Definition: gl_vidnt.c:848
static GLuint GLfloat GLfloat y0
Definition: gl_vidnt.c:848
static GLuint GLfloat GLfloat GLfloat x1
Definition: gl_vidnt.c:848
GLfloat GLfloat GLfloat z
Definition: glquake.h:158
GLfloat GLfloat y
Definition: glquake.h:158
GLsizei count
Definition: glquake.h:149
GLint GLenum GLboolean GLsizei stride
Definition: glquake.h:157
GLdouble GLdouble x2
Definition: glquake.h:938
GLfloat v0
Definition: glquake.h:163
GLfloat x
Definition: glquake.h:158
GLdouble GLdouble r
Definition: glquake.h:969
GLint j
Definition: glquake.h:806
GLfloat GLfloat GLfloat GLfloat w
Definition: glquake.h:158
const GLfloat * v
Definition: glsupp.h:466
astc_status_e
Definition: image_astc.h:43
@ ASTC_VOID_HDR
Definition: image_astc.h:47
@ ASTC_UNSUPPORTED
Definition: image_astc.h:51
@ ASTC_VOID_LDR
Definition: image_astc.h:46
@ ASTC_ERROR
Definition: image_astc.h:50
@ ASTC_OKAY
Definition: image_astc.h:45
@ ASTC_RESERVED
Definition: image_astc.h:52
unsigned char bits
Definition: image_astc.h:123
unsigned char extra
Definition: image_astc.h:123
unsigned char * dequant
Definition: image_astc.h:123
ASTC_PUBLIC int ASTC_BlocksAreHDR(unsigned char *in, size_t datasize, int bw, int bh, int bd)
Definition: image_astc.h:348
ASTC_PUBLIC void ASTC_Decode_HDR(unsigned char *in, unsigned short *out, int pixstride, int layerstride, int bw, int bh, int bd)
Definition: image_astc.h:1562
ASTC_PUBLIC void ASTC_Decode_LDR8(unsigned char *in, unsigned char *out, int pixstride, int layerstride, int bw, int bh, int bd)
Definition: image_astc.h:1355
int const void * p
Definition: pr_lua.c:232
int b
Definition: pr_lua.c:242
int const char * e
Definition: pr_lua.c:259
lua_Reader void * dt
Definition: pr_lua.c:218
float scale
Definition: pr_menu.c:106
vec3_t offset
Definition: q2m_flash.c:28
if(StringFromGUID2(guid==NULL)
Definition: snd_directx.c:1080
static spx_int16_t * in
Definition: snd_dma.c:488
static SpeexBits spx_int16_t * out
Definition: snd_dma.c:492
int i
Definition: snd_ov.c:50
Definition: image_astc.h:74
int ep[2][4]
Definition: image_astc.h:79
unsigned char hdr
Definition: image_astc.h:77
unsigned char mode
Definition: image_astc.h:75
Definition: image_astc.h:55
unsigned short partindex
Definition: image_astc.h:72
int weight_bits
Definition: image_astc.h:66
unsigned char blocksize[3]
Definition: image_astc.h:57
unsigned char precision
Definition: image_astc.h:63
unsigned char ccs
Definition: image_astc.h:61
int wcount[4]
Definition: image_astc.h:65
unsigned char * in
Definition: image_astc.h:56
int config_bits
Definition: image_astc.h:67
struct astc_block_info::astc_part part[4]
unsigned char partitions
Definition: image_astc.h:71
int ep_bits
Definition: image_astc.h:68
unsigned char weights[64]
Definition: image_astc.h:69
unsigned char dualplane
Definition: image_astc.h:60
enum astc_status_e status
Definition: image_astc.h:59
netadr_t a
Definition: sv_master.c:141