Commit c816d3d0 authored by Måns Rullgård's avatar Måns Rullgård

AAC: Compress codebook tables and optimise sign bit handling

The codebooks each consist of small number of values repeated in
groups of 2 or 4.  Storing the codebooks as a packed list of 2- or
4-bit indexes into a table reduces their size substantially (from 7.5k
to 1.5k), resulting in less cache pressure.

For the band types with sign bits in the bitstream, storing the number
and position of non-zero codebook values using a few bits avoids
multiple get_bits() calls and floating-point comparisons which gcc
handles miserably.

Some float/int type punning also avoids gcc brain damage.

Overall speedup 20-35% on Cortex-A8, 20% on Core i7.

Originally committed as revision 21188 to svn://svn.ffmpeg.org/ffmpeg/trunk
parent 9d50d396
This diff is collapsed.
...@@ -899,6 +899,192 @@ const float * const ff_aac_codebook_vectors[] = { ...@@ -899,6 +899,192 @@ const float * const ff_aac_codebook_vectors[] = {
codebook_vector8, codebook_vector10, codebook_vector8, codebook_vector10,
}; };
static const float codebook_vector0_vals[] = {
-1.0000000, 0.0000000, 1.0000000
};
static const float codebook_vector2_vals[] = {
0.0000000, 1.0000000, 2.5198421,
};
/*
* bits 0:1, 2:3, 4:5, 6:7 index into _vals array
* 8:11 number of non-zero values
* 12:15 bit mask of non-zero values
*/
static const uint16_t codebook_vector02_idx[] = {
0x0000, 0x8140, 0x8180, 0x4110, 0xc250, 0xc290, 0x4120, 0xc260, 0xc2a0,
0x2104, 0xa244, 0xa284, 0x6214, 0xe354, 0xe394, 0x6224, 0xe364, 0xe3a4,
0x2108, 0xa248, 0xa288, 0x6218, 0xe358, 0xe398, 0x6228, 0xe368, 0xe3a8,
0x1101, 0x9241, 0x9281, 0x5211, 0xd351, 0xd391, 0x5221, 0xd361, 0xd3a1,
0x3205, 0xb345, 0xb385, 0x7315, 0xf455, 0xf495, 0x7325, 0xf465, 0xf4a5,
0x3209, 0xb349, 0xb389, 0x7319, 0xf459, 0xf499, 0x7329, 0xf469, 0xf4a9,
0x1102, 0x9242, 0x9282, 0x5212, 0xd352, 0xd392, 0x5222, 0xd362, 0xd3a2,
0x3206, 0xb346, 0xb386, 0x7316, 0xf456, 0xf496, 0x7326, 0xf466, 0xf4a6,
0x320a, 0xb34a, 0xb38a, 0x731a, 0xf45a, 0xf49a, 0x732a, 0xf46a, 0xf4aa,
};
static const float codebook_vector4_vals[] = {
-6.3496042, -4.3267487,
-2.5198421, -1.0000000,
0.0000000, 1.0000000,
2.5198421, 4.3267487,
6.3496042,
};
/*
* bits 0:3, 4:7 index into _vals array
*/
static const uint16_t codebook_vector4_idx[] = {
0x0000, 0x0010, 0x0020, 0x0030, 0x0040, 0x0050, 0x0060, 0x0070, 0x0080,
0x0001, 0x0011, 0x0021, 0x0031, 0x0041, 0x0051, 0x0061, 0x0071, 0x0081,
0x0002, 0x0012, 0x0022, 0x0032, 0x0042, 0x0052, 0x0062, 0x0072, 0x0082,
0x0003, 0x0013, 0x0023, 0x0033, 0x0043, 0x0053, 0x0063, 0x0073, 0x0083,
0x0004, 0x0014, 0x0024, 0x0034, 0x0044, 0x0054, 0x0064, 0x0074, 0x0084,
0x0005, 0x0015, 0x0025, 0x0035, 0x0045, 0x0055, 0x0065, 0x0075, 0x0085,
0x0006, 0x0016, 0x0026, 0x0036, 0x0046, 0x0056, 0x0066, 0x0076, 0x0086,
0x0007, 0x0017, 0x0027, 0x0037, 0x0047, 0x0057, 0x0067, 0x0077, 0x0087,
0x0008, 0x0018, 0x0028, 0x0038, 0x0048, 0x0058, 0x0068, 0x0078, 0x0088,
};
static const float codebook_vector6_vals[] = {
0.0000000, 1.0000000, 2.5198421, 4.3267487,
6.3496042, 8.5498797, 10.9027236, 13.3905183,
};
/*
* bits 0:3, 4:7 index into _vals array
* 8:11 number of non-zero values
* 12:15 1: only second value non-zero
* 0: other cases
*/
static const uint16_t codebook_vector6_idx[] = {
0x0000, 0x0110, 0x0120, 0x0130, 0x0140, 0x0150, 0x0160, 0x0170,
0x1101, 0x0211, 0x0221, 0x0231, 0x0241, 0x0251, 0x0261, 0x0271,
0x1102, 0x0212, 0x0222, 0x0232, 0x0242, 0x0252, 0x0262, 0x0272,
0x1103, 0x0213, 0x0223, 0x0233, 0x0243, 0x0253, 0x0263, 0x0273,
0x1104, 0x0214, 0x0224, 0x0234, 0x0244, 0x0254, 0x0264, 0x0274,
0x1105, 0x0215, 0x0225, 0x0235, 0x0245, 0x0255, 0x0265, 0x0275,
0x1106, 0x0216, 0x0226, 0x0236, 0x0246, 0x0256, 0x0266, 0x0276,
0x1107, 0x0217, 0x0227, 0x0237, 0x0247, 0x0257, 0x0267, 0x0277,
};
static const float codebook_vector8_vals[] = {
0.0000000, 1.0000000,
2.5198421, 4.3267487,
6.3496042, 8.5498797,
10.9027236, 13.3905183,
16.0000000, 18.7207544,
21.5443469, 24.4637810,
27.4731418,
};
/*
* bits 0:3, 4:7 index into _vals array
* 8:11 number of non-zero values
* 12:15 1: only second value non-zero
* 0: other cases
*/
static const uint16_t codebook_vector8_idx[] = {
0x0000, 0x0110, 0x0120, 0x0130, 0x0140, 0x0150, 0x0160,
0x0170, 0x0180, 0x0190, 0x01a0, 0x01b0, 0x01c0,
0x1101, 0x0211, 0x0221, 0x0231, 0x0241, 0x0251, 0x0261,
0x0271, 0x0281, 0x0291, 0x02a1, 0x02b1, 0x02c1,
0x1102, 0x0212, 0x0222, 0x0232, 0x0242, 0x0252, 0x0262,
0x0272, 0x0282, 0x0292, 0x02a2, 0x02b2, 0x02c2,
0x1103, 0x0213, 0x0223, 0x0233, 0x0243, 0x0253, 0x0263,
0x0273, 0x0283, 0x0293, 0x02a3, 0x02b3, 0x02c3,
0x1104, 0x0214, 0x0224, 0x0234, 0x0244, 0x0254, 0x0264,
0x0274, 0x0284, 0x0294, 0x02a4, 0x02b4, 0x02c4,
0x1105, 0x0215, 0x0225, 0x0235, 0x0245, 0x0255, 0x0265,
0x0275, 0x0285, 0x0295, 0x02a5, 0x02b5, 0x02c5,
0x1106, 0x0216, 0x0226, 0x0236, 0x0246, 0x0256, 0x0266,
0x0276, 0x0286, 0x0296, 0x02a6, 0x02b6, 0x02c6,
0x1107, 0x0217, 0x0227, 0x0237, 0x0247, 0x0257, 0x0267,
0x0277, 0x0287, 0x0297, 0x02a7, 0x02b7, 0x02c7,
0x1108, 0x0218, 0x0228, 0x0238, 0x0248, 0x0258, 0x0268,
0x0278, 0x0288, 0x0298, 0x02a8, 0x02b8, 0x02c8,
0x1109, 0x0219, 0x0229, 0x0239, 0x0249, 0x0259, 0x0269,
0x0279, 0x0289, 0x0299, 0x02a9, 0x02b9, 0x02c9,
0x110a, 0x021a, 0x022a, 0x023a, 0x024a, 0x025a, 0x026a,
0x027a, 0x028a, 0x029a, 0x02aa, 0x02ba, 0x02ca,
0x110b, 0x021b, 0x022b, 0x023b, 0x024b, 0x025b, 0x026b,
0x027b, 0x028b, 0x029b, 0x02ab, 0x02bb, 0x02cb,
0x110c, 0x021c, 0x022c, 0x023c, 0x024c, 0x025c, 0x026c,
0x027c, 0x028c, 0x029c, 0x02ac, 0x02bc, 0x02cc,
};
static const float codebook_vector10_vals[] = {
0.0000000, 1.0000000,
2.5198421, 4.3267487,
6.3496042, 8.5498797,
10.9027236, 13.3905183,
16.0000000, 18.7207544,
21.5443469, 24.4637810,
27.4731418, 30.5673509,
33.7419917, 36.9931811,
};
/*
* bits 0:3, 4:7 index into _vals array
* 8:9 bit mask of escape-coded entries
* 12:15 number of non-zero values
*/
static const uint16_t codebook_vector10_idx[] = {
0x0000, 0x1010, 0x1020, 0x1030, 0x1040, 0x1050, 0x1060, 0x1070,
0x1080, 0x1090, 0x10a0, 0x10b0, 0x10c0, 0x10d0, 0x10e0, 0x10f0, 0x1200,
0x1001, 0x2011, 0x2021, 0x2031, 0x2041, 0x2051, 0x2061, 0x2071,
0x2081, 0x2091, 0x20a1, 0x20b1, 0x20c1, 0x20d1, 0x20e1, 0x20f1, 0x2201,
0x1002, 0x2012, 0x2022, 0x2032, 0x2042, 0x2052, 0x2062, 0x2072,
0x2082, 0x2092, 0x20a2, 0x20b2, 0x20c2, 0x20d2, 0x20e2, 0x20f2, 0x2202,
0x1003, 0x2013, 0x2023, 0x2033, 0x2043, 0x2053, 0x2063, 0x2073,
0x2083, 0x2093, 0x20a3, 0x20b3, 0x20c3, 0x20d3, 0x20e3, 0x20f3, 0x2203,
0x1004, 0x2014, 0x2024, 0x2034, 0x2044, 0x2054, 0x2064, 0x2074,
0x2084, 0x2094, 0x20a4, 0x20b4, 0x20c4, 0x20d4, 0x20e4, 0x20f4, 0x2204,
0x1005, 0x2015, 0x2025, 0x2035, 0x2045, 0x2055, 0x2065, 0x2075,
0x2085, 0x2095, 0x20a5, 0x20b5, 0x20c5, 0x20d5, 0x20e5, 0x20f5, 0x2205,
0x1006, 0x2016, 0x2026, 0x2036, 0x2046, 0x2056, 0x2066, 0x2076,
0x2086, 0x2096, 0x20a6, 0x20b6, 0x20c6, 0x20d6, 0x20e6, 0x20f6, 0x2206,
0x1007, 0x2017, 0x2027, 0x2037, 0x2047, 0x2057, 0x2067, 0x2077,
0x2087, 0x2097, 0x20a7, 0x20b7, 0x20c7, 0x20d7, 0x20e7, 0x20f7, 0x2207,
0x1008, 0x2018, 0x2028, 0x2038, 0x2048, 0x2058, 0x2068, 0x2078,
0x2088, 0x2098, 0x20a8, 0x20b8, 0x20c8, 0x20d8, 0x20e8, 0x20f8, 0x2208,
0x1009, 0x2019, 0x2029, 0x2039, 0x2049, 0x2059, 0x2069, 0x2079,
0x2089, 0x2099, 0x20a9, 0x20b9, 0x20c9, 0x20d9, 0x20e9, 0x20f9, 0x2209,
0x100a, 0x201a, 0x202a, 0x203a, 0x204a, 0x205a, 0x206a, 0x207a,
0x208a, 0x209a, 0x20aa, 0x20ba, 0x20ca, 0x20da, 0x20ea, 0x20fa, 0x220a,
0x100b, 0x201b, 0x202b, 0x203b, 0x204b, 0x205b, 0x206b, 0x207b,
0x208b, 0x209b, 0x20ab, 0x20bb, 0x20cb, 0x20db, 0x20eb, 0x20fb, 0x220b,
0x100c, 0x201c, 0x202c, 0x203c, 0x204c, 0x205c, 0x206c, 0x207c,
0x208c, 0x209c, 0x20ac, 0x20bc, 0x20cc, 0x20dc, 0x20ec, 0x20fc, 0x220c,
0x100d, 0x201d, 0x202d, 0x203d, 0x204d, 0x205d, 0x206d, 0x207d,
0x208d, 0x209d, 0x20ad, 0x20bd, 0x20cd, 0x20dd, 0x20ed, 0x20fd, 0x220d,
0x100e, 0x201e, 0x202e, 0x203e, 0x204e, 0x205e, 0x206e, 0x207e,
0x208e, 0x209e, 0x20ae, 0x20be, 0x20ce, 0x20de, 0x20ee, 0x20fe, 0x220e,
0x100f, 0x201f, 0x202f, 0x203f, 0x204f, 0x205f, 0x206f, 0x207f,
0x208f, 0x209f, 0x20af, 0x20bf, 0x20cf, 0x20df, 0x20ef, 0x20ff, 0x220f,
0x1100, 0x2110, 0x2120, 0x2130, 0x2140, 0x2150, 0x2160, 0x2170,
0x2180, 0x2190, 0x21a0, 0x21b0, 0x21c0, 0x21d0, 0x21e0, 0x21f0, 0x2300,
};
const float *const ff_aac_codebook_vector_vals[] = {
codebook_vector0_vals, codebook_vector0_vals,
codebook_vector2_vals, codebook_vector2_vals,
codebook_vector4_vals, codebook_vector4_vals,
codebook_vector6_vals, codebook_vector6_vals,
codebook_vector8_vals, codebook_vector8_vals,
codebook_vector10_vals,
};
const uint16_t *const ff_aac_codebook_vector_idx[] = {
codebook_vector02_idx, codebook_vector02_idx,
codebook_vector02_idx, codebook_vector02_idx,
codebook_vector4_idx, codebook_vector4_idx,
codebook_vector6_idx, codebook_vector6_idx,
codebook_vector8_idx, codebook_vector8_idx,
codebook_vector10_idx,
};
/* @name swb_offsets /* @name swb_offsets
* Sample offset into the window indicating the beginning of a scalefactor * Sample offset into the window indicating the beginning of a scalefactor
* window band * window band
......
...@@ -64,6 +64,8 @@ extern const uint8_t * const ff_aac_spectral_bits [11]; ...@@ -64,6 +64,8 @@ extern const uint8_t * const ff_aac_spectral_bits [11];
extern const uint16_t ff_aac_spectral_sizes[11]; extern const uint16_t ff_aac_spectral_sizes[11];
extern const float *ff_aac_codebook_vectors[]; extern const float *ff_aac_codebook_vectors[];
extern const float *ff_aac_codebook_vector_vals[];
extern const uint16_t *ff_aac_codebook_vector_idx[];
extern const uint16_t * const ff_swb_offset_1024[13]; extern const uint16_t * const ff_swb_offset_1024[13];
extern const uint16_t * const ff_swb_offset_128 [13]; extern const uint16_t * const ff_swb_offset_128 [13];
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment