sonic.c 24.3 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19
/*
 * Simple free lossless/lossy audio codec
 * Copyright (c) 2004 Alex Beregszaszi
 *
 * This library is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2 of the License, or (at your option) any later version.
 *
 * This library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with this library; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 */
#include "avcodec.h"
20
#include "bitstream.h"
21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39
#include "golomb.h"

/**
 * @file sonic.c
 * Simple free lossless/lossy audio codec
 * Based on Paul Francis Harrison's Bonk (http://www.logarithmic.net/pfh/bonk)
 * Written and designed by Alex Beregszaszi
 *
 * TODO:
 *  - CABAC put/get_symbol
 *  - independent quantizer for channels
 *  - >2 channels support
 *  - more decorrelation types
 *  - more tap_quant tests
 *  - selectable intlist writers/readers (bonk-style, golomb, cabac)
 */

#define MAX_CHANNELS 2

40 41 42 43
#define MID_SIDE 0
#define LEFT_SIDE 1
#define RIGHT_SIDE 2

44
typedef struct SonicContext {
45
    int lossless, decorrelation;
46

47 48
    int num_taps, downsampling;
    double quantization;
49

50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66
    int channels, samplerate, block_align, frame_size;

    int *tap_quant;
    int *int_samples;
    int *coded_samples[MAX_CHANNELS];

    // for encoding
    int *tail;
    int tail_size;
    int *window;
    int window_size;

    // for decoding
    int *predictor_k;
    int *predictor_state[MAX_CHANNELS];
} SonicContext;

67 68 69 70
#define LATTICE_SHIFT   10
#define SAMPLE_SHIFT    4
#define LATTICE_FACTOR  (1 << LATTICE_SHIFT)
#define SAMPLE_FACTOR   (1 << SAMPLE_SHIFT)
71

72 73
#define BASE_QUANT      0.6
#define RATE_VARIATION  3.0
74 75 76 77

static inline int divide(int a, int b)
{
    if (a < 0)
78
        return -( (-a + b/2)/b );
79
    else
80
        return (a + b/2)/b;
81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98
}

static inline int shift(int a,int b)
{
    return (a+(1<<(b-1))) >> b;
}

static inline int shift_down(int a,int b)
{
    return (a>>b)+((a<0)?1:0);
}

#if 1
static inline int intlist_write(PutBitContext *pb, int *buf, int entries, int base_2_part)
{
    int i;

    for (i = 0; i < entries; i++)
99
        set_se_golomb(pb, buf[i]);
100 101 102 103 104 105 106

    return 1;
}

static inline int intlist_read(GetBitContext *gb, int *buf, int entries, int base_2_part)
{
    int i;
107

108
    for (i = 0; i < entries; i++)
109
        buf[i] = get_se_golomb(gb);
110 111 112 113 114 115 116 117 118 119 120

    return 1;
}

#else

#define ADAPT_LEVEL 8

static int bits_to_store(uint64_t x)
{
    int res = 0;
121

122 123
    while(x)
    {
124 125
        res++;
        x >>= 1;
126 127 128 129 130 131 132 133 134
    }
    return res;
}

static void write_uint_max(PutBitContext *pb, unsigned int value, unsigned int max)
{
    int i, bits;

    if (!max)
135
        return;
136 137 138 139

    bits = bits_to_store(max);

    for (i = 0; i < bits-1; i++)
140
        put_bits(pb, 1, value & (1 << i));
141 142

    if ( (value | (1 << (bits-1))) <= max)
143
        put_bits(pb, 1, value & (1 << (bits-1)));
144 145 146 147 148
}

static unsigned int read_uint_max(GetBitContext *gb, int max)
{
    int i, bits, value = 0;
149

150
    if (!max)
151
        return 0;
152 153 154 155

    bits = bits_to_store(max);

    for (i = 0; i < bits-1; i++)
156 157
        if (get_bits1(gb))
            value += 1 << i;
158 159

    if ( (value | (1<<(bits-1))) <= max)
160 161
        if (get_bits1(gb))
            value += 1 << (bits-1);
162 163 164 165 166 167 168 169 170 171 172 173

    return value;
}

static int intlist_write(PutBitContext *pb, int *buf, int entries, int base_2_part)
{
    int i, j, x = 0, low_bits = 0, max = 0;
    int step = 256, pos = 0, dominant = 0, any = 0;
    int *copy, *bits;

    copy = av_mallocz(4* entries);
    if (!copy)
174
        return -1;
175

176 177
    if (base_2_part)
    {
178
        int energy = 0;
179

180 181
        for (i = 0; i < entries; i++)
            energy += abs(buf[i]);
182

183 184 185
        low_bits = bits_to_store(energy / (entries * 2));
        if (low_bits > 15)
            low_bits = 15;
186

187
        put_bits(pb, 4, low_bits);
188
    }
189

190 191
    for (i = 0; i < entries; i++)
    {
192 193 194 195
        put_bits(pb, low_bits, abs(buf[i]));
        copy[i] = abs(buf[i]) >> low_bits;
        if (copy[i] > max)
            max = abs(copy[i]);
196 197 198 199 200
    }

    bits = av_mallocz(4* entries*max);
    if (!bits)
    {
201 202
//        av_free(copy);
        return -1;
203
    }
204

205 206
    for (i = 0; i <= max; i++)
    {
207 208 209
        for (j = 0; j < entries; j++)
            if (copy[j] >= i)
                bits[x++] = copy[j] > i;
210 211 212 213 214
    }

    // store bitstream
    while (pos < x)
    {
215
        int steplet = step >> 8;
216

217 218
        if (pos + steplet > x)
            steplet = x - pos;
219

220 221 222
        for (i = 0; i < steplet; i++)
            if (bits[i+pos] != dominant)
                any = 1;
223

224
        put_bits(pb, 1, any);
225

226 227 228 229 230 231 232 233
        if (!any)
        {
            pos += steplet;
            step += step / ADAPT_LEVEL;
        }
        else
        {
            int interloper = 0;
234

235 236
            while (((pos + interloper) < x) && (bits[pos + interloper] == dominant))
                interloper++;
237

238 239
            // note change
            write_uint_max(pb, interloper, (step >> 8) - 1);
240

241 242 243
            pos += interloper + 1;
            step -= step / ADAPT_LEVEL;
        }
244

245 246 247 248 249
        if (step < 256)
        {
            step = 65536 / step;
            dominant = !dominant;
        }
250
    }
251

252 253
    // store signs
    for (i = 0; i < entries; i++)
254 255
        if (buf[i])
            put_bits(pb, 1, buf[i] < 0);
256 257 258 259 260 261 262 263 264 265 266 267 268 269 270

//    av_free(bits);
//    av_free(copy);

    return 0;
}

static int intlist_read(GetBitContext *gb, int *buf, int entries, int base_2_part)
{
    int i, low_bits = 0, x = 0;
    int n_zeros = 0, step = 256, dominant = 0;
    int pos = 0, level = 0;
    int *bits = av_mallocz(4* entries);

    if (!bits)
271
        return -1;
272

273 274
    if (base_2_part)
    {
275
        low_bits = get_bits(gb, 4);
276

277 278 279
        if (low_bits)
            for (i = 0; i < entries; i++)
                buf[i] = get_bits(gb, low_bits);
280 281 282 283 284 285
    }

//    av_log(NULL, AV_LOG_INFO, "entries: %d, low bits: %d\n", entries, low_bits);

    while (n_zeros < entries)
    {
286
        int steplet = step >> 8;
287

288 289 290 291
        if (!get_bits1(gb))
        {
            for (i = 0; i < steplet; i++)
                bits[x++] = dominant;
292

293 294
            if (!dominant)
                n_zeros += steplet;
295

296 297 298 299 300
            step += step / ADAPT_LEVEL;
        }
        else
        {
            int actual_run = read_uint_max(gb, steplet-1);
301

302
//            av_log(NULL, AV_LOG_INFO, "actual run: %d\n", actual_run);
303

304 305
            for (i = 0; i < actual_run; i++)
                bits[x++] = dominant;
306

307
            bits[x++] = !dominant;
308

309 310 311 312
            if (!dominant)
                n_zeros += actual_run;
            else
                n_zeros++;
313

314 315
            step -= step / ADAPT_LEVEL;
        }
316

317 318 319 320 321
        if (step < 256)
        {
            step = 65536 / step;
            dominant = !dominant;
        }
322
    }
323

324 325 326 327
    // reconstruct unsigned values
    n_zeros = 0;
    for (i = 0; n_zeros < entries; i++)
    {
328 329 330 331 332 333 334
        while(1)
        {
            if (pos >= entries)
            {
                pos = 0;
                level += 1 << low_bits;
            }
335

336 337
            if (buf[pos] >= level)
                break;
338

339 340
            pos++;
        }
341

342 343 344 345
        if (bits[i])
            buf[pos] += 1 << low_bits;
        else
            n_zeros++;
346

347
        pos++;
348 349
    }
//    av_free(bits);
350

351 352
    // read signs
    for (i = 0; i < entries; i++)
353 354
        if (buf[i] && get_bits1(gb))
            buf[i] = -buf[i];
355 356 357 358 359 360 361 362 363 364 365 366 367

//    av_log(NULL, AV_LOG_INFO, "zeros: %d pos: %d\n", n_zeros, pos);

    return 0;
}
#endif

static void predictor_init_state(int *k, int *state, int order)
{
    int i;

    for (i = order-2; i >= 0; i--)
    {
368
        int j, p, x = state[i];
369

370 371 372 373 374 375
        for (j = 0, p = i+1; p < order; j++,p++)
            {
            int tmp = x + shift_down(k[j] * state[p], LATTICE_SHIFT);
            state[p] += shift_down(k[j]*x, LATTICE_SHIFT);
            x = tmp;
        }
376 377 378 379 380 381 382 383 384
    }
}

static int predictor_calc_error(int *k, int *state, int order, int error)
{
    int i, x = error - shift_down(k[order-1] * state[order-1], LATTICE_SHIFT);

#if 1
    int *k_ptr = &(k[order-2]),
385
        *state_ptr = &(state[order-2]);
386 387
    for (i = order-2; i >= 0; i--, k_ptr--, state_ptr--)
    {
388 389 390
        int k_value = *k_ptr, state_value = *state_ptr;
        x -= shift_down(k_value * state_value, LATTICE_SHIFT);
        state_ptr[1] = state_value + shift_down(k_value * x, LATTICE_SHIFT);
391 392 393 394
    }
#else
    for (i = order-2; i >= 0; i--)
    {
395 396
        x -= shift_down(k[i] * state[i], LATTICE_SHIFT);
        state[i+1] = state[i] + shift_down(k[i] * x, LATTICE_SHIFT);
397 398 399
    }
#endif

400
    // don't drift too far, to avoid overflows
401 402 403 404 405 406 407 408 409 410 411 412 413
    if (x >  (SAMPLE_FACTOR<<16)) x =  (SAMPLE_FACTOR<<16);
    if (x < -(SAMPLE_FACTOR<<16)) x = -(SAMPLE_FACTOR<<16);

    state[0] = x;

    return x;
}

// Heavily modified Levinson-Durbin algorithm which
// copes better with quantization, and calculates the
// actual whitened result as it goes.

static void modified_levinson_durbin(int *window, int window_entries,
414
        int *out, int out_entries, int channels, int *tap_quant)
415 416 417
{
    int i;
    int *state = av_mallocz(4* window_entries);
418

419
    memcpy(state, window, 4* window_entries);
420

421 422
    for (i = 0; i < out_entries; i++)
    {
423 424
        int step = (i+1)*channels, k, j;
        double xx = 0.0, xy = 0.0;
425
#if 1
426 427 428 429 430 431 432 433
        int *x_ptr = &(window[step]), *state_ptr = &(state[0]);
        j = window_entries - step;
        for (;j>=0;j--,x_ptr++,state_ptr++)
        {
            double x_value = *x_ptr, state_value = *state_ptr;
            xx += state_value*state_value;
            xy += x_value*state_value;
        }
434
#else
435 436 437 438 439 440 441 442
        for (j = 0; j <= (window_entries - step); j++);
        {
            double stepval = window[step+j], stateval = window[j];
//            xx += (double)window[j]*(double)window[j];
//            xy += (double)window[step+j]*(double)window[j];
            xx += stateval*stateval;
            xy += stepval*stateval;
        }
443
#endif
444 445 446 447
        if (xx == 0.0)
            k = 0;
        else
            k = (int)(floor(-xy/xx * (double)LATTICE_FACTOR / (double)(tap_quant[i]) + 0.5));
448

449 450 451 452
        if (k > (LATTICE_FACTOR/tap_quant[i]))
            k = LATTICE_FACTOR/tap_quant[i];
        if (-k > (LATTICE_FACTOR/tap_quant[i]))
            k = -(LATTICE_FACTOR/tap_quant[i]);
453

454 455
        out[i] = k;
        k *= tap_quant[i];
456 457

#if 1
458 459 460 461 462 463 464 465 466
        x_ptr = &(window[step]);
        state_ptr = &(state[0]);
        j = window_entries - step;
        for (;j>=0;j--,x_ptr++,state_ptr++)
        {
            int x_value = *x_ptr, state_value = *state_ptr;
            *x_ptr = x_value + shift_down(k*state_value,LATTICE_SHIFT);
            *state_ptr = state_value + shift_down(k*x_value, LATTICE_SHIFT);
        }
467
#else
468 469 470 471 472 473
        for (j=0; j <= (window_entries - step); j++)
        {
            int stepval = window[step+j], stateval=state[j];
            window[step+j] += shift_down(k * stateval, LATTICE_SHIFT);
            state[j] += shift_down(k * stepval, LATTICE_SHIFT);
        }
474 475
#endif
    }
476

477 478 479 480 481 482 483 484 485 486 487 488
    av_free(state);
}

static int samplerate_table[] =
    { 44100, 22050, 11025, 96000, 48000, 32000, 24000, 16000, 8000 };

#ifdef CONFIG_ENCODERS

static inline int code_samplerate(int samplerate)
{
    switch (samplerate)
    {
489 490 491 492 493 494 495 496 497
        case 44100: return 0;
        case 22050: return 1;
        case 11025: return 2;
        case 96000: return 3;
        case 48000: return 4;
        case 32000: return 5;
        case 24000: return 6;
        case 16000: return 7;
        case 8000: return 8;
498 499 500 501 502 503 504 505 506 507 508
    }
    return -1;
}

static int sonic_encode_init(AVCodecContext *avctx)
{
    SonicContext *s = avctx->priv_data;
    PutBitContext pb;
    int i, version = 0;

    if (avctx->channels > MAX_CHANNELS)
509
    {
510
        av_log(avctx, AV_LOG_ERROR, "Only mono and stereo streams are supported by now\n");
511
        return -1; /* only stereo or mono for now */
512
    }
513 514

    if (avctx->channels == 2)
515
        s->decorrelation = MID_SIDE;
516

517 518
    if (avctx->codec->id == CODEC_ID_SONIC_LS)
    {
519 520 521 522
        s->lossless = 1;
        s->num_taps = 32;
        s->downsampling = 1;
        s->quantization = 0.0;
523 524 525
    }
    else
    {
526 527 528
        s->num_taps = 128;
        s->downsampling = 2;
        s->quantization = 1.0;
529 530 531 532
    }

    // max tap 2048
    if ((s->num_taps < 32) || (s->num_taps > 1024) ||
533
        ((s->num_taps>>5)<<5 != s->num_taps))
534
    {
535 536
        av_log(avctx, AV_LOG_ERROR, "Invalid number of taps\n");
        return -1;
537 538 539 540 541
    }

    // generate taps
    s->tap_quant = av_mallocz(4* s->num_taps);
    for (i = 0; i < s->num_taps; i++)
542
        s->tap_quant[i] = (int)(sqrt(i+1));
543 544 545 546 547 548 549 550 551

    s->channels = avctx->channels;
    s->samplerate = avctx->sample_rate;

    s->block_align = (int)(2048.0*s->samplerate/44100)/s->downsampling;
    s->frame_size = s->channels*s->block_align*s->downsampling;

    s->tail = av_mallocz(4* s->num_taps*s->channels);
    if (!s->tail)
552
        return -1;
553 554 555 556
    s->tail_size = s->num_taps*s->channels;

    s->predictor_k = av_mallocz(4 * s->num_taps);
    if (!s->predictor_k)
557
        return -1;
558 559 560

    for (i = 0; i < s->channels; i++)
    {
561 562 563
        s->coded_samples[i] = av_mallocz(4* s->block_align);
        if (!s->coded_samples[i])
            return -1;
564
    }
565

566 567 568 569 570
    s->int_samples = av_mallocz(4* s->frame_size);

    s->window_size = ((2*s->tail_size)+s->frame_size);
    s->window = av_mallocz(4* s->window_size);
    if (!s->window)
571
        return -1;
572 573 574

    avctx->extradata = av_mallocz(16);
    if (!avctx->extradata)
575
        return -1;
576 577 578 579 580
    init_put_bits(&pb, avctx->extradata, 16*8);

    put_bits(&pb, 2, version); // version
    if (version == 1)
    {
581 582
        put_bits(&pb, 2, s->channels);
        put_bits(&pb, 4, code_samplerate(s->samplerate));
583 584 585
    }
    put_bits(&pb, 1, s->lossless);
    if (!s->lossless)
586
        put_bits(&pb, 3, SAMPLE_SHIFT); // XXX FIXME: sample precision
587
    put_bits(&pb, 2, s->decorrelation);
588 589 590 591 592 593 594
    put_bits(&pb, 2, s->downsampling);
    put_bits(&pb, 5, (s->num_taps >> 5)-1); // 32..1024
    put_bits(&pb, 1, 0); // XXX FIXME: no custom tap quant table

    flush_put_bits(&pb);
    avctx->extradata_size = put_bits_count(&pb)/8;

595
    av_log(avctx, AV_LOG_INFO, "Sonic: ver: %d ls: %d dr: %d taps: %d block: %d frame: %d downsamp: %d\n",
596
        version, s->lossless, s->decorrelation, s->num_taps, s->block_align, s->frame_size, s->downsampling);
597 598 599

    avctx->coded_frame = avcodec_alloc_frame();
    if (!avctx->coded_frame)
600
        return -ENOMEM;
601 602 603 604 605 606 607 608 609 610 611 612 613 614
    avctx->coded_frame->key_frame = 1;
    avctx->frame_size = s->block_align*s->downsampling;

    return 0;
}

static int sonic_encode_close(AVCodecContext *avctx)
{
    SonicContext *s = avctx->priv_data;
    int i;

    av_freep(&avctx->coded_frame);

    for (i = 0; i < s->channels; i++)
615
        av_free(s->coded_samples[i]);
616 617 618 619 620 621 622 623 624 625 626

    av_free(s->predictor_k);
    av_free(s->tail);
    av_free(s->tap_quant);
    av_free(s->window);
    av_free(s->int_samples);

    return 0;
}

static int sonic_encode_frame(AVCodecContext *avctx,
627
                            uint8_t *buf, int buf_size, void *data)
628 629 630 631 632 633 634 635 636 637
{
    SonicContext *s = avctx->priv_data;
    PutBitContext pb;
    int i, j, ch, quant = 0, x = 0;
    short *samples = data;

    init_put_bits(&pb, buf, buf_size*8);

    // short -> internal
    for (i = 0; i < s->frame_size; i++)
638
        s->int_samples[i] = samples[i];
639 640

    if (!s->lossless)
641 642
        for (i = 0; i < s->frame_size; i++)
            s->int_samples[i] = s->int_samples[i] << SAMPLE_SHIFT;
643

644 645
    switch(s->decorrelation)
    {
646 647 648 649 650 651 652 653 654 655 656 657 658 659 660
        case MID_SIDE:
            for (i = 0; i < s->frame_size; i += s->channels)
            {
                s->int_samples[i] += s->int_samples[i+1];
                s->int_samples[i+1] -= shift(s->int_samples[i], 1);
            }
            break;
        case LEFT_SIDE:
            for (i = 0; i < s->frame_size; i += s->channels)
                s->int_samples[i+1] -= s->int_samples[i];
            break;
        case RIGHT_SIDE:
            for (i = 0; i < s->frame_size; i += s->channels)
                s->int_samples[i] -= s->int_samples[i+1];
            break;
661
    }
662 663

    memset(s->window, 0, 4* s->window_size);
664

665
    for (i = 0; i < s->tail_size; i++)
666
        s->window[x++] = s->tail[i];
667 668

    for (i = 0; i < s->frame_size; i++)
669
        s->window[x++] = s->int_samples[i];
670

671
    for (i = 0; i < s->tail_size; i++)
672
        s->window[x++] = 0;
673 674

    for (i = 0; i < s->tail_size; i++)
675
        s->tail[i] = s->int_samples[s->frame_size - s->tail_size + i];
676 677 678

    // generate taps
    modified_levinson_durbin(s->window, s->window_size,
679
                s->predictor_k, s->num_taps, s->channels, s->tap_quant);
680
    if (intlist_write(&pb, s->predictor_k, s->num_taps, 0) < 0)
681
        return -1;
682 683 684

    for (ch = 0; ch < s->channels; ch++)
    {
685 686 687 688 689 690 691 692
        x = s->tail_size+ch;
        for (i = 0; i < s->block_align; i++)
        {
            int sum = 0;
            for (j = 0; j < s->downsampling; j++, x += s->channels)
                sum += s->window[x];
            s->coded_samples[ch][i] = sum;
        }
693
    }
694 695

    // simple rate control code
696 697
    if (!s->lossless)
    {
698 699 700 701 702 703 704 705 706 707
        double energy1 = 0.0, energy2 = 0.0;
        for (ch = 0; ch < s->channels; ch++)
        {
            for (i = 0; i < s->block_align; i++)
            {
                double sample = s->coded_samples[ch][i];
                energy2 += sample*sample;
                energy1 += fabs(sample);
            }
        }
708

709 710
        energy2 = sqrt(energy2/(s->channels*s->block_align));
        energy1 = sqrt(2.0)*energy1/(s->channels*s->block_align);
711

712 713
        // increase bitrate when samples are like a gaussian distribution
        // reduce bitrate when samples are like a two-tailed exponential distribution
714

715 716
        if (energy2 > energy1)
            energy2 += (energy2-energy1)*RATE_VARIATION;
717

718 719
        quant = (int)(BASE_QUANT*s->quantization*energy2/SAMPLE_FACTOR);
//        av_log(avctx, AV_LOG_DEBUG, "quant: %d energy: %f / %f\n", quant, energy1, energy2);
720

721 722 723 724
        if (quant < 1)
            quant = 1;
        if (quant > 65535)
            quant = 65535;
725

726
        set_ue_golomb(&pb, quant);
727

728
        quant *= SAMPLE_FACTOR;
729 730 731 732 733
    }

    // write out coded samples
    for (ch = 0; ch < s->channels; ch++)
    {
734 735 736
        if (!s->lossless)
            for (i = 0; i < s->block_align; i++)
                s->coded_samples[ch][i] = divide(s->coded_samples[ch][i], quant);
737

738 739
        if (intlist_write(&pb, s->coded_samples[ch], s->block_align, 1) < 0)
            return -1;
740 741 742 743 744 745 746 747 748 749 750 751 752 753
    }

//    av_log(avctx, AV_LOG_DEBUG, "used bytes: %d\n", (put_bits_count(&pb)+7)/8);

    flush_put_bits(&pb);
    return (put_bits_count(&pb)+7)/8;
}
#endif //CONFIG_ENCODERS

static int sonic_decode_init(AVCodecContext *avctx)
{
    SonicContext *s = avctx->priv_data;
    GetBitContext gb;
    int i, version;
754

755 756
    s->channels = avctx->channels;
    s->samplerate = avctx->sample_rate;
757

758 759
    if (!avctx->extradata)
    {
760 761
        av_log(avctx, AV_LOG_ERROR, "No mandatory headers present\n");
        return -1;
762
    }
763

764
    init_get_bits(&gb, avctx->extradata, avctx->extradata_size);
765

766 767 768
    version = get_bits(&gb, 2);
    if (version > 1)
    {
769 770
        av_log(avctx, AV_LOG_ERROR, "Unsupported Sonic version, please report\n");
        return -1;
771 772 773 774
    }

    if (version == 1)
    {
775 776 777 778
        s->channels = get_bits(&gb, 2);
        s->samplerate = samplerate_table[get_bits(&gb, 4)];
        av_log(avctx, AV_LOG_INFO, "Sonicv2 chans: %d samprate: %d\n",
            s->channels, s->samplerate);
779 780 781 782
    }

    if (s->channels > MAX_CHANNELS)
    {
783 784
        av_log(avctx, AV_LOG_ERROR, "Only mono and stereo streams are supported by now\n");
        return -1;
785 786 787 788
    }

    s->lossless = get_bits1(&gb);
    if (!s->lossless)
789
        skip_bits(&gb, 3); // XXX FIXME
790
    s->decorrelation = get_bits(&gb, 2);
791 792 793 794

    s->downsampling = get_bits(&gb, 2);
    s->num_taps = (get_bits(&gb, 5)+1)<<5;
    if (get_bits1(&gb)) // XXX FIXME
795
        av_log(avctx, AV_LOG_INFO, "Custom quant table\n");
796

797 798 799 800
    s->block_align = (int)(2048.0*(s->samplerate/44100))/s->downsampling;
    s->frame_size = s->channels*s->block_align*s->downsampling;
//    avctx->frame_size = s->block_align;

801
    av_log(avctx, AV_LOG_INFO, "Sonic: ver: %d ls: %d dr: %d taps: %d block: %d frame: %d downsamp: %d\n",
802
        version, s->lossless, s->decorrelation, s->num_taps, s->block_align, s->frame_size, s->downsampling);
803 804 805 806

    // generate taps
    s->tap_quant = av_mallocz(4* s->num_taps);
    for (i = 0; i < s->num_taps; i++)
807
        s->tap_quant[i] = (int)(sqrt(i+1));
808

809
    s->predictor_k = av_mallocz(4* s->num_taps);
810

811 812
    for (i = 0; i < s->channels; i++)
    {
813 814 815
        s->predictor_state[i] = av_mallocz(4* s->num_taps);
        if (!s->predictor_state[i])
            return -1;
816 817 818 819
    }

    for (i = 0; i < s->channels; i++)
    {
820 821 822
        s->coded_samples[i] = av_mallocz(4* s->block_align);
        if (!s->coded_samples[i])
            return -1;
823 824 825 826 827 828 829 830 831 832
    }
    s->int_samples = av_mallocz(4* s->frame_size);

    return 0;
}

static int sonic_decode_close(AVCodecContext *avctx)
{
    SonicContext *s = avctx->priv_data;
    int i;
833

834 835 836
    av_free(s->int_samples);
    av_free(s->tap_quant);
    av_free(s->predictor_k);
837

838 839
    for (i = 0; i < s->channels; i++)
    {
840 841
        av_free(s->predictor_state[i]);
        av_free(s->coded_samples[i]);
842
    }
843

844 845 846 847
    return 0;
}

static int sonic_decode_frame(AVCodecContext *avctx,
848 849
                            void *data, int *data_size,
                            uint8_t *buf, int buf_size)
850 851 852 853 854 855 856 857 858
{
    SonicContext *s = avctx->priv_data;
    GetBitContext gb;
    int i, quant, ch, j;
    short *samples = data;

    if (buf_size == 0) return 0;

//    av_log(NULL, AV_LOG_INFO, "buf_size: %d\n", buf_size);
859

860
    init_get_bits(&gb, buf, buf_size*8);
861

862 863 864 865
    intlist_read(&gb, s->predictor_k, s->num_taps, 0);

    // dequantize
    for (i = 0; i < s->num_taps; i++)
866
        s->predictor_k[i] *= s->tap_quant[i];
867 868

    if (s->lossless)
869
        quant = 1;
870
    else
871
        quant = get_ue_golomb(&gb) * SAMPLE_FACTOR;
872 873 874 875 876

//    av_log(NULL, AV_LOG_INFO, "quant: %d\n", quant);

    for (ch = 0; ch < s->channels; ch++)
    {
877
        int x = ch;
878

879
        predictor_init_state(s->predictor_k, s->predictor_state[ch], s->num_taps);
880

881
        intlist_read(&gb, s->coded_samples[ch], s->block_align, 1);
882

883 884 885 886 887 888 889
        for (i = 0; i < s->block_align; i++)
        {
            for (j = 0; j < s->downsampling - 1; j++)
            {
                s->int_samples[x] = predictor_calc_error(s->predictor_k, s->predictor_state[ch], s->num_taps, 0);
                x += s->channels;
            }
890

891 892 893
            s->int_samples[x] = predictor_calc_error(s->predictor_k, s->predictor_state[ch], s->num_taps, s->coded_samples[ch][i] * quant);
            x += s->channels;
        }
894

895 896
        for (i = 0; i < s->num_taps; i++)
            s->predictor_state[ch][i] = s->int_samples[s->frame_size - s->channels + ch - i*s->channels];
897
    }
898

899 900
    switch(s->decorrelation)
    {
901 902 903 904 905 906 907 908 909 910 911 912 913 914 915
        case MID_SIDE:
            for (i = 0; i < s->frame_size; i += s->channels)
            {
                s->int_samples[i+1] += shift(s->int_samples[i], 1);
                s->int_samples[i] -= s->int_samples[i+1];
            }
            break;
        case LEFT_SIDE:
            for (i = 0; i < s->frame_size; i += s->channels)
                s->int_samples[i+1] += s->int_samples[i];
            break;
        case RIGHT_SIDE:
            for (i = 0; i < s->frame_size; i += s->channels)
                s->int_samples[i] += s->int_samples[i+1];
            break;
916
    }
917 918

    if (!s->lossless)
919 920
        for (i = 0; i < s->frame_size; i++)
            s->int_samples[i] = shift(s->int_samples[i], SAMPLE_SHIFT);
921 922 923 924

    // internal -> short
    for (i = 0; i < s->frame_size; i++)
    {
925 926 927 928 929 930
        if (s->int_samples[i] > 32767)
            samples[i] = 32767;
        else if (s->int_samples[i] < -32768)
            samples[i] = -32768;
        else
            samples[i] = s->int_samples[i];
931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975
    }

    align_get_bits(&gb);

    *data_size = s->frame_size * 2;

    return (get_bits_count(&gb)+7)/8;
}

#ifdef CONFIG_ENCODERS
AVCodec sonic_encoder = {
    "sonic",
    CODEC_TYPE_AUDIO,
    CODEC_ID_SONIC,
    sizeof(SonicContext),
    sonic_encode_init,
    sonic_encode_frame,
    sonic_encode_close,
    NULL,
};

AVCodec sonic_ls_encoder = {
    "sonicls",
    CODEC_TYPE_AUDIO,
    CODEC_ID_SONIC_LS,
    sizeof(SonicContext),
    sonic_encode_init,
    sonic_encode_frame,
    sonic_encode_close,
    NULL,
};
#endif

#ifdef CONFIG_DECODERS
AVCodec sonic_decoder = {
    "sonic",
    CODEC_TYPE_AUDIO,
    CODEC_ID_SONIC,
    sizeof(SonicContext),
    sonic_decode_init,
    NULL,
    sonic_decode_close,
    sonic_decode_frame,
};
#endif