📄 sonic.c

📁 ffmpeg的完整源代码和作者自己写的文档。不但有在Linux的工程哦
💻 C
📖 第 1 页 / 共 2 页
字号:
上一页 12
static inline int code_samplerate(int samplerate)
{
    switch (samplerate)
    {
        case 44100: return 0;
        case 22050: return 1;
        case 11025: return 2;
        case 96000: return 3;
        case 48000: return 4;
        case 32000: return 5;
        case 24000: return 6;
        case 16000: return 7;
        case 8000: return 8;
    }
    return -1;
}

static int sonic_encode_init(AVCodecContext *avctx)
{
    SonicContext *s = avctx->priv_data;
    PutBitContext pb;
    int i, version = 0;

    if (avctx->channels > MAX_CHANNELS)
    {
        av_log(avctx, AV_LOG_ERROR, "Only mono and stereo streams are supported by now\n");
        return -1; /* only stereo or mono for now */
    }

    if (avctx->channels == 2)
        s->decorrelation = MID_SIDE;

    if (avctx->codec->id == CODEC_ID_SONIC_LS)
    {
        s->lossless = 1;
        s->num_taps = 32;
        s->downsampling = 1;
        s->quantization = 0.0;
    }
    else
    {
        s->num_taps = 128;
        s->downsampling = 2;
        s->quantization = 1.0;
    }

    // max tap 2048
    if ((s->num_taps < 32) || (s->num_taps > 1024) ||
        ((s->num_taps>>5)<<5 != s->num_taps))
    {
        av_log(avctx, AV_LOG_ERROR, "Invalid number of taps\n");
        return -1;
    }

    // generate taps
    s->tap_quant = av_mallocz(4* s->num_taps);
    for (i = 0; i < s->num_taps; i++)
        s->tap_quant[i] = (int)(sqrt(i+1));

    s->channels = avctx->channels;
    s->samplerate = avctx->sample_rate;

    s->block_align = (int)(2048.0*s->samplerate/44100)/s->downsampling;
    s->frame_size = s->channels*s->block_align*s->downsampling;

    s->tail = av_mallocz(4* s->num_taps*s->channels);
    if (!s->tail)
        return -1;
    s->tail_size = s->num_taps*s->channels;

    s->predictor_k = av_mallocz(4 * s->num_taps);
    if (!s->predictor_k)
        return -1;

    for (i = 0; i < s->channels; i++)
    {
        s->coded_samples[i] = av_mallocz(4* s->block_align);
        if (!s->coded_samples[i])
            return -1;
    }

    s->int_samples = av_mallocz(4* s->frame_size);

    s->window_size = ((2*s->tail_size)+s->frame_size);
    s->window = av_mallocz(4* s->window_size);
    if (!s->window)
        return -1;

    avctx->extradata = av_mallocz(16);
    if (!avctx->extradata)
        return -1;
    init_put_bits(&pb, avctx->extradata, 16*8);

    put_bits(&pb, 2, version); // version
    if (version == 1)
    {
        put_bits(&pb, 2, s->channels);
        put_bits(&pb, 4, code_samplerate(s->samplerate));
    }
    put_bits(&pb, 1, s->lossless);
    if (!s->lossless)
        put_bits(&pb, 3, SAMPLE_SHIFT); // XXX FIXME: sample precision
    put_bits(&pb, 2, s->decorrelation);
    put_bits(&pb, 2, s->downsampling);
    put_bits(&pb, 5, (s->num_taps >> 5)-1); // 32..1024
    put_bits(&pb, 1, 0); // XXX FIXME: no custom tap quant table

    flush_put_bits(&pb);
    avctx->extradata_size = put_bits_count(&pb)/8;

    av_log(avctx, AV_LOG_INFO, "Sonic: ver: %d ls: %d dr: %d taps: %d block: %d frame: %d downsamp: %d\n",
        version, s->lossless, s->decorrelation, s->num_taps, s->block_align, s->frame_size, s->downsampling);

    avctx->coded_frame = avcodec_alloc_frame();
    if (!avctx->coded_frame)
        return AVERROR(ENOMEM);
    avctx->coded_frame->key_frame = 1;
    avctx->frame_size = s->block_align*s->downsampling;

    return 0;
}

static int sonic_encode_close(AVCodecContext *avctx)
{
    SonicContext *s = avctx->priv_data;
    int i;

    av_freep(&avctx->coded_frame);

    for (i = 0; i < s->channels; i++)
        av_free(s->coded_samples[i]);

    av_free(s->predictor_k);
    av_free(s->tail);
    av_free(s->tap_quant);
    av_free(s->window);
    av_free(s->int_samples);

    return 0;
}

static int sonic_encode_frame(AVCodecContext *avctx,
                            uint8_t *buf, int buf_size, void *data)
{
    SonicContext *s = avctx->priv_data;
    PutBitContext pb;
    int i, j, ch, quant = 0, x = 0;
    short *samples = data;

    init_put_bits(&pb, buf, buf_size*8);

    // short -> internal
    for (i = 0; i < s->frame_size; i++)
        s->int_samples[i] = samples[i];

    if (!s->lossless)
        for (i = 0; i < s->frame_size; i++)
            s->int_samples[i] = s->int_samples[i] << SAMPLE_SHIFT;

    switch(s->decorrelation)
    {
        case MID_SIDE:
            for (i = 0; i < s->frame_size; i += s->channels)
            {
                s->int_samples[i] += s->int_samples[i+1];
                s->int_samples[i+1] -= shift(s->int_samples[i], 1);
            }
            break;
        case LEFT_SIDE:
            for (i = 0; i < s->frame_size; i += s->channels)
                s->int_samples[i+1] -= s->int_samples[i];
            break;
        case RIGHT_SIDE:
            for (i = 0; i < s->frame_size; i += s->channels)
                s->int_samples[i] -= s->int_samples[i+1];
            break;
    }

    memset(s->window, 0, 4* s->window_size);

    for (i = 0; i < s->tail_size; i++)
        s->window[x++] = s->tail[i];

    for (i = 0; i < s->frame_size; i++)
        s->window[x++] = s->int_samples[i];

    for (i = 0; i < s->tail_size; i++)
        s->window[x++] = 0;

    for (i = 0; i < s->tail_size; i++)
        s->tail[i] = s->int_samples[s->frame_size - s->tail_size + i];

    // generate taps
    modified_levinson_durbin(s->window, s->window_size,
                s->predictor_k, s->num_taps, s->channels, s->tap_quant);
    if (intlist_write(&pb, s->predictor_k, s->num_taps, 0) < 0)
        return -1;

    for (ch = 0; ch < s->channels; ch++)
    {
        x = s->tail_size+ch;
        for (i = 0; i < s->block_align; i++)
        {
            int sum = 0;
            for (j = 0; j < s->downsampling; j++, x += s->channels)
                sum += s->window[x];
            s->coded_samples[ch][i] = sum;
        }
    }

    // simple rate control code
    if (!s->lossless)
    {
        double energy1 = 0.0, energy2 = 0.0;
        for (ch = 0; ch < s->channels; ch++)
        {
            for (i = 0; i < s->block_align; i++)
            {
                double sample = s->coded_samples[ch][i];
                energy2 += sample*sample;
                energy1 += fabs(sample);
            }
        }

        energy2 = sqrt(energy2/(s->channels*s->block_align));
        energy1 = sqrt(2.0)*energy1/(s->channels*s->block_align);

        // increase bitrate when samples are like a gaussian distribution
        // reduce bitrate when samples are like a two-tailed exponential distribution

        if (energy2 > energy1)
            energy2 += (energy2-energy1)*RATE_VARIATION;

        quant = (int)(BASE_QUANT*s->quantization*energy2/SAMPLE_FACTOR);
//        av_log(avctx, AV_LOG_DEBUG, "quant: %d energy: %f / %f\n", quant, energy1, energy2);

        if (quant < 1)
            quant = 1;
        if (quant > 65535)
            quant = 65535;

        set_ue_golomb(&pb, quant);

        quant *= SAMPLE_FACTOR;
    }

    // write out coded samples
    for (ch = 0; ch < s->channels; ch++)
    {
        if (!s->lossless)
            for (i = 0; i < s->block_align; i++)
                s->coded_samples[ch][i] = divide(s->coded_samples[ch][i], quant);

        if (intlist_write(&pb, s->coded_samples[ch], s->block_align, 1) < 0)
            return -1;
    }

//    av_log(avctx, AV_LOG_DEBUG, "used bytes: %d\n", (put_bits_count(&pb)+7)/8);

    flush_put_bits(&pb);
    return (put_bits_count(&pb)+7)/8;
}
#endif //CONFIG_ENCODERS

#ifdef CONFIG_DECODERS
static int sonic_decode_init(AVCodecContext *avctx)
{
    SonicContext *s = avctx->priv_data;
    GetBitContext gb;
    int i, version;

    s->channels = avctx->channels;
    s->samplerate = avctx->sample_rate;

    if (!avctx->extradata)
    {
        av_log(avctx, AV_LOG_ERROR, "No mandatory headers present\n");
        return -1;
    }

    init_get_bits(&gb, avctx->extradata, avctx->extradata_size);

    version = get_bits(&gb, 2);
    if (version > 1)
    {
        av_log(avctx, AV_LOG_ERROR, "Unsupported Sonic version, please report\n");
        return -1;
    }

    if (version == 1)
    {
        s->channels = get_bits(&gb, 2);
        s->samplerate = samplerate_table[get_bits(&gb, 4)];
        av_log(avctx, AV_LOG_INFO, "Sonicv2 chans: %d samprate: %d\n",
            s->channels, s->samplerate);
    }

    if (s->channels > MAX_CHANNELS)
    {
        av_log(avctx, AV_LOG_ERROR, "Only mono and stereo streams are supported by now\n");
        return -1;
    }

    s->lossless = get_bits1(&gb);
    if (!s->lossless)
        skip_bits(&gb, 3); // XXX FIXME
    s->decorrelation = get_bits(&gb, 2);

    s->downsampling = get_bits(&gb, 2);
    s->num_taps = (get_bits(&gb, 5)+1)<<5;
    if (get_bits1(&gb)) // XXX FIXME
        av_log(avctx, AV_LOG_INFO, "Custom quant table\n");

    s->block_align = (int)(2048.0*(s->samplerate/44100))/s->downsampling;
    s->frame_size = s->channels*s->block_align*s->downsampling;
//    avctx->frame_size = s->block_align;

    av_log(avctx, AV_LOG_INFO, "Sonic: ver: %d ls: %d dr: %d taps: %d block: %d frame: %d downsamp: %d\n",
        version, s->lossless, s->decorrelation, s->num_taps, s->block_align, s->frame_size, s->downsampling);

    // generate taps
    s->tap_quant = av_mallocz(4* s->num_taps);
    for (i = 0; i < s->num_taps; i++)
        s->tap_quant[i] = (int)(sqrt(i+1));

    s->predictor_k = av_mallocz(4* s->num_taps);

    for (i = 0; i < s->channels; i++)
    {
        s->predictor_state[i] = av_mallocz(4* s->num_taps);
        if (!s->predictor_state[i])
            return -1;
    }

    for (i = 0; i < s->channels; i++)
    {
        s->coded_samples[i] = av_mallocz(4* s->block_align);
        if (!s->coded_samples[i])
            return -1;
    }
    s->int_samples = av_mallocz(4* s->frame_size);

    return 0;
}

static int sonic_decode_close(AVCodecContext *avctx)
{
    SonicContext *s = avctx->priv_data;
    int i;

    av_free(s->int_samples);
    av_free(s->tap_quant);
    av_free(s->predictor_k);

    for (i = 0; i < s->channels; i++)
    {
        av_free(s->predictor_state[i]);
        av_free(s->coded_samples[i]);
    }

    return 0;
}

static int sonic_decode_frame(AVCodecContext *avctx,
                            void *data, int *data_size,
                            uint8_t *buf, int buf_size)
{
    SonicContext *s = avctx->priv_data;
    GetBitContext gb;
    int i, quant, ch, j;
    short *samples = data;

    if (buf_size == 0) return 0;

//    av_log(NULL, AV_LOG_INFO, "buf_size: %d\n", buf_size);

    init_get_bits(&gb, buf, buf_size*8);

    intlist_read(&gb, s->predictor_k, s->num_taps, 0);

    // dequantize
    for (i = 0; i < s->num_taps; i++)
        s->predictor_k[i] *= s->tap_quant[i];

    if (s->lossless)
        quant = 1;
    else
        quant = get_ue_golomb(&gb) * SAMPLE_FACTOR;

//    av_log(NULL, AV_LOG_INFO, "quant: %d\n", quant);

    for (ch = 0; ch < s->channels; ch++)
    {
        int x = ch;

        predictor_init_state(s->predictor_k, s->predictor_state[ch], s->num_taps);

        intlist_read(&gb, s->coded_samples[ch], s->block_align, 1);

        for (i = 0; i < s->block_align; i++)
        {
            for (j = 0; j < s->downsampling - 1; j++)
            {
                s->int_samples[x] = predictor_calc_error(s->predictor_k, s->predictor_state[ch], s->num_taps, 0);
                x += s->channels;
            }

            s->int_samples[x] = predictor_calc_error(s->predictor_k, s->predictor_state[ch], s->num_taps, s->coded_samples[ch][i] * quant);
            x += s->channels;
        }

        for (i = 0; i < s->num_taps; i++)
            s->predictor_state[ch][i] = s->int_samples[s->frame_size - s->channels + ch - i*s->channels];
    }

    switch(s->decorrelation)
    {
        case MID_SIDE:
            for (i = 0; i < s->frame_size; i += s->channels)
            {
                s->int_samples[i+1] += shift(s->int_samples[i], 1);
                s->int_samples[i] -= s->int_samples[i+1];
            }
            break;
        case LEFT_SIDE:
            for (i = 0; i < s->frame_size; i += s->channels)
                s->int_samples[i+1] += s->int_samples[i];
            break;
        case RIGHT_SIDE:
            for (i = 0; i < s->frame_size; i += s->channels)
                s->int_samples[i] += s->int_samples[i+1];
            break;
    }

    if (!s->lossless)
        for (i = 0; i < s->frame_size; i++)
            s->int_samples[i] = shift(s->int_samples[i], SAMPLE_SHIFT);

    // internal -> short
    for (i = 0; i < s->frame_size; i++)
        samples[i] = av_clip_int16(s->int_samples[i]);

    align_get_bits(&gb);

    *data_size = s->frame_size * 2;

    return (get_bits_count(&gb)+7)/8;
}
#endif

#ifdef CONFIG_ENCODERS
AVCodec sonic_encoder = {
    "sonic",
    CODEC_TYPE_AUDIO,
    CODEC_ID_SONIC,
    sizeof(SonicContext),
    sonic_encode_init,
    sonic_encode_frame,
    sonic_encode_close,
    NULL,
};

AVCodec sonic_ls_encoder = {
    "sonicls",
    CODEC_TYPE_AUDIO,
    CODEC_ID_SONIC_LS,
    sizeof(SonicContext),
    sonic_encode_init,
    sonic_encode_frame,
    sonic_encode_close,
    NULL,
};
#endif

#ifdef CONFIG_DECODERS
AVCodec sonic_decoder = {
    "sonic",
    CODEC_TYPE_AUDIO,
    CODEC_ID_SONIC,
    sizeof(SonicContext),
    sonic_decode_init,
    NULL,
    sonic_decode_close,
    sonic_decode_frame,
};
#endif
上一页 12
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -