/* FAudio - XAudio Reimplementation for FNA * * Copyright (c) 2011-2020 Ethan Lee, Luigi Auriemma, and the MonoGame Team * * This software is provided 'as-is', without any express or implied warranty. * In no event will the authors be held liable for any damages arising from * the use of this software. * * Permission is granted to anyone to use this software for any purpose, * including commercial applications, and to alter it and redistribute it * freely, subject to the following restrictions: * * 1. The origin of this software must not be misrepresented; you must not * claim that you wrote the original software. If you use this software in a * product, an acknowledgment in the product documentation would be * appreciated but is not required. * * 2. Altered source versions must be plainly marked as such, and must not be * misrepresented as being the original software. * * 3. This notice may not be removed or altered from any source distribution. * * Ethan "flibitijibibo" Lee * */ #ifdef HAVE_FFMPEG #include "FAudio_internal.h" #ifdef __cplusplus extern "C" { #endif /* __cplusplus */ #include #ifdef __cplusplus } #endif /* __cplusplus */ typedef struct FAudioFFmpeg { AVCodecContext *av_ctx; AVFrame *av_frame; uint32_t encOffset; /* current position in encoded stream (in bytes) */ uint32_t decOffset; /* current position in decoded stream (in samples) */ /* buffer used to decode the last frame */ size_t paddingBytes; uint8_t *paddingBuffer; /* buffer to receive an entire decoded frame */ uint32_t convertCapacity; uint32_t convertSamples; uint32_t convertOffset; float *convertCache; } FAudioFFmpeg; void FAudio_FFMPEG_reset(FAudioSourceVoice *voice) { LOG_FUNC_ENTER(voice->audio) voice->src.ffmpeg->encOffset = 0; voice->src.ffmpeg->decOffset = 0; LOG_FUNC_EXIT(voice->audio) } uint32_t FAudio_FFMPEG_init(FAudioSourceVoice *pSourceVoice, uint32_t type) { AVCodecContext *av_ctx; AVFrame *av_frame; AVCodec *codec = NULL; const char *typestring = "Unknown"; LOG_FUNC_ENTER(pSourceVoice->audio) pSourceVoice->src.decode = FAudio_INTERNAL_DecodeFFMPEG; /* initialize ffmpeg state */ if (type == FAUDIO_FORMAT_WMAUDIO2) { typestring = "WMAv2"; codec = avcodec_find_decoder(AV_CODEC_ID_WMAV2); } else if (type == FAUDIO_FORMAT_WMAUDIO3) { typestring = "WMAv3"; codec = avcodec_find_decoder(AV_CODEC_ID_WMAPRO); } else if (type == FAUDIO_FORMAT_XMAUDIO2) { typestring = "XMA2"; codec = avcodec_find_decoder(AV_CODEC_ID_XMA2); } if (!codec) { LOG_ERROR( pSourceVoice->audio, "%s codec not supported!", typestring ); FAudio_assert(0 && "FFmpeg codec not supported!"); LOG_FUNC_EXIT(pSourceVoice->audio) return FAUDIO_E_UNSUPPORTED_FORMAT; } av_ctx = avcodec_alloc_context3(codec); if (!av_ctx) { LOG_ERROR( pSourceVoice->audio, "%s", "WMAv2 codec not supported!" ); FAudio_assert(0 && "WMAv2 codec not supported!"); LOG_FUNC_EXIT(pSourceVoice->audio) return FAUDIO_E_UNSUPPORTED_FORMAT; } av_ctx->bit_rate = pSourceVoice->src.format->nAvgBytesPerSec * 8; av_ctx->channels = pSourceVoice->src.format->nChannels; av_ctx->sample_rate = pSourceVoice->src.format->nSamplesPerSec; av_ctx->block_align = pSourceVoice->src.format->nBlockAlign; av_ctx->bits_per_coded_sample = pSourceVoice->src.format->wBitsPerSample; av_ctx->request_sample_fmt = AV_SAMPLE_FMT_FLT; /* pSourceVoice->src.format is actually pointing to a * WAVEFORMATEXTENSIBLE struct, not just a WAVEFORMATEX struct. * That means there's always at least 22 bytes following the struct, I * assume the WMA data is behind that. * Need to verify but haven't come across any samples data with cbSize > 22 * -@JohanSmet! */ FAudio_assert(pSourceVoice->src.format->cbSize <= 22); if (type == FAUDIO_FORMAT_WMAUDIO3) { av_ctx->extradata_size = pSourceVoice->src.format->cbSize; av_ctx->extradata = (uint8_t *) av_malloc( pSourceVoice->src.format->cbSize + AV_INPUT_BUFFER_PADDING_SIZE ); FAudio_memcpy( av_ctx->extradata, &((FAudioWaveFormatExtensible*) pSourceVoice->src.format)->Samples, pSourceVoice->src.format->cbSize ); } else if (type == FAUDIO_FORMAT_WMAUDIO2) { /* xWMA doesn't provide the extradata info that FFmpeg needs to * decode WMA data, so we create some fake extradata. This is * taken from . */ av_ctx->extradata_size = 6; av_ctx->extradata = (uint8_t *) av_malloc(AV_INPUT_BUFFER_PADDING_SIZE); FAudio_zero(av_ctx->extradata, AV_INPUT_BUFFER_PADDING_SIZE); av_ctx->extradata[4] = 31; } else if (type == FAUDIO_FORMAT_XMAUDIO2) { /* FFmpeg expects XMA2WAVEFORMATEX or XMA2WAVEFORMAT. * For more info, check . */ av_ctx->extradata_size = 34; av_ctx->extradata = (uint8_t *) av_malloc(AV_INPUT_BUFFER_PADDING_SIZE); FAudio_zero(av_ctx->extradata, AV_INPUT_BUFFER_PADDING_SIZE); av_ctx->extradata[1] = 1; av_ctx->extradata[5] = pSourceVoice->src.format->nChannels == 2 ? 3 : 0; av_ctx->extradata[31] = 4; av_ctx->extradata[33] = 1; } if (avcodec_open2(av_ctx, codec, NULL) < 0) { av_free(av_ctx->extradata); av_free(av_ctx); LOG_ERROR(pSourceVoice->audio, "%s", "avcodec_open2 failed!") LOG_FUNC_EXIT(pSourceVoice->audio) return FAUDIO_E_UNSUPPORTED_FORMAT; } av_frame = av_frame_alloc(); if (!av_frame) { avcodec_close(av_ctx); av_free(av_ctx->extradata); av_free(av_ctx); LOG_ERROR(pSourceVoice->audio, "%s", "avcodec_open2 failed!") LOG_FUNC_EXIT(pSourceVoice->audio) return FAUDIO_E_UNSUPPORTED_FORMAT; } if (av_ctx->sample_fmt != AV_SAMPLE_FMT_FLT && av_ctx->sample_fmt != AV_SAMPLE_FMT_FLTP) { FAudio_assert(0 && "Got non-float format!!!"); } pSourceVoice->src.ffmpeg = (FAudioFFmpeg *) pSourceVoice->audio->pMalloc(sizeof(FAudioFFmpeg)); FAudio_zero(pSourceVoice->src.ffmpeg, sizeof(FAudioFFmpeg)); pSourceVoice->src.ffmpeg->av_ctx = av_ctx; pSourceVoice->src.ffmpeg->av_frame = av_frame; LOG_FUNC_EXIT(pSourceVoice->audio) return 0; } void FAudio_FFMPEG_free(FAudioSourceVoice *voice) { FAudioFFmpeg *ffmpeg = voice->src.ffmpeg; LOG_FUNC_ENTER(voice->audio) avcodec_close(ffmpeg->av_ctx); av_free(ffmpeg->av_ctx->extradata); av_free(ffmpeg->av_ctx); voice->audio->pFree(ffmpeg->convertCache); voice->audio->pFree(ffmpeg->paddingBuffer); voice->audio->pFree(ffmpeg); voice->src.ffmpeg = NULL; LOG_FUNC_EXIT(voice->audio) } void FAudio_INTERNAL_ResizeConvertCache(FAudioVoice *voice, uint32_t samples) { LOG_FUNC_ENTER(voice->audio) if (samples > voice->src.ffmpeg->convertCapacity) { voice->src.ffmpeg->convertCapacity = samples; voice->src.ffmpeg->convertCache = (float*) voice->audio->pRealloc( voice->src.ffmpeg->convertCache, sizeof(float) * voice->src.ffmpeg->convertCapacity ); } LOG_FUNC_EXIT(voice->audio) } void FAudio_INTERNAL_FillConvertCache(FAudioVoice *voice, FAudioBuffer *buffer) { FAudioFFmpeg *ffmpeg = voice->src.ffmpeg; AVPacket avpkt = {0}; int averr; uint32_t total_samples; LOG_FUNC_ENTER(voice->audio) avpkt.size = voice->src.format->nBlockAlign; avpkt.data = (unsigned char *) buffer->pAudioData + ffmpeg->encOffset; for(;;) { averr = avcodec_receive_frame(ffmpeg->av_ctx, ffmpeg->av_frame); if (averr == AVERROR(EAGAIN)) { /* ffmpeg needs more data to decode */ avpkt.pts = avpkt.dts = AV_NOPTS_VALUE; if (ffmpeg->encOffset >= buffer->AudioBytes) { /* no more data in this buffer */ break; } if (ffmpeg->encOffset + avpkt.size + AV_INPUT_BUFFER_PADDING_SIZE > buffer->AudioBytes) { /* Unfortunately, the FFmpeg API requires that a number of * extra bytes must be available past the end of the buffer. * The xaudio2 client probably hasn't done this, so we have to * perform a copy near the end of the buffer. */ size_t remain = buffer->AudioBytes - ffmpeg->encOffset; if (ffmpeg->paddingBytes < remain + AV_INPUT_BUFFER_PADDING_SIZE) { ffmpeg->paddingBytes = remain + AV_INPUT_BUFFER_PADDING_SIZE; ffmpeg->paddingBuffer = (uint8_t *) voice->audio->pRealloc( ffmpeg->paddingBuffer, ffmpeg->paddingBytes ); } FAudio_memcpy(ffmpeg->paddingBuffer, buffer->pAudioData + ffmpeg->encOffset, remain); FAudio_zero(ffmpeg->paddingBuffer + remain, AV_INPUT_BUFFER_PADDING_SIZE); avpkt.data = ffmpeg->paddingBuffer; } averr = avcodec_send_packet(ffmpeg->av_ctx, &avpkt); if (averr) { FAudio_assert(0 && "avcodec_send_packet failed" && averr); break; } ffmpeg->encOffset += avpkt.size; avpkt.data += avpkt.size; /* data sent, try receive again */ continue; } if (averr) { LOG_ERROR( voice->audio, "avcodec_receive_frame failed: %d", averr ) FAudio_assert(0 && "avcodec_receive_frame failed" && averr); LOG_FUNC_EXIT(voice->audio) return; } else { break; } } /* copy decoded samples to internal buffer, reordering if necessary */ total_samples = ffmpeg->av_frame->nb_samples * ffmpeg->av_ctx->channels; FAudio_INTERNAL_ResizeConvertCache(voice, total_samples); if (av_sample_fmt_is_planar(ffmpeg->av_ctx->sample_fmt)) { int32_t s, c; uint8_t **src = ffmpeg->av_frame->data; uint32_t *dst = (uint32_t *) ffmpeg->convertCache; for(s = 0; s < ffmpeg->av_frame->nb_samples; ++s) for(c = 0; c < ffmpeg->av_ctx->channels; ++c) *dst++ = ((uint32_t*)(src[c]))[s]; } else { FAudio_memcpy( ffmpeg->convertCache, ffmpeg->av_frame->data[0], total_samples * sizeof(float) ); } ffmpeg->convertSamples = ffmpeg->av_frame->nb_samples; ffmpeg->convertOffset = 0; LOG_FUNC_EXIT(voice->audio) } void FAudio_INTERNAL_DecodeFFMPEG( FAudioVoice *voice, FAudioBuffer *buffer, float *decodeCache, uint32_t samples ) { FAudioFFmpeg *ffmpeg = voice->src.ffmpeg; uint32_t decSampleSize = voice->src.format->nChannels * voice->src.format->wBitsPerSample / 8; uint32_t outSampleSize = voice->src.format->nChannels * sizeof(float); uint32_t done = 0, available, todo, cumulative; uint32_t reseek = 0; LOG_FUNC_ENTER(voice->audio) /* check if we need to reposition in the stream */ if (voice->src.curBufferOffset < ffmpeg->decOffset) { /* If curBufferOffset is behind, it's because we had to do some * padding, which should not affect the stream offset. To fix, * we simply rewind by a couple samples. Pretty safe if it doesn't * cross back into the previous decoded block. */ uint32_t delta = ffmpeg->decOffset - voice->src.curBufferOffset; if (ffmpeg->convertOffset >= delta) { ffmpeg->convertOffset -= delta; ffmpeg->decOffset = voice->src.curBufferOffset; } else { reseek = 1; } } else if (voice->src.curBufferOffset > ffmpeg->decOffset) { /* If we're starting in the middle, we have to seek to the * starting position. AFAIK this shouldn't happen mid-stream. */ reseek = 1; } if (reseek) { FAudioBufferWMA *bufferWMA = &voice->src.bufferList->bufferWMA; uint32_t byteOffset = voice->src.curBufferOffset * decSampleSize; uint32_t packetIdx = bufferWMA->PacketCount - 1; /* figure out in which encoded packet has this position */ while (packetIdx > 0 && bufferWMA->pDecodedPacketCumulativeBytes[packetIdx] > byteOffset) { packetIdx -= 1; } if (packetIdx == 0) { cumulative = 0; } else { cumulative = bufferWMA->pDecodedPacketCumulativeBytes[packetIdx - 1]; } /* seek to the wanted position in the stream */ ffmpeg->encOffset = packetIdx * voice->src.format->nBlockAlign; FAudio_INTERNAL_FillConvertCache(voice, buffer); ffmpeg->convertOffset = (byteOffset - cumulative) / outSampleSize; ffmpeg->decOffset = voice->src.curBufferOffset; } while (done < samples) { /* check for available data in decoded cache, refill if necessary */ if (ffmpeg->convertOffset >= ffmpeg->convertSamples) { FAudio_INTERNAL_FillConvertCache(voice, buffer); } available = ffmpeg->convertSamples - ffmpeg->convertOffset; if (available <= 0) { break; } todo = FAudio_min(available, samples - done); FAudio_memcpy( decodeCache + (done * voice->src.format->nChannels), ffmpeg->convertCache + (ffmpeg->convertOffset * voice->src.format->nChannels), todo * voice->src.format->nChannels * sizeof(float) ); done += todo; ffmpeg->convertOffset += todo; } /* FIXME: This block should not be here! */ if (done < samples) { FAudio_zero( decodeCache + (done * voice->src.format->nChannels), (samples - done) * voice->src.format->nChannels * sizeof(float) ); } ffmpeg->decOffset += samples; LOG_FUNC_EXIT(voice->audio) } #else extern int this_tu_is_empty; #endif /* HAVE_FFMPEG */ /* vim: set noexpandtab shiftwidth=8 tabstop=8: */