|
|
/* FAudio - XAudio Reimplementation for FNA
|
|
|
*
|
|
|
* Copyright (c) 2011-2020 Ethan Lee, Luigi Auriemma, and the MonoGame Team
|
|
|
*
|
|
|
* This software is provided 'as-is', without any express or implied warranty.
|
|
|
* In no event will the authors be held liable for any damages arising from
|
|
|
* the use of this software.
|
|
|
*
|
|
|
* Permission is granted to anyone to use this software for any purpose,
|
|
|
* including commercial applications, and to alter it and redistribute it
|
|
|
* freely, subject to the following restrictions:
|
|
|
*
|
|
|
* 1. The origin of this software must not be misrepresented; you must not
|
|
|
* claim that you wrote the original software. If you use this software in a
|
|
|
* product, an acknowledgment in the product documentation would be
|
|
|
* appreciated but is not required.
|
|
|
*
|
|
|
* 2. Altered source versions must be plainly marked as such, and must not be
|
|
|
* misrepresented as being the original software.
|
|
|
*
|
|
|
* 3. This notice may not be removed or altered from any source distribution.
|
|
|
*
|
|
|
* Ethan "flibitijibibo" Lee <flibitijibibo@flibitijibibo.com>
|
|
|
*
|
|
|
*/
|
|
|
|
|
|
#ifdef HAVE_FFMPEG
|
|
|
|
|
|
#include "FAudio_internal.h"
|
|
|
#ifdef __cplusplus
|
|
|
extern "C" {
|
|
|
#endif /* __cplusplus */
|
|
|
#include <libavcodec/avcodec.h>
|
|
|
#ifdef __cplusplus
|
|
|
}
|
|
|
#endif /* __cplusplus */
|
|
|
|
|
|
typedef struct FAudioFFmpeg
|
|
|
{
|
|
|
AVCodecContext *av_ctx;
|
|
|
AVFrame *av_frame;
|
|
|
|
|
|
uint32_t encOffset; /* current position in encoded stream (in bytes) */
|
|
|
uint32_t decOffset; /* current position in decoded stream (in samples) */
|
|
|
|
|
|
/* buffer used to decode the last frame */
|
|
|
size_t paddingBytes;
|
|
|
uint8_t *paddingBuffer;
|
|
|
|
|
|
/* buffer to receive an entire decoded frame */
|
|
|
uint32_t convertCapacity;
|
|
|
uint32_t convertSamples;
|
|
|
uint32_t convertOffset;
|
|
|
float *convertCache;
|
|
|
} FAudioFFmpeg;
|
|
|
|
|
|
void FAudio_FFMPEG_reset(FAudioSourceVoice *voice)
|
|
|
{
|
|
|
LOG_FUNC_ENTER(voice->audio)
|
|
|
voice->src.ffmpeg->encOffset = 0;
|
|
|
voice->src.ffmpeg->decOffset = 0;
|
|
|
LOG_FUNC_EXIT(voice->audio)
|
|
|
}
|
|
|
|
|
|
uint32_t FAudio_FFMPEG_init(FAudioSourceVoice *pSourceVoice, uint32_t type)
|
|
|
{
|
|
|
AVCodecContext *av_ctx;
|
|
|
AVFrame *av_frame;
|
|
|
AVCodec *codec = NULL;
|
|
|
const char *typestring = "Unknown";
|
|
|
|
|
|
LOG_FUNC_ENTER(pSourceVoice->audio)
|
|
|
pSourceVoice->src.decode = FAudio_INTERNAL_DecodeFFMPEG;
|
|
|
|
|
|
/* initialize ffmpeg state */
|
|
|
if (type == FAUDIO_FORMAT_WMAUDIO2)
|
|
|
{
|
|
|
typestring = "WMAv2";
|
|
|
codec = avcodec_find_decoder(AV_CODEC_ID_WMAV2);
|
|
|
}
|
|
|
else if (type == FAUDIO_FORMAT_WMAUDIO3)
|
|
|
{
|
|
|
typestring = "WMAv3";
|
|
|
codec = avcodec_find_decoder(AV_CODEC_ID_WMAPRO);
|
|
|
}
|
|
|
else if (type == FAUDIO_FORMAT_XMAUDIO2)
|
|
|
{
|
|
|
typestring = "XMA2";
|
|
|
codec = avcodec_find_decoder(AV_CODEC_ID_XMA2);
|
|
|
}
|
|
|
if (!codec)
|
|
|
{
|
|
|
LOG_ERROR(
|
|
|
pSourceVoice->audio,
|
|
|
"%s codec not supported!",
|
|
|
typestring
|
|
|
);
|
|
|
FAudio_assert(0 && "FFmpeg codec not supported!");
|
|
|
LOG_FUNC_EXIT(pSourceVoice->audio)
|
|
|
return FAUDIO_E_UNSUPPORTED_FORMAT;
|
|
|
}
|
|
|
|
|
|
av_ctx = avcodec_alloc_context3(codec);
|
|
|
if (!av_ctx)
|
|
|
{
|
|
|
LOG_ERROR(
|
|
|
pSourceVoice->audio,
|
|
|
"%s",
|
|
|
"WMAv2 codec not supported!"
|
|
|
);
|
|
|
FAudio_assert(0 && "WMAv2 codec not supported!");
|
|
|
LOG_FUNC_EXIT(pSourceVoice->audio)
|
|
|
return FAUDIO_E_UNSUPPORTED_FORMAT;
|
|
|
}
|
|
|
|
|
|
av_ctx->bit_rate = pSourceVoice->src.format->nAvgBytesPerSec * 8;
|
|
|
av_ctx->channels = pSourceVoice->src.format->nChannels;
|
|
|
av_ctx->sample_rate = pSourceVoice->src.format->nSamplesPerSec;
|
|
|
av_ctx->block_align = pSourceVoice->src.format->nBlockAlign;
|
|
|
av_ctx->bits_per_coded_sample = pSourceVoice->src.format->wBitsPerSample;
|
|
|
av_ctx->request_sample_fmt = AV_SAMPLE_FMT_FLT;
|
|
|
|
|
|
/* pSourceVoice->src.format is actually pointing to a
|
|
|
* WAVEFORMATEXTENSIBLE struct, not just a WAVEFORMATEX struct.
|
|
|
* That means there's always at least 22 bytes following the struct, I
|
|
|
* assume the WMA data is behind that.
|
|
|
* Need to verify but haven't come across any samples data with cbSize > 22
|
|
|
* -@JohanSmet!
|
|
|
*/
|
|
|
FAudio_assert(pSourceVoice->src.format->cbSize <= 22);
|
|
|
if (type == FAUDIO_FORMAT_WMAUDIO3)
|
|
|
{
|
|
|
av_ctx->extradata_size = pSourceVoice->src.format->cbSize;
|
|
|
av_ctx->extradata = (uint8_t *) av_malloc(
|
|
|
pSourceVoice->src.format->cbSize +
|
|
|
AV_INPUT_BUFFER_PADDING_SIZE
|
|
|
);
|
|
|
FAudio_memcpy(
|
|
|
av_ctx->extradata,
|
|
|
&((FAudioWaveFormatExtensible*) pSourceVoice->src.format)->Samples,
|
|
|
pSourceVoice->src.format->cbSize
|
|
|
);
|
|
|
}
|
|
|
else if (type == FAUDIO_FORMAT_WMAUDIO2)
|
|
|
{
|
|
|
/* xWMA doesn't provide the extradata info that FFmpeg needs to
|
|
|
* decode WMA data, so we create some fake extradata. This is
|
|
|
* taken from <ffmpeg/libavformat/xwma.c>.
|
|
|
*/
|
|
|
av_ctx->extradata_size = 6;
|
|
|
av_ctx->extradata = (uint8_t *) av_malloc(AV_INPUT_BUFFER_PADDING_SIZE);
|
|
|
FAudio_zero(av_ctx->extradata, AV_INPUT_BUFFER_PADDING_SIZE);
|
|
|
av_ctx->extradata[4] = 31;
|
|
|
}
|
|
|
else if (type == FAUDIO_FORMAT_XMAUDIO2)
|
|
|
{
|
|
|
/* FFmpeg expects XMA2WAVEFORMATEX or XMA2WAVEFORMAT.
|
|
|
* For more info, check <ffmpeg/libavcodec/wmaprodec.c>. */
|
|
|
av_ctx->extradata_size = 34;
|
|
|
av_ctx->extradata = (uint8_t *) av_malloc(AV_INPUT_BUFFER_PADDING_SIZE);
|
|
|
FAudio_zero(av_ctx->extradata, AV_INPUT_BUFFER_PADDING_SIZE);
|
|
|
av_ctx->extradata[1] = 1;
|
|
|
av_ctx->extradata[5] = pSourceVoice->src.format->nChannels == 2 ? 3 : 0;
|
|
|
av_ctx->extradata[31] = 4;
|
|
|
av_ctx->extradata[33] = 1;
|
|
|
}
|
|
|
|
|
|
if (avcodec_open2(av_ctx, codec, NULL) < 0)
|
|
|
{
|
|
|
av_free(av_ctx->extradata);
|
|
|
av_free(av_ctx);
|
|
|
LOG_ERROR(pSourceVoice->audio, "%s", "avcodec_open2 failed!")
|
|
|
LOG_FUNC_EXIT(pSourceVoice->audio)
|
|
|
return FAUDIO_E_UNSUPPORTED_FORMAT;
|
|
|
}
|
|
|
|
|
|
av_frame = av_frame_alloc();
|
|
|
if (!av_frame)
|
|
|
{
|
|
|
avcodec_close(av_ctx);
|
|
|
av_free(av_ctx->extradata);
|
|
|
av_free(av_ctx);
|
|
|
LOG_ERROR(pSourceVoice->audio, "%s", "avcodec_open2 failed!")
|
|
|
LOG_FUNC_EXIT(pSourceVoice->audio)
|
|
|
return FAUDIO_E_UNSUPPORTED_FORMAT;
|
|
|
}
|
|
|
|
|
|
if (av_ctx->sample_fmt != AV_SAMPLE_FMT_FLT && av_ctx->sample_fmt != AV_SAMPLE_FMT_FLTP)
|
|
|
{
|
|
|
FAudio_assert(0 && "Got non-float format!!!");
|
|
|
}
|
|
|
|
|
|
pSourceVoice->src.ffmpeg = (FAudioFFmpeg *) pSourceVoice->audio->pMalloc(sizeof(FAudioFFmpeg));
|
|
|
FAudio_zero(pSourceVoice->src.ffmpeg, sizeof(FAudioFFmpeg));
|
|
|
|
|
|
pSourceVoice->src.ffmpeg->av_ctx = av_ctx;
|
|
|
pSourceVoice->src.ffmpeg->av_frame = av_frame;
|
|
|
LOG_FUNC_EXIT(pSourceVoice->audio)
|
|
|
return 0;
|
|
|
}
|
|
|
|
|
|
void FAudio_FFMPEG_free(FAudioSourceVoice *voice)
|
|
|
{
|
|
|
FAudioFFmpeg *ffmpeg = voice->src.ffmpeg;
|
|
|
|
|
|
LOG_FUNC_ENTER(voice->audio)
|
|
|
|
|
|
avcodec_close(ffmpeg->av_ctx);
|
|
|
av_free(ffmpeg->av_ctx->extradata);
|
|
|
av_free(ffmpeg->av_ctx);
|
|
|
|
|
|
voice->audio->pFree(ffmpeg->convertCache);
|
|
|
voice->audio->pFree(ffmpeg->paddingBuffer);
|
|
|
voice->audio->pFree(ffmpeg);
|
|
|
voice->src.ffmpeg = NULL;
|
|
|
|
|
|
LOG_FUNC_EXIT(voice->audio)
|
|
|
}
|
|
|
|
|
|
void FAudio_INTERNAL_ResizeConvertCache(FAudioVoice *voice, uint32_t samples)
|
|
|
{
|
|
|
LOG_FUNC_ENTER(voice->audio)
|
|
|
if (samples > voice->src.ffmpeg->convertCapacity)
|
|
|
{
|
|
|
voice->src.ffmpeg->convertCapacity = samples;
|
|
|
voice->src.ffmpeg->convertCache = (float*) voice->audio->pRealloc(
|
|
|
voice->src.ffmpeg->convertCache,
|
|
|
sizeof(float) * voice->src.ffmpeg->convertCapacity
|
|
|
);
|
|
|
}
|
|
|
LOG_FUNC_EXIT(voice->audio)
|
|
|
}
|
|
|
|
|
|
void FAudio_INTERNAL_FillConvertCache(FAudioVoice *voice, FAudioBuffer *buffer)
|
|
|
{
|
|
|
FAudioFFmpeg *ffmpeg = voice->src.ffmpeg;
|
|
|
AVPacket avpkt = {0};
|
|
|
int averr;
|
|
|
uint32_t total_samples;
|
|
|
|
|
|
LOG_FUNC_ENTER(voice->audio)
|
|
|
|
|
|
avpkt.size = voice->src.format->nBlockAlign;
|
|
|
avpkt.data = (unsigned char *) buffer->pAudioData + ffmpeg->encOffset;
|
|
|
|
|
|
for(;;)
|
|
|
{
|
|
|
averr = avcodec_receive_frame(ffmpeg->av_ctx, ffmpeg->av_frame);
|
|
|
if (averr == AVERROR(EAGAIN))
|
|
|
{
|
|
|
/* ffmpeg needs more data to decode */
|
|
|
avpkt.pts = avpkt.dts = AV_NOPTS_VALUE;
|
|
|
|
|
|
if (ffmpeg->encOffset >= buffer->AudioBytes)
|
|
|
{
|
|
|
/* no more data in this buffer */
|
|
|
break;
|
|
|
}
|
|
|
|
|
|
if (ffmpeg->encOffset + avpkt.size + AV_INPUT_BUFFER_PADDING_SIZE > buffer->AudioBytes)
|
|
|
{
|
|
|
/* Unfortunately, the FFmpeg API requires that a number of
|
|
|
* extra bytes must be available past the end of the buffer.
|
|
|
* The xaudio2 client probably hasn't done this, so we have to
|
|
|
* perform a copy near the end of the buffer. */
|
|
|
size_t remain = buffer->AudioBytes - ffmpeg->encOffset;
|
|
|
|
|
|
if (ffmpeg->paddingBytes < remain + AV_INPUT_BUFFER_PADDING_SIZE)
|
|
|
{
|
|
|
ffmpeg->paddingBytes = remain + AV_INPUT_BUFFER_PADDING_SIZE;
|
|
|
ffmpeg->paddingBuffer = (uint8_t *) voice->audio->pRealloc(
|
|
|
ffmpeg->paddingBuffer,
|
|
|
ffmpeg->paddingBytes
|
|
|
);
|
|
|
}
|
|
|
FAudio_memcpy(ffmpeg->paddingBuffer, buffer->pAudioData + ffmpeg->encOffset, remain);
|
|
|
FAudio_zero(ffmpeg->paddingBuffer + remain, AV_INPUT_BUFFER_PADDING_SIZE);
|
|
|
avpkt.data = ffmpeg->paddingBuffer;
|
|
|
}
|
|
|
|
|
|
averr = avcodec_send_packet(ffmpeg->av_ctx, &avpkt);
|
|
|
if (averr)
|
|
|
{
|
|
|
FAudio_assert(0 && "avcodec_send_packet failed" && averr);
|
|
|
break;
|
|
|
}
|
|
|
|
|
|
ffmpeg->encOffset += avpkt.size;
|
|
|
avpkt.data += avpkt.size;
|
|
|
|
|
|
/* data sent, try receive again */
|
|
|
continue;
|
|
|
}
|
|
|
|
|
|
if (averr)
|
|
|
{
|
|
|
LOG_ERROR(
|
|
|
voice->audio,
|
|
|
"avcodec_receive_frame failed: %d",
|
|
|
averr
|
|
|
)
|
|
|
FAudio_assert(0 && "avcodec_receive_frame failed" && averr);
|
|
|
LOG_FUNC_EXIT(voice->audio)
|
|
|
return;
|
|
|
}
|
|
|
else
|
|
|
{
|
|
|
break;
|
|
|
}
|
|
|
}
|
|
|
|
|
|
/* copy decoded samples to internal buffer, reordering if necessary */
|
|
|
total_samples = ffmpeg->av_frame->nb_samples * ffmpeg->av_ctx->channels;
|
|
|
|
|
|
FAudio_INTERNAL_ResizeConvertCache(voice, total_samples);
|
|
|
|
|
|
if (av_sample_fmt_is_planar(ffmpeg->av_ctx->sample_fmt))
|
|
|
{
|
|
|
int32_t s, c;
|
|
|
uint8_t **src = ffmpeg->av_frame->data;
|
|
|
uint32_t *dst = (uint32_t *) ffmpeg->convertCache;
|
|
|
|
|
|
for(s = 0; s < ffmpeg->av_frame->nb_samples; ++s)
|
|
|
for(c = 0; c < ffmpeg->av_ctx->channels; ++c)
|
|
|
*dst++ = ((uint32_t*)(src[c]))[s];
|
|
|
}
|
|
|
else
|
|
|
{
|
|
|
FAudio_memcpy(
|
|
|
ffmpeg->convertCache,
|
|
|
ffmpeg->av_frame->data[0],
|
|
|
total_samples * sizeof(float)
|
|
|
);
|
|
|
}
|
|
|
|
|
|
ffmpeg->convertSamples = ffmpeg->av_frame->nb_samples;
|
|
|
ffmpeg->convertOffset = 0;
|
|
|
LOG_FUNC_EXIT(voice->audio)
|
|
|
}
|
|
|
|
|
|
void FAudio_INTERNAL_DecodeFFMPEG(
|
|
|
FAudioVoice *voice,
|
|
|
FAudioBuffer *buffer,
|
|
|
float *decodeCache,
|
|
|
uint32_t samples
|
|
|
) {
|
|
|
FAudioFFmpeg *ffmpeg = voice->src.ffmpeg;
|
|
|
uint32_t decSampleSize = voice->src.format->nChannels * voice->src.format->wBitsPerSample / 8;
|
|
|
uint32_t outSampleSize = voice->src.format->nChannels * sizeof(float);
|
|
|
uint32_t done = 0, available, todo, cumulative;
|
|
|
uint32_t reseek = 0;
|
|
|
|
|
|
LOG_FUNC_ENTER(voice->audio)
|
|
|
|
|
|
/* check if we need to reposition in the stream */
|
|
|
if (voice->src.curBufferOffset < ffmpeg->decOffset)
|
|
|
{
|
|
|
/* If curBufferOffset is behind, it's because we had to do some
|
|
|
* padding, which should not affect the stream offset. To fix,
|
|
|
* we simply rewind by a couple samples. Pretty safe if it doesn't
|
|
|
* cross back into the previous decoded block.
|
|
|
*/
|
|
|
uint32_t delta = ffmpeg->decOffset - voice->src.curBufferOffset;
|
|
|
|
|
|
if (ffmpeg->convertOffset >= delta)
|
|
|
{
|
|
|
ffmpeg->convertOffset -= delta;
|
|
|
ffmpeg->decOffset = voice->src.curBufferOffset;
|
|
|
}
|
|
|
else
|
|
|
{
|
|
|
reseek = 1;
|
|
|
}
|
|
|
}
|
|
|
else if (voice->src.curBufferOffset > ffmpeg->decOffset)
|
|
|
{
|
|
|
/* If we're starting in the middle, we have to seek to the
|
|
|
* starting position. AFAIK this shouldn't happen mid-stream.
|
|
|
*/
|
|
|
reseek = 1;
|
|
|
}
|
|
|
|
|
|
if (reseek)
|
|
|
{
|
|
|
FAudioBufferWMA *bufferWMA = &voice->src.bufferList->bufferWMA;
|
|
|
uint32_t byteOffset = voice->src.curBufferOffset * decSampleSize;
|
|
|
uint32_t packetIdx = bufferWMA->PacketCount - 1;
|
|
|
|
|
|
/* figure out in which encoded packet has this position */
|
|
|
while (packetIdx > 0 && bufferWMA->pDecodedPacketCumulativeBytes[packetIdx] > byteOffset)
|
|
|
{
|
|
|
packetIdx -= 1;
|
|
|
}
|
|
|
|
|
|
if (packetIdx == 0)
|
|
|
{
|
|
|
cumulative = 0;
|
|
|
}
|
|
|
else
|
|
|
{
|
|
|
cumulative = bufferWMA->pDecodedPacketCumulativeBytes[packetIdx - 1];
|
|
|
}
|
|
|
|
|
|
/* seek to the wanted position in the stream */
|
|
|
ffmpeg->encOffset = packetIdx * voice->src.format->nBlockAlign;
|
|
|
FAudio_INTERNAL_FillConvertCache(voice, buffer);
|
|
|
ffmpeg->convertOffset = (byteOffset - cumulative) / outSampleSize;
|
|
|
ffmpeg->decOffset = voice->src.curBufferOffset;
|
|
|
}
|
|
|
|
|
|
while (done < samples)
|
|
|
{
|
|
|
/* check for available data in decoded cache, refill if necessary */
|
|
|
if (ffmpeg->convertOffset >= ffmpeg->convertSamples)
|
|
|
{
|
|
|
FAudio_INTERNAL_FillConvertCache(voice, buffer);
|
|
|
}
|
|
|
|
|
|
available = ffmpeg->convertSamples - ffmpeg->convertOffset;
|
|
|
if (available <= 0)
|
|
|
{
|
|
|
break;
|
|
|
}
|
|
|
|
|
|
todo = FAudio_min(available, samples - done);
|
|
|
FAudio_memcpy(
|
|
|
decodeCache + (done * voice->src.format->nChannels),
|
|
|
ffmpeg->convertCache + (ffmpeg->convertOffset * voice->src.format->nChannels),
|
|
|
todo * voice->src.format->nChannels * sizeof(float)
|
|
|
);
|
|
|
|
|
|
done += todo;
|
|
|
ffmpeg->convertOffset += todo;
|
|
|
}
|
|
|
|
|
|
/* FIXME: This block should not be here! */
|
|
|
if (done < samples)
|
|
|
{
|
|
|
FAudio_zero(
|
|
|
decodeCache + (done * voice->src.format->nChannels),
|
|
|
(samples - done) * voice->src.format->nChannels * sizeof(float)
|
|
|
);
|
|
|
}
|
|
|
|
|
|
ffmpeg->decOffset += samples;
|
|
|
LOG_FUNC_EXIT(voice->audio)
|
|
|
}
|
|
|
|
|
|
#else
|
|
|
|
|
|
extern int this_tu_is_empty;
|
|
|
|
|
|
#endif /* HAVE_FFMPEG */
|
|
|
|
|
|
/* vim: set noexpandtab shiftwidth=8 tabstop=8: */
|
|
|
|