Clean up WavExtractor PCM output

- Make extractor output samples that are uniformly distributed
  with respect to time, with a target of ~10 samples per second.
  The old approach could in theory put every frame into its own
  sample, which would be very inefficient downstream because we'd
  need to pass them individually to MediaCodec. It could also put
  data corresponding to a long duration of time into a single
  sample (e.g. if the sample rate of the content is low), which
  is bad downstream because we decide whether to set the decodeOnly
  flag on a per sample basis. More generally, the new approach
  is more predictable :).

- Stop using the WavSeekMap to get sample timestamps, and instead
  calculate them directly from the number of frames output. It's
  more obviously correct, particularly for data formats like IMA
  ADPCM where we'll need to adjust the data prior to output.

PiperOrigin-RevId: 285750010
This commit is contained in:
olly 2019-12-16 13:05:09 +00:00 committed by Oliver Woodman
parent 27b06e9ad9
commit 7bd912f895
8 changed files with 168 additions and 92 deletions

View File

@ -28,6 +28,7 @@ import com.google.android.exoplayer2.extractor.PositionHolder;
import com.google.android.exoplayer2.extractor.TrackOutput;
import com.google.android.exoplayer2.util.Assertions;
import com.google.android.exoplayer2.util.MimeTypes;
import com.google.android.exoplayer2.util.Util;
import java.io.IOException;
/**
@ -35,8 +36,12 @@ import java.io.IOException;
*/
public final class WavExtractor implements Extractor {
/** Arbitrary maximum sample size of 32KB, which is ~170ms of 16-bit stereo PCM audio at 48KHz. */
private static final int MAX_SAMPLE_SIZE = 32 * 1024;
/**
* When outputting PCM data to a {@link TrackOutput}, we can choose how many frames are grouped
* into each sample, and hence each sample's duration. This is the target number of samples to
* output for each second of media, meaning that each sample will have a duration of ~100ms.
*/
private static final int TARGET_SAMPLES_PER_SECOND = 10;
/** Factory for {@link WavExtractor} instances. */
public static final ExtractorsFactory FACTORY = () -> new Extractor[] {new WavExtractor()};
@ -67,7 +72,7 @@ public final class WavExtractor implements Extractor {
@Override
public void seek(long position, long timeUs) {
if (outputWriter != null) {
outputWriter.reset();
outputWriter.reset(timeUs);
}
}
@ -105,18 +110,18 @@ public final class WavExtractor implements Extractor {
Assertions.checkState(dataEndPosition != C.POSITION_UNSET);
long bytesLeft = dataEndPosition - input.getPosition();
if (bytesLeft <= 0) {
return Extractor.RESULT_END_OF_INPUT;
}
return outputWriter.sampleData(input, bytesLeft) ? RESULT_CONTINUE : RESULT_END_OF_INPUT;
return outputWriter.sampleData(input, bytesLeft) ? RESULT_END_OF_INPUT : RESULT_CONTINUE;
}
/** Writes to the extractor's output. */
private interface OutputWriter {
/** Resets the writer. */
void reset();
/**
* Resets the writer.
*
* @param timeUs The new start position in microseconds.
*/
void reset(long timeUs);
/**
* Initializes the writer.
@ -137,7 +142,7 @@ public final class WavExtractor implements Extractor {
*
* @param input The input from which to read.
* @param bytesLeft The number of sample data bytes left to be read from the input.
* @return True if data was consumed. False if the end of the stream has been reached.
* @return Whether the end of the sample data has been reached.
* @throws IOException If an error occurs reading from the input.
* @throws InterruptedException If the thread has been interrupted.
*/
@ -151,8 +156,10 @@ public final class WavExtractor implements Extractor {
private final TrackOutput trackOutput;
private final WavHeader header;
private final @C.PcmEncoding int pcmEncoding;
private final int targetSampleSize;
private WavSeekMap seekMap;
private long startTimeUs;
private long outputFrameCount;
private int pendingBytes;
public PcmOutputWriter(
@ -164,26 +171,31 @@ public final class WavExtractor implements Extractor {
this.trackOutput = trackOutput;
this.header = header;
this.pcmEncoding = pcmEncoding;
// For PCM blocks correspond to single frames. This is validated in init(int, long).
int bytesPerFrame = header.blockSize;
targetSampleSize =
Math.max(bytesPerFrame, header.frameRateHz * bytesPerFrame / TARGET_SAMPLES_PER_SECOND);
}
@Override
public void reset() {
public void reset(long timeUs) {
startTimeUs = timeUs;
outputFrameCount = 0;
pendingBytes = 0;
}
@Override
public void init(int dataStartPosition, long dataEndPosition) throws ParserException {
// Validate the header.
int expectedBytesPerFrame = header.numChannels * header.bitsPerSample / 8;
if (header.blockAlign != expectedBytesPerFrame) {
int bytesPerFrame = header.numChannels * header.bitsPerSample / 8;
if (header.blockSize != bytesPerFrame) {
throw new ParserException(
"Expected block alignment: " + expectedBytesPerFrame + "; got: " + header.blockAlign);
"Expected block size: " + bytesPerFrame + "; got: " + header.blockSize);
}
// Output the seek map.
seekMap =
new WavSeekMap(header, /* samplesPerBlock= */ 1, dataStartPosition, dataEndPosition);
extractorOutput.seekMap(seekMap);
extractorOutput.seekMap(
new WavSeekMap(header, /* framesPerBlock= */ 1, dataStartPosition, dataEndPosition));
// Output the format.
Format format =
@ -192,9 +204,9 @@ public final class WavExtractor implements Extractor {
MimeTypes.AUDIO_RAW,
/* codecs= */ null,
/* bitrate= */ header.averageBytesPerSecond * 8,
MAX_SAMPLE_SIZE,
targetSampleSize,
header.numChannels,
header.sampleRateHz,
header.frameRateHz,
pcmEncoding,
/* initializationData= */ null,
/* drmInitData= */ null,
@ -206,25 +218,36 @@ public final class WavExtractor implements Extractor {
@Override
public boolean sampleData(ExtractorInput input, long bytesLeft)
throws IOException, InterruptedException {
int maxBytesToRead = (int) Math.min(MAX_SAMPLE_SIZE - pendingBytes, bytesLeft);
int numBytesAppended = trackOutput.sampleData(input, maxBytesToRead, true);
boolean wereBytesAppended = numBytesAppended != RESULT_END_OF_INPUT;
if (wereBytesAppended) {
pendingBytes += numBytesAppended;
// Write sample data until we've reached the target sample size, or the end of the data.
boolean endOfSampleData = bytesLeft == 0;
while (!endOfSampleData && pendingBytes < targetSampleSize) {
int bytesToRead = (int) Math.min(targetSampleSize - pendingBytes, bytesLeft);
int bytesAppended = trackOutput.sampleData(input, bytesToRead, true);
if (bytesAppended == RESULT_END_OF_INPUT) {
endOfSampleData = true;
} else {
pendingBytes += bytesAppended;
}
}
// blockAlign is the frame size, and samples must consist of a whole number of frames.
int bytesPerFrame = header.blockAlign;
// Write the corresponding sample metadata. Samples must be a whole number of frames. It's
// possible pendingBytes is not a whole number of frames if the stream ended unexpectedly.
int bytesPerFrame = header.blockSize;
int pendingFrames = pendingBytes / bytesPerFrame;
if (pendingFrames > 0) {
long timeUs = seekMap.getTimeUs(input.getPosition() - pendingBytes);
long timeUs =
startTimeUs
+ Util.scaleLargeTimestamp(
outputFrameCount, C.MICROS_PER_SECOND, header.frameRateHz);
int size = pendingFrames * bytesPerFrame;
pendingBytes -= size;
int offset = pendingBytes - size;
trackOutput.sampleMetadata(
timeUs, C.BUFFER_FLAG_KEY_FRAME, size, pendingBytes, /* encryptionData= */ null);
timeUs, C.BUFFER_FLAG_KEY_FRAME, size, offset, /* encryptionData= */ null);
outputFrameCount += pendingFrames;
pendingBytes = offset;
}
return wereBytesAppended;
return endOfSampleData;
}
}
}

View File

@ -26,11 +26,11 @@ package com.google.android.exoplayer2.extractor.wav;
/** The number of channels. */
public final int numChannels;
/** The sample rate in Hertz. */
public final int sampleRateHz;
public final int frameRateHz;
/** The average bytes per second for the sample data. */
public final int averageBytesPerSecond;
/** The block size in bytes. */
public final int blockAlign;
public final int blockSize;
/** Bits per sample for a single channel. */
public final int bitsPerSample;
/** Extra data appended to the format chunk of the header. */
@ -39,16 +39,16 @@ package com.google.android.exoplayer2.extractor.wav;
public WavHeader(
int formatType,
int numChannels,
int sampleRateHz,
int frameRateHz,
int averageBytesPerSecond,
int bytesPerFrame,
int blockSize,
int bitsPerSample,
byte[] extraData) {
this.formatType = formatType;
this.numChannels = numChannels;
this.sampleRateHz = sampleRateHz;
this.frameRateHz = frameRateHz;
this.averageBytesPerSecond = averageBytesPerSecond;
this.blockAlign = bytesPerFrame;
this.blockSize = blockSize;
this.bitsPerSample = bitsPerSample;
this.extraData = extraData;
}

View File

@ -75,9 +75,9 @@ import java.io.IOException;
scratch.setPosition(0);
int audioFormatType = scratch.readLittleEndianUnsignedShort();
int numChannels = scratch.readLittleEndianUnsignedShort();
int sampleRateHz = scratch.readLittleEndianUnsignedIntToInt();
int frameRateHz = scratch.readLittleEndianUnsignedIntToInt();
int averageBytesPerSecond = scratch.readLittleEndianUnsignedIntToInt();
int blockAlignment = scratch.readLittleEndianUnsignedShort();
int blockSize = scratch.readLittleEndianUnsignedShort();
int bitsPerSample = scratch.readLittleEndianUnsignedShort();
int bytesLeft = (int) chunkHeader.size - 16;
@ -92,9 +92,9 @@ import java.io.IOException;
return new WavHeader(
audioFormatType,
numChannels,
sampleRateHz,
frameRateHz,
averageBytesPerSecond,
blockAlignment,
blockSize,
bitsPerSample,
extraData);
}

View File

@ -22,21 +22,19 @@ import com.google.android.exoplayer2.util.Util;
/* package */ final class WavSeekMap implements SeekMap {
/** The WAV header for the stream. */
private final WavHeader wavHeader;
/** Number of samples in each block. */
private final int samplesPerBlock;
/** Position of the start of the sample data, in bytes. */
private final long dataStartPosition;
/** Position of the end of the sample data (exclusive), in bytes. */
private final long dataEndPosition;
private final int framesPerBlock;
private final long firstBlockPosition;
private final long blockCount;
private final long durationUs;
public WavSeekMap(
WavHeader wavHeader, int samplesPerBlock, long dataStartPosition, long dataEndPosition) {
WavHeader wavHeader, int framesPerBlock, long dataStartPosition, long dataEndPosition) {
this.wavHeader = wavHeader;
this.samplesPerBlock = samplesPerBlock;
this.dataStartPosition = dataStartPosition;
this.dataEndPosition = dataEndPosition;
this.framesPerBlock = framesPerBlock;
this.firstBlockPosition = dataStartPosition;
this.blockCount = (dataEndPosition - dataStartPosition) / wavHeader.blockSize;
durationUs = blockIndexToTimeUs(blockCount);
}
@Override
@ -46,38 +44,33 @@ import com.google.android.exoplayer2.util.Util;
@Override
public long getDurationUs() {
long numBlocks = (dataEndPosition - dataStartPosition) / wavHeader.blockAlign;
return numBlocks * samplesPerBlock * C.MICROS_PER_SECOND / wavHeader.sampleRateHz;
return durationUs;
}
@Override
public SeekPoints getSeekPoints(long timeUs) {
long blockAlign = wavHeader.blockAlign;
long dataSize = dataEndPosition - dataStartPosition;
// Calculate the expected number of bytes of sample data corresponding to the requested time.
long positionOffset = (timeUs * wavHeader.averageBytesPerSecond) / C.MICROS_PER_SECOND;
// Constrain to nearest preceding frame offset.
positionOffset = (positionOffset / blockAlign) * blockAlign;
positionOffset = Util.constrainValue(positionOffset, 0, dataSize - blockAlign);
long seekPosition = dataStartPosition + positionOffset;
long seekTimeUs = getTimeUs(seekPosition);
// Calculate the containing block index, constraining to valid indices.
long blockSize = wavHeader.blockSize;
long blockIndex = Util.constrainValue(positionOffset / blockSize, 0, blockCount - 1);
long seekPosition = firstBlockPosition + (blockIndex * blockSize);
long seekTimeUs = blockIndexToTimeUs(blockIndex);
SeekPoint seekPoint = new SeekPoint(seekTimeUs, seekPosition);
if (seekTimeUs >= timeUs || positionOffset == dataSize - blockAlign) {
if (seekTimeUs >= timeUs || blockIndex == blockCount - 1) {
return new SeekPoints(seekPoint);
} else {
long secondSeekPosition = seekPosition + blockAlign;
long secondSeekTimeUs = getTimeUs(secondSeekPosition);
long secondBlockIndex = blockIndex + 1;
long secondSeekPosition = firstBlockPosition + (secondBlockIndex * blockSize);
long secondSeekTimeUs = blockIndexToTimeUs(secondBlockIndex);
SeekPoint secondSeekPoint = new SeekPoint(secondSeekTimeUs, secondSeekPosition);
return new SeekPoints(seekPoint, secondSeekPoint);
}
}
/**
* Returns the time in microseconds for the given position in bytes.
*
* @param position The position in bytes.
*/
public long getTimeUs(long position) {
long positionOffset = Math.max(0, position - dataStartPosition);
return (positionOffset * C.MICROS_PER_SECOND) / wavHeader.averageBytesPerSecond;
private long blockIndexToTimeUs(long blockIndex) {
return Util.scaleLargeTimestamp(
blockIndex * framesPerBlock, C.MICROS_PER_SECOND, wavHeader.frameRateHz);
}
}

View File

@ -9,7 +9,7 @@ track 0:
id = null
containerMimeType = null
sampleMimeType = audio/raw
maxInputSize = 32768
maxInputSize = 8820
width = -1
height = -1
frameRate = -1.0
@ -27,17 +27,45 @@ track 0:
metadata = null
initializationData:
total output bytes = 88200
sample count = 3
sample count = 10
sample 0:
time = 0
flags = 1
data = length 32768, hash 9A8CEEBA
data = length 8820, hash FAE27E28
sample 1:
time = 371519
time = 100000
flags = 1
data = length 32768, hash C1717317
data = length 8820, hash 21C3E9C3
sample 2:
time = 743038
time = 200000
flags = 1
data = length 22664, hash 819F5F62
data = length 8820, hash B51AD902
sample 3:
time = 300000
flags = 1
data = length 8820, hash 2F4B2CB4
sample 4:
time = 400000
flags = 1
data = length 8820, hash F0030CC2
sample 5:
time = 500000
flags = 1
data = length 8820, hash FF83DA46
sample 6:
time = 600000
flags = 1
data = length 8820, hash 685C1AB5
sample 7:
time = 700000
flags = 1
data = length 8820, hash BE63D51C
sample 8:
time = 800000
flags = 1
data = length 8820, hash 1E44EB8E
sample 9:
time = 900000
flags = 1
data = length 8820, hash 57C41232
tracksEnded = true

View File

@ -9,7 +9,7 @@ track 0:
id = null
containerMimeType = null
sampleMimeType = audio/raw
maxInputSize = 32768
maxInputSize = 8820
width = -1
height = -1
frameRate = -1.0
@ -27,13 +27,33 @@ track 0:
metadata = null
initializationData:
total output bytes = 58802
sample count = 2
sample count = 7
sample 0:
time = 333310
time = 333333
flags = 1
data = length 32768, hash 42D6E860
data = length 8820, hash 31868A21
sample 1:
time = 704829
time = 433333
flags = 1
data = length 26034, hash 62692C38
data = length 8820, hash AE3D77A2
sample 2:
time = 533333
flags = 1
data = length 8820, hash 966140CE
sample 3:
time = 633333
flags = 1
data = length 8820, hash CB405D7B
sample 4:
time = 733333
flags = 1
data = length 8820, hash 733BA3E6
sample 5:
time = 833333
flags = 1
data = length 8820, hash 7595D752
sample 6:
time = 933333
flags = 1
data = length 5882, hash C617B719
tracksEnded = true

View File

@ -9,7 +9,7 @@ track 0:
id = null
containerMimeType = null
sampleMimeType = audio/raw
maxInputSize = 32768
maxInputSize = 8820
width = -1
height = -1
frameRate = -1.0
@ -27,9 +27,21 @@ track 0:
metadata = null
initializationData:
total output bytes = 29402
sample count = 1
sample count = 4
sample 0:
time = 666643
time = 666666
flags = 1
data = length 29402, hash 4241604E
data = length 8820, hash D6617E20
sample 1:
time = 766666
flags = 1
data = length 8820, hash 28C74B7A
sample 2:
time = 866666
flags = 1
data = length 8820, hash 680DEFC7
sample 3:
time = 966666
flags = 1
data = length 2942, hash 1D063CF0
tracksEnded = true

View File

@ -9,7 +9,7 @@ track 0:
id = null
containerMimeType = null
sampleMimeType = audio/raw
maxInputSize = 32768
maxInputSize = 8820
width = -1
height = -1
frameRate = -1.0
@ -29,7 +29,7 @@ track 0:
total output bytes = 2
sample count = 1
sample 0:
time = 999977
time = 1000000
flags = 1
data = length 2, hash 116
tracksEnded = true