Fix playback of MKV audio tracks that use default channel/frequency vals.

Also do some significant cleanup to the extractor.
This commit is contained in:
Oliver Woodman 2015-09-01 14:00:37 +01:00
parent 64c0e5c997
commit 530aa265ff

View File

@ -158,9 +158,9 @@ public final class WebmExtractor implements Extractor {
private long durationTimecode = C.UNKNOWN_TIME_US; private long durationTimecode = C.UNKNOWN_TIME_US;
private long durationUs = C.UNKNOWN_TIME_US; private long durationUs = C.UNKNOWN_TIME_US;
private TrackFormat trackFormat; // Used to store the last seen track. private Track currentTrack;
private TrackFormat audioTrackFormat; private Track audioTrack;
private TrackFormat videoTrackFormat; private Track videoTrack;
private boolean sentDrmInitData; private boolean sentDrmInitData;
@ -353,10 +353,10 @@ public final class WebmExtractor implements Extractor {
// TODO: check and fail if more than one content encoding is present. // TODO: check and fail if more than one content encoding is present.
return; return;
case ID_CONTENT_ENCRYPTION: case ID_CONTENT_ENCRYPTION:
trackFormat.hasContentEncryption = true; currentTrack.hasContentEncryption = true;
return; return;
case ID_TRACK_ENTRY: case ID_TRACK_ENTRY:
trackFormat = new TrackFormat(); currentTrack = new Track();
return; return;
default: default:
return; return;
@ -399,53 +399,48 @@ public final class WebmExtractor implements Extractor {
if (!sampleSeenReferenceBlock) { if (!sampleSeenReferenceBlock) {
blockFlags |= C.SAMPLE_FLAG_SYNC; blockFlags |= C.SAMPLE_FLAG_SYNC;
} }
outputSampleMetadata( outputSampleMetadata((audioTrack != null && blockTrackNumber == audioTrack.number)
(audioTrackFormat != null && blockTrackNumber == audioTrackFormat.number) ? audioTrack.output : videoTrack.output, blockTimeUs);
? audioTrackFormat.trackOutput : videoTrackFormat.trackOutput, blockTimeUs);
blockState = BLOCK_STATE_START; blockState = BLOCK_STATE_START;
return; return;
case ID_CONTENT_ENCODING: case ID_CONTENT_ENCODING:
if (trackFormat.hasContentEncryption) { if (currentTrack.hasContentEncryption) {
if (trackFormat.encryptionKeyId == null) { if (currentTrack.encryptionKeyId == null) {
throw new ParserException("Encrypted Track found but ContentEncKeyID was not found"); throw new ParserException("Encrypted Track found but ContentEncKeyID was not found");
} }
if (!sentDrmInitData) { if (!sentDrmInitData) {
extractorOutput.drmInitData( extractorOutput.drmInitData(
new DrmInitData.Universal(MimeTypes.VIDEO_WEBM, trackFormat.encryptionKeyId)); new DrmInitData.Universal(MimeTypes.VIDEO_WEBM, currentTrack.encryptionKeyId));
sentDrmInitData = true; sentDrmInitData = true;
} }
} }
return; return;
case ID_CONTENT_ENCODINGS: case ID_CONTENT_ENCODINGS:
if (trackFormat.hasContentEncryption && trackFormat.sampleStrippedBytes != null) { if (currentTrack.hasContentEncryption && currentTrack.sampleStrippedBytes != null) {
throw new ParserException("Combining encryption and compression is not supported"); throw new ParserException("Combining encryption and compression is not supported");
} }
return; return;
case ID_TRACK_ENTRY: case ID_TRACK_ENTRY:
if (trackFormat.number == UNKNOWN || trackFormat.type == UNKNOWN) { if ((currentTrack.type == TRACK_TYPE_AUDIO && audioTrack != null)
throw new ParserException("Mandatory element TrackNumber or TrackType not found"); || (currentTrack.type == TRACK_TYPE_VIDEO && videoTrack != null)) {
}
if ((trackFormat.type == TRACK_TYPE_AUDIO && audioTrackFormat != null)
|| (trackFormat.type == TRACK_TYPE_VIDEO && videoTrackFormat != null)) {
// There is more than 1 audio/video track. Ignore everything but the first. // There is more than 1 audio/video track. Ignore everything but the first.
trackFormat = null; currentTrack = null;
return; return;
} }
if (trackFormat.type == TRACK_TYPE_AUDIO && isCodecSupported(trackFormat.codecId)) { if (currentTrack.type == TRACK_TYPE_AUDIO && isCodecSupported(currentTrack.codecId)) {
audioTrackFormat = trackFormat; audioTrack = currentTrack;
audioTrackFormat.trackOutput = extractorOutput.track(audioTrackFormat.number); audioTrack.initializeOutput(extractorOutput, durationUs);
audioTrackFormat.trackOutput.format(audioTrackFormat.getMediaFormat(durationUs)); } else if (currentTrack.type == TRACK_TYPE_VIDEO
} else if (trackFormat.type == TRACK_TYPE_VIDEO && isCodecSupported(trackFormat.codecId)) { && isCodecSupported(currentTrack.codecId)) {
videoTrackFormat = trackFormat; videoTrack = currentTrack;
videoTrackFormat.trackOutput = extractorOutput.track(videoTrackFormat.number); videoTrack.initializeOutput(extractorOutput, durationUs);
videoTrackFormat.trackOutput.format(videoTrackFormat.getMediaFormat(durationUs));
} else { } else {
// Unsupported track type. Do nothing. // Unsupported track type. Do nothing.
} }
trackFormat = null; currentTrack = null;
return; return;
case ID_TRACKS: case ID_TRACKS:
if (videoTrackFormat == null && audioTrackFormat == null) { if (videoTrack == null && audioTrack == null) {
throw new ParserException("No valid tracks were found"); throw new ParserException("No valid tracks were found");
} }
extractorOutput.endTracks(); extractorOutput.endTracks();
@ -478,28 +473,28 @@ public final class WebmExtractor implements Extractor {
timecodeScale = value; timecodeScale = value;
return; return;
case ID_PIXEL_WIDTH: case ID_PIXEL_WIDTH:
trackFormat.pixelWidth = (int) value; currentTrack.width = (int) value;
return; return;
case ID_PIXEL_HEIGHT: case ID_PIXEL_HEIGHT:
trackFormat.pixelHeight = (int) value; currentTrack.height = (int) value;
return; return;
case ID_TRACK_NUMBER: case ID_TRACK_NUMBER:
trackFormat.number = (int) value; currentTrack.number = (int) value;
return; return;
case ID_TRACK_TYPE: case ID_TRACK_TYPE:
trackFormat.type = (int) value; currentTrack.type = (int) value;
return; return;
case ID_DEFAULT_DURATION: case ID_DEFAULT_DURATION:
trackFormat.defaultSampleDurationNs = (int) value; currentTrack.defaultSampleDurationNs = (int) value;
break; break;
case ID_CODEC_DELAY: case ID_CODEC_DELAY:
trackFormat.codecDelayNs = value; currentTrack.codecDelayNs = value;
return; return;
case ID_SEEK_PRE_ROLL: case ID_SEEK_PRE_ROLL:
trackFormat.seekPreRollNs = value; currentTrack.seekPreRollNs = value;
return; return;
case ID_CHANNELS: case ID_CHANNELS:
trackFormat.channelCount = (int) value; currentTrack.channelCount = (int) value;
return; return;
case ID_REFERENCE_BLOCK: case ID_REFERENCE_BLOCK:
sampleSeenReferenceBlock = true; sampleSeenReferenceBlock = true;
@ -560,7 +555,7 @@ public final class WebmExtractor implements Extractor {
durationTimecode = (long) value; durationTimecode = (long) value;
return; return;
case ID_SAMPLING_FREQUENCY: case ID_SAMPLING_FREQUENCY:
trackFormat.sampleRate = (int) value; currentTrack.sampleRate = (int) value;
return; return;
default: default:
return; return;
@ -576,7 +571,7 @@ public final class WebmExtractor implements Extractor {
} }
return; return;
case ID_CODEC_ID: case ID_CODEC_ID:
trackFormat.codecId = value; currentTrack.codecId = value;
return; return;
default: default:
return; return;
@ -593,17 +588,17 @@ public final class WebmExtractor implements Extractor {
seekEntryId = (int) seekEntryIdBytes.readUnsignedInt(); seekEntryId = (int) seekEntryIdBytes.readUnsignedInt();
return; return;
case ID_CODEC_PRIVATE: case ID_CODEC_PRIVATE:
trackFormat.codecPrivate = new byte[contentSize]; currentTrack.codecPrivate = new byte[contentSize];
input.readFully(trackFormat.codecPrivate, 0, contentSize); input.readFully(currentTrack.codecPrivate, 0, contentSize);
return; return;
case ID_CONTENT_COMPRESSION_SETTINGS: case ID_CONTENT_COMPRESSION_SETTINGS:
// This extractor only supports header stripping, so the payload is the stripped bytes. // This extractor only supports header stripping, so the payload is the stripped bytes.
trackFormat.sampleStrippedBytes = new byte[contentSize]; currentTrack.sampleStrippedBytes = new byte[contentSize];
input.readFully(trackFormat.sampleStrippedBytes, 0, contentSize); input.readFully(currentTrack.sampleStrippedBytes, 0, contentSize);
return; return;
case ID_CONTENT_ENCRYPTION_KEY_ID: case ID_CONTENT_ENCRYPTION_KEY_ID:
trackFormat.encryptionKeyId = new byte[contentSize]; currentTrack.encryptionKeyId = new byte[contentSize];
input.readFully(trackFormat.encryptionKeyId, 0, contentSize); input.readFully(currentTrack.encryptionKeyId, 0, contentSize);
return; return;
case ID_SIMPLE_BLOCK: case ID_SIMPLE_BLOCK:
case ID_BLOCK: case ID_BLOCK:
@ -620,23 +615,15 @@ public final class WebmExtractor implements Extractor {
} }
// Ignore the block if the track number equals neither the audio track nor the video track. // Ignore the block if the track number equals neither the audio track nor the video track.
if ((audioTrackFormat != null && videoTrackFormat != null if ((audioTrack == null || audioTrack.number != blockTrackNumber)
&& audioTrackFormat.number != blockTrackNumber && (videoTrack == null || videoTrack.number != blockTrackNumber)) {
&& videoTrackFormat.number != blockTrackNumber)
|| (audioTrackFormat != null && videoTrackFormat == null
&& audioTrackFormat.number != blockTrackNumber)
|| (audioTrackFormat == null && videoTrackFormat != null
&& videoTrackFormat.number != blockTrackNumber)) {
input.skipFully(contentSize - blockTrackNumberLength); input.skipFully(contentSize - blockTrackNumberLength);
blockState = BLOCK_STATE_START; blockState = BLOCK_STATE_START;
return; return;
} }
TrackFormat sampleTrackFormat = Track track = (audioTrack != null && blockTrackNumber == audioTrack.number)
(audioTrackFormat != null && blockTrackNumber == audioTrackFormat.number) ? audioTrack : videoTrack;
? audioTrackFormat : videoTrackFormat;
TrackOutput trackOutput = sampleTrackFormat.trackOutput;
if (blockState == BLOCK_STATE_HEADER) { if (blockState == BLOCK_STATE_HEADER) {
// Read the relative timecode (2 bytes) and flags (1 byte). // Read the relative timecode (2 bytes) and flags (1 byte).
readScratch(input, 3); readScratch(input, 3);
@ -721,11 +708,11 @@ public final class WebmExtractor implements Extractor {
int timecode = (scratch.data[0] << 8) | (scratch.data[1] & 0xFF); int timecode = (scratch.data[0] << 8) | (scratch.data[1] & 0xFF);
blockTimeUs = clusterTimecodeUs + scaleTimecodeToUs(timecode); blockTimeUs = clusterTimecodeUs + scaleTimecodeToUs(timecode);
boolean isInvisible = (scratch.data[2] & 0x08) == 0x08; boolean isInvisible = (scratch.data[2] & 0x08) == 0x08;
boolean isKeyframe = sampleTrackFormat.type == TRACK_TYPE_AUDIO boolean isKeyframe = track.type == TRACK_TYPE_AUDIO
|| (id == ID_SIMPLE_BLOCK && (scratch.data[2] & 0x80) == 0x80); || (id == ID_SIMPLE_BLOCK && (scratch.data[2] & 0x80) == 0x80);
blockFlags = (isKeyframe ? C.SAMPLE_FLAG_SYNC : 0) blockFlags = (isKeyframe ? C.SAMPLE_FLAG_SYNC : 0)
| (isInvisible ? C.SAMPLE_FLAG_DECODE_ONLY : 0); | (isInvisible ? C.SAMPLE_FLAG_DECODE_ONLY : 0);
blockEncryptionKeyId = sampleTrackFormat.encryptionKeyId; blockEncryptionKeyId = track.encryptionKeyId;
blockState = BLOCK_STATE_DATA; blockState = BLOCK_STATE_DATA;
blockLacingSampleIndex = 0; blockLacingSampleIndex = 0;
} }
@ -733,18 +720,17 @@ public final class WebmExtractor implements Extractor {
if (id == ID_SIMPLE_BLOCK) { if (id == ID_SIMPLE_BLOCK) {
// For SimpleBlock, we have metadata for each sample here. // For SimpleBlock, we have metadata for each sample here.
while (blockLacingSampleIndex < blockLacingSampleCount) { while (blockLacingSampleIndex < blockLacingSampleCount) {
writeSampleData(input, trackOutput, sampleTrackFormat, writeSampleData(input, track, blockLacingSampleSizes[blockLacingSampleIndex]);
blockLacingSampleSizes[blockLacingSampleIndex]);
long sampleTimeUs = this.blockTimeUs long sampleTimeUs = this.blockTimeUs
+ (blockLacingSampleIndex * sampleTrackFormat.defaultSampleDurationNs) / 1000; + (blockLacingSampleIndex * track.defaultSampleDurationNs) / 1000;
outputSampleMetadata(trackOutput, sampleTimeUs); outputSampleMetadata(track.output, sampleTimeUs);
blockLacingSampleIndex++; blockLacingSampleIndex++;
} }
blockState = BLOCK_STATE_START; blockState = BLOCK_STATE_START;
} else { } else {
// For Block, we send the metadata at the end of the BlockGroup element since we'll know // For Block, we send the metadata at the end of the BlockGroup element since we'll know
// if the sample is a keyframe or not only at that point. // if the sample is a keyframe or not only at that point.
writeSampleData(input, trackOutput, sampleTrackFormat, blockLacingSampleSizes[0]); writeSampleData(input, track, blockLacingSampleSizes[0]);
} }
return; return;
@ -784,10 +770,11 @@ public final class WebmExtractor implements Extractor {
scratch.setLimit(requiredLength); scratch.setLimit(requiredLength);
} }
private void writeSampleData(ExtractorInput input, TrackOutput output, TrackFormat format, private void writeSampleData(ExtractorInput input, Track track, int size)
int size) throws IOException, InterruptedException { throws IOException, InterruptedException {
TrackOutput output = track.output;
if (!sampleEncodingHandled) { if (!sampleEncodingHandled) {
if (format.hasContentEncryption) { if (track.hasContentEncryption) {
// If the sample is encrypted, read its encryption signal byte and set the IV size. // If the sample is encrypted, read its encryption signal byte and set the IV size.
// Clear the encrypted flag. // Clear the encrypted flag.
blockFlags &= ~C.SAMPLE_FLAG_ENCRYPTED; blockFlags &= ~C.SAMPLE_FLAG_ENCRYPTED;
@ -803,15 +790,15 @@ public final class WebmExtractor implements Extractor {
sampleBytesWritten++; sampleBytesWritten++;
blockFlags |= C.SAMPLE_FLAG_ENCRYPTED; blockFlags |= C.SAMPLE_FLAG_ENCRYPTED;
} }
} else if (format.sampleStrippedBytes != null) { } else if (track.sampleStrippedBytes != null) {
// If the sample has header stripping, prepare to read/output the stripped bytes first. // If the sample has header stripping, prepare to read/output the stripped bytes first.
sampleStrippedBytes.reset(format.sampleStrippedBytes, format.sampleStrippedBytes.length); sampleStrippedBytes.reset(track.sampleStrippedBytes, track.sampleStrippedBytes.length);
} }
sampleEncodingHandled = true; sampleEncodingHandled = true;
} }
size += sampleStrippedBytes.limit(); size += sampleStrippedBytes.limit();
if (CODEC_ID_H264.equals(format.codecId) || CODEC_ID_H265.equals(format.codecId)) { if (CODEC_ID_H264.equals(track.codecId) || CODEC_ID_H265.equals(track.codecId)) {
// TODO: Deduplicate with Mp4Extractor. // TODO: Deduplicate with Mp4Extractor.
// Zero the top three bytes of the array that we'll use to parse nal unit lengths, in case // Zero the top three bytes of the array that we'll use to parse nal unit lengths, in case
@ -820,8 +807,8 @@ public final class WebmExtractor implements Extractor {
nalLengthData[0] = 0; nalLengthData[0] = 0;
nalLengthData[1] = 0; nalLengthData[1] = 0;
nalLengthData[2] = 0; nalLengthData[2] = 0;
int nalUnitLengthFieldLength = format.nalUnitLengthFieldLength; int nalUnitLengthFieldLength = track.nalUnitLengthFieldLength;
int nalUnitLengthFieldLengthDiff = 4 - format.nalUnitLengthFieldLength; int nalUnitLengthFieldLengthDiff = 4 - track.nalUnitLengthFieldLength;
// NAL units are length delimited, but the decoder requires start code delimited units. // NAL units are length delimited, but the decoder requires start code delimited units.
// Loop until we've written the sample to the track output, replacing length delimiters with // Loop until we've written the sample to the track output, replacing length delimiters with
// start codes as we encounter them. // start codes as we encounter them.
@ -848,7 +835,7 @@ public final class WebmExtractor implements Extractor {
} }
} }
if (CODEC_ID_VORBIS.equals(format.codecId)) { if (CODEC_ID_VORBIS.equals(track.codecId)) {
// Vorbis decoder in android MediaCodec [1] expects the last 4 bytes of the sample to be the // Vorbis decoder in android MediaCodec [1] expects the last 4 bytes of the sample to be the
// number of samples in the current page. This definition holds good only for Ogg and // number of samples in the current page. This definition holds good only for Ogg and
// irrelevant for WebM. So we always set this to -1 (the decoder will ignore this value if we // irrelevant for WebM. So we always set this to -1 (the decoder will ignore this value if we
@ -1038,36 +1025,39 @@ public final class WebmExtractor implements Extractor {
} }
private static final class TrackFormat { private static final class Track {
// Common track elements. // Common elements.
public String codecId; public String codecId;
public int number = UNKNOWN; public int number;
public int type = UNKNOWN; public int type;
public int defaultSampleDurationNs = UNKNOWN; public int defaultSampleDurationNs;
public boolean hasContentEncryption; public boolean hasContentEncryption;
public byte[] sampleStrippedBytes; public byte[] sampleStrippedBytes;
public byte[] encryptionKeyId; public byte[] encryptionKeyId;
public byte[] codecPrivate; public byte[] codecPrivate;
// Video track related elements. // Video elements.
public int pixelWidth = UNKNOWN; public int width = MediaFormat.NO_VALUE;
public int pixelHeight = UNKNOWN; public int height = MediaFormat.NO_VALUE;
public int nalUnitLengthFieldLength = UNKNOWN;
// Audio track related elements. // Audio elements. Initially set to their default values.
public int channelCount = UNKNOWN; public int channelCount = 1;
public int sampleRate = UNKNOWN; public int sampleRate = 8000;
public long codecDelayNs = UNKNOWN; public long codecDelayNs = 0;
public long seekPreRollNs = UNKNOWN; public long seekPreRollNs = 0;
public TrackOutput trackOutput; // Set when the output is initialized. nalUnitLengthFieldLength is only set for H264/H265.
public TrackOutput output;
public int nalUnitLengthFieldLength;
/** Returns a {@link MediaFormat} built using the information in this instance. */ /**
public MediaFormat getMediaFormat(long durationUs) throws ParserException { * Initializes the track with an output.
*/
public void initializeOutput(ExtractorOutput output, long durationUs) throws ParserException {
String mimeType; String mimeType;
int maxInputSize = MediaFormat.NO_VALUE;
List<byte[]> initializationData = null; List<byte[]> initializationData = null;
int maxInputSize = UNKNOWN;
switch (codecId) { switch (codecId) {
case CODEC_ID_VP8: case CODEC_ID_VP8:
mimeType = MimeTypes.VIDEO_VP8; mimeType = MimeTypes.VIDEO_VP8;
@ -1116,8 +1106,8 @@ public final class WebmExtractor implements Extractor {
initializationData = Collections.singletonList(codecPrivate); initializationData = Collections.singletonList(codecPrivate);
break; break;
case CODEC_ID_MP3: case CODEC_ID_MP3:
maxInputSize = MP3_MAX_INPUT_SIZE;
mimeType = MimeTypes.AUDIO_MPEG; mimeType = MimeTypes.AUDIO_MPEG;
maxInputSize = MP3_MAX_INPUT_SIZE;
break; break;
case CODEC_ID_AC3: case CODEC_ID_AC3:
mimeType = MimeTypes.AUDIO_AC3; mimeType = MimeTypes.AUDIO_AC3;
@ -1126,15 +1116,19 @@ public final class WebmExtractor implements Extractor {
throw new ParserException("Unrecognized codec identifier."); throw new ParserException("Unrecognized codec identifier.");
} }
MediaFormat format;
if (MimeTypes.isAudio(mimeType)) { if (MimeTypes.isAudio(mimeType)) {
return MediaFormat.createAudioFormat(mimeType, MediaFormat.NO_VALUE, maxInputSize, format = MediaFormat.createAudioFormat(mimeType, MediaFormat.NO_VALUE, maxInputSize,
durationUs, channelCount, sampleRate, initializationData); durationUs, channelCount, sampleRate, initializationData);
} else if (MimeTypes.isVideo(mimeType)) { } else if (MimeTypes.isVideo(mimeType)) {
return MediaFormat.createVideoFormat(mimeType, MediaFormat.NO_VALUE, maxInputSize, format = MediaFormat.createVideoFormat(mimeType, MediaFormat.NO_VALUE, maxInputSize,
durationUs, pixelWidth, pixelHeight, 0, initializationData); durationUs, width, height, 0, initializationData);
} else { } else {
throw new ParserException("Unexpected MIME type."); throw new ParserException("Unexpected MIME type.");
} }
this.output = output.track(number);
this.output.format(format);
} }
/** /**