diff --git a/library/src/main/java/com/google/android/exoplayer/extractor/webm/WebmExtractor.java b/library/src/main/java/com/google/android/exoplayer/extractor/webm/WebmExtractor.java index de7ef2c6a6..081d6bca60 100644 --- a/library/src/main/java/com/google/android/exoplayer/extractor/webm/WebmExtractor.java +++ b/library/src/main/java/com/google/android/exoplayer/extractor/webm/WebmExtractor.java @@ -82,6 +82,7 @@ public final class WebmExtractor implements Extractor { private static final int ID_SIMPLE_BLOCK = 0xA3; private static final int ID_BLOCK_GROUP = 0xA0; private static final int ID_BLOCK = 0xA1; + private static final int ID_REFERENCE_BLOCK = 0xFB; private static final int ID_TRACKS = 0x1654AE6B; private static final int ID_TRACK_ENTRY = 0xAE; private static final int ID_TRACK_NUMBER = 0xD7; @@ -152,6 +153,7 @@ public final class WebmExtractor implements Extractor { private int sampleFlags; private long sampleTimeUs; private boolean sampleRead; + private boolean sampleSeenReferenceBlock; // Extractor outputs. private ExtractorOutput extractorOutput; @@ -236,6 +238,7 @@ public final class WebmExtractor implements Extractor { case ID_CONTENT_ENCRYPTION_AES_SETTINGS_CIPHER_MODE: case ID_CUE_TIME: case ID_CUE_CLUSTER_POSITION: + case ID_REFERENCE_BLOCK: return EbmlReader.TYPE_UNSIGNED_INT; case ID_DOC_TYPE: case ID_CODEC_ID: @@ -282,6 +285,9 @@ public final class WebmExtractor implements Extractor { seekForCues = true; } return; + case ID_BLOCK_GROUP: + sampleSeenReferenceBlock = false; + return; case ID_CONTENT_ENCODING: // TODO: check and fail if more than one content encoding is present. return; @@ -314,6 +320,19 @@ public final class WebmExtractor implements Extractor { // We have already built the cues. Ignore. } return; + case ID_BLOCK_GROUP: + if (sampleState != SAMPLE_STATE_DATA) { + // We've skipped this sample (due to incompatible track number). + return; + } + // If the ReferenceBlock element was not found for this sample, then it is a keyframe. + if (!sampleSeenReferenceBlock) { + sampleFlags |= C.SAMPLE_FLAG_SYNC; + } + outputSampleMetadata( + (audioTrackFormat != null && sampleTrackNumber == audioTrackFormat.number) + ? audioTrackFormat.trackOutput : videoTrackFormat.trackOutput); + return; case ID_CONTENT_ENCODING: if (!trackFormat.hasContentEncryption) { // We found a ContentEncoding other than Encryption. @@ -405,6 +424,9 @@ public final class WebmExtractor implements Extractor { case ID_CHANNELS: trackFormat.channelCount = (int) value; return; + case ID_REFERENCE_BLOCK: + sampleSeenReferenceBlock = true; + return; case ID_CONTENT_ENCODING_ORDER: // This extractor only supports one ContentEncoding element and hence the order has to be 0. if (value != 0) { @@ -551,16 +573,8 @@ public final class WebmExtractor implements Extractor { throw new ParserException("Lacing mode not supported: " + lacing); } boolean isInvisible = (sampleHeaderScratchData[2] & 0x08) == 0x08; - boolean isKeyframe; - if (id == ID_BLOCK) { - // Matroska Block element does not self-sufficiently say whether it is a keyframe. It - // depends on the existence of another element (ReferenceBlock) which may occur after - // the Block element. Since this extractor uses Block element only for Opus, we set the - // keyframe to be true always since all Opus frames are key frames. - isKeyframe = true; - } else { - isKeyframe = (sampleHeaderScratchData[2] & 0x80) == 0x80; - } + boolean isKeyframe = + (id == ID_SIMPLE_BLOCK && (sampleHeaderScratchData[2] & 0x80) == 0x80); boolean isEncrypted = false; // If encrypted, the fourth byte is an encryption signal byte. @@ -601,15 +615,24 @@ public final class WebmExtractor implements Extractor { sampleSize += 4; } - trackOutput.sampleMetadata(sampleTimeUs, sampleFlags, sampleSize, 0, null); - sampleState = SAMPLE_STATE_START; - sampleRead = true; + // For SimpleBlock, we send the metadata here as we have all the information. For Block, we + // send the metadata at the end of the BlockGroup element since we'll know if the frame is a + // keyframe or not only at that point. + if (id == ID_SIMPLE_BLOCK) { + outputSampleMetadata(trackOutput); + } return; default: throw new IllegalStateException("Unexpected id: " + id); } } + private void outputSampleMetadata(TrackOutput trackOutput) { + trackOutput.sampleMetadata(sampleTimeUs, sampleFlags, sampleSize, 0, null); + sampleState = SAMPLE_STATE_START; + sampleRead = true; + } + /** * Builds an video {@link MediaFormat} containing recently gathered Video information. * diff --git a/library/src/test/java/com/google/android/exoplayer/extractor/webm/WebmExtractorTest.java b/library/src/test/java/com/google/android/exoplayer/extractor/webm/WebmExtractorTest.java index 844921bce7..239dadb00e 100644 --- a/library/src/test/java/com/google/android/exoplayer/extractor/webm/WebmExtractorTest.java +++ b/library/src/test/java/com/google/android/exoplayer/extractor/webm/WebmExtractorTest.java @@ -334,6 +334,19 @@ public class WebmExtractorTest extends InstrumentationTestCase { assertSample(mediaSegment, 0, true, false, false, audioOutput); } + public void testReadBlockNonKeyframe() throws IOException, InterruptedException { + MediaSegment mediaSegment = + createMediaSegment(100, 0, 0, false, false, false, false, false, 1); + byte[] testInputData = joinByteArrays( + createInitializationSegment( + 1, mediaSegment.clusterBytes.length, true, DEFAULT_TIMECODE_SCALE, + new int[] { ID_VP9 }, null), + mediaSegment.clusterBytes); + consume(testInputData); + assertVideoFormat(); + assertSample(mediaSegment, 0, false, false, false, videoOutput); + } + public void testReadEncryptedFrame() throws IOException, InterruptedException { MediaSegment mediaSegment = createMediaSegment(100, 0, 0, true, false, true, true, true, 1); ContentEncodingSettings settings = new ContentEncodingSettings(0, 1, 1, 5, 1); @@ -466,7 +479,6 @@ public class WebmExtractorTest extends InstrumentationTestCase { assertEquals(keyframe, (output.sampleFlags & C.SAMPLE_FLAG_SYNC) != 0); assertEquals(invisible, (output.sampleFlags & C.SAMPLE_FLAG_DECODE_ONLY) != 0); assertEquals(encrypted, (output.sampleFlags & C.SAMPLE_FLAG_ENCRYPTED) != 0); - } private byte[] createInitializationSegment(int cuePoints, int mediaSegmentSize, @@ -522,7 +534,8 @@ public class WebmExtractorTest extends InstrumentationTestCase { blockBytes = createSimpleBlockElement(data.length, blockTimecode, keyframe, invisible, true, encrypted, validSignalByte, trackNumber); } else { - blockBytes = createBlockElement(data.length, blockTimecode, invisible, true, trackNumber); + blockBytes = createBlockElement(data.length, blockTimecode, + keyframe, invisible, true, trackNumber); } byte[] clusterBytes = createClusterElement(blockBytes.length + data.length, clusterTimecode); @@ -762,22 +775,29 @@ public class WebmExtractorTest extends InstrumentationTestCase { } private static byte[] createBlockElement( - int size, int timecode, boolean invisible, boolean noLacing, int trackNumber) { + int size, int timecode, boolean keyframe, boolean invisible, boolean noLacing, + int trackNumber) { int blockSize = size + 5; byte[] blockSizeBytes = getIntegerBytes(blockSize); byte[] timeBytes = getIntegerBytes(timecode); byte[] trackNumberBytes = getIntegerBytes(trackNumber); - int blockElementSize = 1 + 8 + blockSize; // id + size + length of data - byte[] sizeBytes = getIntegerBytes(blockElementSize); + // Size of blockgroup = id + size + size of reference block + length of data. + int blockGroupElementSize = 1 + 8 + (keyframe ? 0 : 3) + blockSize; + byte[] sizeBytes = getIntegerBytes(blockGroupElementSize); byte flags = (byte) ((invisible ? 0x08 : 0x00) | (noLacing ? 0x00 : 0x06)); - return createByteArray( + byte[] blockGroupHeader = createByteArray( 0xA0, // BlockGroup - 0x01, 0x00, 0x00, 0x00, sizeBytes[0], sizeBytes[1], sizeBytes[2], sizeBytes[3], + 0x01, 0x00, 0x00, 0x00, sizeBytes[0], sizeBytes[1], sizeBytes[2], sizeBytes[3]); + byte[] referenceBlock = keyframe ? new byte[0] : createByteArray( + 0xFB, // ReferenceBlock + 0x81, 0x00); // size=1 value=0 + byte[] blockData = createByteArray( 0xA1, // Block 0x01, 0x00, 0x00, 0x00, blockSizeBytes[0], blockSizeBytes[1], blockSizeBytes[2], blockSizeBytes[3], 0x40, trackNumberBytes[3], // Track number size=2 timeBytes[2], timeBytes[3], flags); // Timecode and flags + return joinByteArrays(blockGroupHeader, referenceBlock, blockData); } private static byte[] createFrameData(int size) {