webm_extractor: Add support for parsing BlockGroup element

This CL adds support for parsing BlockGroup elements for all
codecs (not just opus). It also adds a test to verify the new
behavior.
This commit is contained in:
Oliver Woodman 2015-05-01 20:20:47 +01:00
parent 99304eb44e
commit 7ad55dbf2c
2 changed files with 63 additions and 20 deletions

View File

@ -82,6 +82,7 @@ public final class WebmExtractor implements Extractor {
private static final int ID_SIMPLE_BLOCK = 0xA3;
private static final int ID_BLOCK_GROUP = 0xA0;
private static final int ID_BLOCK = 0xA1;
private static final int ID_REFERENCE_BLOCK = 0xFB;
private static final int ID_TRACKS = 0x1654AE6B;
private static final int ID_TRACK_ENTRY = 0xAE;
private static final int ID_TRACK_NUMBER = 0xD7;
@ -152,6 +153,7 @@ public final class WebmExtractor implements Extractor {
private int sampleFlags;
private long sampleTimeUs;
private boolean sampleRead;
private boolean sampleSeenReferenceBlock;
// Extractor outputs.
private ExtractorOutput extractorOutput;
@ -236,6 +238,7 @@ public final class WebmExtractor implements Extractor {
case ID_CONTENT_ENCRYPTION_AES_SETTINGS_CIPHER_MODE:
case ID_CUE_TIME:
case ID_CUE_CLUSTER_POSITION:
case ID_REFERENCE_BLOCK:
return EbmlReader.TYPE_UNSIGNED_INT;
case ID_DOC_TYPE:
case ID_CODEC_ID:
@ -282,6 +285,9 @@ public final class WebmExtractor implements Extractor {
seekForCues = true;
}
return;
case ID_BLOCK_GROUP:
sampleSeenReferenceBlock = false;
return;
case ID_CONTENT_ENCODING:
// TODO: check and fail if more than one content encoding is present.
return;
@ -314,6 +320,19 @@ public final class WebmExtractor implements Extractor {
// We have already built the cues. Ignore.
}
return;
case ID_BLOCK_GROUP:
if (sampleState != SAMPLE_STATE_DATA) {
// We've skipped this sample (due to incompatible track number).
return;
}
// If the ReferenceBlock element was not found for this sample, then it is a keyframe.
if (!sampleSeenReferenceBlock) {
sampleFlags |= C.SAMPLE_FLAG_SYNC;
}
outputSampleMetadata(
(audioTrackFormat != null && sampleTrackNumber == audioTrackFormat.number)
? audioTrackFormat.trackOutput : videoTrackFormat.trackOutput);
return;
case ID_CONTENT_ENCODING:
if (!trackFormat.hasContentEncryption) {
// We found a ContentEncoding other than Encryption.
@ -405,6 +424,9 @@ public final class WebmExtractor implements Extractor {
case ID_CHANNELS:
trackFormat.channelCount = (int) value;
return;
case ID_REFERENCE_BLOCK:
sampleSeenReferenceBlock = true;
return;
case ID_CONTENT_ENCODING_ORDER:
// This extractor only supports one ContentEncoding element and hence the order has to be 0.
if (value != 0) {
@ -551,16 +573,8 @@ public final class WebmExtractor implements Extractor {
throw new ParserException("Lacing mode not supported: " + lacing);
}
boolean isInvisible = (sampleHeaderScratchData[2] & 0x08) == 0x08;
boolean isKeyframe;
if (id == ID_BLOCK) {
// Matroska Block element does not self-sufficiently say whether it is a keyframe. It
// depends on the existence of another element (ReferenceBlock) which may occur after
// the Block element. Since this extractor uses Block element only for Opus, we set the
// keyframe to be true always since all Opus frames are key frames.
isKeyframe = true;
} else {
isKeyframe = (sampleHeaderScratchData[2] & 0x80) == 0x80;
}
boolean isKeyframe =
(id == ID_SIMPLE_BLOCK && (sampleHeaderScratchData[2] & 0x80) == 0x80);
boolean isEncrypted = false;
// If encrypted, the fourth byte is an encryption signal byte.
@ -601,15 +615,24 @@ public final class WebmExtractor implements Extractor {
sampleSize += 4;
}
trackOutput.sampleMetadata(sampleTimeUs, sampleFlags, sampleSize, 0, null);
sampleState = SAMPLE_STATE_START;
sampleRead = true;
// For SimpleBlock, we send the metadata here as we have all the information. For Block, we
// send the metadata at the end of the BlockGroup element since we'll know if the frame is a
// keyframe or not only at that point.
if (id == ID_SIMPLE_BLOCK) {
outputSampleMetadata(trackOutput);
}
return;
default:
throw new IllegalStateException("Unexpected id: " + id);
}
}
private void outputSampleMetadata(TrackOutput trackOutput) {
trackOutput.sampleMetadata(sampleTimeUs, sampleFlags, sampleSize, 0, null);
sampleState = SAMPLE_STATE_START;
sampleRead = true;
}
/**
* Builds an video {@link MediaFormat} containing recently gathered Video information.
*

View File

@ -334,6 +334,19 @@ public class WebmExtractorTest extends InstrumentationTestCase {
assertSample(mediaSegment, 0, true, false, false, audioOutput);
}
public void testReadBlockNonKeyframe() throws IOException, InterruptedException {
MediaSegment mediaSegment =
createMediaSegment(100, 0, 0, false, false, false, false, false, 1);
byte[] testInputData = joinByteArrays(
createInitializationSegment(
1, mediaSegment.clusterBytes.length, true, DEFAULT_TIMECODE_SCALE,
new int[] { ID_VP9 }, null),
mediaSegment.clusterBytes);
consume(testInputData);
assertVideoFormat();
assertSample(mediaSegment, 0, false, false, false, videoOutput);
}
public void testReadEncryptedFrame() throws IOException, InterruptedException {
MediaSegment mediaSegment = createMediaSegment(100, 0, 0, true, false, true, true, true, 1);
ContentEncodingSettings settings = new ContentEncodingSettings(0, 1, 1, 5, 1);
@ -466,7 +479,6 @@ public class WebmExtractorTest extends InstrumentationTestCase {
assertEquals(keyframe, (output.sampleFlags & C.SAMPLE_FLAG_SYNC) != 0);
assertEquals(invisible, (output.sampleFlags & C.SAMPLE_FLAG_DECODE_ONLY) != 0);
assertEquals(encrypted, (output.sampleFlags & C.SAMPLE_FLAG_ENCRYPTED) != 0);
}
private byte[] createInitializationSegment(int cuePoints, int mediaSegmentSize,
@ -522,7 +534,8 @@ public class WebmExtractorTest extends InstrumentationTestCase {
blockBytes = createSimpleBlockElement(data.length, blockTimecode,
keyframe, invisible, true, encrypted, validSignalByte, trackNumber);
} else {
blockBytes = createBlockElement(data.length, blockTimecode, invisible, true, trackNumber);
blockBytes = createBlockElement(data.length, blockTimecode,
keyframe, invisible, true, trackNumber);
}
byte[] clusterBytes =
createClusterElement(blockBytes.length + data.length, clusterTimecode);
@ -762,22 +775,29 @@ public class WebmExtractorTest extends InstrumentationTestCase {
}
private static byte[] createBlockElement(
int size, int timecode, boolean invisible, boolean noLacing, int trackNumber) {
int size, int timecode, boolean keyframe, boolean invisible, boolean noLacing,
int trackNumber) {
int blockSize = size + 5;
byte[] blockSizeBytes = getIntegerBytes(blockSize);
byte[] timeBytes = getIntegerBytes(timecode);
byte[] trackNumberBytes = getIntegerBytes(trackNumber);
int blockElementSize = 1 + 8 + blockSize; // id + size + length of data
byte[] sizeBytes = getIntegerBytes(blockElementSize);
// Size of blockgroup = id + size + size of reference block + length of data.
int blockGroupElementSize = 1 + 8 + (keyframe ? 0 : 3) + blockSize;
byte[] sizeBytes = getIntegerBytes(blockGroupElementSize);
byte flags = (byte) ((invisible ? 0x08 : 0x00) | (noLacing ? 0x00 : 0x06));
return createByteArray(
byte[] blockGroupHeader = createByteArray(
0xA0, // BlockGroup
0x01, 0x00, 0x00, 0x00, sizeBytes[0], sizeBytes[1], sizeBytes[2], sizeBytes[3],
0x01, 0x00, 0x00, 0x00, sizeBytes[0], sizeBytes[1], sizeBytes[2], sizeBytes[3]);
byte[] referenceBlock = keyframe ? new byte[0] : createByteArray(
0xFB, // ReferenceBlock
0x81, 0x00); // size=1 value=0
byte[] blockData = createByteArray(
0xA1, // Block
0x01, 0x00, 0x00, 0x00,
blockSizeBytes[0], blockSizeBytes[1], blockSizeBytes[2], blockSizeBytes[3],
0x40, trackNumberBytes[3], // Track number size=2
timeBytes[2], timeBytes[3], flags); // Timecode and flags
return joinByteArrays(blockGroupHeader, referenceBlock, blockData);
}
private static byte[] createFrameData(int size) {