webm_extractor: Add support for parsing BlockGroup element

This CL adds support for parsing BlockGroup elements for all codecs (not just opus). It also adds a test to verify the new behavior.
2025-05-10 00:59:51 +08:00 · 2015-05-01 20:20:47 +01:00 · 2015-05-01 20:20:47 +01:00 · 7ad55dbf2c
commit 7ad55dbf2c
parent 99304eb44e
2 changed files with 63 additions and 20 deletions
--- a/library/src/main/java/com/google/android/exoplayer/extractor/webm/WebmExtractor.java
+++ b/library/src/main/java/com/google/android/exoplayer/extractor/webm/WebmExtractor.java
@ -82,6 +82,7 @@ public final class WebmExtractor implements Extractor {
  private static final int ID_SIMPLE_BLOCK = 0xA3;
  private static final int ID_BLOCK_GROUP = 0xA0;
  private static final int ID_BLOCK = 0xA1;
+  private static final int ID_REFERENCE_BLOCK = 0xFB;
  private static final int ID_TRACKS = 0x1654AE6B;
  private static final int ID_TRACK_ENTRY = 0xAE;
  private static final int ID_TRACK_NUMBER = 0xD7;
@ -152,6 +153,7 @@ public final class WebmExtractor implements Extractor {
  private int sampleFlags;
  private long sampleTimeUs;
  private boolean sampleRead;
+  private boolean sampleSeenReferenceBlock;

  // Extractor outputs.
  private ExtractorOutput extractorOutput;
@ -236,6 +238,7 @@ public final class WebmExtractor implements Extractor {
      case ID_CONTENT_ENCRYPTION_AES_SETTINGS_CIPHER_MODE:
      case ID_CUE_TIME:
      case ID_CUE_CLUSTER_POSITION:
+      case ID_REFERENCE_BLOCK:
        return EbmlReader.TYPE_UNSIGNED_INT;
      case ID_DOC_TYPE:
      case ID_CODEC_ID:
@ -282,6 +285,9 @@ public final class WebmExtractor implements Extractor {
          seekForCues = true;
        }
        return;
+      case ID_BLOCK_GROUP:
+        sampleSeenReferenceBlock = false;
+        return;
      case ID_CONTENT_ENCODING:
        // TODO: check and fail if more than one content encoding is present.
        return;
@ -314,6 +320,19 @@ public final class WebmExtractor implements Extractor {
          // We have already built the cues. Ignore.
        }
        return;
+      case ID_BLOCK_GROUP:
+        if (sampleState != SAMPLE_STATE_DATA) {
+          // We've skipped this sample (due to incompatible track number).
+          return;
+        }
+        // If the ReferenceBlock element was not found for this sample, then it is a keyframe.
+        if (!sampleSeenReferenceBlock) {
+          sampleFlags |= C.SAMPLE_FLAG_SYNC;
+        }
+        outputSampleMetadata(
+            (audioTrackFormat != null && sampleTrackNumber == audioTrackFormat.number)
+                ? audioTrackFormat.trackOutput : videoTrackFormat.trackOutput);
+        return;
      case ID_CONTENT_ENCODING:
        if (!trackFormat.hasContentEncryption) {
          // We found a ContentEncoding other than Encryption.
@ -405,6 +424,9 @@ public final class WebmExtractor implements Extractor {
      case ID_CHANNELS:
        trackFormat.channelCount = (int) value;
        return;
+      case ID_REFERENCE_BLOCK:
+        sampleSeenReferenceBlock = true;
+        return;
      case ID_CONTENT_ENCODING_ORDER:
        // This extractor only supports one ContentEncoding element and hence the order has to be 0.
        if (value != 0) {
@ -551,16 +573,8 @@ public final class WebmExtractor implements Extractor {
            throw new ParserException("Lacing mode not supported: " + lacing);
          }
          boolean isInvisible = (sampleHeaderScratchData[2] & 0x08) == 0x08;
-          boolean isKeyframe;
-          if (id == ID_BLOCK) {
-            // Matroska Block element does not self-sufficiently say whether it is a keyframe. It
-            // depends on the existence of another element (ReferenceBlock) which may occur after
-            // the Block element. Since this extractor uses Block element only for Opus, we set the
-            // keyframe to be true always since all Opus frames are key frames.
-            isKeyframe = true;
-          } else {
-            isKeyframe = (sampleHeaderScratchData[2] & 0x80) == 0x80;
-          }
+          boolean isKeyframe =
+              (id == ID_SIMPLE_BLOCK && (sampleHeaderScratchData[2] & 0x80) == 0x80);
          boolean isEncrypted = false;

          // If encrypted, the fourth byte is an encryption signal byte.
@ -601,15 +615,24 @@ public final class WebmExtractor implements Extractor {
          sampleSize += 4;
        }

-        trackOutput.sampleMetadata(sampleTimeUs, sampleFlags, sampleSize, 0, null);
-        sampleState = SAMPLE_STATE_START;
-        sampleRead = true;
+        // For SimpleBlock, we send the metadata here as we have all the information. For Block, we
+        // send the metadata at the end of the BlockGroup element since we'll know if the frame is a
+        // keyframe or not only at that point.
+        if (id == ID_SIMPLE_BLOCK) {
+          outputSampleMetadata(trackOutput);
+        }
        return;
      default:
        throw new IllegalStateException("Unexpected id: " + id);
    }
  }

+  private void outputSampleMetadata(TrackOutput trackOutput) {
+    trackOutput.sampleMetadata(sampleTimeUs, sampleFlags, sampleSize, 0, null);
+    sampleState = SAMPLE_STATE_START;
+    sampleRead = true;
+  }
+
  /**
   * Builds an video {@link MediaFormat} containing recently gathered Video information.
   *
--- a/library/src/test/java/com/google/android/exoplayer/extractor/webm/WebmExtractorTest.java
+++ b/library/src/test/java/com/google/android/exoplayer/extractor/webm/WebmExtractorTest.java
@ -334,6 +334,19 @@ public class WebmExtractorTest extends InstrumentationTestCase {
    assertSample(mediaSegment, 0, true, false, false, audioOutput);
  }

+  public void testReadBlockNonKeyframe() throws IOException, InterruptedException {
+    MediaSegment mediaSegment =
+        createMediaSegment(100, 0, 0, false, false, false, false, false, 1);
+    byte[] testInputData = joinByteArrays(
+        createInitializationSegment(
+            1, mediaSegment.clusterBytes.length, true, DEFAULT_TIMECODE_SCALE,
+            new int[] { ID_VP9 }, null),
+        mediaSegment.clusterBytes);
+    consume(testInputData);
+    assertVideoFormat();
+    assertSample(mediaSegment, 0, false, false, false, videoOutput);
+  }
+
  public void testReadEncryptedFrame() throws IOException, InterruptedException {
    MediaSegment mediaSegment = createMediaSegment(100, 0, 0, true, false, true, true, true, 1);
    ContentEncodingSettings settings = new ContentEncodingSettings(0, 1, 1, 5, 1);
@ -466,7 +479,6 @@ public class WebmExtractorTest extends InstrumentationTestCase {
    assertEquals(keyframe, (output.sampleFlags & C.SAMPLE_FLAG_SYNC) != 0);
    assertEquals(invisible, (output.sampleFlags & C.SAMPLE_FLAG_DECODE_ONLY) != 0);
    assertEquals(encrypted, (output.sampleFlags & C.SAMPLE_FLAG_ENCRYPTED) != 0);
-
  }

  private byte[] createInitializationSegment(int cuePoints, int mediaSegmentSize,
@ -522,7 +534,8 @@ public class WebmExtractorTest extends InstrumentationTestCase {
      blockBytes = createSimpleBlockElement(data.length, blockTimecode,
          keyframe, invisible, true, encrypted, validSignalByte, trackNumber);
    } else {
-      blockBytes = createBlockElement(data.length, blockTimecode, invisible, true, trackNumber);
+      blockBytes = createBlockElement(data.length, blockTimecode,
+          keyframe, invisible, true, trackNumber);
    }
    byte[] clusterBytes =
        createClusterElement(blockBytes.length + data.length, clusterTimecode);
@ -762,22 +775,29 @@ public class WebmExtractorTest extends InstrumentationTestCase {
  }

  private static byte[] createBlockElement(
-      int size, int timecode, boolean invisible, boolean noLacing, int trackNumber) {
+      int size, int timecode, boolean keyframe, boolean invisible, boolean noLacing,
+      int trackNumber) {
    int blockSize = size + 5;
    byte[] blockSizeBytes = getIntegerBytes(blockSize);
    byte[] timeBytes = getIntegerBytes(timecode);
    byte[] trackNumberBytes = getIntegerBytes(trackNumber);
-    int blockElementSize = 1 + 8 + blockSize; // id + size + length of data
-    byte[] sizeBytes = getIntegerBytes(blockElementSize);
+    // Size of blockgroup = id + size + size of reference block + length of data.
+    int blockGroupElementSize = 1 + 8 + (keyframe ? 0 : 3) +  blockSize;
+    byte[] sizeBytes = getIntegerBytes(blockGroupElementSize);
    byte flags = (byte) ((invisible ? 0x08 : 0x00) | (noLacing ? 0x00 : 0x06));
-    return createByteArray(
+    byte[] blockGroupHeader = createByteArray(
        0xA0, // BlockGroup
-        0x01, 0x00, 0x00, 0x00, sizeBytes[0], sizeBytes[1], sizeBytes[2], sizeBytes[3],
+        0x01, 0x00, 0x00, 0x00, sizeBytes[0], sizeBytes[1], sizeBytes[2], sizeBytes[3]);
+    byte[] referenceBlock = keyframe ? new byte[0] : createByteArray(
+        0xFB, // ReferenceBlock
+        0x81, 0x00); // size=1 value=0
+    byte[] blockData = createByteArray(
        0xA1, // Block
        0x01, 0x00, 0x00, 0x00,
        blockSizeBytes[0], blockSizeBytes[1], blockSizeBytes[2], blockSizeBytes[3],
        0x40, trackNumberBytes[3], // Track number size=2
        timeBytes[2], timeBytes[3], flags); // Timecode and flags
+    return joinByteArrays(blockGroupHeader, referenceBlock, blockData);
  }

  private static byte[] createFrameData(int size) {