diff --git a/library/rtsp/src/main/java/com/google/android/exoplayer2/source/rtsp/RtpPayloadFormat.java b/library/rtsp/src/main/java/com/google/android/exoplayer2/source/rtsp/RtpPayloadFormat.java index 4b8ab71110..c0bd12e564 100644 --- a/library/rtsp/src/main/java/com/google/android/exoplayer2/source/rtsp/RtpPayloadFormat.java +++ b/library/rtsp/src/main/java/com/google/android/exoplayer2/source/rtsp/RtpPayloadFormat.java @@ -49,6 +49,7 @@ public final class RtpPayloadFormat { private static final String RTP_MEDIA_PCMA = "PCMA"; private static final String RTP_MEDIA_PCMU = "PCMU"; private static final String RTP_MEDIA_VP8 = "VP8"; + private static final String RTP_MEDIA_VP9 = "VP9"; /** Returns whether the format of a {@link MediaDescription} is supported. */ public static boolean isFormatSupported(MediaDescription mediaDescription) { @@ -65,6 +66,7 @@ public final class RtpPayloadFormat { case RTP_MEDIA_PCMA: case RTP_MEDIA_PCMU: case RTP_MEDIA_VP8: + case RTP_MEDIA_VP9: return true; default: return false; @@ -103,6 +105,8 @@ public final class RtpPayloadFormat { return MimeTypes.VIDEO_MP4V; case RTP_MEDIA_VP8: return MimeTypes.VIDEO_VP8; + case RTP_MEDIA_VP9: + return MimeTypes.VIDEO_VP9; default: throw new IllegalArgumentException(mediaType); } diff --git a/library/rtsp/src/main/java/com/google/android/exoplayer2/source/rtsp/RtspMediaTrack.java b/library/rtsp/src/main/java/com/google/android/exoplayer2/source/rtsp/RtspMediaTrack.java index 3b1f47b423..b04dc7a052 100644 --- a/library/rtsp/src/main/java/com/google/android/exoplayer2/source/rtsp/RtspMediaTrack.java +++ b/library/rtsp/src/main/java/com/google/android/exoplayer2/source/rtsp/RtspMediaTrack.java @@ -99,6 +99,27 @@ import com.google.common.collect.ImmutableMap; */ private static final int DEFAULT_VP8_HEIGHT = 240; + /** + * Default width for VP9. + * + *
VP9 RFC (this draft + * RFC) never uses codec specific data (like width and height) in the fmtp attribute. These + * values are taken from Android's + * software VP9 decoder. + */ + private static final int DEFAULT_VP9_WIDTH = 320; + /** + * Default height for VP9. + * + *
VP9 RFC (this draft + * RFC) never uses codec specific data (like width and height) in the fmtp attribute. These + * values are taken from Android's + * software VP9 decoder. + */ + private static final int DEFAULT_VP9_HEIGHT = 240; + /** The track's associated {@link RtpPayloadFormat}. */ public final RtpPayloadFormat payloadFormat; /** The track's URI. */ @@ -195,6 +216,10 @@ import com.google.common.collect.ImmutableMap; // width and height. formatBuilder.setWidth(DEFAULT_VP8_WIDTH).setHeight(DEFAULT_VP8_HEIGHT); break; + case MimeTypes.VIDEO_VP9: + // VP9 never uses fmtp width and height attributes, setting default width and height. + formatBuilder.setWidth(DEFAULT_VP9_WIDTH).setHeight(DEFAULT_VP9_HEIGHT); + break; case MimeTypes.AUDIO_RAW: formatBuilder.setPcmEncoding(RtpPayloadFormat.getRawPcmEncodingType(mediaEncoding)); break; diff --git a/library/rtsp/src/main/java/com/google/android/exoplayer2/source/rtsp/reader/DefaultRtpPayloadReaderFactory.java b/library/rtsp/src/main/java/com/google/android/exoplayer2/source/rtsp/reader/DefaultRtpPayloadReaderFactory.java index 2aeaa7298b..793c0b02c3 100644 --- a/library/rtsp/src/main/java/com/google/android/exoplayer2/source/rtsp/reader/DefaultRtpPayloadReaderFactory.java +++ b/library/rtsp/src/main/java/com/google/android/exoplayer2/source/rtsp/reader/DefaultRtpPayloadReaderFactory.java @@ -49,6 +49,8 @@ import com.google.android.exoplayer2.util.MimeTypes; return new RtpMpeg4Reader(payloadFormat); case MimeTypes.VIDEO_VP8: return new RtpVp8Reader(payloadFormat); + case MimeTypes.VIDEO_VP9: + return new RtpVp9Reader(payloadFormat); default: // No supported reader, returning null. } diff --git a/library/rtsp/src/main/java/com/google/android/exoplayer2/source/rtsp/reader/RtpVp9Reader.java b/library/rtsp/src/main/java/com/google/android/exoplayer2/source/rtsp/reader/RtpVp9Reader.java new file mode 100644 index 0000000000..d7f1834fe2 --- /dev/null +++ b/library/rtsp/src/main/java/com/google/android/exoplayer2/source/rtsp/reader/RtpVp9Reader.java @@ -0,0 +1,261 @@ +/* + * Copyright 2022 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.google.android.exoplayer2.source.rtsp.reader; + +import static com.google.android.exoplayer2.util.Assertions.checkArgument; +import static com.google.android.exoplayer2.util.Assertions.checkStateNotNull; + +import com.google.android.exoplayer2.C; +import com.google.android.exoplayer2.extractor.ExtractorOutput; +import com.google.android.exoplayer2.extractor.TrackOutput; +import com.google.android.exoplayer2.source.rtsp.RtpPacket; +import com.google.android.exoplayer2.source.rtsp.RtpPayloadFormat; +import com.google.android.exoplayer2.util.Log; +import com.google.android.exoplayer2.util.ParsableByteArray; +import com.google.android.exoplayer2.util.Util; +import org.checkerframework.checker.nullness.qual.MonotonicNonNull; + +/** + * Parses a VP9 byte stream carried on RTP packets, and extracts VP9 Access Units. Refer to this draft RFC for more + * details. + */ +/* package */ final class RtpVp9Reader implements RtpPayloadReader { + + private static final String TAG = "RtpVp9Reader"; + + private static final long MEDIA_CLOCK_FREQUENCY = 90_000; + private static final int SCALABILITY_STRUCTURE_SIZE = 4; + + private final RtpPayloadFormat payloadFormat; + + private @MonotonicNonNull TrackOutput trackOutput; + + /** + * First received RTP timestamp. All RTP timestamps are dimension-less, the time base is defined + * by {@link #MEDIA_CLOCK_FREQUENCY}. + */ + private long firstReceivedTimestamp; + + private long startTimeOffsetUs; + private int previousSequenceNumber; + /** The combined size of a sample that is fragmented into multiple RTP packets. */ + private int fragmentedSampleSizeBytes; + + private int width; + private int height; + /** + * Whether the first packet of a VP9 frame is received, it mark the start of a VP9 partition. A + * VP9 frame can be split into multiple RTP packets. + */ + private boolean gotFirstPacketOfVP9Frame; + + private boolean reportedOutputFormat; + + /** Creates an instance. */ + public RtpVp9Reader(RtpPayloadFormat payloadFormat) { + this.payloadFormat = payloadFormat; + firstReceivedTimestamp = C.TIME_UNSET; + // The start time offset must be 0 until the first seek. + startTimeOffsetUs = 0; + previousSequenceNumber = C.INDEX_UNSET; + width = C.LENGTH_UNSET; + height = C.LENGTH_UNSET; + gotFirstPacketOfVP9Frame = false; + reportedOutputFormat = false; + } + + @Override + public void createTracks(ExtractorOutput extractorOutput, int trackId) { + trackOutput = extractorOutput.track(trackId, C.TRACK_TYPE_VIDEO); + trackOutput.format(payloadFormat.format); + } + + @Override + public void onReceivingFirstPacket(long timestamp, int sequenceNumber) {} + + @Override + public void consume( + ParsableByteArray data, long timestamp, int sequenceNumber, boolean rtpMarker) { + checkStateNotNull(trackOutput); + + if (validateVp9Descriptor(data, sequenceNumber)) { + @C.BufferFlags int bufferFlags = 0; + if (fragmentedSampleSizeBytes == 0 + && gotFirstPacketOfVP9Frame + && (data.peekUnsignedByte() & 0x04) == 0) { + bufferFlags = C.BUFFER_FLAG_KEY_FRAME; + } + + if (!reportedOutputFormat && width != C.LENGTH_UNSET && height != C.LENGTH_UNSET) { + if (width != payloadFormat.format.width || height != payloadFormat.format.height) { + trackOutput.format( + payloadFormat.format.buildUpon().setWidth(width).setHeight(height).build()); + } + reportedOutputFormat = true; + } + + int currentFragmentSizeBytes = data.bytesLeft(); + // Write the video sample. + trackOutput.sampleData(data, currentFragmentSizeBytes); + fragmentedSampleSizeBytes += currentFragmentSizeBytes; + + if (rtpMarker) { + if (firstReceivedTimestamp == C.TIME_UNSET) { + firstReceivedTimestamp = timestamp; + } + long timeUs = toSampleUs(startTimeOffsetUs, timestamp, firstReceivedTimestamp); + trackOutput.sampleMetadata( + timeUs, + bufferFlags, + fragmentedSampleSizeBytes, + /* offset= */ 0, + /* cryptoData= */ null); + fragmentedSampleSizeBytes = 0; + gotFirstPacketOfVP9Frame = false; + } + previousSequenceNumber = sequenceNumber; + } + } + + @Override + public void seek(long nextRtpTimestamp, long timeUs) { + firstReceivedTimestamp = nextRtpTimestamp; + fragmentedSampleSizeBytes = 0; + startTimeOffsetUs = timeUs; + } + + // Internal methods. + /** + * Returns {@code true} and sets the {@link ParsableByteArray#getPosition() payload.position} to + * the end of the descriptor, if a valid VP9 descriptor is present. + */ + private boolean validateVp9Descriptor(ParsableByteArray payload, int packetSequenceNumber) { + // VP9 Payload Descriptor, Section 4.2 + // 0 1 2 3 4 5 6 7 + // +-+-+-+-+-+-+-+-+ + // |I|P|L|F|B|E|V|Z| (REQUIRED) + // +-+-+-+-+-+-+-+-+ + // I: |M| PICTURE ID | (RECOMMENDED) + // +-+-+-+-+-+-+-+-+ + // M: | EXTENDED PID | (RECOMMENDED) + // +-+-+-+-+-+-+-+-+ + // L: | TID |U| SID |D| (Conditionally RECOMMENDED) + // +-+-+-+-+-+-+-+-+ + // | TL0PICIDX | (Conditionally REQUIRED) + // +-+-+-+-+-+-+-+-+ + // V: | SS | + // | .. | + // +-+-+-+-+-+-+-+-+ + + int header = payload.readUnsignedByte(); + if (!gotFirstPacketOfVP9Frame) { + if ((header & 0x08) == 0) { + Log.w( + TAG, + "First payload octet of the RTP packet is not the beginning of a new VP9 partition," + + " Dropping current packet."); + return false; + } + gotFirstPacketOfVP9Frame = true; + } else { + // Check that this packet is in the sequence of the previous packet. + int expectedSequenceNumber = RtpPacket.getNextSequenceNumber(previousSequenceNumber); + if (packetSequenceNumber != expectedSequenceNumber) { + Log.w( + TAG, + Util.formatInvariant( + "Received RTP packet with unexpected sequence number. Expected: %d; received: %d." + + " Dropping packet.", + expectedSequenceNumber, packetSequenceNumber)); + return false; + } + } + + // Check if optional I header is present. + if ((header & 0x80) != 0) { + int optionalHeader = payload.readUnsignedByte(); + // Check M for 15 bits PictureID. + if ((optionalHeader & 0x80) != 0) { + if (payload.bytesLeft() < 1) { + return false; + } + } + } + + // Flexible-mode is not implemented. + checkArgument((header & 0x10) == 0, "VP9 flexible mode is not supported."); + + // Check if the optional L header is present. + if ((header & 0x20) != 0) { + payload.skipBytes(1); + if (payload.bytesLeft() < 1) { + return false; + } + // Check if TL0PICIDX header present (non-flexible mode). + if ((header & 0x10) == 0) { + payload.skipBytes(1); + } + } + + // Check if the optional V header is present, Refer to RFC Section 4.2.1. + if ((header & 0x02) != 0) { + int scalabilityStructure = payload.readUnsignedByte(); + int spatialLayersCount = (scalabilityStructure >> 5) & 0x7; + + // Check Y bit. + if ((scalabilityStructure & 0x10) != 0) { + int scalabilityStructureCount = spatialLayersCount + 1; + if (payload.bytesLeft() < scalabilityStructureCount * SCALABILITY_STRUCTURE_SIZE) { + return false; + } + for (int index = 0; index < scalabilityStructureCount; index++) { + width = payload.readUnsignedShort(); + height = payload.readUnsignedShort(); + } + } + + // Checks G bit, skips all additional temporal layers. + if ((scalabilityStructure & 0x08) != 0) { + // Reads N_G. + int numOfPicInPictureGroup = payload.readUnsignedByte(); + if (payload.bytesLeft() < numOfPicInPictureGroup) { + return false; + } + + for (int picIndex = 0; picIndex < numOfPicInPictureGroup; picIndex++) { + int picture = payload.readUnsignedShort(); + int referenceIndices = (picture & 0x0C) >> 2; + if (payload.bytesLeft() < referenceIndices) { + return false; + } + // Ignore Reference indices. + payload.skipBytes(referenceIndices); + } + } + } + return true; + } + + private static long toSampleUs( + long startTimeOffsetUs, long rtpTimestamp, long firstReceivedRtpTimestamp) { + return startTimeOffsetUs + + Util.scaleLargeTimestamp( + (rtpTimestamp - firstReceivedRtpTimestamp), + /* multiplier= */ C.MICROS_PER_SECOND, + /* divisor= */ MEDIA_CLOCK_FREQUENCY); + } +}