Add support for RTSP H265

Added H265 RTP packet reader and added support for H265
playback through RTSP

Change-Id: Ic0e135768bb92cd3343212c2edd84bae6947320e
This commit is contained in:
Rakesh Kumar 2022-01-13 15:16:06 +05:30
parent b208d6d26e
commit 4b99db938a
4 changed files with 376 additions and 3 deletions

View File

@ -39,12 +39,14 @@ public final class RtpPayloadFormat {
private static final String RTP_MEDIA_AC3 = "AC3";
private static final String RTP_MEDIA_MPEG4_GENERIC = "MPEG4-GENERIC";
private static final String RTP_MEDIA_H264 = "H264";
private static final String RTP_MEDIA_H265 = "H265";
/** Returns whether the format of a {@link MediaDescription} is supported. */
public static boolean isFormatSupported(MediaDescription mediaDescription) {
switch (Ascii.toUpperCase(mediaDescription.rtpMapAttribute.mediaEncoding)) {
case RTP_MEDIA_AC3:
case RTP_MEDIA_H264:
case RTP_MEDIA_H265:
case RTP_MEDIA_MPEG4_GENERIC:
return true;
default:
@ -65,6 +67,8 @@ public final class RtpPayloadFormat {
return MimeTypes.AUDIO_AC3;
case RTP_MEDIA_H264:
return MimeTypes.VIDEO_H264;
case RTP_MEDIA_H265:
return MimeTypes.VIDEO_H265;
case RTP_MEDIA_MPEG4_GENERIC:
return MimeTypes.AUDIO_AAC;
default:

View File

@ -44,6 +44,11 @@ import com.google.common.collect.ImmutableMap;
// Format specific parameter names.
private static final String PARAMETER_PROFILE_LEVEL_ID = "profile-level-id";
private static final String PARAMETER_SPROP_PARAMS = "sprop-parameter-sets";
private static final String H265_SPS = "sprop-sps";
private static final String H265_PPS = "sprop-pps";
private static final String H265_VPS = "sprop-vps";
private static final String H265_MAX_DON_DIFF = "sprop-max-don-diff";
/** Prefix for the RFC6381 codecs string for AAC formats. */
private static final String AAC_CODECS_PREFIX = "mp4a.40.";
/** Prefix for the RFC6381 codecs string for AVC formats. */
@ -120,6 +125,10 @@ import com.google.common.collect.ImmutableMap;
checkArgument(!fmtpParameters.isEmpty());
processH264FmtpAttribute(formatBuilder, fmtpParameters);
break;
case MimeTypes.VIDEO_H265:
checkArgument(!fmtpParameters.isEmpty());
processH265FmtpAttribute(formatBuilder, fmtpParameters);
break;
case MimeTypes.AUDIO_AC3:
// AC3 does not require a FMTP attribute. Fall through.
default:
@ -168,8 +177,8 @@ import com.google.common.collect.ImmutableMap;
checkArgument(parameterSets.length == 2);
ImmutableList<byte[]> initializationData =
ImmutableList.of(
getH264InitializationDataFromParameterSet(parameterSets[0]),
getH264InitializationDataFromParameterSet(parameterSets[1]));
getInitializationDataFromParameterSet(parameterSets[0]),
getInitializationDataFromParameterSet(parameterSets[1]));
formatBuilder.setInitializationData(initializationData);
// Process SPS (Sequence Parameter Set).
@ -191,7 +200,7 @@ import com.google.common.collect.ImmutableMap;
}
}
private static byte[] getH264InitializationDataFromParameterSet(String parameterSet) {
private static byte[] getInitializationDataFromParameterSet(String parameterSet) {
byte[] decodedParameterNalData = Base64.decode(parameterSet, Base64.DEFAULT);
byte[] decodedParameterNalUnit =
new byte[decodedParameterNalData.length + NAL_START_CODE.length];
@ -210,6 +219,47 @@ import com.google.common.collect.ImmutableMap;
return decodedParameterNalUnit;
}
private static void processH265FmtpAttribute(
Format.Builder formatBuilder, ImmutableMap<String, String> fmtpAttributes) {
if (fmtpAttributes.containsKey(H265_MAX_DON_DIFF)) {
checkArgument(Integer.parseInt(checkNotNull(fmtpAttributes.get(H265_MAX_DON_DIFF))) == 0);
}
checkArgument(fmtpAttributes.containsKey(H265_SPS));
String spropSPS = checkNotNull(fmtpAttributes.get(H265_SPS));
checkArgument(fmtpAttributes.containsKey(H265_PPS));
String spropPPS = checkNotNull(fmtpAttributes.get(H265_PPS));
checkArgument(fmtpAttributes.containsKey(H265_VPS));
String spropVPS = checkNotNull(fmtpAttributes.get(H265_VPS));
String[] parameterSets = new String[] {spropSPS, spropPPS, spropVPS};
checkArgument(parameterSets.length == 3);
ImmutableList<byte[]> initializationData =
ImmutableList.of(
getInitializationDataFromParameterSet(parameterSets[0]),
getInitializationDataFromParameterSet(parameterSets[1]),
getInitializationDataFromParameterSet(parameterSets[2]));
formatBuilder.setInitializationData(initializationData);
// Process SPS (Sequence Parameter Set).
byte[] spsNalDataWithStartCode = initializationData.get(0);
NalUnitUtil.H265SpsData spsData =
NalUnitUtil.parseH265SpsNalUnit(
spsNalDataWithStartCode, NAL_START_CODE.length, spsNalDataWithStartCode.length);
formatBuilder.setPixelWidthHeightRatio(spsData.pixelWidthHeightRatio);
formatBuilder.setHeight(spsData.height);
formatBuilder.setWidth(spsData.width);
formatBuilder.setCodecs(
CodecSpecificDataUtil.buildHevcCodecString(
spsData.generalProfileSpace,
spsData.generalTierFlag,
spsData.generalProfileIdc,
spsData.generalProfileCompatibilityFlags,
spsData.constraintBytes,
spsData.generalLevelIdc));
}
/**
* Extracts the track URI.
*

View File

@ -38,6 +38,8 @@ import androidx.media3.exoplayer.rtsp.RtpPayloadFormat;
return new RtpAacReader(payloadFormat);
case MimeTypes.VIDEO_H264:
return new RtpH264Reader(payloadFormat);
case MimeTypes.VIDEO_H265:
return new RtpH265Reader(payloadFormat);
default:
// No supported reader, returning null.
}

View File

@ -0,0 +1,317 @@
/*
* Copyright 2022 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package androidx.media3.exoplayer.rtsp.reader;
import static androidx.media3.common.util.Assertions.checkNotNull;
import static androidx.media3.common.util.Assertions.checkStateNotNull;
import static androidx.media3.common.util.Util.castNonNull;
import androidx.media3.common.C;
import androidx.media3.common.ParserException;
import androidx.media3.common.util.Log;
import androidx.media3.common.util.ParsableByteArray;
import androidx.media3.common.util.Util;
import androidx.media3.exoplayer.rtsp.RtpPacket;
import androidx.media3.exoplayer.rtsp.RtpPayloadFormat;
import androidx.media3.extractor.ExtractorOutput;
import androidx.media3.extractor.NalUnitUtil;
import androidx.media3.extractor.TrackOutput;
import org.checkerframework.checker.nullness.qual.MonotonicNonNull;
import org.checkerframework.checker.nullness.qual.RequiresNonNull;
/**
* Parses an H265 byte stream carried on RTP packets, and extracts H265 Access Units. Refer to
* RFC7798 for more details.
*/
/* package */ final class RtpH265Reader implements RtpPayloadReader {
private static final String TAG = "RtpH265Reader";
private static final long MEDIA_CLOCK_FREQUENCY = 90_000;
/** Offset of payload data within a FU type A payload. */
private static final int FU_PAYLOAD_OFFSET = 3;
/** Single Time Aggregation Packet type A. */
private static final int RTP_PACKET_TYPE_STAP_A = 48; // RFC7798 Section 4.4.2
/** Fragmentation Unit type A. */
private static final int RTP_PACKET_TYPE_FU_A = 49;
/** IDR NAL unit type. */
private static final int NAL_IDR_W_LP = 19;
private static final int NAL_IDR_N_LP = 20;
/** Scratch for Fragmentation Unit RTP packets. */
private final ParsableByteArray fuScratchBuffer;
private final ParsableByteArray nalStartCodeArray =
new ParsableByteArray(NalUnitUtil.NAL_START_CODE);
private final RtpPayloadFormat payloadFormat;
private @MonotonicNonNull TrackOutput trackOutput;
@C.BufferFlags private int bufferFlags;
private long firstReceivedTimestamp;
private int previousSequenceNumber;
/** The combined size of a sample that is fragmented into multiple RTP packets. */
private int fragmentedSampleSizeBytes;
private long startTimeOffsetUs;
/** Creates an instance. */
public RtpH265Reader(RtpPayloadFormat payloadFormat) {
this.payloadFormat = payloadFormat;
fuScratchBuffer = new ParsableByteArray();
firstReceivedTimestamp = C.TIME_UNSET;
previousSequenceNumber = C.INDEX_UNSET;
}
@Override
public void createTracks(ExtractorOutput extractorOutput, int trackId) {
trackOutput = extractorOutput.track(trackId, C.TRACK_TYPE_VIDEO);
castNonNull(trackOutput).format(payloadFormat.format);
}
@Override
public void onReceivingFirstPacket(long timestamp, int sequenceNumber) {}
@Override
public void consume(ParsableByteArray data, long timestamp, int sequenceNumber, boolean rtpMarker)
throws ParserException {
int payloadType;
try {
// RFC7798 Section 1.1.4. NAL Unit Header
payloadType = (data.getData()[0] >> 1) & 0x3F; // Type - Bits 1 to 6, inclusive.
} catch (IndexOutOfBoundsException e) {
throw ParserException.createForMalformedManifest(/* message= */ null, e);
}
checkStateNotNull(trackOutput);
if (payloadType >= 0 && payloadType < RTP_PACKET_TYPE_STAP_A) {
processSingleNalUnitPacket(data);
} else if (payloadType == RTP_PACKET_TYPE_STAP_A) {
processSingleTimeAggregationPacket(data);
} else if (payloadType == RTP_PACKET_TYPE_FU_A) {
processFragmentationUnitPacket(data, sequenceNumber);
} else {
throw ParserException.createForMalformedManifest(
String.format("RTP H265 payload type [%d] not supported.", payloadType),
/* cause= */ null);
}
if (rtpMarker) {
if (firstReceivedTimestamp == C.TIME_UNSET) {
firstReceivedTimestamp = timestamp;
}
long timeUs = toSampleUs(startTimeOffsetUs, timestamp, firstReceivedTimestamp);
trackOutput.sampleMetadata(
timeUs,
bufferFlags,
fragmentedSampleSizeBytes,
/* offset= */ 0,
/* encryptionData= */ null);
fragmentedSampleSizeBytes = 0;
}
previousSequenceNumber = sequenceNumber;
}
@Override
public void seek(long nextRtpTimestamp, long timeUs) {
firstReceivedTimestamp = nextRtpTimestamp;
fragmentedSampleSizeBytes = 0;
startTimeOffsetUs = timeUs;
}
// Internal methods.
/**
* Processes Single NAL Unit packet (RFC7798 Section 4.4.1).
*
* <p>Outputs the single NAL Unit (with start code prepended) to {@link #trackOutput}. Sets {@link
* #bufferFlags} and {@link #fragmentedSampleSizeBytes} accordingly.
*/
@RequiresNonNull("trackOutput")
private void processSingleNalUnitPacket(ParsableByteArray data) {
// Example of a Single Nal Unit packet
// 0 1 2 3
// 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
// +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
// | PayloadHdr | DONL (conditional) |
// +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
// | |
// | NAL unit payload data |
// | |
// | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
// | :...OPTIONAL RTP padding |
// +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
int numBytesInData = data.bytesLeft();
fragmentedSampleSizeBytes += writeStartCode();
trackOutput.sampleData(data, numBytesInData);
fragmentedSampleSizeBytes += numBytesInData;
int nalHeaderType = (data.getData()[0] >> 1) & 0x3F;
bufferFlags = getBufferFlagsFromNalType(nalHeaderType);
}
/**
* Processes STAP Type A packet (RFC7798 Section 4.4.2).
*
* <p>Outputs the received aggregation units (with start code prepended) to {@link #trackOutput}.
* Sets {@link #bufferFlags} and {@link #fragmentedSampleSizeBytes} accordingly.
*/
@RequiresNonNull("trackOutput")
private void processSingleTimeAggregationPacket(ParsableByteArray data) throws ParserException {
// An Example of an AP Packet Containing Two Aggregation
// Units without the DONL and DOND Fields.
// 0 1 2 3
// 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
// +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
// | RTP Header |
// +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
// | PayloadHdr (Type=48) | NALU 1 Size |
// +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
// | NALU 1 HDR | |
// +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+- NALU 1 Data |
// | |
// | |
// + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
// | | NALU 2 Size | NALU 2 HDR |
// +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
// | NALU 2 HDR | |
// +-+-+-+-+-+-+-+- NALU 2 Data |
// | |
// | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
// | :...OPTIONAL RTP padding |
// +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
throw ParserException.createForMalformedManifest(
"need to implement processSingleTimeAggregationPacket",
/* cause= */ null);
}
/**
* Processes Fragmentation Unit Type A packet (RFC7798 Section 4.4.3).
*
* <p>This method will be invoked multiple times to receive a single frame that is broken down
* into a series of fragmentation units in multiple RTP packets.
*
* <p>Outputs the received fragmentation units (with start code prepended) to {@link
* #trackOutput}. Sets {@link #bufferFlags} and {@link #fragmentedSampleSizeBytes} accordingly.
*/
@RequiresNonNull("trackOutput")
private void processFragmentationUnitPacket(ParsableByteArray data, int packetSequenceNumber) {
// Example of a FU packet
// 0 1 2 3
// 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
// +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
// | PayloadHdr (Type=49) | FU header | DONL (cond) |
// +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-|
// | DONL (cond) | |
// |-+-+-+-+-+-+-+-+ |
// | FU payload |
// | |
// | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
// | :...OPTIONAL RTP padding |
// +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
// FU header
// +---------------+
// |0|1|2|3|4|5|6|7|
// +-+-+-+-+-+-+-+-+
// |S|E| FuType |
// +---------------+
int tid = (data.getData()[1] & 0x7); // last 3 bits in byte 1 of payload header section 1.1.4
int fuHeader = data.getData()[2];
int nalUnitType = fuHeader & 0x3F;
byte nalHeader[] = new byte[2];
nalHeader[0] = (byte) (nalUnitType << 1); // Section: 1.1.4
// According to section 1.1.4 in rfc7798, layerId is required to be zero so keeping its value
// zero and copying only tid.
nalHeader[1] = (byte) tid;
boolean isFirstFuPacket = (fuHeader & 0x80) > 0;
boolean isLastFuPacket = (fuHeader & 0x40) > 0;
if (isFirstFuPacket) {
// Prepends starter code.
fragmentedSampleSizeBytes += writeStartCode();
// The bytes needed is 2 (NALU header) + payload size. The original data array has size 3
// (2 payload + 1 FU header) + payload size. Thus setting the correct header and set position
// to 1.
// Overwrite byte 1 of payload header with byte 0 of HEVC nal header
data.getData()[1] = (byte) nalHeader[0];
// Overwrite byte FU Header with byte 1 of HEVC nal header
data.getData()[2] = (byte) nalHeader[1];
fuScratchBuffer.reset(data.getData());
fuScratchBuffer.setPosition(1);
} else {
// Check that this packet is in the sequence of the previous packet.
int expectedSequenceNumber = (previousSequenceNumber + 1) % RtpPacket.MAX_SEQUENCE_NUMBER;
if (packetSequenceNumber != expectedSequenceNumber) {
Log.w(
TAG,
Util.formatInvariant(
"Received RTP packet with unexpected sequence number. Expected: %d; received: %d."
+ " Dropping packet.",
expectedSequenceNumber, packetSequenceNumber));
return;
}
// Setting position to ignore payload and FU header.
fuScratchBuffer.reset(data.getData());
fuScratchBuffer.setPosition(FU_PAYLOAD_OFFSET);
}
int fragmentSize = fuScratchBuffer.bytesLeft();
trackOutput.sampleData(fuScratchBuffer, fragmentSize);
fragmentedSampleSizeBytes += fragmentSize;
if (isLastFuPacket) {
bufferFlags = getBufferFlagsFromNalType(nalUnitType);
}
}
private int writeStartCode() {
nalStartCodeArray.setPosition(/* position= */ 0);
int bytesWritten = nalStartCodeArray.bytesLeft();
checkNotNull(trackOutput).sampleData(nalStartCodeArray, bytesWritten);
return bytesWritten;
}
private static long toSampleUs(
long startTimeOffsetUs, long rtpTimestamp, long firstReceivedRtpTimestamp) {
return startTimeOffsetUs
+ Util.scaleLargeTimestamp(
(rtpTimestamp - firstReceivedRtpTimestamp),
/* multiplier= */ C.MICROS_PER_SECOND,
/* divisor= */ MEDIA_CLOCK_FREQUENCY);
}
@C.BufferFlags
private static int getBufferFlagsFromNalType(int nalType) {
return (nalType == NAL_IDR_W_LP || nalType == NAL_IDR_N_LP) ? C.BUFFER_FLAG_KEY_FRAME : 0;
}
}