Transformer GL: Implement auto-scaling to preserve input frame.

PiperOrigin-RevId: 427982223
This commit is contained in:
huangdarwin 2022-02-11 13:47:20 +00:00 committed by Ian Baker
parent 8b180eb040
commit b5ed01d479
4 changed files with 82 additions and 64 deletions

View File

@ -152,7 +152,7 @@ public final class ConfigurationActivity extends AppCompatActivity {
rotateAdapter.setDropDownViewResource(android.R.layout.simple_spinner_dropdown_item);
rotateSpinner = findViewById(R.id.rotate_spinner);
rotateSpinner.setAdapter(rotateAdapter);
rotateAdapter.addAll(SAME_AS_INPUT_OPTION, "0", "10", "45", "90", "180");
rotateAdapter.addAll(SAME_AS_INPUT_OPTION, "0", "10", "45", "60", "90", "180");
enableHdrEditingCheckBox = findViewById(R.id.hdr_editing_checkbox);
}

View File

@ -107,7 +107,7 @@ public final class DefaultEncoderFactory implements Codec.EncoderFactory {
checkArgument(format.width != Format.NO_VALUE);
checkArgument(format.height != Format.NO_VALUE);
// According to interface Javadoc, format.rotationDegrees should be 0. The video should always
// be in landscape orientation.
// be encoded in landscape orientation.
checkArgument(format.height <= format.width);
checkArgument(format.rotationDegrees == 0);
checkNotNull(format.sampleMimeType);

View File

@ -26,7 +26,6 @@ import androidx.media3.common.util.UnstableApi;
import androidx.media3.common.util.Util;
import androidx.media3.exoplayer.source.MediaSource;
import androidx.media3.extractor.mp4.Mp4Extractor;
import com.google.common.collect.ImmutableSet;
/** A media transformation request. */
@UnstableApi
@ -35,9 +34,6 @@ public final class TransformationRequest {
/** A builder for {@link TransformationRequest} instances. */
public static final class Builder {
private static final ImmutableSet<Integer> SUPPORTED_OUTPUT_HEIGHTS =
ImmutableSet.of(144, 240, 360, 480, 720, 1080, 1440, 2160);
private Matrix transformationMatrix;
private boolean flattenForSlowMotion;
private int outputHeight;
@ -124,27 +120,17 @@ public final class TransformationRequest {
/**
* Sets the output resolution using the output height. The default value {@link C#LENGTH_UNSET}
* corresponds to using the same height as the input. Output width will scale to preserve the
* input video's aspect ratio.
*
* <p>For now, only "popular" heights like 144, 240, 360, 480, 720, 1080, 1440, or 2160 are
* supported, to ensure compatibility on different devices.
* corresponds to using the same height as the input. Output width of the displayed video will
* scale to preserve the video's aspect ratio after other transformations.
*
* <p>For example, a 1920x1440 video can be scaled to 640x480 by calling setResolution(480).
*
* @param outputHeight The output height in pixels.
* @param outputHeight The output height of the displayed video, in pixels.
* @return This builder.
* @throws IllegalArgumentException If the {@code outputHeight} is not supported.
*/
public Builder setResolution(int outputHeight) {
// TODO(b/209781577): Define outputHeight in the javadoc as height can be ambiguous for videos
// where rotationDegrees is set in the Format.
// TODO(b/201293185): Restructure to input a Presentation class.
// TODO(b/201293185): Check encoder codec capabilities in order to allow arbitrary
// resolutions and reasonable fallbacks.
checkArgument(
outputHeight == C.LENGTH_UNSET || SUPPORTED_OUTPUT_HEIGHTS.contains(outputHeight),
"Unsupported outputHeight: " + outputHeight);
this.outputHeight = outputHeight;
return this;
}

View File

@ -18,6 +18,8 @@ package androidx.media3.transformer;
import static androidx.media3.common.util.Assertions.checkNotNull;
import static androidx.media3.common.util.Util.SDK_INT;
import static java.lang.Math.max;
import static java.lang.Math.min;
import android.content.Context;
import android.graphics.Matrix;
@ -63,76 +65,106 @@ import org.checkerframework.dataflow.qual.Pure;
encoderOutputBuffer =
new DecoderInputBuffer(DecoderInputBuffer.BUFFER_REPLACEMENT_MODE_DISABLED);
// Scale width and height to desired transformationRequest.outputHeight, preserving aspect
// ratio.
// TODO(b/209781577): Think about which edge length should be set for portrait videos.
float inputFormatAspectRatio = (float) inputFormat.width / inputFormat.height;
int outputWidth = inputFormat.width;
int outputHeight = inputFormat.height;
// The decoder rotates encoded frames for display by inputFormat.rotationDegrees.
int decodedWidth =
(inputFormat.rotationDegrees % 180 == 0) ? inputFormat.width : inputFormat.height;
int decodedHeight =
(inputFormat.rotationDegrees % 180 == 0) ? inputFormat.height : inputFormat.width;
float decodedAspectRatio = (float) decodedWidth / decodedHeight;
Matrix transformationMatrix = new Matrix(transformationRequest.transformationMatrix);
int outputWidth = decodedWidth;
int outputHeight = decodedHeight;
if (!transformationMatrix.isIdentity()) {
// Scale frames by decodedAspectRatio, to account for FrameEditor's normalized device
// coordinates (NDC) (a square from -1 to 1 for both x and y) and preserve rectangular display
// of input pixels during transformations (ex. rotations). With scaling, transformationMatrix
// operations operate on a rectangle for x from -decodedAspectRatio to decodedAspectRatio, and
// y from -1 to 1.
transformationMatrix.preScale(/* sx= */ decodedAspectRatio, /* sy= */ 1f);
transformationMatrix.postScale(/* sx= */ 1f / decodedAspectRatio, /* sy= */ 1f);
float[][] transformOnNdcPoints = {{-1, -1, 0, 1}, {-1, 1, 0, 1}, {1, -1, 0, 1}, {1, 1, 0, 1}};
float xMin = Float.MAX_VALUE;
float xMax = Float.MIN_VALUE;
float yMin = Float.MAX_VALUE;
float yMax = Float.MIN_VALUE;
for (float[] transformOnNdcPoint : transformOnNdcPoints) {
transformationMatrix.mapPoints(transformOnNdcPoint);
xMin = min(xMin, transformOnNdcPoint[0]);
xMax = max(xMax, transformOnNdcPoint[0]);
yMin = min(yMin, transformOnNdcPoint[1]);
yMax = max(yMax, transformOnNdcPoint[1]);
}
float xCenter = (xMax + xMin) / 2f;
float yCenter = (yMax + yMin) / 2f;
transformationMatrix.postTranslate(-xCenter, -yCenter);
float ndcWidthAndHeight = 2f; // Length from -1 to 1.
float xScale = (xMax - xMin) / ndcWidthAndHeight;
float yScale = (yMax - yMin) / ndcWidthAndHeight;
transformationMatrix.postScale(1f / xScale, 1f / yScale);
outputWidth = Math.round(decodedWidth * xScale);
outputHeight = Math.round(decodedHeight * yScale);
}
// Scale width and height to desired transformationRequest.outputHeight, preserving
// aspect ratio.
if (transformationRequest.outputHeight != C.LENGTH_UNSET
&& transformationRequest.outputHeight != inputFormat.height) {
outputWidth = Math.round(inputFormatAspectRatio * transformationRequest.outputHeight);
&& transformationRequest.outputHeight != outputHeight) {
outputWidth =
Math.round((float) transformationRequest.outputHeight * outputWidth / outputHeight);
outputHeight = transformationRequest.outputHeight;
}
// The encoder may not support encoding in portrait orientation, so the decoded video is
// rotated to landscape orientation and a rotation is added back later to the output format.
boolean swapEncodingDimensions = inputFormat.height > inputFormat.width;
// Encoders commonly support higher maximum widths than maximum heights. Rotate the decoded
// video before encoding, so the encoded video's width >= height, and set outputRotationDegrees
// to ensure the video is displayed in the correct orientation.
int requestedEncoderWidth;
int requestedEncoderHeight;
boolean swapEncodingDimensions = outputHeight > outputWidth;
if (swapEncodingDimensions) {
outputRotationDegrees = (inputFormat.rotationDegrees + 90) % 360;
int temp = outputWidth;
outputWidth = outputHeight;
outputHeight = temp;
outputRotationDegrees = 90;
requestedEncoderWidth = outputHeight;
requestedEncoderHeight = outputWidth;
// TODO(b/201293185): After fragment shader transformations are implemented, put
// postRotate in a later vertex shader.
transformationMatrix.postRotate(outputRotationDegrees);
} else {
outputRotationDegrees = inputFormat.rotationDegrees;
outputRotationDegrees = 0;
requestedEncoderWidth = outputWidth;
requestedEncoderHeight = outputHeight;
}
float displayAspectRatio =
(inputFormat.rotationDegrees % 180) == 0
? inputFormatAspectRatio
: 1.0f / inputFormatAspectRatio;
Matrix transformationMatrix = new Matrix(transformationRequest.transformationMatrix);
// Scale frames by input aspect ratio, to account for FrameEditor's square normalized device
// coordinates (-1 to 1) and preserve frame relative dimensions during transformations
// (ex. rotations). After this scaling, transformationMatrix operations operate on a rectangle
// for x from -displayAspectRatio to displayAspectRatio, and y from -1 to 1
transformationMatrix.preScale(displayAspectRatio, 1);
transformationMatrix.postScale(1.0f / displayAspectRatio, 1);
// The decoder rotates videos to their intended display orientation. The frameEditor rotates
// them back for improved encoder compatibility.
// TODO(b/201293185): After fragment shader transformations are implemented, put
// postRotate in a later vertex shader.
transformationMatrix.postRotate(outputRotationDegrees);
Format requestedOutputFormat =
Format requestedEncoderFormat =
new Format.Builder()
.setWidth(outputWidth)
.setHeight(outputHeight)
.setWidth(requestedEncoderWidth)
.setHeight(requestedEncoderHeight)
.setRotationDegrees(0)
.setSampleMimeType(
transformationRequest.videoMimeType != null
? transformationRequest.videoMimeType
: inputFormat.sampleMimeType)
.build();
encoder = encoderFactory.createForVideoEncoding(requestedOutputFormat, allowedOutputMimeTypes);
Format actualOutputFormat = encoder.getConfigurationFormat();
encoder = encoderFactory.createForVideoEncoding(requestedEncoderFormat, allowedOutputMimeTypes);
Format encoderSupportedFormat = encoder.getConfigurationFormat();
fallbackListener.onTransformationRequestFinalized(
createFallbackTransformationRequest(
transformationRequest,
!swapEncodingDimensions,
requestedOutputFormat,
actualOutputFormat));
requestedEncoderFormat,
encoderSupportedFormat));
if (transformationRequest.enableHdrEditing
|| inputFormat.height != actualOutputFormat.height
|| inputFormat.width != actualOutputFormat.width
|| inputFormat.height != encoderSupportedFormat.height
|| inputFormat.width != encoderSupportedFormat.width
|| !transformationMatrix.isIdentity()) {
frameEditor =
FrameEditor.create(
context,
actualOutputFormat.width,
actualOutputFormat.height,
encoderSupportedFormat.width,
encoderSupportedFormat.height,
inputFormat.pixelWidthHeightRatio,
transformationMatrix,
/* outputSurface= */ encoder.getInputSurface(),