Add LanczosResample effect to scale videos in Transformer

Add SeparableConvolution.configure(inputSize) to allow effect configuration depending on input dimensions. Add LanczosResample.scaleToFit method to scale input images to fit inside given dimensions. PiperOrigin-RevId: 640498008
2024-06-05 05:57:54 -07:00 · 2024-06-05 05:57:54 -07:00 · 9c4a532d5c
commit 9c4a532d5c
parent 3334f0afee
7 changed files with 217 additions and 35 deletions
--- a/demos/transformer/src/main/java/androidx/media3/demo/transformer/TransformerActivity.java
+++ b/demos/transformer/src/main/java/androidx/media3/demo/transformer/TransformerActivity.java
@ -67,6 +67,7 @@ import androidx.media3.effect.DrawableOverlay;
 import androidx.media3.effect.GlEffect;
 import androidx.media3.effect.GlShaderProgram;
 import androidx.media3.effect.HslAdjustment;
+import androidx.media3.effect.LanczosResample;
 import androidx.media3.effect.OverlayEffect;
 import androidx.media3.effect.OverlaySettings;
 import androidx.media3.effect.Presentation;
@ -616,6 +617,7 @@ public final class TransformerActivity extends AppCompatActivity {
    int resolutionHeight =
        bundle.getInt(ConfigurationActivity.RESOLUTION_HEIGHT, /* defaultValue= */ C.LENGTH_UNSET);
    if (resolutionHeight != C.LENGTH_UNSET) {
+      effects.add(LanczosResample.scaleToFit(10000, resolutionHeight));
      effects.add(Presentation.createForHeight(resolutionHeight));
    }

--- a/libraries/effect/src/androidTest/java/androidx/media3/effect/LanczosResampleTest.java
+++ b/libraries/effect/src/androidTest/java/androidx/media3/effect/LanczosResampleTest.java
@ -95,9 +95,10 @@ public class LanczosResampleTest {
  @Test
  public void queueInputFrame_with6xDownscale_matchesGoldenFile() throws Exception {
    float scale = 1f / 6;
-    lanczosShaderProgram =
-        new LanczosResample(/* radius= */ 3, scale).toGlShaderProgram(context, /* useHdr= */ false);
    Size outputSize = new Size((int) (inputWidth * scale), (int) (inputHeight * scale));
+    lanczosShaderProgram =
+        LanczosResample.scaleToFit(outputSize.getWidth(), outputSize.getHeight())
+            .toGlShaderProgram(context, /* useHdr= */ false);
    setupOutputTexture(outputSize.getWidth(), outputSize.getHeight());
    Bitmap expectedBitmap = readBitmap(DOWNSCALED_6X_PNG_ASSET_PATH);
    GlTextureInfo inputTextureInfo =
--- a/libraries/effect/src/main/java/androidx/media3/effect/ConvolutionFunction1D.java
+++ b/libraries/effect/src/main/java/androidx/media3/effect/ConvolutionFunction1D.java
@ -15,6 +15,7 @@
 */
 package androidx.media3.effect;

+import androidx.media3.common.util.Size;
 import androidx.media3.common.util.UnstableApi;

 /**
@ -25,6 +26,28 @@ import androidx.media3.common.util.UnstableApi;
@UnstableApi
 public interface ConvolutionFunction1D {

+  /** A configurable provider for {@link ConvolutionFunction1D} instances. */
+  interface Provider {
+
+    /**
+     * Configures the provider to return {@linkplain ConvolutionFunction1D 1D convolution functions}
+     * based on the input frame dimensions.
+     *
+     * <p>This method must be called before {@link #getConvolution(long)}.
+     *
+     * @param inputSize The input frame size on which to apply a convolution.
+     * @return The output frame size after applying the convolution.
+     */
+    Size configure(Size inputSize);
+
+    /**
+     * Returns a {@linkplain ConvolutionFunction1D 1D convolution function}.
+     *
+     * @param presentationTimeUs The presentation timestamp of the input frame, in microseconds.
+     */
+    ConvolutionFunction1D getConvolution(long presentationTimeUs);
+  }
+
  /** Returns the start of the domain. */
  float domainStart();

--- a/libraries/effect/src/main/java/androidx/media3/effect/LanczosResample.java
+++ b/libraries/effect/src/main/java/androidx/media3/effect/LanczosResample.java
@ -15,31 +15,98 @@
 */
 package androidx.media3.effect;

+import static androidx.media3.common.util.Assertions.checkArgument;
+import static java.lang.Math.round;
+
+import android.content.Context;
+import androidx.annotation.FloatRange;
+import androidx.annotation.IntRange;
+import androidx.media3.common.VideoFrameProcessingException;
+import androidx.media3.common.util.Size;
+import androidx.media3.common.util.UnstableApi;
+
 /**
- * A {@link SeparableConvolution} that applies a Lanczos-windowed sinc function when resampling an
- * image. See Filters for Common Resampling Tasks, Ken Turkowski.
+ * A {@link GlEffect} that applies a Lanczos-windowed sinc function when resampling an image. See
+ * Filters for Common Resampling Tasks, Ken Turkowski.
 *
 * <p>The filter rescales images in both dimensions with the same scaling factor.
 */
-/* package */ final class LanczosResample extends SeparableConvolution {
+@UnstableApi
+public final class LanczosResample implements GlEffect {
+  // Default value for the radius, or alpha parameter used by Lanczos filter. A value of 3 is
+  // used by ffmpeg (https://ffmpeg.org/ffmpeg-scaler.html), libplacebo, or Apple's vImage library.
+  private static final float DEFAULT_RADIUS = 3f;

  private final float radius;
-  private final float scale;
+  private final int width;
+  private final int height;

  /**
   * Creates an instance.
   *
-   * @param radius The non-zero radius of the Lanczos reconstruction kernel.
-   * @param scale The scaling factor to be applied when scaling the input image.
+   * @param width The width inside which the output contents will fit.
+   * @param height The height inside which the output contents will fit.
   */
-  public LanczosResample(float radius, float scale) {
-    super(scale, scale);
+  public static LanczosResample scaleToFit(
+      @IntRange(from = 1) int width, @IntRange(from = 1) int height) {
+    checkArgument(width > 0);
+    checkArgument(height > 0);
+    return new LanczosResample(DEFAULT_RADIUS, width, height);
+  }
+
+  private LanczosResample(float radius, int width, int height) {
    this.radius = radius;
-    this.scale = scale;
+    this.width = width;
+    this.height = height;
  }

  @Override
-  public ConvolutionFunction1D getConvolution(long presentationTimeUs) {
-    return new ScaledLanczosFunction(radius, scale);
+  public GlShaderProgram toGlShaderProgram(Context context, boolean useHdr)
+      throws VideoFrameProcessingException {
+    return new SeparableConvolutionShaderProgram(
+        context, useHdr, new LanczosResampleScaledFunctionProvider(radius, width, height));
+  }
+
+  private static class LanczosResampleScaledFunctionProvider
+      implements ConvolutionFunction1D.Provider {
+    // Note: We deliberately don't use Float.MIN_VALUE because it's positive & very close to zero.
+    private static final float SCALE_UNSET = -Float.MAX_VALUE;
+    private final float radius;
+    private final int width;
+    private final int height;
+
+    private float scale;
+
+    private LanczosResampleScaledFunctionProvider(
+        @FloatRange(from = 0, fromInclusive = false) float radius,
+        @IntRange(from = 1) int width,
+        @IntRange(from = 1) int height) {
+      checkArgument(radius > 0);
+      checkArgument(width > 0);
+      checkArgument(height > 0);
+      this.radius = radius;
+      this.width = width;
+      this.height = height;
+      scale = SCALE_UNSET;
+    }
+
+    @Override
+    public ConvolutionFunction1D getConvolution(long presentationTimeUs) {
+      return new ScaledLanczosFunction(radius, scale);
+    }
+
+    @Override
+    public Size configure(Size inputSize) {
+      checkArgument(inputSize.getWidth() > 0);
+      checkArgument(inputSize.getHeight() > 0);
+      // Scale to fit, preserving aspect ratio.
+      if (inputSize.getHeight() * width <= height * inputSize.getWidth()) {
+        scale = (float) width / inputSize.getWidth();
+        return new Size(width, round(inputSize.getHeight() * scale));
+      } else {
+        scale = (float) height / inputSize.getHeight();
+        return new Size(round(inputSize.getWidth() * scale), height);
+      }
+    }
  }
 }
--- a/libraries/effect/src/main/java/androidx/media3/effect/SeparableConvolutionShaderProgram.java
+++ b/libraries/effect/src/main/java/androidx/media3/effect/SeparableConvolutionShaderProgram.java
@ -53,9 +53,7 @@ public class SeparableConvolutionShaderProgram implements GlShaderProgram {

  private final GlProgram glProgram;
  private final boolean useHdr;
-  private final SeparableConvolution convolution;
-  private final float scaleWidth;
-  private final float scaleHeight;
+  private final ConvolutionFunction1D.Provider convolutionFunction1DProvider;

  private GlShaderProgram.InputListener inputListener;
  private GlShaderProgram.OutputListener outputListener;
@ -94,10 +92,24 @@ public class SeparableConvolutionShaderProgram implements GlShaderProgram {
      float scaleWidth,
      float scaleHeight)
      throws VideoFrameProcessingException {
+    this(context, useHdr, new SeparableConvolutionWrapper(convolution, scaleWidth, scaleHeight));
+  }
+
+  /**
+   * Creates an instance.
+   *
+   * @param context The {@link Context}.
+   * @param useHdr Whether input textures come from an HDR source. If {@code true}, colors will be
+   *     in linear RGB BT.2020. If {@code false}, colors will be in linear RGB BT.709.
+   * @param convolutionFunction1DProvider The {@link ConvolutionFunction1D.Provider} which will
+   *     provide the 1D convolution function to apply in each direction.
+   * @throws VideoFrameProcessingException If a problem occurs while reading shader files.
+   */
+  public SeparableConvolutionShaderProgram(
+      Context context, boolean useHdr, ConvolutionFunction1D.Provider convolutionFunction1DProvider)
+      throws VideoFrameProcessingException {
    this.useHdr = useHdr;
-    this.convolution = convolution;
-    this.scaleWidth = scaleWidth;
-    this.scaleHeight = scaleHeight;
+    this.convolutionFunction1DProvider = convolutionFunction1DProvider;
    inputListener = new InputListener() {};
    outputListener = new OutputListener() {};
    errorListener = (frameProcessingException) -> {};
@ -226,20 +238,6 @@ public class SeparableConvolutionShaderProgram implements GlShaderProgram {
    GlUtil.checkGlError();
  }

-  private Size configure(Size inputSize) {
-    // Draw the frame on the entire normalized device coordinate space, from -1 to 1, for x and y.
-    glProgram.setBufferAttribute(
-        "aFramePosition",
-        GlUtil.getNormalizedCoordinateBounds(),
-        GlUtil.HOMOGENEOUS_COORDINATE_VECTOR_SIZE);
-    float[] identityMatrix = GlUtil.create4x4IdentityMatrix();
-    glProgram.setFloatsUniform("uTransformationMatrix", identityMatrix);
-    glProgram.setFloatsUniform("uTexTransformationMatrix", identityMatrix);
-
-    return new Size(
-        (int) (inputSize.getWidth() * scaleWidth), (int) (inputSize.getHeight() * scaleHeight));
-  }
-
  private void renderHorizontal(GlTextureInfo inputTexture) throws GlUtil.GlException {
    // Render horizontal reads from the input texture and renders to the intermediate texture.
    GlUtil.focusFramebufferUsingCurrentContext(
@ -259,8 +257,9 @@ public class SeparableConvolutionShaderProgram implements GlShaderProgram {
  private void ensureTexturesAreConfigured(
      GlObjectsProvider glObjectsProvider, Size inputSize, long presentationTimeUs)
      throws GlUtil.GlException {
+    outputSize = convolutionFunction1DProvider.configure(inputSize);
    ConvolutionFunction1D currentConvolutionFunction =
-        convolution.getConvolution(presentationTimeUs);
+        convolutionFunction1DProvider.getConvolution(presentationTimeUs);
    if (!currentConvolutionFunction.equals(lastConvolutionFunction)) {
      updateFunctionTexture(currentConvolutionFunction);
      lastConvolutionFunction = currentConvolutionFunction;
@ -271,7 +270,15 @@ public class SeparableConvolutionShaderProgram implements GlShaderProgram {
      return;
    }

-    outputSize = configure(inputSize);
+    // Draw the frame on the entire normalized device coordinate space, from -1 to 1, for x and y.
+    glProgram.setBufferAttribute(
+        "aFramePosition",
+        GlUtil.getNormalizedCoordinateBounds(),
+        GlUtil.HOMOGENEOUS_COORDINATE_VECTOR_SIZE);
+    float[] identityMatrix = GlUtil.create4x4IdentityMatrix();
+    glProgram.setFloatsUniform("uTransformationMatrix", identityMatrix);
+    glProgram.setFloatsUniform("uTexTransformationMatrix", identityMatrix);
+
    // If there is a size change with the filtering (for example, a scaling operation), the first
    // pass is applied horizontally.  As a result, width of the intermediate texture will match the
    // output size, while the height will be unchanged from the input
@ -371,4 +378,28 @@ public class SeparableConvolutionShaderProgram implements GlShaderProgram {

    return glObjectsProvider.createBuffersForTexture(texId, size.getWidth(), size.getHeight());
  }
+
+  private static final class SeparableConvolutionWrapper implements ConvolutionFunction1D.Provider {
+    private final SeparableConvolution separableConvolution;
+    private final float scaleWidth;
+    private final float scaleHeight;
+
+    public SeparableConvolutionWrapper(
+        SeparableConvolution separableConvolution, float scaleWidth, float scaleHeight) {
+      this.separableConvolution = separableConvolution;
+      this.scaleWidth = scaleWidth;
+      this.scaleHeight = scaleHeight;
+    }
+
+    @Override
+    public ConvolutionFunction1D getConvolution(long presentationTimeUs) {
+      return separableConvolution.getConvolution(presentationTimeUs);
+    }
+
+    @Override
+    public Size configure(Size inputSize) {
+      return new Size(
+          (int) (inputSize.getWidth() * scaleWidth), (int) (inputSize.getHeight() * scaleHeight));
+    }
+  }
 }
--- a/libraries/test_data/src/test/assets/test-generated-goldens/transformer_sequence_effect_test/export_imageWithLanczosResample_completesWithHighPsnr_0.png
+++ b/libraries/test_data/src/test/assets/test-generated-goldens/transformer_sequence_effect_test/export_imageWithLanczosResample_completesWithHighPsnr_0.png
--- a/libraries/transformer/src/androidTest/java/androidx/media3/transformer/TransformerSequenceEffectTest.java
+++ b/libraries/transformer/src/androidTest/java/androidx/media3/transformer/TransformerSequenceEffectTest.java
@ -56,11 +56,13 @@ import android.content.Context;
 import androidx.annotation.Nullable;
 import androidx.media3.common.C;
 import androidx.media3.common.Effect;
+import androidx.media3.common.Format;
 import androidx.media3.common.MediaItem;
 import androidx.media3.common.util.Util;
 import androidx.media3.effect.BitmapOverlay;
 import androidx.media3.effect.DebugTraceUtil;
 import androidx.media3.effect.DefaultVideoFrameProcessor;
+import androidx.media3.effect.LanczosResample;
 import androidx.media3.effect.OverlayEffect;
 import androidx.media3.effect.Presentation;
 import androidx.media3.effect.RgbFilter;
@ -401,6 +403,62 @@ public final class TransformerSequenceEffectTest {
        /* frameCount= */ 2);
  }

+  @Test
+  public void export_imageWithLanczosResample_completesWithHighPsnr() throws Exception {
+    int exportWidth = 640;
+    int exportHeight = 240;
+    Format outputFormat =
+        MP4_ASSET_WITH_INCREASING_TIMESTAMPS_FORMAT
+            .buildUpon()
+            .setWidth(exportWidth)
+            .setHeight(exportHeight)
+            .build();
+    assumeFormatsSupported(
+        context,
+        testId,
+        /* inputFormat= */ MP4_ASSET_WITH_INCREASING_TIMESTAMPS_FORMAT,
+        outputFormat);
+    Composition composition =
+        createComposition(
+            /* presentation= */ null,
+            new EditedMediaItem.Builder(MediaItem.fromUri(PNG_ASSET_LINES_1080P_URI_STRING))
+                .setFrameRate(30)
+                .setDurationUs(C.MICROS_PER_SECOND / 4)
+                .setEffects(
+                    new Effects(
+                        ImmutableList.of(),
+                        ImmutableList.of(LanczosResample.scaleToFit(exportWidth, exportHeight))))
+                .build());
+    // Some devices need a high bitrate to avoid encoding artifacts.
+    int bitrate = 2_000_000;
+    Codec.EncoderFactory encoderFactory =
+        new DefaultEncoderFactory.Builder(context)
+            .setRequestedVideoEncoderSettings(
+                new VideoEncoderSettings.Builder().setBitrate(bitrate).build())
+            .build();
+    Transformer transformer =
+        new Transformer.Builder(context)
+            .setEncoderFactory(new AndroidTestUtil.ForceEncodeEncoderFactory(encoderFactory))
+            .setVideoMimeType("video/avc")
+            .build();
+
+    ExportTestResult result =
+        new TransformerAndroidTestRunner.Builder(context, transformer)
+            .build()
+            .run(testId, composition);
+
+    assertThat(new File(result.filePath).length()).isGreaterThan(0);
+    // The PSNR threshold was chosen based on:
+    // Moto G20 with Lanczos: 30.1
+    // Moto G20 with bilinear: 16.3
+    assertFramesMatchExpectedPsnrAndSave(
+        context,
+        testId,
+        checkNotNull(result.filePath),
+        /* psnrThreshold= */ 24,
+        /* frameCount= */ 1);
+  }
+
  @Test
  public void export_withCompositionPresentationAndWithPerMediaItemEffects() throws Exception {
    // Reference: b/296225823#comment5