2023-06-29 nightly release (a8ce4a8)

pytorch · Jun 29, 2023 · 186290e · 186290e
1 parent fb963d7
commit 186290e
Show file tree

Hide file tree

Showing 2 changed files with 21 additions and 18 deletions.
diff --git a/docs/source/index.rst b/docs/source/index.rst
@@ -85,6 +85,7 @@ model implementations and application components.
    Emformer RNN-T ASR <https://github.com/pytorch/audio/tree/main/examples/asr/emformer_rnnt>
    Conv-TasNet Source Separation <https://github.com/pytorch/audio/tree/main/examples/source_separation>
    HuBERT Pre-training and Fine-tuning (ASR) <https://github.com/pytorch/audio/tree/main/examples/hubert>
+   Conformer/Emformer RNN-T ASR/VSR/AV-ASR <https://github.com/pytorch/audio/tree/main/examples/asr/avsr_rnnt>
 
 .. toctree::
    :maxdepth: 1

diff --git a/examples/tutorials/audio_feature_extractions_tutorial.py b/examples/tutorials/audio_feature_extractions_tutorial.py
@@ -28,6 +28,20 @@
 import librosa
 import matplotlib.pyplot as plt
 
+######################################################################
+# Overview of audio features
+# --------------------------
+#
+# The following diagram shows the relationship between common audio features
+# and torchaudio APIs to generate them.
+#
+# .. image:: https://download.pytorch.org/torchaudio/tutorial-assets/torchaudio_feature_extractions.png
+#
+# For the complete list of available features, please refer to the
+# documentation.
+#
+
+
 ######################################################################
 # Preparation
 # -----------
@@ -83,20 +97,6 @@ def plot_fbank(fbank, title=None):
     plt.show(block=False)
 
 
-######################################################################
-# Overview of audio features
-# --------------------------
-#
-# The following diagram shows the relationship between common audio features
-# and torchaudio APIs to generate them.
-#
-# .. image:: https://download.pytorch.org/torchaudio/tutorial-assets/torchaudio_feature_extractions.png
-#
-# For the complete list of available features, please refer to the
-# documentation.
-#
-
-
 ######################################################################
 # Spectrogram
 # -----------
@@ -156,7 +156,8 @@ def plot_fbank(fbank, title=None):
 #    By default, (i.e. ``hop_length=None`` and ``win_length=None``),
 #    the value of ``n_fft // 4`` is used.
 #    Here we use the same ``hop_length`` value across different ``n_fft``
-#    so that the visualization.
+#    so that they have the same number of elemets in the time axis.
+#
 
 n_ffts = [32, 128, 512, 2048]
 hop_length = 64
@@ -181,11 +182,12 @@ def plot_fbank(fbank, title=None):
 # When comparing signals, it is desirable to use the same sampling rate,
 # however if you must use the different sampling rate, care must be
 # taken for interpretating the meaning of ``n_fft``.
-# ``n_fft`` determines the resolution of the frequency, and what
-# each frequency bin represents is subject to the sampling rate.
+# Recall that ``n_fft`` determines the resolution of the frequency
+# axis for a given sampling rate. In other words, what each bin on
+# the frequency axis represents is subject to the sampling rate.
 #
 # As we have seen above, changing the value of ``n_fft`` does not change
-# the coverage of frequency range.
+# the coverage of frequency range for the same input signal.
 
 ######################################################################
 #