Apr-05-2024, 07:30 AM
We can close the topic: I won't be able to go further due to hardware constraints.
Here is my code, CUDA is getting out of memory before being able to transcript it:
Here is my code, CUDA is getting out of memory before being able to transcript it:
## Imports ## import torch import streamlit as st from pathlib import Path from tempfile import NamedTemporaryFile from transformers import AutoModelForCTC, Wav2Vec2ProcessorWithLM import nemo.collections.asr as nemo_asr import torchaudio ## Initialisation ## device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") asr_model = nemo_asr.models.EncDecCTCModelBPE.from_pretrained("nvidia/stt_fr_conformer_ctc_large") ## Affichage ## st.title("Facilitateur de compte-rendus") col1, col2 = st.columns(2) audio_source=st.sidebar.file_uploader(label="Choisir votre fichier", type=["wav","m4a","mp3","wma"]) ## Variables ## suffix = "" predicted_sentence = "" ## Traitement ## #col1.subheader("Modèle utilisé : nvidia/stt_fr_conformer_ctc_large") if audio_source is not None: suffix = Path(audio_source.name).suffix col1.write("Démarrage de la transcription") # predicted_text = asr_model.transcribe([Path(audio_source.name)]) with NamedTemporaryFile(suffix=suffix) as temp_file: temp_file.write(audio_source.getvalue()) temp_file.seek(0) col2.write(temp_file.name) predicted_text = asr_model.transcribe([temp_file.name]) col1.write("Fichier transcrit :point_right:") col2.write(predicted_text) col1.sidebar.download_button(label="Télécharger la transcription", data=predicted_text, file_name="transcript.txt",mime="text/plain")If anyone has a 6+GB GPU or a good CPU with enough RAM and a long time to spend, you can feel free to test it.