Modificar main.py

Usar Wishper
2024-11-26 07:31:50 -08:00
parent 76669c0ed3
commit d23aadcd0e
1 changed files with 301 additions and 482 deletions
--- a/main.py
+++ b/main.py
@ -1,483 +1,302 @@
-import tkinter as tk
+import tkinter as tk
-from tkinter import ttk, filedialog, scrolledtext
+from tkinter import ttk, scrolledtext, filedialog, messagebox
-from tkinter import messagebox
+import threading
-import torch
+import whisper
-from transformers import AutoProcessor, WhisperForConditionalGeneration
+import cv2
-import cv2
+from datetime import timedelta
-from datetime import timedelta
+import os
-import os
+import subprocess
-import threading
+import numpy as np
-import subprocess
+
-import time
+class VideoSubtitleApp:
-import re
+    def __init__(self, root):
-import numpy as np
+        self.root = root
-
+        self.root.title("Extrator de Legendas")
-class VideoSubtitleApp:
+        self.root.geometry("900x700")
-    def __init__(self, root):
+        
-        self.root = root
+        # Variáveis
-        self.root.title("Extrator de Legendas")
+        self.video_path = tk.StringVar()
-        self.root.geometry("900x700")
+        self.video_info = tk.StringVar()
-        
+        self.selected_language = tk.StringVar(value='English')
-        # Variáveis
+        self.status_var = tk.StringVar(value="Pronto")
-        self.video_path = tk.StringVar()
+        self.subtitles_list = []
-        self.video_info = tk.StringVar()
+        
-        self.selected_language = tk.StringVar(value='pt-BR')
+        # Dicionário de línguas
-        self.subtitles_list = []
+        self.languages = {
-        
+            'Português (Brasil)': 'pt',
-        # Inicializar modelo Whisper e processador
+            'Português (Portugal)': 'pt',
-        self.initialize_whisper()
+            'English': 'en',
-        
+            'Español': 'es',
-        # Dicionário de línguas disponíveis
+            'Français': 'fr',
-        self.languages = {
+            'Deutsch': 'de',
-            'Português (Brasil)': 'pt',
+            'Italiano': 'it'
-            'Português (Portugal)': 'pt',
+        }
-            'English': 'en',
+        
-            'Español': 'es',
+        # Criar interface
-            'Français': 'fr',
+        self.create_widgets()
-            'Deutsch': 'de',
+        
-            'Italiano': 'it'
+        # Inicializar modelo em thread separada
-        }
+        self.model_ready = False
-        
+        threading.Thread(target=self.initialize_whisper, daemon=True).start()
-        # Criar interface
+
-        self.create_widgets()
+    def initialize_whisper(self):
-        
+        """Inicializa o modelo Whisper"""
-        # Variável para armazenar o vídeo
+        try:
-        self.video = None
+            self.status_var.set("Carregando modelo...")
-        
+            # Usar modelo base para melhor equilíbrio
-    def create_widgets(self):
+            self.model = whisper.load_model("base")
-        # Frame principal
+            self.model_ready = True
-        main_frame = ttk.Frame(self.root, padding="10")
+            self.status_var.set("Modelo carregado com sucesso")
-        main_frame.grid(row=0, column=0, sticky=(tk.W, tk.E, tk.N, tk.S))
+            self.generate_button.config(state='normal')
-        
+        except Exception as e:
-        # Configurar expansão da grade
+            self.status_var.set("Erro ao carregar modelo")
-        self.root.grid_rowconfigure(0, weight=1)
+            messagebox.showerror("Erro", f"Erro ao carregar modelo Whisper:\n{str(e)}")
-        self.root.grid_columnconfigure(0, weight=1)
+
-        main_frame.grid_columnconfigure(1, weight=1)
+    def create_widgets(self):
-        
+        """Cria a interface gráfica"""
-        # Frame para seleção de arquivo e idioma
+        # Frame principal
-        file_frame = ttk.Frame(main_frame)
+        main_frame = ttk.Frame(self.root, padding=(10, 10, 10, 10))
-        file_frame.grid(row=0, column=0, columnspan=2, sticky=(tk.W, tk.E), pady=5)
+        main_frame.grid(row=0, column=0, sticky="nsew")
-        
+        
-        # Botão para selecionar arquivo
+        # Configurar grid
-        ttk.Button(file_frame, text="Selecionar Vídeo", command=self.select_file).pack(side=tk.LEFT, padx=5)
+        self.root.grid_rowconfigure(0, weight=1)
-        
+        self.root.grid_columnconfigure(0, weight=1)
-        # Seleção de idioma
+        main_frame.grid_columnconfigure(0, weight=1)
-        ttk.Label(file_frame, text="Idioma:").pack(side=tk.LEFT, padx=5)
+        
-        language_combo = ttk.Combobox(file_frame, 
+        # Frame superior
-                                    values=list(self.languages.keys()),
+        top_frame = ttk.Frame(main_frame)
-                                    textvariable=self.selected_language,
+        top_frame.grid(row=0, column=0, sticky="ew", pady=(0, 10))
-                                    state='readonly',
+        
-                                    width=20)
+        # Botões e controles
-        language_combo.pack(side=tk.LEFT, padx=5)
+        ttk.Button(top_frame, text="📂 Selecionar Vídeo", 
-        language_combo.set('Português (Brasil)')
+                  command=self.select_file).pack(side=tk.LEFT, padx=5)
-        
+        
-        # Label para mostrar caminho do arquivo
+        ttk.Label(top_frame, text="🌐 Idioma:").pack(side=tk.LEFT, padx=5)
-        ttk.Label(main_frame, textvariable=self.video_path, wraplength=500).grid(row=1, column=0, columnspan=2, pady=5)
+        
-        
+        language_combo = ttk.Combobox(top_frame, 
-        # Frame para informações do vídeo
+                                    values=list(self.languages.keys()),
-        info_frame = ttk.LabelFrame(main_frame, text="Informações do Vídeo", padding="5")
+                                    textvariable=self.selected_language,
-        info_frame.grid(row=2, column=0, columnspan=2, sticky=(tk.W, tk.E), pady=5)
+                                    state='readonly',
-        
+                                    width=20)
-        ttk.Label(info_frame, textvariable=self.video_info).grid(row=0, column=0, sticky=tk.W)
+        language_combo.pack(side=tk.LEFT, padx=5)
-        
+        
-        # Frame para botões de ação
+        # Caminho do arquivo
-        button_frame = ttk.Frame(main_frame)
+        path_frame = ttk.LabelFrame(main_frame, text="Arquivo Selecionado")
-        button_frame.grid(row=3, column=0, columnspan=2, pady=5)
+        path_frame.grid(row=1, column=0, sticky="ew", pady=(0, 10))
-        
+        ttk.Label(path_frame, textvariable=self.video_path, 
-        ttk.Button(button_frame, text="Gerar Legendas", command=self.generate_subtitles).pack(side=tk.LEFT, padx=5)
+                 wraplength=800).grid(row=0, column=0, padx=5, pady=5)
-        ttk.Button(button_frame, text="Salvar Alterações", command=self.save_subtitles).pack(side=tk.LEFT, padx=5)
+        
-        
+        # Informações do vídeo
-        # Progress bar
+        info_frame = ttk.LabelFrame(main_frame, text="Informações do Vídeo")
-        self.progress = ttk.Progressbar(main_frame, mode='indeterminate')
+        info_frame.grid(row=2, column=0, sticky="ew", pady=(0, 10))
-        self.progress.grid(row=4, column=0, columnspan=2, sticky=(tk.W, tk.E), pady=5)
+        ttk.Label(info_frame, textvariable=self.video_info).grid(row=0, column=0, padx=5, pady=5)
-        
+        
-        # Frame para edição de legendas
+        # Botões de ação
-        subtitle_frame = ttk.LabelFrame(main_frame, text="Editor de Legendas", padding="5")
+        button_frame = ttk.Frame(main_frame)
-        subtitle_frame.grid(row=5, column=0, columnspan=2, sticky=(tk.W, tk.E, tk.N, tk.S), pady=5)
+        button_frame.grid(row=3, column=0, pady=(0, 10))
-        subtitle_frame.grid_rowconfigure(0, weight=1)
+        
-        subtitle_frame.grid_columnconfigure(0, weight=1)
+        self.generate_button = ttk.Button(button_frame, text="🎬 Gerar Legendas",
-        
+                                        command=self.generate_subtitles,
-        # Área de texto editável para legendas
+                                        state='disabled')
-        self.subtitle_text = scrolledtext.ScrolledText(subtitle_frame, height=20, width=80, wrap=tk.WORD)
+        self.generate_button.pack(side=tk.LEFT, padx=5)
-        self.subtitle_text.grid(row=0, column=0, sticky=(tk.W, tk.E, tk.N, tk.S), padx=5, pady=5)
+        
-        
+        self.save_button = ttk.Button(button_frame, text="💾 Salvar",
-        # Instruções de uso
+                                    command=self.save_subtitles,
-        instructions = """Instruções:
+                                    state='disabled')
-        1. Selecione o idioma do áudio do vídeo
+        self.save_button.pack(side=tk.LEFT, padx=5)
-        2. Clique em 'Selecionar Vídeo' e escolha o arquivo
+        
-        3. Aguarde o processamento do modelo Whisper
+        # Barra de progresso
-        4. Edite as legendas se necessário
+        self.progress = ttk.Progressbar(main_frame, mode='indeterminate')
-        5. Clique em 'Salvar Alterações' para gerar o arquivo .srt"""
+        self.progress.grid(row=4, column=0, sticky="ew", pady=(0, 10))
-        
+        
-        ttk.Label(main_frame, text=instructions, justify=tk.LEFT, wraplength=600).grid(
+        # Editor de legendas
-            row=6, column=0, columnspan=2, pady=5, sticky=tk.W)
+        editor_frame = ttk.LabelFrame(main_frame, text="Editor de Legendas")
-
+        editor_frame.grid(row=5, column=0, sticky="nsew", pady=(0, 10))
-    def select_file(self):
+        editor_frame.grid_columnconfigure(0, weight=1)
-        filetypes = (
+        editor_frame.grid_rowconfigure(0, weight=1)
-            ('Arquivos de vídeo', '*.mp4 *.avi *.mkv'),
+        
-            ('Todos os arquivos', '*.*')
+        self.subtitle_text = scrolledtext.ScrolledText(
-        )
+            editor_frame,
-        
+            wrap=tk.WORD,
-        filename = filedialog.askopenfilename(
+            font=('Consolas', 10)
-            title='Selecione um vídeo',
+        )
-            filetypes=filetypes
+        self.subtitle_text.grid(row=0, column=0, sticky="nsew", padx=5, pady=5)
-        )
+        
-        
+        # Barra de status
-        if filename:
+        status_frame = ttk.Frame(main_frame)
-            self.video_path.set(filename)
+        status_frame.grid(row=6, column=0, sticky="ew")
-            self.load_video_info(filename)
+        
-
+        ttk.Label(status_frame, textvariable=self.status_var, relief=tk.SUNKEN).grid(
-    def load_video_info(self, filename):
+            row=0, column=0, sticky="ew")
-        try:
+        status_frame.grid_columnconfigure(0, weight=1)
-            self.video = cv2.VideoCapture(filename)
+
-            
+    def select_file(self):
-            # Obter informações do vídeo
+        """Seleciona arquivo de vídeo"""
-            fps = self.video.get(cv2.CAP_PROP_FPS)
+        filename = filedialog.askopenfilename(
-            frame_count = int(self.video.get(cv2.CAP_PROP_FRAME_COUNT))
+            title="Selecionar Vídeo",
-            duration = frame_count / fps
+            filetypes=[
-            width = int(self.video.get(cv2.CAP_PROP_FRAME_WIDTH))
+                ("Arquivos de Vídeo", "*.mp4 *.mkv *.avi"),
-            height = int(self.video.get(cv2.CAP_PROP_FRAME_HEIGHT))
+                ("Todos os Arquivos", "*.*")
-            
+            ]
-            info = f"""
+        )
-            Duração: {str(timedelta(seconds=int(duration)))}
+        
-            Resolução: {width}x{height}
+        if filename:
-            FPS: {fps:.2f}
+            self.video_path.set(filename)
-            Formato: {os.path.splitext(filename)[1]}
+            self.load_video_info(filename)
-            """
+
-            self.video_info.set(info)
+    def load_video_info(self, filename):
-            
+        """Carrega informações do vídeo"""
-        except Exception as e:
+        try:
-            messagebox.showerror("Erro", f"Erro ao carregar o vídeo: {str(e)}")
+            cap = cv2.VideoCapture(filename)
-
+            fps = cap.get(cv2.CAP_PROP_FPS)
-    def generate_subtitles(self):
+            frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
-        if not self.video_path.get():
+            duration = frame_count / fps
-            messagebox.showwarning("Aviso", "Por favor, selecione um vídeo primeiro.")
+            width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
-            return
+            height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
-        
+            
-        # Iniciar processamento em thread separada
+            info = f"Duração: {str(timedelta(seconds=int(duration)))}\n"
-        self.progress.start()
+            info += f"Resolução: {width}x{height}\n"
-        thread = threading.Thread(target=self.process_video)
+            info += f"FPS: {fps:.2f}\n"
-        thread.start()
+            info += f"Formato: {os.path.splitext(filename)[1]}"
-    
+            
-    def initialize_whisper(self):
+            self.video_info.set(info)
-        """Inicializa o modelo Whisper e o processador com configurações otimizadas"""
+            cap.release()
-        try:
+            
-            # Usar o modelo maior para melhor qualidade
+        except Exception as e:
-            model_name = "openai/whisper-large-v3"
+            messagebox.showerror("Erro", f"Erro ao carregar vídeo: {str(e)}")
-            self.processor = AutoProcessor.from_pretrained(model_name)
+
-            self.model = WhisperForConditionalGeneration.from_pretrained(
+    def extract_audio(self, video_path, audio_path):
-                model_name,
+        """Extrai o áudio do vídeo"""
-                device_map="auto",  # Usar a melhor dispositivo disponível
+        try:
-                torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
+            command = [
-                low_cpu_mem_usage=True
+                'ffmpeg',
-            )
+                '-i', video_path,
-            
+                '-vn',
-            if torch.cuda.is_available():
+                '-acodec', 'pcm_s16le',
-                print("Usando GPU para processamento")
+                '-ar', '16000',
-            else:
+                '-ac', '1',
-                print("Usando CPU para processamento")
+                '-y',
-                
+                audio_path
-        except Exception as e:
+            ]
-            messagebox.showerror("Erro", f"Erro ao carregar modelo Whisper: {str(e)}")
+            
-
+            process = subprocess.run(
-    def extract_audio(self, video_path, audio_path):
+                command,
-        """Extrai o áudio do vídeo com configurações otimizadas"""
+                capture_output=True,
-        try:
+                text=True
-            print(f"Extraindo áudio de {video_path}")
+            )
-            
+            
-            # Primeiro comando - qualidade máxima
+            return os.path.exists(audio_path) and os.path.getsize(audio_path) > 0
-            command = [
+            
-                'ffmpeg',
+        except Exception as e:
-                '-i', video_path,
+            print(f"Erro na extração de áudio: {str(e)}")
-                '-vn',  # Não processar vídeo
+            return False
-                '-acodec', 'pcm_s16le',  # Codec PCM 16-bit
+
-                '-ac', '1',  # Mono
+    def process_audio_with_whisper(self, audio_path, language_code):
-                '-ar', '16000',  # Taxa de amostragem para Whisper
+        """Processa o áudio usando Whisper"""
-                '-af', 'volume=2.0,highpass=f=200,lowpass=f=3000,areverse,silenceremove=start_periods=1:start_duration=1:start_threshold=-60dB,areverse',  # Filtros de áudio
+        try:
-                '-y',  # Sobrescrever arquivo
+            # Configurar opções do Whisper
-                audio_path
+            options = {
-            ]
+                "language": language_code,
-            
+                "task": "transcribe",
-            print("Tentando primeira extração de áudio...")
+                "verbose": False
-            process = subprocess.run(
+            }
-                command,
+            
-                capture_output=True,
+            # Realizar transcrição
-                text=True,
+            result = self.model.transcribe(audio_path, **options)
-                encoding='utf-8'
+            
-            )
+            # Processar segmentos
-            
+            segments = []
-            if process.returncode != 0:
+            for i, segment in enumerate(result["segments"], 1):
-                print("Primeira tentativa falhou, tentando método alternativo...")
+                start_time = segment["start"]
-                # Comando alternativo - mais simples
+                end_time = segment["end"]
-                alt_command = [
+                text = segment["text"].strip()
-                    'ffmpeg',
+                
-                    '-i', video_path,
+                if text:
-                    '-vn',
+                    segment_str = f"{i}\n"
-                    '-acodec', 'pcm_s16le',
+                    segment_str += f"{self.format_timestamp(start_time)} --> {self.format_timestamp(end_time)}\n"
-                    '-ac', '1',
+                    segment_str += f"{text}\n\n"
-                    '-ar', '16000',
+                    segments.append(segment_str)
-                    '-y',
+            
-                    audio_path
+            return segments
-                ]
+            
-                process = subprocess.run(
+        except Exception as e:
-                    alt_command,
+            print(f"Erro detalhado no processamento do áudio: {str(e)}")
-                    capture_output=True,
+            raise
-                    text=True,
+
-                    encoding='utf-8'
+    def format_timestamp(self, seconds):
-                )
+        """Converte segundos para formato SRT"""
-            
+        hours = int(seconds // 3600)
-            if os.path.exists(audio_path) and os.path.getsize(audio_path) > 0:
+        minutes = int((seconds % 3600) // 60)
-                print(f"Áudio extraído com sucesso: {os.path.getsize(audio_path)} bytes")
+        secs = int(seconds % 60)
-                return True
+        millisecs = int((seconds * 1000) % 1000)
-            else:
+        
-                raise Exception("Arquivo de áudio não foi criado ou está vazio")
+        return f"{hours:02d}:{minutes:02d}:{secs:02d},{millisecs:03d}"
-                
+
-        except Exception as e:
+    def generate_subtitles(self):
-            print(f"Erro detalhado na extração de áudio: {str(e)}")
+        """Inicia processo de geração de legendas"""
-            if process and process.stderr:
+        if not self.video_path.get():
-                print(f"Erro FFmpeg: {process.stderr}")
+            messagebox.showwarning("Aviso", "Selecione um vídeo primeiro.")
-            return False
+            return
-
+            
-    def process_audio_with_whisper(self, audio_path, language_code):
+        if not self.model_ready:
-        try:
+            messagebox.showwarning("Aviso", "Aguarde o modelo ser carregado.")
-            import soundfile as sf
+            return
-            print(f"Processando áudio em {language_code}...")
+        
-            
+        self.progress.start()
-            # Carregar áudio
+        self.generate_button.config(state='disabled')
-            audio, sample_rate = sf.read(audio_path)
+        self.save_button.config(state='disabled')
-            print(f"Áudio carregado: {len(audio)} amostras, taxa de amostragem: {sample_rate}Hz")
+        threading.Thread(target=self.process_video, daemon=True).start()
-            
+
-            # Normalizar áudio
+    def process_video(self):
-            if audio.dtype == np.int16:
+        """Processa o vídeo e gera legendas"""
-                audio = audio.astype(np.float32) / 32768.0
+        audio_path = "temp_audio.wav"
-            elif audio.dtype == np.int32:
+        try:
-                audio = audio.astype(np.float32) / 2147483648.0
+            self.status_var.set("Extraindo áudio...")
-            
+            
-            # Garantir que o áudio esteja entre -1 e 1
+            if not self.extract_audio(self.video_path.get(), audio_path):
-            max_abs = np.max(np.abs(audio))
+                raise Exception("Falha na extração do áudio")
-            if max_abs > 1.0:
+            
-                audio = audio / max_abs
+            self.status_var.set("Processando áudio...")
-            
+            language = self.languages[self.selected_language.get()]
-            # Preparar input features com configurações explícitas
+            
-            inputs = self.processor(
+            self.subtitles_list = self.process_audio_with_whisper(audio_path, language)
-                audio, 
+            
-                sampling_rate=sample_rate,
+            if not self.subtitles_list:
-                return_tensors="pt",
+                raise Exception("Nenhuma legenda gerada")
-                padding=True,
+            
-                do_normalize=True,
+            self.status_var.set("Legendas geradas com sucesso!")
-                return_attention_mask=True
+            self.root.after(0, self.update_subtitle_text)
-            )
+            
-            
+        except Exception as e:
-            print("Features de entrada processadas")
+            self.status_var.set("Erro no processamento")
-            
+            messagebox.showerror("Erro", str(e))
-            # Mover para GPU se disponível
+            
-            if torch.cuda.is_available():
+        finally:
-                inputs = {k: v.to("cuda") for k, v in inputs.items()}
+            self.progress.stop()
-                print("Dados movidos para GPU")
+            self.generate_button.config(state='normal')
-            
+            try:
-            # Configurar parâmetros de geração corrigidos
+                if os.path.exists(audio_path):
-            generate_kwargs = {
+                    os.remove(audio_path)
-                "temperature": 0.0,  # Determinístico
+            except:
-                "no_speech_threshold": 0.6,
+                pass
-                "logprob_threshold": -1.0,
+
-                "compression_ratio_threshold": 2.4,
+    def update_subtitle_text(self):
-                "condition_on_previous_text": True,
+        """Atualiza o texto das legendas na interface"""
-                "max_initial_timestamp": 1.0,
+        self.subtitle_text.delete('1.0', tk.END)
-                "return_timestamps": True
+        self.subtitle_text.insert('1.0', ''.join(self.subtitles_list))
-            }
+        self.save_button.config(state='normal')
-            
+
-            if language_code:
+    def save_subtitles(self):
-                generate_kwargs["language"] = language_code
+        """Salva as legendas em arquivo"""
-            
+        try:
-            print("Iniciando geração da transcrição...")
+            output_path = os.path.splitext(self.video_path.get())[0] + ".srt"
-            
+            with open(output_path, 'w', encoding='utf-8') as f:
-            # Gerar transcrição com timestamps
+                f.write(self.subtitle_text.get('1.0', tk.END))
-            with torch.no_grad():
+            messagebox.showinfo("Sucesso", f"Legendas salvas em:\n{output_path}")
-                outputs = self.model.generate(
+        except Exception as e:
-                    inputs.input_features,
+            messagebox.showerror("Erro", f"Erro ao salvar legendas: {str(e)}")
-                    **generate_kwargs
+
-                )
+if __name__ == "__main__":
-            
+    root = tk.Tk()
-            print("Transcrição gerada, decodificando...")
+    app = VideoSubtitleApp(root)
            # Decodificar saída com timestamp_begin=True
            transcription = self.processor.batch_decode(
                outputs, 
                skip_special_tokens=True,
                output_offsets=True
            )[0]
            print(f"Transcrição decodificada: {len(transcription.text)} caracteres")
            if not transcription.text.strip():
                raise Exception("Transcrição vazia retornada pelo modelo")
            # Formatar segmentos com timestamps
            segments = []
            for i, segment in enumerate(transcription.offsets, start=1):
                start_time = self.format_timestamp(segment['timestamp'][0])
                end_time = self.format_timestamp(segment['timestamp'][1])
                text = segment['text'].strip()
                if text:  # Só adicionar se houver texto
                    segment_str = f"{i}\n{start_time} --> {end_time}\n{text}\n\n"
                    segments.append(segment_str)
            print(f"Segmentos formatados: {len(segments)}")
            return segments
        except Exception as e:
            print(f"Erro detalhado no processamento do áudio: {str(e)}")
            raise Exception(f"Erro no processamento do áudio: {str(e)}")
    def format_timestamp(self, seconds):
        """Converte segundos em formato de timestamp SRT (HH:MM:SS,mmm)"""
        hours = int(seconds // 3600)
        minutes = int((seconds % 3600) // 60)
        seconds = seconds % 60
        milliseconds = int((seconds % 1) * 1000)
        seconds = int(seconds)
        return f"{hours:02d}:{minutes:02d}:{seconds:02d},{milliseconds:03d}"
    def format_whisper_output(self, transcription):
        """Formata a saída do Whisper em formato SRT"""
        segments = []
        pattern = r"\[(\d+:\d+\.\d+) --> (\d+:\d+\.\d+)\](.*?)(?=\[|$)"
        matches = re.finditer(pattern, transcription, re.DOTALL)
        for idx, match in enumerate(matches, 1):
            start_time = match.group(1)
            end_time = match.group(2)
            text = match.group(3).strip()
            # Converter para formato SRT
            start_time = self.convert_timestamp_to_srt(start_time)
            end_time = self.convert_timestamp_to_srt(end_time)
            segment = f"{idx}\n{start_time} --> {end_time}\n{text}\n\n"
            segments.append(segment)
        return segments
    def convert_timestamp_to_srt(self, timestamp):
        """Converte timestamp do Whisper para formato SRT"""
        # Converter MM:SS.ms para HH:MM:SS,mmm
        minutes, seconds = timestamp.split(":")
        seconds, milliseconds = seconds.split(".")
        hours = int(minutes) // 60
        minutes = int(minutes) % 60
        return f"{hours:02d}:{minutes:02d}:{seconds:02d},{milliseconds:03d}"
    def process_video(self):
        try:
            # Extrair áudio
            audio_path = "temp_audio.wav"
            print("Iniciando extração de áudio...")
            if not self.extract_audio(self.video_path.get(), audio_path):
                raise Exception("Falha na extração do áudio")
            print("Áudio extraído com sucesso")
            # Obter código do idioma
            selected_name = self.selected_language.get()
            language_code = self.languages.get(selected_name, 'en')
            print(f"Idioma selecionado: {selected_name} ({language_code})")
            # Processar áudio com Whisper
            print("Iniciando reconhecimento de fala...")
            self.subtitles_list = self.process_audio_with_whisper(audio_path, language_code)
            if not self.subtitles_list:
                raise Exception("Nenhum texto foi reconhecido")
            print(f"Texto reconhecido com sucesso: {len(self.subtitles_list)} segmentos")
            # Mostrar legendas na interface
            self.root.after(0, self.update_subtitle_text, ''.join(self.subtitles_list))
        except Exception as e:
            print(f"Erro no processamento: {str(e)}")
            self.root.after(0, messagebox.showerror, "Erro", f"Erro ao gerar legendas: {str(e)}")
        finally:
            # Limpar
            self.root.after(0, self.progress.stop)
            if self.video is not None:
                self.video.release()
            try:
                if os.path.exists(audio_path):
                    print(f"Removendo arquivo temporário: {audio_path}")
                    os.remove(audio_path)
            except Exception as e:
                print(f"Erro ao remover arquivo temporário: {str(e)}")
    def update_subtitle_text(self, text):
        self.subtitle_text.delete(1.0, tk.END)
        self.subtitle_text.insert(tk.END, text)
    def save_subtitles(self):
        try:
            # Pegar texto atual
            current_text = self.subtitle_text.get(1.0, tk.END).strip()
            # Validar formato básico das legendas
            if not self.validate_subtitle_format(current_text):
                raise ValueError("Formato de legendas inválido. Mantenha o formato: número + tempo + texto")
            # Salvar em arquivo
            output_path = os.path.splitext(self.video_path.get())[0] + ".srt"
            with open(output_path, 'w', encoding='utf-8') as f:
                f.write(current_text)
            messagebox.showinfo("Sucesso", f"Legendas salvas com sucesso em:\n{output_path}")
        except Exception as e:
            messagebox.showerror("Erro", f"Erro ao salvar legendas: {str(e)}")
    def validate_subtitle_format(self, text):
        """Validação melhorada do formato das legendas"""
        if not text.strip():
            return False
        lines = text.split('\n')
        i = 0
        while i < len(lines):
            if not lines[i].strip():
                i += 1
                continue
            # Validar número da legenda
            if not lines[i].strip().isdigit():
                return False
            # Validar formato do tempo
            i += 1
            if i >= len(lines):
                return False
            time_line = lines[i].strip()
            if not (' --> ' in time_line and 
                   time_line.count(':') == 4 and 
                   len(time_line.split(' --> ')) == 2):
                return False
            # Validar texto da legenda
            i += 1
            if i >= len(lines) or not lines[i].strip():
                return False
            i += 1
        return True
 if __name__ == "__main__":
    root = tk.Tk()
    app = VideoSubtitleApp(root)
    root.mainloop()