예제 #1
0
        static void Main(string[] args)
        {
            using (IDeepSpeech sttClient = new DeepSpeech(DEEPSPEECH_MODEL))
            {
                sttClient.EnableExternalScorer(DEEPSPEECH_SCORER);

                string AudioFilePath = @"audio\2830-3980-0043.wav";

                WaveBuffer waveBuffer   = new WaveBuffer(File.ReadAllBytes(AudioFilePath));
                string     speechResult = sttClient.SpeechToText(
                    waveBuffer.ShortBuffer,
                    Convert.ToUInt32(waveBuffer.MaxSize / 2));

                Console.WriteLine(speechResult);

                waveBuffer.Clear();
            }
        }
예제 #2
0
        static void Main(string[] args)
        {
            string model    = null;
            string lm       = null;
            string trie     = null;
            string audio    = null;
            bool   extended = false;

            if (args.Length > 0)
            {
                model    = GetArgument(args, "--model");
                lm       = GetArgument(args, "--lm");
                trie     = GetArgument(args, "--trie");
                audio    = GetArgument(args, "--audio");
                extended = !string.IsNullOrWhiteSpace(GetArgument(args, "--extended"));
            }

            const uint  BEAM_WIDTH = 500;
            const float LM_ALPHA   = 0.75f;
            const float LM_BETA    = 1.85f;

            Stopwatch stopwatch = new Stopwatch();

            using (IDeepSpeech sttClient = new DeepSpeech())
            {
                try
                {
                    Console.WriteLine("Loading model...");
                    stopwatch.Start();
                    sttClient.CreateModel(
                        model ?? "output_graph.pbmm",
                        BEAM_WIDTH);
                    stopwatch.Stop();

                    Console.WriteLine($"Model loaded - {stopwatch.Elapsed.Milliseconds} ms");
                    stopwatch.Reset();
                    if (lm != null)
                    {
                        Console.WriteLine("Loadin LM...");
                        sttClient.EnableDecoderWithLM(
                            lm ?? "lm.binary",
                            trie ?? "trie",
                            LM_ALPHA, LM_BETA);
                    }

                    string audioFile  = audio ?? "arctic_a0024.wav";
                    var    waveBuffer = new WaveBuffer(File.ReadAllBytes(audioFile));
                    using (var waveInfo = new WaveFileReader(audioFile))
                    {
                        Console.WriteLine("Running inference....");

                        stopwatch.Start();

                        string speechResult;
                        if (extended)
                        {
                            Metadata metaResult = sttClient.SpeechToTextWithMetadata(waveBuffer.ShortBuffer, Convert.ToUInt32(waveBuffer.MaxSize / 2));
                            speechResult = MetadataToString(metaResult);
                        }
                        else
                        {
                            speechResult = sttClient.SpeechToText(waveBuffer.ShortBuffer, Convert.ToUInt32(waveBuffer.MaxSize / 2));
                        }

                        stopwatch.Stop();

                        Console.WriteLine($"Audio duration: {waveInfo.TotalTime.ToString()}");
                        Console.WriteLine($"Inference took: {stopwatch.Elapsed.ToString()}");
                        Console.WriteLine((extended ? $"Extended result: " : "Recognized text: ") + speechResult);
                    }
                    waveBuffer.Clear();
                }
                catch (Exception ex)
                {
                    Console.WriteLine(ex.Message);
                }
            }
        }
예제 #3
0
        static void Main(string[] args)
        {
            string model    = null;
            string alphabet = null;
            string lm       = null;
            string trie     = null;
            string audio    = null;

            if (args.Length > 0)
            {
                model    = GetArgument(args, "--model");
                alphabet = GetArgument(args, "--alphabet");
                lm       = GetArgument(args, "--lm");
                trie     = GetArgument(args, "--trie");
                audio    = GetArgument(args, "--audio");
            }

            const uint  N_CEP      = 26;
            const uint  N_CONTEXT  = 9;
            const uint  BEAM_WIDTH = 200;
            const float LM_ALPHA   = 0.75f;
            const float LM_BETA    = 1.85f;

            Stopwatch stopwatch = new Stopwatch();

            using (IDeepSpeech sttClient = new DeepSpeech())
            {
                var result = 1;
                Console.WriteLine("Loading model...");
                stopwatch.Start();
                try
                {
                    result = sttClient.CreateModel(
                        model ?? "output_graph.pbmm",
                        N_CEP, N_CONTEXT,
                        alphabet ?? "alphabet.txt",
                        BEAM_WIDTH);
                }
                catch (IOException ex)
                {
                    Console.WriteLine("Error loading lm.");
                    Console.WriteLine(ex.Message);
                }

                stopwatch.Stop();
                if (result == 0)
                {
                    Console.WriteLine($"Model loaded - {stopwatch.Elapsed.Milliseconds} ms");
                    stopwatch.Reset();
                    if (lm != null)
                    {
                        Console.WriteLine("Loadin LM...");
                        try
                        {
                            result = sttClient.EnableDecoderWithLM(
                                alphabet ?? "alphabet.txt",
                                lm ?? "lm.binary",
                                trie ?? "trie",
                                LM_ALPHA, LM_BETA);
                        }
                        catch (IOException ex)
                        {
                            Console.WriteLine("Error loading lm.");
                            Console.WriteLine(ex.Message);
                        }
                    }

                    string audioFile  = audio ?? "arctic_a0024.wav";
                    var    waveBuffer = new WaveBuffer(File.ReadAllBytes(audioFile));
                    using (var waveInfo = new WaveFileReader(audioFile))
                    {
                        Console.WriteLine("Running inference....");

                        stopwatch.Start();

                        string speechResult = sttClient.SpeechToText(waveBuffer.ShortBuffer, Convert.ToUInt32(waveBuffer.MaxSize / 2), 16000);

                        stopwatch.Stop();

                        Console.WriteLine($"Audio duration: {waveInfo.TotalTime.ToString()}");
                        Console.WriteLine($"Inference took: {stopwatch.Elapsed.ToString()}");
                        Console.WriteLine($"Recognized text: {speechResult}");
                    }
                    waveBuffer.Clear();
                }
                else
                {
                    Console.WriteLine("Error loding the model.");
                }
            }
        }
예제 #4
0
        static void Main(string[] args)
        {
            string model    = null;
            string scorer   = null;
            string audio    = null;
            bool   extended = false;

            if (args.Length > 0)
            {
                model    = GetArgument(args, "--model");
                scorer   = GetArgument(args, "--scorer");
                audio    = GetArgument(args, "--audio");
                extended = !string.IsNullOrWhiteSpace(GetArgument(args, "--extended"));
            }

            Stopwatch stopwatch = new Stopwatch();

            try
            {
                Console.WriteLine("Loading model...");
                stopwatch.Start();
                // sphinx-doc: csharp_ref_model_start
                using IDeepSpeech sttClient = new DeepSpeech(model ?? "output_graph.pbmm");
                // sphinx-doc: csharp_ref_model_stop
                stopwatch.Stop();

                Console.WriteLine($"Model loaded - {stopwatch.Elapsed.Milliseconds} ms");
                stopwatch.Reset();
                if (scorer != null)
                {
                    Console.WriteLine("Loading scorer...");
                    sttClient.EnableExternalScorer(scorer ?? "kenlm.scorer");
                }

                string audioFile  = audio ?? "new-home-in-the-stars-16k.wav";
                var    waveBuffer = new WaveBuffer(File.ReadAllBytes(audioFile));
                using (var waveInfo = new WaveFileReader(audioFile))
                {
                    Console.WriteLine("Running inference....");

                    stopwatch.Start();

                    string speechResult;
                    // sphinx-doc: csharp_ref_inference_start
                    if (extended)
                    {
                        Metadata metaResult = sttClient.SpeechToTextWithMetadata(waveBuffer.ShortBuffer,
                                                                                 Convert.ToUInt32(waveBuffer.MaxSize / 2), 1);
                        speechResult = MetadataToString(metaResult.Transcripts[0]);
                    }
                    else
                    {
                        speechResult = sttClient.SpeechToText(waveBuffer.ShortBuffer,
                                                              Convert.ToUInt32(waveBuffer.MaxSize / 2));
                    }
                    // sphinx-doc: csharp_ref_inference_stop

                    stopwatch.Stop();

                    Console.WriteLine($"Audio duration: {waveInfo.TotalTime.ToString()}");
                    Console.WriteLine($"Inference took: {stopwatch.Elapsed.ToString()}");
                    Console.WriteLine((extended ? $"Extended result: " : "Recognized text: ") + speechResult);
                }
                waveBuffer.Clear();
            }
            catch (Exception ex)
            {
                Console.WriteLine(ex.Message);
            }
        }
        public static string Main1(string s)
        {
            string model    = null;
            string audio    = null;
            string hotwords = null;
            bool   extended = false;

            model = "C:/Users/Nour El-Din/Documents/deepspeech/deepspeech-0.9.3-models.pbmm";
            var scorer = "C:/Users/Nour El-Din/Documents/deepspeech/deepspeech-0.9.3-models.scorer";

            audio = "C:/Users/Nour El-Din/Documents/deepspeech/audio";
            //hotwords = GetArgument(args, "--hot_words");
            extended = true;
            Stopwatch stopwatch = new Stopwatch();

            try
            {
                Console.WriteLine("Loading model...");
                stopwatch.Start();
                // sphinx-doc: csharp_ref_model_start
                using (IDeepSpeech sttClient = new DeepSpeech("C:/Users/Nour El-Din/Documents/deepspeech/deepspeech-0.9.3-models.pbmm"))
                {
                    // sphinx-doc: csharp_ref_model_stop
                    stopwatch.Stop();
                    Console.WriteLine($"Model loaded - {stopwatch.Elapsed.Milliseconds} ms");
                    stopwatch.Reset();
                    if (scorer != null)
                    {
                        Console.WriteLine("Loading scorer...");
                        sttClient.EnableExternalScorer("C:/Users/Nour El-Din/Documents/deepspeech/deepspeech-0.9.3-models.scorer");
                    }

                    if (hotwords != null)
                    {
                        Console.WriteLine($"Adding hot-words {hotwords}");
                        char[]   sep         = { ',' };
                        string[] word_boosts = hotwords.Split(sep);
                        foreach (string word_boost in word_boosts)
                        {
                            char[]   sep1 = { ':' };
                            string[] word = word_boost.Split(sep1);
                            sttClient.AddHotWord(word[0], float.Parse(word[1]));
                        }
                    }
                    Directory.SetCurrentDirectory("C:/Users/Nour El-Din/Downloads/Graduation-Project-Dragons-master/Graduation Project Dragons/wwwroot/Video");
                    string v_Name           = s.Split('.')[0];
                    var    enviroment       = System.Environment.CurrentDirectory;
                    string projectDirectory = Directory.GetParent(enviroment).Parent.FullName;
                    Console.WriteLine(projectDirectory);
                    string m_Path    = @$ "C:/Users/Nour El-Din/Downloads/Graduation-Project-Dragons-master/Graduation Project Dragons/wwwroot/Video/{v_Name}.mp3";
                    bool   mp3_Found = File.Exists(m_Path) ? true : false;
                    string w_Path    = @$ "C:/Users/Nour El-Din/Downloads/Graduation-Project-Dragons-master/Graduation Project Dragons/wwwroot/Video/{v_Name}.wav";
                    bool   wav_Found = File.Exists(w_Path) ? true : false;
                    string strCmdText;
                    if (!mp3_Found)
                    {
                        strCmdText = $"/c ffmpeg -i {v_Name}.mp4 {v_Name}.mp3";
                        System.Diagnostics.Process.Start("CMD.exe", strCmdText);
                    }
                    else if (!wav_Found)
                    {
                        strCmdText = $"/c ffmpeg -i {v_Name}.mp3 -acodec pcm_s16le -ac 1 -ar 16000 {v_Name}.wav";
                        System.Diagnostics.Process.Start("CMD.exe", strCmdText);
                    }
                    else
                    {
                        string audioFile  = $"C:/Users/Nour El-Din/Downloads/Graduation-Project-Dragons-master/Graduation Project Dragons/wwwroot/Video/{v_Name}.wav";
                        var    waveBuffer = new WaveBuffer(File.ReadAllBytes(audioFile));
                        using (var waveInfo = new WaveFileReader(audioFile))
                        {
                            Console.WriteLine("Running inference....");

                            stopwatch.Start();
                            string w = "";
                            string speechResult;
                            // sphinx-doc: csharp_ref_inference_start
                            if (extended)
                            {
                                Metadata metaResult = sttClient.SpeechToTextWithMetadata(waveBuffer.ShortBuffer,
                                                                                         Convert.ToUInt32(waveBuffer.MaxSize / 2), 1);
                                speechResult = MetadataToString(metaResult.Transcripts[0]);
                                data        new_Data   = new data();
                                List <data> full_Text  = new List <data>();
                                double      word_Start = 0;;
                                double      word_End;
                                string      temp     = "";
                                int         counter  = 0;
                                int         counter2 = 0;
                                foreach (var c in metaResult.Transcripts[0].Tokens)
                                {
                                    counter2++;
                                    if (counter == 0)
                                    {
                                        word_Start = c.StartTime;
                                        counter++;
                                    }
                                    temp += c.Text;
                                    if (c.Text == " ")
                                    {
                                        counter        = 0;
                                        word_End       = c.StartTime;
                                        new_Data.start = word_Start;
                                        new_Data.end   = word_End;
                                        new_Data.word  = temp;
                                        full_Text.Add(new_Data);
                                        temp     = "";
                                        new_Data = new data();

                                        continue;
                                    }
                                    w += c.Text;
                                    if (counter2 == metaResult.Transcripts[0].Tokens.Length - 1)
                                    {
                                        word_End       = c.StartTime;
                                        new_Data.start = word_Start;
                                        new_Data.end   = word_End;
                                        new_Data.word  = temp;
                                        full_Text.Add(new_Data);
                                        temp = "";
                                    }
                                }
                            }
                            else
                            {
                                speechResult = sttClient.SpeechToText(waveBuffer.ShortBuffer,
                                                                      Convert.ToUInt32(waveBuffer.MaxSize / 2));
                            }
                            // sphinx-doc: csharp_ref_inference_stop
                            stopwatch.Stop();
                            Console.WriteLine($"Audio duration: {waveInfo.TotalTime.ToString()}");
                            Console.WriteLine($"Inference took: {stopwatch.Elapsed.ToString()}");
                            Console.WriteLine((extended ? $"Extended result: " : "Recognized text: ") + speechResult);
                            return(w);
                        }
                        waveBuffer.Clear();
                    }
                }
            }
            catch (Exception ex)
            {
                Console.WriteLine(ex.Message);
            }
            return(" ");
        }
예제 #6
0
        public static string Main1(string s)
        {
            string model    = null;
            bool   extended = false;

            model = "C:/Users/Nour El-Din/Documents/deepspeech/deepspeech-0.9.3-models.pbmm";
            var scorer = "C:/Users/Nour El-Din/Documents/deepspeech/deepspeech-0.9.3-models.scorer";

            extended = true;
            string    wwwPath   = HomeController.Environment.WebRootPath;
            string    path      = Path.Combine(HomeController.Environment.WebRootPath, "Video");
            Stopwatch stopwatch = new Stopwatch();

            try
            {
                Console.WriteLine("Loading model...");
                stopwatch.Start();
                // sphinx-doc: csharp_ref_model_start
                using (IDeepSpeech sttClient = new DeepSpeech(model))
                {
                    // sphinx-doc: csharp_ref_model_stop
                    stopwatch.Stop();
                    Console.WriteLine($"Model loaded - {stopwatch.Elapsed.Milliseconds} ms");
                    stopwatch.Reset();
                    if (scorer != null)
                    {
                        Console.WriteLine("Loading scorer...");
                        sttClient.EnableExternalScorer(scorer);
                    }
                    Directory.SetCurrentDirectory(path);
                    string v_Name    = s.Split('.')[0];
                    string x         = path + @$ "/{v_Name}.mp4";
                    string m_Path    = path + @$ "/{v_Name}.mp3";
                    bool   mp3_Found = File.Exists(m_Path) ? true : false;
                    string w_Path    = path + @$ "/{v_Name}.wav";
                    bool   wav_Found = File.Exists(w_Path) ? true : false;
                    string strCmdText;
                    if (!mp3_Found)
                    {
                        strCmdText = $"/c ffmpeg -i {v_Name}.mp4 {v_Name}.mp3";
                        Process process = new Process();
                        process.StartInfo.FileName    = "cmd.exe";
                        process.StartInfo.WindowStyle = ProcessWindowStyle.Hidden;
                        process.StartInfo.Arguments   = strCmdText;
                        process.Start();
                        process.WaitForExit();
                    }

                    if (!wav_Found)
                    {
                        strCmdText = $"/c ffmpeg -i {v_Name}.mp3 -acodec pcm_s16le -ac 1 -ar 16000 {v_Name}.wav";
                        Process process = new Process();
                        process.StartInfo.FileName    = "cmd.exe";
                        process.StartInfo.WindowStyle = ProcessWindowStyle.Hidden;
                        process.StartInfo.Arguments   = strCmdText;
                        process.Start();
                        process.WaitForExit();
                    }
                    string audioFile  = path + $"/{v_Name}.wav";
                    var    waveBuffer = new WaveBuffer(File.ReadAllBytes(audioFile));
                    using (var waveInfo = new WaveFileReader(audioFile))
                    {
                        Console.WriteLine("Running inference....");
                        stopwatch.Start();
                        string w = "";
                        string speechResult;
                        // sphinx-doc: csharp_ref_inference_start
                        if (extended)
                        {
                            Metadata metaResult = sttClient.SpeechToTextWithMetadata(waveBuffer.ShortBuffer,
                                                                                     Convert.ToUInt32(waveBuffer.MaxSize / 2), 1);
                            speechResult = MetadataToString(metaResult.Transcripts[0]);
                            data        new_Data   = new data();
                            List <data> full_Text  = new List <data>();
                            double      word_Start = 0;;
                            double      word_End;
                            string      temp     = "";
                            int         counter  = 0;
                            int         counter2 = 0;
                            int         count    = 0;
                            foreach (var c in metaResult.Transcripts[0].Tokens)
                            {
                                counter2++;
                                if (counter == 0)
                                {
                                    word_Start = c.StartTime;
                                    counter++;
                                }
                                temp += c.Text;
                                if (c.Text == " ")
                                {
                                    counter        = 0;
                                    word_End       = c.StartTime;
                                    new_Data.start = word_Start;
                                    new_Data.end   = word_End;
                                    new_Data.word  = temp;
                                    full_Text.Add(new_Data);
                                    w       += temp + ',';
                                    w       += (double)word_Start + ",";
                                    w       += (double)word_End + "/";
                                    temp     = "";
                                    new_Data = new data();

                                    continue;
                                }
                                //w += c.Text;
                                if (counter2 == metaResult.Transcripts[0].Tokens.Length)
                                {
                                    word_End       = c.StartTime;
                                    new_Data.start = word_Start;
                                    new_Data.end   = word_End;
                                    new_Data.word  = temp;
                                    full_Text.Add(new_Data);
                                    w   += temp + ',';
                                    w   += (double)word_Start + ",";
                                    w   += (double)word_End + "/";
                                    temp = "";
                                }
                                count++;
                            }
                        }
                        else
                        {
                            speechResult = sttClient.SpeechToText(waveBuffer.ShortBuffer,
                                                                  Convert.ToUInt32(waveBuffer.MaxSize / 2));
                        }
                        // sphinx-doc: csharp_ref_inference_stop
                        stopwatch.Stop();
                        Console.WriteLine($"Audio duration: {waveInfo.TotalTime.ToString()}");
                        Console.WriteLine($"Inference took: {stopwatch.Elapsed.ToString()}");
                        Console.WriteLine((extended ? $"Extended result: " : "Recognized text: ") + speechResult);
                        waveBuffer.Clear();
                        return(w);
                    }
                }
            }
            catch (Exception ex)
            {
                Console.WriteLine(ex.Message);
            }
            return(" ");
        }