Exemplo n.º 1
0
        public void Process()
        {
            // Not initialized yet
            if (numTracks == 0)
            {
                return;
            }

            foreach (var buf in trackBuffers)
            {
                Array.Clear(buf, 0, buf.Length);
            }
            audio.Clear();

            foreach (var c in dsChannels)
            {
                if (c.OwnerIdx != 0xFF)
                {
                    c.Process(trackBuffers[c.OwnerIdx]);
                }
            }

            // Reverb only applies to DirectSound
            for (int i = 0; i < numTracks; i++)
            {
                reverbs[i]?.Process(trackBuffers[i], SamplesPerBuffer);
            }

            foreach (var c in gbChannels)
            {
                if (c.OwnerIdx != 0xFF)
                {
                    c.Process(trackBuffers[c.OwnerIdx]);
                }
            }

            for (int i = 0; i < numTracks; i++)
            {
                if (mutes[i])
                {
                    continue;
                }

                var buf = trackBuffers[i];
                for (int j = 0; j < SamplesPerBuffer; j++)
                {
                    audio.FloatBuffer[j * 2]     += buf[j * 2] * MasterVolume;
                    audio.FloatBuffer[j * 2 + 1] += buf[j * 2 + 1] * MasterVolume;
                }
            }

            buffer.AddSamples(audio, 0, audio.ByteBufferCount);
        }
Exemplo n.º 2
0
        public void Process(Track[] tracks, bool output, bool recording)
        {
            _audio.Clear();
            float masterStep;
            float masterLevel;

            if (_isFading && _fadeMicroFramesLeft == 0)
            {
                masterStep  = 0;
                masterLevel = 0;
            }
            else
            {
                float fromMaster = 1f;
                float toMaster   = 1f;
                if (_fadeMicroFramesLeft > 0)
                {
                    const float scale = 10f / 6f;
                    fromMaster *= (_fadePos < 0f) ? 0f : (float)Math.Pow(_fadePos, scale);
                    _fadePos   += _fadeStepPerMicroframe;
                    toMaster   *= (_fadePos < 0f) ? 0f : (float)Math.Pow(_fadePos, scale);
                    _fadeMicroFramesLeft--;
                }
                masterStep  = (toMaster - fromMaster) * _samplesReciprocal;
                masterLevel = fromMaster;
            }
            for (int i = 0; i < Player.NumTracks; i++)
            {
                Track track = tracks[i];
                if (track.Enabled && track.NoteDuration != 0 && !track.Channel.Stopped && !Mutes[i])
                {
                    float   level = masterLevel;
                    float[] buf   = _trackBuffers[i];
                    Array.Clear(buf, 0, buf.Length);
                    track.Channel.Process(buf);
                    for (int j = 0; j < SamplesPerBuffer; j++)
                    {
                        _audio.FloatBuffer[j * 2]       += buf[j * 2] * level;
                        _audio.FloatBuffer[(j * 2) + 1] += buf[(j * 2) + 1] * level;
                        level += masterStep;
                    }
                }
            }
            if (output)
            {
                _buffer.AddSamples(_audio.ByteBuffer, 0, _audio.ByteBufferCount);
            }
            if (recording)
            {
                _waveWriter.Write(_audio.ByteBuffer, 0, _audio.ByteBufferCount);
            }
        }
Exemplo n.º 3
0
        internal static void Process()
        {
            foreach (var buf in trackBuffers)
            {
                Array.Clear(buf, 0, buf.Length);
            }
            audio.Clear();

            foreach (var c in dsChannels)
            {
                if (c.OwnerIdx != 0xFF)
                {
                    c.Process(trackBuffers[c.OwnerIdx]);
                }
            }

            // Reverb only applies to DirectSound
            for (int i = 0; i < trackBuffers.Length; i++)
            {
                reverbs[i]?.Process(trackBuffers[i], (int)SamplesPerBuffer);
            }

            foreach (var c in gbChannels)
            {
                if (c.OwnerIdx != 0xFF)
                {
                    c.Process(trackBuffers[c.OwnerIdx]);
                }
            }

            for (int i = 0; i < MAX_TRACKS; i++)
            {
                if (mutes[i])
                {
                    continue;
                }

                var buf = trackBuffers[i];
                for (int j = 0; j < SamplesPerBuffer; j++)
                {
                    audio.FloatBuffer[j * 2]     += buf[j * 2] * MasterVolume;
                    audio.FloatBuffer[j * 2 + 1] += buf[j * 2 + 1] * MasterVolume;
                }
            }

            buffer.AddSamples(audio, 0, audio.ByteBufferCount);
        }
Exemplo n.º 4
0
        static void Main(string[] args)
        {
            using (IDeepSpeech sttClient = new DeepSpeech(DEEPSPEECH_MODEL))
            {
                sttClient.EnableExternalScorer(DEEPSPEECH_SCORER);

                string AudioFilePath = @"audio\2830-3980-0043.wav";

                WaveBuffer waveBuffer   = new WaveBuffer(File.ReadAllBytes(AudioFilePath));
                string     speechResult = sttClient.SpeechToText(
                    waveBuffer.ShortBuffer,
                    Convert.ToUInt32(waveBuffer.MaxSize / 2));

                Console.WriteLine(speechResult);

                waveBuffer.Clear();
            }
        }
        private List <String> Transcribe_Offline()
        {
            List <String> result = new List <string>();

            var waveBuffer = new WaveBuffer(File.ReadAllBytes(tmpWavFilePath));

            using (var waveInfo = new WaveFileReader(tmpWavFilePath))
            {
                Metadata metaResult = _sttClient.SpeechToTextWithMetadata(waveBuffer.ShortBuffer, Convert.ToUInt32(waveBuffer.MaxSize / 2), 16000);

                List <CandidateTranscript> candidateTranscriptions = metaResult.Transcripts.ToList();
                candidateTranscriptions.OrderByDescending(x => x.Confidence);
                foreach (CandidateTranscript ct in candidateTranscriptions)
                {
                    result.Add(MetadataToString(ct));
                }
            }
            waveBuffer.Clear();
            return(result);
        }
Exemplo n.º 6
0
        public void Process(Track[] tracks, bool output, bool recording)
        {
            audio.Clear();
            float fromMaster = 1f, toMaster = 1f;

            if (fadeMicroFramesLeft > 0)
            {
                const float scale = 10f / 6f;
                fromMaster *= (fadePos < 0f) ? 0f : (float)Math.Pow(fadePos, scale);
                fadePos    += fadeStepPerMicroframe;
                toMaster   *= (fadePos < 0f) ? 0f : (float)Math.Pow(fadePos, scale);
                fadeMicroFramesLeft--;
            }
            float masterStep = (toMaster - fromMaster) * samplesReciprocal;

            for (int i = 0; i < 0x10; i++)
            {
                Track track = tracks[i];
                if (track.Enabled && track.NoteDuration != 0 && !track.Channel.Stopped && !Mutes[i])
                {
                    float   masterLevel = fromMaster;
                    float[] buf         = trackBuffers[i];
                    Array.Clear(buf, 0, buf.Length);
                    track.Channel.Process(buf);
                    for (int j = 0; j < SamplesPerBuffer; j++)
                    {
                        audio.FloatBuffer[j * 2]       += buf[j * 2] * masterLevel;
                        audio.FloatBuffer[(j * 2) + 1] += buf[(j * 2) + 1] * masterLevel;
                        masterLevel += masterStep;
                    }
                }
            }
            if (output)
            {
                buffer.AddSamples(audio.ByteBuffer, 0, audio.ByteBufferCount);
            }
            if (recording)
            {
                waveWriter.Write(audio.ByteBuffer, 0, audio.ByteBufferCount);
            }
        }
        //this is called from the soundcard
        public int Read(byte[] buffer, int offset, int count)
        {
            int        samplesNeeded = count / 4;
            WaveBuffer wb            = new WaveBuffer(buffer);

            //fix buffer size
            FMixerBuffer = BufferHelpers.Ensure(FMixerBuffer, samplesNeeded);

            //empty buffer
            wb.Clear();

            lock (source)
            {
                var inputCount = source.Count;
                //var invCount = 1.0f/inputCount;
                for (int i = 0; i < inputCount; i++)
                {
                    if (source[i] != null)
                    {
                        //starts the calculation of the audio graph
                        source[i].Read(FMixerBuffer, offset / 4, samplesNeeded);

                        //add to output buffer
                        for (int j = 0; j < samplesNeeded; j++)
                        {
                            wb.FloatBuffer[j] += FMixerBuffer[j];
                            FMixerBuffer[j]    = 0;
                        }
                    }
                }

                //tell  the engine that reading has finished
                FReadingFinished();
            }
            return(count); //always run
        }
Exemplo n.º 8
0
        static void Main(string[] args)
        {
            string model    = null;
            string lm       = null;
            string trie     = null;
            string audio    = null;
            bool   extended = false;

            if (args.Length > 0)
            {
                model    = GetArgument(args, "--model");
                lm       = GetArgument(args, "--lm");
                trie     = GetArgument(args, "--trie");
                audio    = GetArgument(args, "--audio");
                extended = !string.IsNullOrWhiteSpace(GetArgument(args, "--extended"));
            }

            const uint  BEAM_WIDTH = 500;
            const float LM_ALPHA   = 0.75f;
            const float LM_BETA    = 1.85f;

            Stopwatch stopwatch = new Stopwatch();

            using (IDeepSpeech sttClient = new DeepSpeech())
            {
                try
                {
                    Console.WriteLine("Loading model...");
                    stopwatch.Start();
                    sttClient.CreateModel(
                        model ?? "output_graph.pbmm",
                        BEAM_WIDTH);
                    stopwatch.Stop();

                    Console.WriteLine($"Model loaded - {stopwatch.Elapsed.Milliseconds} ms");
                    stopwatch.Reset();
                    if (lm != null)
                    {
                        Console.WriteLine("Loadin LM...");
                        sttClient.EnableDecoderWithLM(
                            lm ?? "lm.binary",
                            trie ?? "trie",
                            LM_ALPHA, LM_BETA);
                    }

                    string audioFile  = audio ?? "arctic_a0024.wav";
                    var    waveBuffer = new WaveBuffer(File.ReadAllBytes(audioFile));
                    using (var waveInfo = new WaveFileReader(audioFile))
                    {
                        Console.WriteLine("Running inference....");

                        stopwatch.Start();

                        string speechResult;
                        if (extended)
                        {
                            Metadata metaResult = sttClient.SpeechToTextWithMetadata(waveBuffer.ShortBuffer, Convert.ToUInt32(waveBuffer.MaxSize / 2));
                            speechResult = MetadataToString(metaResult);
                        }
                        else
                        {
                            speechResult = sttClient.SpeechToText(waveBuffer.ShortBuffer, Convert.ToUInt32(waveBuffer.MaxSize / 2));
                        }

                        stopwatch.Stop();

                        Console.WriteLine($"Audio duration: {waveInfo.TotalTime.ToString()}");
                        Console.WriteLine($"Inference took: {stopwatch.Elapsed.ToString()}");
                        Console.WriteLine((extended ? $"Extended result: " : "Recognized text: ") + speechResult);
                    }
                    waveBuffer.Clear();
                }
                catch (Exception ex)
                {
                    Console.WriteLine(ex.Message);
                }
            }
        }
Exemplo n.º 9
0
        public void Process(bool output, bool recording)
        {
            for (int i = 0; i < _trackBuffers.Length; i++)
            {
                float[] buf = _trackBuffers[i];
                Array.Clear(buf, 0, buf.Length);
            }
            _audio.Clear();

            for (int i = 0; i < _pcm8Channels.Length; i++)
            {
                PCM8Channel c = _pcm8Channels[i];
                if (c.Owner != null)
                {
                    c.Process(_trackBuffers[c.Owner.Index]);
                }
            }

            for (int i = 0; i < _psgChannels.Length; i++)
            {
                PSGChannel c = _psgChannels[i];
                if (c.Owner != null)
                {
                    c.Process(_trackBuffers[c.Owner.Index]);
                }
            }

            float masterStep;
            float masterLevel;

            if (_isFading && _fadeMicroFramesLeft == 0)
            {
                masterStep  = 0;
                masterLevel = 0;
            }
            else
            {
                float fromMaster = 1f;
                float toMaster   = 1f;
                if (_fadeMicroFramesLeft > 0)
                {
                    const float scale = 10f / 6f;
                    fromMaster *= (_fadePos < 0f) ? 0f : (float)Math.Pow(_fadePos, scale);
                    _fadePos   += _fadeStepPerMicroframe;
                    toMaster   *= (_fadePos < 0f) ? 0f : (float)Math.Pow(_fadePos, scale);
                    _fadeMicroFramesLeft--;
                }
                masterStep  = (toMaster - fromMaster) * _samplesReciprocal;
                masterLevel = fromMaster;
            }
            for (int i = 0; i < _trackBuffers.Length; i++)
            {
                if (!Mutes[i])
                {
                    float   level = masterLevel;
                    float[] buf   = _trackBuffers[i];
                    for (int j = 0; j < SamplesPerBuffer; j++)
                    {
                        _audio.FloatBuffer[j * 2]       += buf[j * 2] * level;
                        _audio.FloatBuffer[(j * 2) + 1] += buf[(j * 2) + 1] * level;
                        level += masterStep;
                    }
                }
            }
            if (output)
            {
                _buffer.AddSamples(_audio.ByteBuffer, 0, _audio.ByteBufferCount);
            }
            if (recording)
            {
                _waveWriter.Write(_audio.ByteBuffer, 0, _audio.ByteBufferCount);
            }
        }
Exemplo n.º 10
0
        static void Main(string[] args)
        {
            string model    = null;
            string alphabet = null;
            string lm       = null;
            string trie     = null;
            string audio    = null;

            if (args.Length > 0)
            {
                model    = GetArgument(args, "--model");
                alphabet = GetArgument(args, "--alphabet");
                lm       = GetArgument(args, "--lm");
                trie     = GetArgument(args, "--trie");
                audio    = GetArgument(args, "--audio");
            }

            const uint  N_CEP      = 26;
            const uint  N_CONTEXT  = 9;
            const uint  BEAM_WIDTH = 200;
            const float LM_ALPHA   = 0.75f;
            const float LM_BETA    = 1.85f;

            Stopwatch stopwatch = new Stopwatch();

            using (IDeepSpeech sttClient = new DeepSpeech())
            {
                var result = 1;
                Console.WriteLine("Loading model...");
                stopwatch.Start();
                try
                {
                    result = sttClient.CreateModel(
                        model ?? "output_graph.pbmm",
                        N_CEP, N_CONTEXT,
                        alphabet ?? "alphabet.txt",
                        BEAM_WIDTH);
                }
                catch (IOException ex)
                {
                    Console.WriteLine("Error loading lm.");
                    Console.WriteLine(ex.Message);
                }

                stopwatch.Stop();
                if (result == 0)
                {
                    Console.WriteLine($"Model loaded - {stopwatch.Elapsed.Milliseconds} ms");
                    stopwatch.Reset();
                    if (lm != null)
                    {
                        Console.WriteLine("Loadin LM...");
                        try
                        {
                            result = sttClient.EnableDecoderWithLM(
                                alphabet ?? "alphabet.txt",
                                lm ?? "lm.binary",
                                trie ?? "trie",
                                LM_ALPHA, LM_BETA);
                        }
                        catch (IOException ex)
                        {
                            Console.WriteLine("Error loading lm.");
                            Console.WriteLine(ex.Message);
                        }
                    }

                    string audioFile  = audio ?? "arctic_a0024.wav";
                    var    waveBuffer = new WaveBuffer(File.ReadAllBytes(audioFile));
                    using (var waveInfo = new WaveFileReader(audioFile))
                    {
                        Console.WriteLine("Running inference....");

                        stopwatch.Start();

                        string speechResult = sttClient.SpeechToText(waveBuffer.ShortBuffer, Convert.ToUInt32(waveBuffer.MaxSize / 2), 16000);

                        stopwatch.Stop();

                        Console.WriteLine($"Audio duration: {waveInfo.TotalTime.ToString()}");
                        Console.WriteLine($"Inference took: {stopwatch.Elapsed.ToString()}");
                        Console.WriteLine($"Recognized text: {speechResult}");
                    }
                    waveBuffer.Clear();
                }
                else
                {
                    Console.WriteLine("Error loding the model.");
                }
            }
        }
Exemplo n.º 11
0
        static void Main(string[] args)
        {
            string model    = null;
            string scorer   = null;
            string audio    = null;
            bool   extended = false;

            if (args.Length > 0)
            {
                model    = GetArgument(args, "--model");
                scorer   = GetArgument(args, "--scorer");
                audio    = GetArgument(args, "--audio");
                extended = !string.IsNullOrWhiteSpace(GetArgument(args, "--extended"));
            }

            Stopwatch stopwatch = new Stopwatch();

            try
            {
                Console.WriteLine("Loading model...");
                stopwatch.Start();
                // sphinx-doc: csharp_ref_model_start
                using IDeepSpeech sttClient = new DeepSpeech(model ?? "output_graph.pbmm");
                // sphinx-doc: csharp_ref_model_stop
                stopwatch.Stop();

                Console.WriteLine($"Model loaded - {stopwatch.Elapsed.Milliseconds} ms");
                stopwatch.Reset();
                if (scorer != null)
                {
                    Console.WriteLine("Loading scorer...");
                    sttClient.EnableExternalScorer(scorer ?? "kenlm.scorer");
                }

                string audioFile  = audio ?? "new-home-in-the-stars-16k.wav";
                var    waveBuffer = new WaveBuffer(File.ReadAllBytes(audioFile));
                using (var waveInfo = new WaveFileReader(audioFile))
                {
                    Console.WriteLine("Running inference....");

                    stopwatch.Start();

                    string speechResult;
                    // sphinx-doc: csharp_ref_inference_start
                    if (extended)
                    {
                        Metadata metaResult = sttClient.SpeechToTextWithMetadata(waveBuffer.ShortBuffer,
                                                                                 Convert.ToUInt32(waveBuffer.MaxSize / 2), 1);
                        speechResult = MetadataToString(metaResult.Transcripts[0]);
                    }
                    else
                    {
                        speechResult = sttClient.SpeechToText(waveBuffer.ShortBuffer,
                                                              Convert.ToUInt32(waveBuffer.MaxSize / 2));
                    }
                    // sphinx-doc: csharp_ref_inference_stop

                    stopwatch.Stop();

                    Console.WriteLine($"Audio duration: {waveInfo.TotalTime.ToString()}");
                    Console.WriteLine($"Inference took: {stopwatch.Elapsed.ToString()}");
                    Console.WriteLine((extended ? $"Extended result: " : "Recognized text: ") + speechResult);
                }
                waveBuffer.Clear();
            }
            catch (Exception ex)
            {
                Console.WriteLine(ex.Message);
            }
        }
Exemplo n.º 12
0
        static void Main(string[] args)
        {
            FFMPEG ffmpeg = new FFMPEG("ffmpeg.exe");

            pythonProcess = new ProcessStartInfo
            {
                FileName               = "python.exe",
                CreateNoWindow         = true, // No window
                UseShellExecute        = false,
                RedirectStandardOutput = true
            };
            var dirs = Directory.GetFileSystemEntries("test-clean/LibriSpeech/test-clean", "*.txt", SearchOption.AllDirectories);
            IDictionary <string, string> dataset = new Dictionary <string, string>();

            foreach (var transcriptionFile in dirs)
            {
                FileInfo fileInf = new FileInfo(transcriptionFile);
                foreach (var sentenceLine in File.ReadAllLines(transcriptionFile))
                {
                    var    sentenceSplit = sentenceLine.Split(' ');
                    string audioName     = fileInf.FullName.Replace(fileInf.Name, $"{sentenceSplit[0]}.flac");
                    string sentence      = string.Join(" ", sentenceSplit.ToList().Skip(1).ToArray()).ToLower();
                    dataset.Add(audioName, sentence);
                }
            }

            const uint  N_CEP      = 26;
            const uint  N_CONTEXT  = 9;
            const uint  BEAM_WIDTH = 200;
            const float LM_ALPHA   = 0.75f;
            const float LM_BETA    = 1.85f;

            const string    modelVersion = "0.4.1";
            List <Sentence> samples      = new List <Sentence>();

            using (var sttClient = new DeepSpeechClient.DeepSpeech())
            {
                var result = 1;
                Console.WriteLine("Loading model...");
                try
                {
                    result = sttClient.CreateModel($"{modelVersion}/output_graph.pbmm",
                                                   N_CEP, N_CONTEXT,
                                                   $"{modelVersion}/alphabet.txt",
                                                   BEAM_WIDTH);
                }
                catch (IOException ex)
                {
                    Console.WriteLine("Error loading lm.");
                    Console.WriteLine(ex.Message);
                }
                if (result == 0)
                {
                    Console.WriteLine("Loadin LM...");
                    try
                    {
                        result = sttClient.EnableDecoderWithLM(
                            $"{modelVersion}/alphabet.txt",
                            $"{modelVersion}/lm.binary",
                            $"{modelVersion}/trie",
                            LM_ALPHA, LM_BETA);
                    }
                    catch (IOException ex)
                    {
                        Console.WriteLine("Error loading lm.");
                        Console.WriteLine(ex.Message);
                    }

                    foreach (var sentencePair in dataset)
                    {
                        ConvertFileToWav(sentencePair.Key, ffmpeg);

                        var waveBuffer = new WaveBuffer(File.ReadAllBytes(TempAudioFile));
                        Console.WriteLine("Running inference....");

                        string speechResult = sttClient.SpeechToText(waveBuffer.ShortBuffer, Convert.ToUInt32(waveBuffer.MaxSize / 2), 16000);

                        Sentence sentenceResult = RunPythonWER(sentencePair.Value, speechResult);

                        Console.WriteLine("================================================================================");
                        Console.WriteLine($"Recognized text: {speechResult}");
                        Console.WriteLine($"Correct text: {sentencePair.Value}");
                        Console.WriteLine($"WER {Math.Round(sentenceResult.Wer,2)*100} %");
                        Console.WriteLine("================================================================================");
                        Console.WriteLine();
                        samples.Add(sentenceResult);

                        waveBuffer.Clear();
                    }
                }
                else
                {
                    Console.WriteLine("Error loding the model.");
                }
            }
            double totalLevenshtein = samples.Select(x => x.Levenshtein).Sum();
            int    totalLabelLength = samples.Select(x => x.Length).Sum();
            double finalWer         = totalLevenshtein / totalLabelLength;

            File.WriteAllText("result.txt", finalWer.ToString(), Encoding.UTF8);
            Console.WriteLine($"Final WER: {finalWer} %");
            Console.ReadKey();
        }
Exemplo n.º 13
0
        //this gets called from the soundcard
        public int Read(byte[] buffer, int offset, int count)
        {
            var        channels      = WaveFormat.Channels;
            int        samplesNeeded = count / (4 * channels);
            WaveBuffer wb            = new WaveBuffer(buffer);

            //fix buffer size
            FMixerBuffer = BufferHelpers.Ensure(FMixerBuffer, samplesNeeded);

            //empty buffer
            wb.Clear();

            lock (FSourceLock)
            {
                //first notify to prepare for buffer
                foreach (var notify in FNotifys)
                {
                    try
                    {
                        notify.NotifyProcess(samplesNeeded);
                    }
                    catch (Exception e)
                    {
                        System.Diagnostics.Debug.WriteLine(e.Message);
                        System.Diagnostics.Debug.WriteLine(e.Source);
                        System.Diagnostics.Debug.WriteLine(e.StackTrace);
                    }
                }

                //evaluate the sinks,
                //e.g. buffer writers should write first to have the latest data in the buffer storage
                foreach (var sink in FSinks)
                {
                    try
                    {
                        sink.Read(offset / 4, samplesNeeded);
                    }
                    catch (Exception e)
                    {
                        System.Diagnostics.Debug.WriteLine(e.Message);
                        System.Diagnostics.Debug.WriteLine(e.Source);
                        System.Diagnostics.Debug.WriteLine(e.StackTrace);
                    }
                }

                //evaluate the inputs
                var inputCount = FSources.Count;
                for (int i = 0; i < inputCount; i++)
                {
                    try
                    {
                        if (FSources[i].Signal != null)
                        {
                            //starts the calculation of the audio graph
                            FSources[i].Signal.Read(FMixerBuffer, offset / 4, samplesNeeded);
                            var chan = FSources[i].Channel % channels;

                            //add to output buffer
                            for (int j = 0; j < samplesNeeded; j++)
                            {
                                wb.FloatBuffer[j * channels + chan] += FMixerBuffer[j];
                                FMixerBuffer[j] = 0;
                            }
                        }
                    }
                    catch (Exception e)
                    {
                        System.Diagnostics.Debug.WriteLine(e.Message);
                        System.Diagnostics.Debug.WriteLine(e.Source);
                        System.Diagnostics.Debug.WriteLine(e.StackTrace);
                    }
                }

                //tell the engine that reading has finished
                FReadingFinished(samplesNeeded);
            }
            return(count);            //always run
        }
        public static string Main1(string s)
        {
            string model    = null;
            string audio    = null;
            string hotwords = null;
            bool   extended = false;

            model = "C:/Users/Nour El-Din/Documents/deepspeech/deepspeech-0.9.3-models.pbmm";
            var scorer = "C:/Users/Nour El-Din/Documents/deepspeech/deepspeech-0.9.3-models.scorer";

            audio = "C:/Users/Nour El-Din/Documents/deepspeech/audio";
            //hotwords = GetArgument(args, "--hot_words");
            extended = true;
            Stopwatch stopwatch = new Stopwatch();

            try
            {
                Console.WriteLine("Loading model...");
                stopwatch.Start();
                // sphinx-doc: csharp_ref_model_start
                using (IDeepSpeech sttClient = new DeepSpeech("C:/Users/Nour El-Din/Documents/deepspeech/deepspeech-0.9.3-models.pbmm"))
                {
                    // sphinx-doc: csharp_ref_model_stop
                    stopwatch.Stop();
                    Console.WriteLine($"Model loaded - {stopwatch.Elapsed.Milliseconds} ms");
                    stopwatch.Reset();
                    if (scorer != null)
                    {
                        Console.WriteLine("Loading scorer...");
                        sttClient.EnableExternalScorer("C:/Users/Nour El-Din/Documents/deepspeech/deepspeech-0.9.3-models.scorer");
                    }

                    if (hotwords != null)
                    {
                        Console.WriteLine($"Adding hot-words {hotwords}");
                        char[]   sep         = { ',' };
                        string[] word_boosts = hotwords.Split(sep);
                        foreach (string word_boost in word_boosts)
                        {
                            char[]   sep1 = { ':' };
                            string[] word = word_boost.Split(sep1);
                            sttClient.AddHotWord(word[0], float.Parse(word[1]));
                        }
                    }
                    Directory.SetCurrentDirectory("C:/Users/Nour El-Din/Downloads/Graduation-Project-Dragons-master/Graduation Project Dragons/wwwroot/Video");
                    string v_Name           = s.Split('.')[0];
                    var    enviroment       = System.Environment.CurrentDirectory;
                    string projectDirectory = Directory.GetParent(enviroment).Parent.FullName;
                    Console.WriteLine(projectDirectory);
                    string m_Path    = @$ "C:/Users/Nour El-Din/Downloads/Graduation-Project-Dragons-master/Graduation Project Dragons/wwwroot/Video/{v_Name}.mp3";
                    bool   mp3_Found = File.Exists(m_Path) ? true : false;
                    string w_Path    = @$ "C:/Users/Nour El-Din/Downloads/Graduation-Project-Dragons-master/Graduation Project Dragons/wwwroot/Video/{v_Name}.wav";
                    bool   wav_Found = File.Exists(w_Path) ? true : false;
                    string strCmdText;
                    if (!mp3_Found)
                    {
                        strCmdText = $"/c ffmpeg -i {v_Name}.mp4 {v_Name}.mp3";
                        System.Diagnostics.Process.Start("CMD.exe", strCmdText);
                    }
                    else if (!wav_Found)
                    {
                        strCmdText = $"/c ffmpeg -i {v_Name}.mp3 -acodec pcm_s16le -ac 1 -ar 16000 {v_Name}.wav";
                        System.Diagnostics.Process.Start("CMD.exe", strCmdText);
                    }
                    else
                    {
                        string audioFile  = $"C:/Users/Nour El-Din/Downloads/Graduation-Project-Dragons-master/Graduation Project Dragons/wwwroot/Video/{v_Name}.wav";
                        var    waveBuffer = new WaveBuffer(File.ReadAllBytes(audioFile));
                        using (var waveInfo = new WaveFileReader(audioFile))
                        {
                            Console.WriteLine("Running inference....");

                            stopwatch.Start();
                            string w = "";
                            string speechResult;
                            // sphinx-doc: csharp_ref_inference_start
                            if (extended)
                            {
                                Metadata metaResult = sttClient.SpeechToTextWithMetadata(waveBuffer.ShortBuffer,
                                                                                         Convert.ToUInt32(waveBuffer.MaxSize / 2), 1);
                                speechResult = MetadataToString(metaResult.Transcripts[0]);
                                data        new_Data   = new data();
                                List <data> full_Text  = new List <data>();
                                double      word_Start = 0;;
                                double      word_End;
                                string      temp     = "";
                                int         counter  = 0;
                                int         counter2 = 0;
                                foreach (var c in metaResult.Transcripts[0].Tokens)
                                {
                                    counter2++;
                                    if (counter == 0)
                                    {
                                        word_Start = c.StartTime;
                                        counter++;
                                    }
                                    temp += c.Text;
                                    if (c.Text == " ")
                                    {
                                        counter        = 0;
                                        word_End       = c.StartTime;
                                        new_Data.start = word_Start;
                                        new_Data.end   = word_End;
                                        new_Data.word  = temp;
                                        full_Text.Add(new_Data);
                                        temp     = "";
                                        new_Data = new data();

                                        continue;
                                    }
                                    w += c.Text;
                                    if (counter2 == metaResult.Transcripts[0].Tokens.Length - 1)
                                    {
                                        word_End       = c.StartTime;
                                        new_Data.start = word_Start;
                                        new_Data.end   = word_End;
                                        new_Data.word  = temp;
                                        full_Text.Add(new_Data);
                                        temp = "";
                                    }
                                }
                            }
                            else
                            {
                                speechResult = sttClient.SpeechToText(waveBuffer.ShortBuffer,
                                                                      Convert.ToUInt32(waveBuffer.MaxSize / 2));
                            }
                            // sphinx-doc: csharp_ref_inference_stop
                            stopwatch.Stop();
                            Console.WriteLine($"Audio duration: {waveInfo.TotalTime.ToString()}");
                            Console.WriteLine($"Inference took: {stopwatch.Elapsed.ToString()}");
                            Console.WriteLine((extended ? $"Extended result: " : "Recognized text: ") + speechResult);
                            return(w);
                        }
                        waveBuffer.Clear();
                    }
                }
            }
            catch (Exception ex)
            {
                Console.WriteLine(ex.Message);
            }
            return(" ");
        }
Exemplo n.º 15
0
        public static string Main1(string s)
        {
            string model    = null;
            bool   extended = false;

            model = "C:/Users/Nour El-Din/Documents/deepspeech/deepspeech-0.9.3-models.pbmm";
            var scorer = "C:/Users/Nour El-Din/Documents/deepspeech/deepspeech-0.9.3-models.scorer";

            extended = true;
            string    wwwPath   = HomeController.Environment.WebRootPath;
            string    path      = Path.Combine(HomeController.Environment.WebRootPath, "Video");
            Stopwatch stopwatch = new Stopwatch();

            try
            {
                Console.WriteLine("Loading model...");
                stopwatch.Start();
                // sphinx-doc: csharp_ref_model_start
                using (IDeepSpeech sttClient = new DeepSpeech(model))
                {
                    // sphinx-doc: csharp_ref_model_stop
                    stopwatch.Stop();
                    Console.WriteLine($"Model loaded - {stopwatch.Elapsed.Milliseconds} ms");
                    stopwatch.Reset();
                    if (scorer != null)
                    {
                        Console.WriteLine("Loading scorer...");
                        sttClient.EnableExternalScorer(scorer);
                    }
                    Directory.SetCurrentDirectory(path);
                    string v_Name    = s.Split('.')[0];
                    string x         = path + @$ "/{v_Name}.mp4";
                    string m_Path    = path + @$ "/{v_Name}.mp3";
                    bool   mp3_Found = File.Exists(m_Path) ? true : false;
                    string w_Path    = path + @$ "/{v_Name}.wav";
                    bool   wav_Found = File.Exists(w_Path) ? true : false;
                    string strCmdText;
                    if (!mp3_Found)
                    {
                        strCmdText = $"/c ffmpeg -i {v_Name}.mp4 {v_Name}.mp3";
                        Process process = new Process();
                        process.StartInfo.FileName    = "cmd.exe";
                        process.StartInfo.WindowStyle = ProcessWindowStyle.Hidden;
                        process.StartInfo.Arguments   = strCmdText;
                        process.Start();
                        process.WaitForExit();
                    }

                    if (!wav_Found)
                    {
                        strCmdText = $"/c ffmpeg -i {v_Name}.mp3 -acodec pcm_s16le -ac 1 -ar 16000 {v_Name}.wav";
                        Process process = new Process();
                        process.StartInfo.FileName    = "cmd.exe";
                        process.StartInfo.WindowStyle = ProcessWindowStyle.Hidden;
                        process.StartInfo.Arguments   = strCmdText;
                        process.Start();
                        process.WaitForExit();
                    }
                    string audioFile  = path + $"/{v_Name}.wav";
                    var    waveBuffer = new WaveBuffer(File.ReadAllBytes(audioFile));
                    using (var waveInfo = new WaveFileReader(audioFile))
                    {
                        Console.WriteLine("Running inference....");
                        stopwatch.Start();
                        string w = "";
                        string speechResult;
                        // sphinx-doc: csharp_ref_inference_start
                        if (extended)
                        {
                            Metadata metaResult = sttClient.SpeechToTextWithMetadata(waveBuffer.ShortBuffer,
                                                                                     Convert.ToUInt32(waveBuffer.MaxSize / 2), 1);
                            speechResult = MetadataToString(metaResult.Transcripts[0]);
                            data        new_Data   = new data();
                            List <data> full_Text  = new List <data>();
                            double      word_Start = 0;;
                            double      word_End;
                            string      temp     = "";
                            int         counter  = 0;
                            int         counter2 = 0;
                            int         count    = 0;
                            foreach (var c in metaResult.Transcripts[0].Tokens)
                            {
                                counter2++;
                                if (counter == 0)
                                {
                                    word_Start = c.StartTime;
                                    counter++;
                                }
                                temp += c.Text;
                                if (c.Text == " ")
                                {
                                    counter        = 0;
                                    word_End       = c.StartTime;
                                    new_Data.start = word_Start;
                                    new_Data.end   = word_End;
                                    new_Data.word  = temp;
                                    full_Text.Add(new_Data);
                                    w       += temp + ',';
                                    w       += (double)word_Start + ",";
                                    w       += (double)word_End + "/";
                                    temp     = "";
                                    new_Data = new data();

                                    continue;
                                }
                                //w += c.Text;
                                if (counter2 == metaResult.Transcripts[0].Tokens.Length)
                                {
                                    word_End       = c.StartTime;
                                    new_Data.start = word_Start;
                                    new_Data.end   = word_End;
                                    new_Data.word  = temp;
                                    full_Text.Add(new_Data);
                                    w   += temp + ',';
                                    w   += (double)word_Start + ",";
                                    w   += (double)word_End + "/";
                                    temp = "";
                                }
                                count++;
                            }
                        }
                        else
                        {
                            speechResult = sttClient.SpeechToText(waveBuffer.ShortBuffer,
                                                                  Convert.ToUInt32(waveBuffer.MaxSize / 2));
                        }
                        // sphinx-doc: csharp_ref_inference_stop
                        stopwatch.Stop();
                        Console.WriteLine($"Audio duration: {waveInfo.TotalTime.ToString()}");
                        Console.WriteLine($"Inference took: {stopwatch.Elapsed.ToString()}");
                        Console.WriteLine((extended ? $"Extended result: " : "Recognized text: ") + speechResult);
                        waveBuffer.Clear();
                        return(w);
                    }
                }
            }
            catch (Exception ex)
            {
                Console.WriteLine(ex.Message);
            }
            return(" ");
        }
Exemplo n.º 16
0
        public void Process(bool output, bool recording)
        {
            for (int i = 0; i < trackBuffers.Length; i++)
            {
                float[] buf = trackBuffers[i];
                Array.Clear(buf, 0, buf.Length);
            }
            audio.Clear();

            for (int i = 0; i < pcm8Channels.Length; i++)
            {
                PCM8Channel c = pcm8Channels[i];
                if (c.Owner != null)
                {
                    c.Process(trackBuffers[c.Owner.Index]);
                }
            }

            for (int i = 0; i < psgChannels.Length; i++)
            {
                PSGChannel c = psgChannels[i];
                if (c.Owner != null)
                {
                    c.Process(trackBuffers[c.Owner.Index]);
                }
            }

            float fromMaster = 1f, toMaster = 1f;

            if (fadeMicroFramesLeft > 0)
            {
                const float scale = 10f / 6f;
                fromMaster *= (fadePos < 0f) ? 0f : (float)Math.Pow(fadePos, scale);
                fadePos    += fadeStepPerMicroframe;
                toMaster   *= (fadePos < 0f) ? 0f : (float)Math.Pow(fadePos, scale);
                fadeMicroFramesLeft--;
            }
            float masterStep = (toMaster - fromMaster) * samplesReciprocal;

            for (int i = 0; i < trackBuffers.Length; i++)
            {
                if (!Mutes[i])
                {
                    float   masterLevel = fromMaster;
                    float[] buf         = trackBuffers[i];
                    for (int j = 0; j < SamplesPerBuffer; j++)
                    {
                        audio.FloatBuffer[j * 2]       += buf[j * 2] * masterLevel;
                        audio.FloatBuffer[(j * 2) + 1] += buf[(j * 2) + 1] * masterLevel;
                        masterLevel += masterStep;
                    }
                }
            }
            if (output)
            {
                buffer.AddSamples(audio.ByteBuffer, 0, audio.ByteBufferCount);
            }
            if (recording)
            {
                waveWriter.Write(audio.ByteBuffer, 0, audio.ByteBufferCount);
            }
        }