public void Transcribe_UsingGrammar_Continuous()
        {
            _configuration = new Configuration
            {
                AcousticModelPath = ModelsDirectory,
                DictionaryPath    = _dictionaryPath,
                LanguageModelPath = _languageModelPath,
                UseGrammar        = true,
                GrammarName       = "hello",
                GrammarPath       = ModelsDirectory
            };

            _speechRecognizer = new StreamSpeechRecognizer(_configuration);

            for (int i = 1; i <= 3; i++)//TODO: Reading 3 or more files in a row causes test fail (The same happens in CMU Sphinx4)
            {
                var audioFile = Path.Combine(Directory.GetCurrentDirectory(), "Audio", string.Format("Long Audio {0}.wav", i));
                var stream    = new FileStream(audioFile, FileMode.Open);
                _speechRecognizer.StartRecognition(stream);
                var result = _speechRecognizer.GetResult();
                _speechRecognizer.StopRecognition();
                Assert.IsNotNull(result);
                var hypothesis = result.GetHypothesis();
                Assert.IsTrue(hypothesis.Contains("the time is now exactly twenty five to one") || hypothesis.Contains("there's three left on the left side the one closest to us"));
            }
        }
        public void LoadModels()
        {
            try
            {
                CanLoadModels  = false;
                CanLoadGrammar = true;

                var modelPath         = Path.Combine(Directory.GetCurrentDirectory(), "Models");
                var dictionaryPath    = Path.Combine(modelPath, "cmudict-en-us.dict");
                var languageModelPath = Path.Combine(modelPath, "en-us.lm.dmp");
                var configuration     = new Configuration
                {
                    AcousticModelPath = modelPath,
                    DictionaryPath    = dictionaryPath,
                    LanguageModelPath = languageModelPath
                };
                _recognizer = new StreamSpeechRecognizer(configuration);

                CanTranscribe = true;
                CanListen     = false;
            }
            catch (Exception exception)
            {
                this.LogError(exception);
            }
        }
예제 #3
0
        private static string GetTranscription(string audioDirectory, string audioFile, string modelsDirectory)
        {
            try
            {
                if (!Directory.Exists(modelsDirectory) || !Directory.Exists(audioDirectory))
                {
                    return("No Models or Audio directory found!! Aborting...");
                }

                var speechConfiguration = new Configuration
                {
                    AcousticModelPath = modelsDirectory,
                    DictionaryPath    = Path.Combine(modelsDirectory, "cmudict-en-us.dict"),
                    LanguageModelPath = Path.Combine(modelsDirectory, "en-us.lm.dmp"),
                    UseGrammar        = true,
                    GrammarPath       = modelsDirectory,
                    GrammarName       = "hello"
                };
                var speechRecognizer = new StreamSpeechRecognizer(speechConfiguration);
                var stream           = new FileStream(audioFile, FileMode.Open);
                speechRecognizer.StartRecognition(stream);

                Console.WriteLine("Transcribing...");
                var result = speechRecognizer.GetResult();

                return(result != null?result.GetHypothesis() : "Sorry! Coudn't Transcribe");
            }
            catch (Exception e)
            {
                Console.WriteLine(e);
                throw;
            }
        }
        public static void speakerAdaptiveDecoding(ArrayList speakers, URL url)
        {
            Configuration configuration = new Configuration();

            configuration.setAcousticModelPath("resource:/edu/cmu/sphinx/models/en-us/en-us");
            configuration.setDictionaryPath("resource:/edu/cmu/sphinx/models/en-us/cmudict-en-us.dict");
            configuration.setLanguageModelPath("resource:/edu/cmu/sphinx/models/en-us/en-us.lm.bin");
            StreamSpeechRecognizer streamSpeechRecognizer = new StreamSpeechRecognizer(configuration);
            Iterator iterator = speakers.iterator();

            while (iterator.hasNext())
            {
                SpeakerCluster speakerCluster   = (SpeakerCluster)iterator.next();
                Stats          stats            = streamSpeechRecognizer.createStats(1);
                ArrayList      speakerIntervals = speakerCluster.getSpeakerIntervals();
                Iterator       iterator2        = speakerIntervals.iterator();
                while (iterator2.hasNext())
                {
                    Segment   segment   = (Segment)iterator2.next();
                    long      start     = (long)segment.getStartTime();
                    long      end       = (long)(segment.getStartTime() + segment.getLength());
                    TimeFrame timeFrame = new TimeFrame(start, end);
                    streamSpeechRecognizer.startRecognition(url.openStream(), timeFrame);
                    SpeechResult result;
                    while ((result = streamSpeechRecognizer.getResult()) != null)
                    {
                        stats.collect(result);
                    }
                    streamSpeechRecognizer.stopRecognition();
                }
                Transform transform = stats.createTransform();
                streamSpeechRecognizer.setTransform(transform);
                Iterator iterator3 = speakerIntervals.iterator();
                while (iterator3.hasNext())
                {
                    Segment   segment2  = (Segment)iterator3.next();
                    long      start2    = (long)segment2.getStartTime();
                    long      end2      = (long)(segment2.getStartTime() + segment2.getLength());
                    TimeFrame timeFrame = new TimeFrame(start2, end2);
                    streamSpeechRecognizer.startRecognition(url.openStream(), timeFrame);
                    SpeechResult result;
                    while ((result = streamSpeechRecognizer.getResult()) != null)
                    {
                        [email protected]("Hypothesis: %s\n", new object[]
                        {
                            result.getHypothesis()
                        });
                    }
                    streamSpeechRecognizer.stopRecognition();
                }
            }
        }
예제 #5
0
        public VoiceToTextService()
        {
            var speechModelsDir = Path.Combine(Directory.GetCurrentDirectory(), "SpeechModels");

            config = new Syn.Speech.Api.Configuration()
            {
                AcousticModelPath = speechModelsDir,
                DictionaryPath    = Path.Combine(speechModelsDir, "cmudict-en-us.dict"),
                LanguageModelPath = Path.Combine(speechModelsDir, "en-us.lm.dmp"),
                SampleRate        = InputRate
            };
            this.speechRecognizer = new StreamSpeechRecognizer(config);
        }
예제 #6
0
        private static void Main()
        {
            Logger.LogReceived += LogReceived;
            var modelsDirectory = Path.Combine(Directory.GetCurrentDirectory(), "Models");
            var audioDirectory  = Path.Combine(Directory.GetCurrentDirectory(), "Audio");

            if (!Directory.Exists(modelsDirectory) || !Directory.Exists(audioDirectory))
            {
                Console.WriteLine("No Models or Audio directory found!! Aborting...");
                Console.ReadLine();
                return;
            }

            _speechConfiguration = new Configuration
            {
                AcousticModelPath = modelsDirectory,
                DictionaryPath    = Path.Combine(modelsDirectory, "cmudict-en-us.dict"),
                LanguageModelPath = Path.Combine(modelsDirectory, "en-us.lm.dmp"),
                UseGrammar        = true,
                GrammarPath       = modelsDirectory,
                GrammarName       = "hello"
            };

            _speechRecognizer = new StreamSpeechRecognizer(_speechConfiguration);
            for (int i = 1; i <= 3; i++)
            {
                var audioFile = Path.Combine(audioDirectory, String.Format("Long Audio {0}.wav", i));
                var stream    = new FileStream(audioFile, FileMode.Open);
                if (i == 3)
                {
                    System.Diagnostics.Trace.WriteLine("checking");
                }
                _speechRecognizer.StartRecognition(stream);
                var result = _speechRecognizer.GetResult();
                _speechRecognizer.StopRecognition();
                if (result != null)
                {
                    Console.WriteLine(string.Format("Result: {0}", i) + result.GetHypothesis());
                }
                else
                {
                    Console.WriteLine("Result: {0}", "Sorry! Coudn't Transcribe");
                }
                var instance = ConfigurationManager.GetInstance <SpeechMarker>();
                Console.WriteLine(instance.ToString());
                stream.Close();
            }
            Console.WriteLine("DONE!");
            Console.ReadLine();
        }
        public static void Main(string[] args)
        {
            Logger.LogReceived += LogReceived;

            var modelsDirectory = Path.Combine(Directory.GetCurrentDirectory(), "Models");
            var audioDirectory  = Path.Combine(Directory.GetCurrentDirectory(), "Audio");
            var audioFile       = Path.Combine(audioDirectory, "Long Audio 2.wav");

            if (!Directory.Exists(modelsDirectory) || !Directory.Exists(audioDirectory))
            {
                Console.WriteLine("No Models or Audio directory found!! Aborting...");
                Console.ReadLine();
                return;
            }

            speechConfiguration = new Configuration();
            speechConfiguration.AcousticModelPath = modelsDirectory;
            speechConfiguration.DictionaryPath    = Path.Combine(modelsDirectory, "cmudict-en-us.dict");
            speechConfiguration.LanguageModelPath = Path.Combine(modelsDirectory, "en-us.lm.dmp");

            speechConfiguration.UseGrammar  = true;
            speechConfiguration.GrammarPath = modelsDirectory;
            speechConfiguration.GrammarName = "hello";


            speechRecognizer = new StreamSpeechRecognizer(speechConfiguration);
            var stream = new FileStream(audioFile, FileMode.Open);

            speechRecognizer.StartRecognition(stream);

            Console.WriteLine("Transcribing...");
            var result = speechRecognizer.GetResult();

            if (result != null)
            {
                Console.WriteLine("Result: " + result.GetHypothesis());
            }
            else
            {
                Console.WriteLine("Sorry! Coudn't Transcribe");
            }

            Console.ReadLine();
        }
예제 #8
0
        public static void main(string[] args)
        {
            Configuration configuration = new Configuration();

            configuration.setAcousticModelPath("resource:/edu/cmu/sphinx/models/en-us/en-us");
            configuration.setDictionaryPath("resource:/edu/cmu/sphinx/models/en-us/cmudict-en-us.dict");
            configuration.setLanguageModelPath("resource:/edu/cmu/sphinx/models/en-us/en-us.lm.bin");
            StreamSpeechRecognizer streamSpeechRecognizer = new StreamSpeechRecognizer(configuration);
            FileInputStream        fileInputStream        = new FileInputStream(new File(args[0]));

            fileInputStream.skip((long)((ulong)44));
            streamSpeechRecognizer.startRecognition(fileInputStream);
            SpeechResult result;

            while ((result = streamSpeechRecognizer.getResult()) != null)
            {
                [email protected](result.getHypothesis());
            }
            streamSpeechRecognizer.stopRecognition();
        }
        public void Transcribe_UsingGrammar()
        {
            _configuration = new Configuration
            {
                AcousticModelPath = ModelsDirectory,
                DictionaryPath    = _dictionaryPath,
                LanguageModelPath = _languageModelPath,
                UseGrammar        = true,
                GrammarName       = "hello",
                GrammarPath       = ModelsDirectory
            };

            var audioFile = Path.Combine(Directory.GetCurrentDirectory(), "Audio", "robot.wav");

            _speechRecognizer = new StreamSpeechRecognizer(_configuration);
            _speechRecognizer.StartRecognition(new FileStream(audioFile, FileMode.Open));

            var result = _speechRecognizer.GetResult();

            Assert.IsNotNull(result);
            Assert.AreEqual("the time is now exactly twenty five to one", result.GetHypothesis());
        }
		public static void Main (string[] args)
		{
			Logger.LogReceived += LogReceived;

			var modelsDirectory = Path.Combine (Directory.GetCurrentDirectory (), "Models");
			var audioDirectory = Path.Combine (Directory.GetCurrentDirectory (), "Audio");
			var audioFile = Path.Combine (audioDirectory, "Long Audio 2.wav");

			if (!Directory.Exists (modelsDirectory)||!Directory.Exists(audioDirectory)) {
				Console.WriteLine ("No Models or Audio directory found!! Aborting...");
				Console.ReadLine ();
				return;
			}

			speechConfiguration = new Configuration ();
			speechConfiguration.AcousticModelPath=modelsDirectory;
			speechConfiguration.DictionaryPath = Path.Combine (modelsDirectory, "cmudict-en-us.dict");
			speechConfiguration.LanguageModelPath = Path.Combine (modelsDirectory, "en-us.lm.dmp");

			speechConfiguration.UseGrammar = true;
			speechConfiguration.GrammarPath = modelsDirectory;
			speechConfiguration.GrammarName = "hello";


			speechRecognizer = new StreamSpeechRecognizer (speechConfiguration);
			var stream = new FileStream (audioFile, FileMode.Open);
			speechRecognizer.StartRecognition (stream);

			Console.WriteLine ("Transcribing...");
			var result = speechRecognizer.GetResult ();

			if (result != null) {
				Console.WriteLine ("Result: " + result.GetHypothesis ());
			} else {
				Console.WriteLine ("Sorry! Coudn't Transcribe");
			}

			Console.ReadLine ();
		}
예제 #11
0
파일: Recognition.cs 프로젝트: Ouay/MVP
        public Recognition()
        {
            waveInStream = new WaveInEvent()
            {
                NumberOfBuffers = 2,
                WaveFormat      = new WaveFormat(16000, 1)
            };
            mem    = new MemoryStream();
            writer = new WaveFileWriter(mem, waveInStream.WaveFormat);
            waveInStream.DataAvailable += WaveInStream_DataAvailable;;
            var modelPath         = Path.Combine(Directory.GetCurrentDirectory(), "VoiceModel/");
            var dictionaryPath    = Path.Combine(modelPath, "cmudict-en-us.dict");
            var languageModelPath = Path.Combine(modelPath, "en-us.lm.dmp");
            var configuration     = new Configuration
            {
                AcousticModelPath = modelPath,
                DictionaryPath    = dictionaryPath,
                LanguageModelPath = languageModelPath,
                UseGrammar        = false
            };

            _recognizer = new StreamSpeechRecognizer(configuration);
        }
예제 #12
0
        static void InitializeEngine()
        {
            Console.WriteLine(@"Started...");
            Logger.LogReceived += Logger_LogReceived;

            _configuration = new Configuration
            {
                AcousticModelPath = ("Models"),
                DictionaryPath    = ("cmudict-en-us.dict"),
                LanguageModelPath = ("en-us.lm.dmp"),
            };

            Console.WriteLine(@"Use Grammar ? (Y/N)");
            var answer = Console.ReadLine();

            if (answer != null && answer.ToLower().Contains("y"))
            {
                _configuration.UseGrammar  = true;
                _configuration.GrammarPath = "Models";
                _configuration.GrammarName = "hello";
            }

            _recognizer = new StreamSpeechRecognizer(_configuration);
        }
예제 #13
0
        public static void main(string[] args)
        {
            [email protected]("Loading models...");
            Configuration configuration = new Configuration();

            configuration.setAcousticModelPath("resource:/edu/cmu/sphinx/models/en-us/en-us");
            configuration.setDictionaryPath("resource:/edu/cmu/sphinx/models/en-us/cmudict-en-us.dict");
            configuration.setLanguageModelPath("resource:/edu/cmu/sphinx/models/en-us/en-us.lm.bin");
            StreamSpeechRecognizer streamSpeechRecognizer = new StreamSpeechRecognizer(configuration);
            InputStream            resourceAsStream       = ClassLiteral <TranscriberDemo> .Value.getResourceAsStream("/edu/cmu/sphinx/demo/aligner/10001-90210-01803.wav");

            resourceAsStream.skip((long)((ulong)44));
            streamSpeechRecognizer.startRecognition(resourceAsStream);
            SpeechResult result;

            while ((result = streamSpeechRecognizer.getResult()) != null)
            {
                [email protected]("Hypothesis: %s\n", new object[]
                {
                    result.getHypothesis()
                });
                [email protected]("List of recognized words and their times:");
                Iterator iterator = result.getWords().iterator();
                while (iterator.hasNext())
                {
                    WordResult wordResult = (WordResult)iterator.next();
                    [email protected](wordResult);
                }
                [email protected]("Best 3 hypothesis:");
                iterator = result.getNbest(3).iterator();
                while (iterator.hasNext())
                {
                    string text = (string)iterator.next();
                    [email protected](text);
                }
            }
            streamSpeechRecognizer.stopRecognition();
            resourceAsStream = ClassLiteral <TranscriberDemo> .Value.getResourceAsStream("/edu/cmu/sphinx/demo/aligner/10001-90210-01803.wav");

            resourceAsStream.skip((long)((ulong)44));
            Stats stats = streamSpeechRecognizer.createStats(1);

            streamSpeechRecognizer.startRecognition(resourceAsStream);
            while ((result = streamSpeechRecognizer.getResult()) != null)
            {
                stats.collect(result);
            }
            streamSpeechRecognizer.stopRecognition();
            Transform transform = stats.createTransform();

            streamSpeechRecognizer.setTransform(transform);
            resourceAsStream = ClassLiteral <TranscriberDemo> .Value.getResourceAsStream("/edu/cmu/sphinx/demo/aligner/10001-90210-01803.wav");

            resourceAsStream.skip((long)((ulong)44));
            streamSpeechRecognizer.startRecognition(resourceAsStream);
            while ((result = streamSpeechRecognizer.getResult()) != null)
            {
                [email protected]("Hypothesis: %s\n", new object[]
                {
                    result.getHypothesis()
                });
            }
            streamSpeechRecognizer.stopRecognition();
        }
        public void LoadModels()
        {
            try
            {
                CanLoadModels = false;
                CanLoadGrammar = true;

                var modelPath = Path.Combine(Directory.GetCurrentDirectory(), "Models");
                var dictionaryPath = Path.Combine(modelPath, "cmudict-en-us.dict");
                var languageModelPath = Path.Combine(modelPath, "en-us.lm.dmp");
                var configuration = new Configuration
                {
                    AcousticModelPath = modelPath,
                    DictionaryPath = dictionaryPath,
                    LanguageModelPath = languageModelPath
                };
                _recognizer = new StreamSpeechRecognizer(configuration);

                CanTranscribe = true;
                CanListen = false;
            }
            catch (Exception exception)
            {
                this.LogError(exception);
            }
        }