Ejemplo n.º 1
0
        public void convert()
        {
            for (int NTrainingSpeakers = 1; NTrainingSpeakers < MSpeakers; NTrainingSpeakers++)
            {
                for (int NTrainingSamplesPerSpeaker = startingNumberOfSamples; NTrainingSamplesPerSpeaker <= MSample; NTrainingSamplesPerSpeaker++)
                {
                    setUpTrainingSet(NTrainingSpeakers, NTrainingSamplesPerSpeaker);
                    foreach (List<int> speakerList in trainingSet[0])
                    {
                        foreach (List<int> sampleList in trainingSet[1])
                        {
                            List<int> trueSampleList = new List<int>();
                            foreach (int index in sampleList)
                            {
                                trueSampleList.Add(index + 1);
                            }
                            gc = new GrammarCreator(directory, gr.getFileName(speakerList, trueSampleList), "allcombinations");
                            Dictionary<string, List<string>> pronunciations = gr.output(speakerList, trueSampleList);
                            gc.boundgr(true);
                            gc.boundrule(true, gc.grammarRuleName);
                            foreach (string wordType in pronunciations.Keys)
                            {
                                for (int i = 0; i < pronunciations[wordType].Count; i++)
                                {
                                    gc.LogGrammar("<item><token sapi:pron=\"" + pronunciations[wordType][i] + "\">" + wordType + "_" + i + "</token></item>");
                                }
                            }
                            gc.boundrule(false, "");
                            gc.boundgr(false);

                        }
                    }
                }
            }
        }
Ejemplo n.º 2
0
        static void Main(string[] args)
        {
            // words to be tested
            List<int> wordsToBeTested = new List<int>();
            for(int i = 1; i <= 100; i++)
               wordsToBeTested.Add(i);
            //wordsToBeTested.Add(1);

            // samples to be tested
            List<int> samplesToBeTested = new List<int>();
            samplesToBeTested.Add(1);
            samplesToBeTested.Add(2);
            samplesToBeTested.Add(3);
            samplesToBeTested.Add(4);
            samplesToBeTested.Add(5);

            // audio directory
            List<string> audioDirectory = new List<string>();
            audioDirectory.Add("C:\\Users\\Administrator\\Documents\\Visual Studio 2008\\Projects\\cmuspeechrecognition_cmuspeechmain\\test_data\\audio\\092910_123758_Hebrew");
            audioDirectory.Add("C:\\Users\\Administrator\\Documents\\Visual Studio 2008\\Projects\\cmuspeechrecognition_cmuspeechmain\\test_data\\audio\\101410_140344_Hebrew");
            audioDirectory.Add("C:\\Users\\Administrator\\Documents\\Visual Studio 2008\\Projects\\cmuspeechrecognition_cmuspeechmain\\test_data\\audio\\101510_111237_Hebrew");
            // audio file names
            List<string> audioFileNames = new List<string>();
            audioFileNames.Add("[Wed_(Sep_29_2010)_12-37-58]_4124143701_");
            audioFileNames.Add("[Thu_(Oct_14_2010)_14-03-44]_4122688595_");
            audioFileNames.Add("[Fri_(Oct_15_2010)_11-12-37]_4126203298_");

            for (int i = 0; i < 3; i++)
            {
                List<int> speakersToBeTested = new List<int>();
                speakersToBeTested.Add(i);
                // set up data
                data = new Data(audioDirectory, audioFileNames, 5, speakersToBeTested, wordsToBeTested, samplesToBeTested,
                    "C:\\Users\\Administrator\\Documents\\Visual Studio 2008\\Projects\\cmuspeechrecognition_cmuspeechmain\\test_data\\config_files\\config.txt.100.english");

                // setup grammar
                GrammarCreator gc = new GrammarCreator("C:\\Users\\Administrator\\Documents\\Visual Studio 2008\\Projects\\cmuspeechrecognition_cmuspeechmain\\test_data", "Hebrew", "allcombinations");

                // setup training
                TrainingAlgorithm ta = new TrainingAlgorithm(gc, data, 10, 15);

                ta.LearnAllWords();
            }
        }
Ejemplo n.º 3
0
        /**************************************************************************************/
        /*
        /* Testing algorithms
        /*
        /***************************************************************************************/
        /// <summary>
        /// Runs the testing algorithm
        /// </summary>
        public void testingAlgorithmDiscriminative()
        {
            for (int NTrainingSpeakers = 1; NTrainingSpeakers < MSpeakers; NTrainingSpeakers++)
            {
                for (int NTrainingSamplesPerSpeaker = startingNumberOfSamples; NTrainingSamplesPerSpeaker <= MSample; NTrainingSamplesPerSpeaker++)
                {
                    setUpTrainingSet(NTrainingSpeakers, NTrainingSamplesPerSpeaker);

                    foreach (List<int> speakerList in trainingSet[0])
                    {
                        foreach (List<int> sampleList in trainingSet[1])
                        {
                            Dictionary<string, List<string>> pronunciations;
                            /*if (input == null)
                            {
                                pronunciations = trainAllWords(speakerList, sampleList);
                            }
                            else
                            {
                                List<int> trueSampleList = new List<int>();
                                foreach (int sample in sampleList)
                                {
                                    trueSampleList.Add(sample + 1);
                                }
                                pronunciations = input.output(speakerList, trueSampleList);
                                System.Diagnostics.Debug.WriteLine(pronunciations.Count);

                            }*/

                            // set up testSpeakerSet
                            List<int> testSpeakerSet = new List<int>();
                            for (int speakerNum = 0; speakerNum < MSpeakers; speakerNum++)
                            {
                                if (!speakerList.Contains(speakerNum))
                                {
                                    testSpeakerSet.Add(speakerNum);
                                }

                            }

                            // begin testing
                            foreach (int vocabSize in V)
                            {
                                foreach (int iTestSpeaker in testSpeakerSet)
                                {
                                    if (input == null)
                                    {
                                        pronunciations = trainAllWords(speakerList, sampleList);
                                    }
                                    else
                                    {
                                        List<int> trueSampleList = new List<int>();
                                        foreach (int sample in sampleList)
                                        {
                                            trueSampleList.Add(sample + 1);
                                        }
                                        pronunciations = input.outputDiscriminative(speakerList, trueSampleList, (iTestSpeaker+ 1));
                                        System.Diagnostics.Debug.WriteLine(pronunciations.Count);

                                    }
                                    for (int iWordType = 1; iWordType <= MWordTypes; iWordType++)
                                    {
                                        System.Diagnostics.Debug.WriteLine("Testing: word" + iWordType);
                                        for (int iRepeat = 1; iRepeat <= repeats; iRepeat++)
                                        {
                                            // create random vocab
                                            GrammarCreator gc = new GrammarCreator(grammarDirectory, "repeat_" + grammarCounter, "allcombinations");
                                            grammarCounter++;
                                            gc.boundgr(true);
                                            gc.boundrule(true, gc.grammarRuleName);

                                            // add correct word
                                            for (int i = 0; i < pronunciations[data.listOfWords[iWordType]].Count; i++)
                                            {
                                                for (int j = 0; j < MSample; j++)
                                                {
                                                    ConfusionMatrix[NTrainingSpeakers - 1, iTestSpeaker, MSample * (iWordType - 1) + j, numberOfAlternates * (iWordType - 1) + i].den++;
                                                }
                                                gc.LogGrammar("<item><token sapi:pron=\"" + pronunciations[data.listOfWords[iWordType]][i] + "\">" + data.listOfWords[iWordType] + "_" + i + "</token></item>");
                                            }
                                            /*foreach (string pronun in pronunciations[data.listOfWords[iWordType]])
                                            {
                                                gc.LogGrammar("<item><token sapi:pron=\"" + pronun + "\">" + data.listOfWords[iWordType] + "</token></item>");
                                            }*/

                                            // add V - 1 random words
                                            Random random = new Random();
                                            List<int> wordTypesChosen = new List<int>();
                                            wordTypesChosen.Add(iWordType);

                                            //ConfusionMatrix[NTrainingSpeakers - 1, iTestSpeaker, iWordType - 1, iWordType - 1].den += MSample;

                                            int wordCount = 1;

                                            while (wordCount < vocabSize)
                                            {
                                                int randomWordType;
                                                if (vocabSize < MWordTypes)
                                                {

                                                    do
                                                    {
                                                        randomWordType = random.Next(1, MWordTypes + 1); // 1 to MWordTypes
                                                    } while (wordTypesChosen.Contains(randomWordType));
                                                    // add word
                                                    wordTypesChosen.Add(randomWordType);
                                                    for (int i = 0; i < pronunciations[data.listOfWords[randomWordType]].Count; i++)
                                                    {
                                                        for (int j = 0; j < MSample; j++)
                                                        {

                                                            ConfusionMatrix[NTrainingSpeakers - 1, iTestSpeaker, MSample * (iWordType - 1) + j, numberOfAlternates * (randomWordType - 1) + i].den++;
                                                        }
                                                        gc.LogGrammar("<item><token sapi:pron=\"" + pronunciations[data.listOfWords[randomWordType]][i] + "\">" + data.listOfWords[randomWordType] + "_" + i + "</token></item>");
                                                    }
                                                    wordCount++;
                                                }
                                                else
                                                {
                                                    for (int k = 1; k <= MWordTypes; k++)
                                                    {
                                                        randomWordType = k;
                                                        // we don't add the "correct" word
                                                        if (randomWordType != iWordType)
                                                        {
                                                            wordTypesChosen.Add(randomWordType);
                                                            for (int i = 0; i < pronunciations[data.listOfWords[randomWordType]].Count; i++)
                                                            {
                                                                for (int j = 0; j < MSample; j++)
                                                                {

                                                                    ConfusionMatrix[NTrainingSpeakers - 1, iTestSpeaker, MSample * (iWordType - 1) + j, numberOfAlternates * (randomWordType - 1) + i].den++;
                                                                }
                                                                gc.LogGrammar("<item><token sapi:pron=\"" + pronunciations[data.listOfWords[randomWordType]][i] + "\">" + data.listOfWords[randomWordType] + "_" + i + "</token></item>");
                                                            }
                                                            //System.Diagnostics.Debug.WriteLine("word chosen: " + randomWordType);

                                                        }

                                                    }

                                                    break;
                                                }
                                                //ConfusionMatrix[NTrainingSpeakers - 1, iTestSpeaker, iWordType - 1, randomWordType - 1].den+=MSample;

                                                /*foreach (string pronun in pronunciations[data.listOfWords[randomWordType]])
                                                {
                                                    gc.LogGrammar("<item><token sapi:pron=\"" + pronun + "\">" + data.listOfWords[randomWordType] + "</token></item>");
                                                }*/

                                                //wordCount++;
                                            }
                                            gc.boundrule(false, "");
                                            gc.boundgr(false);

                                            // setup recognizer
                                            SpeechRecognitionEngine rec = new SpeechRecognitionEngine();
                                            rec.LoadGrammar(gc.getGrammar());

                                            // recognizing
                                            for (int iSample = 1; iSample <= MSample; iSample++)
                                            {
                                                rec.SetInputToWaveFile(data.getAudioName(iTestSpeaker, iWordType, iSample));
                                                RecognitionResult result;
                                                try
                                                {
                                                    result = rec.Recognize();
                                                }
                                                catch (Exception e)
                                                {
                                                    System.Diagnostics.Debug.WriteLine(e.Message);
                                                    System.Diagnostics.Debug.WriteLine(data.getAudioName(iTestSpeaker, iWordType, iSample));
                                                    continue;
                                                }
                                                //System.Diagnostics.Debug.WriteLine("recognising");
                                                if (result == null)
                                                {
                                                    ConfusionMatrix[NTrainingSpeakers - 1, iTestSpeaker, MSample * (iWordType - 1) + iSample - 1, numberOfAlternates * MWordTypes].num++;
                                                }
                                                else
                                                {
                                                    string[] word = result.Alternates[0].Words[0].Text.Split('_');
                                                    word = wordTypeResult(2, result);
                                                    //if (result.Alternates.Count > 2)
                                                     //   System.Diagnostics.Debug.WriteLine("No. of results found for wordType " + iWordType + "sample " + iSample + " = " + result.Alternates[2].Confidence);
                                                    ConfusionMatrix[NTrainingSpeakers - 1, iTestSpeaker, MSample * (iWordType - 1) + iSample - 1, numberOfAlternates * (Array.IndexOf(data.listOfWords.ToArray(), word[0]) - 1) + int.Parse(word[1])].num++;

                                                    if (word[0].Equals(data.listOfWords[iWordType]))
                                                        SampleAccuracy[NTrainingSpeakers - 1, NTrainingSamplesPerSpeaker - 1, Array.IndexOf(V, vocabSize), iTestSpeaker, iWordType - 1, iSample - 1].num++;

                                                }
                                                /*
                                                if (result != null && result.Alternates[0].Words[0].Text.Split('_')[0].Equals(data.listOfWords[iWordType]))
                                                {
                                                    SampleAccuracy[NTrainingSpeakers - 1, NTrainingSamplesPerSpeaker - 1, Array.IndexOf(V, vocabSize), iTestSpeaker, iWordType - 1, iSample - 1].num++;
                                                    //System.Diagnostics.Debug.WriteLine(NTrainingSpeakers + "_" + NTrainingSamplesPerSpeaker + "_" + Array.IndexOf(V, vocabSize) + "_" + iTestSpeaker + "_" + iWordType + "_" + iSample);
                                                }*/
                                                SampleAccuracy[NTrainingSpeakers - 1, NTrainingSamplesPerSpeaker - 1, Array.IndexOf(V, vocabSize), iTestSpeaker, iWordType - 1, iSample - 1].den++;

                                            }

                                            gc.Destroy();
                                        }
                                    }
                                }
                            }

                            // end testing
                        }
                    }
                }
            }
        }
Ejemplo n.º 4
0
        /// <summary>
        /// Given a list of speakers and the samples per speakers to train on, training is carried out to produce a List of pronunciations
        /// </summary>
        /// <param name="speakerList">list of speakers for training</param>
        /// <param name="sampleList">list of samples for training</param>
        /// <returns>List of pronunciations</returns>
        private Dictionary<string, List<string>> trainAllWords(List<int> speakerList, List<int> sampleList)
        {
            string speaker = "speaker-";
            for (int i = 0; i < speakerList.Count; i++)
            {
                speaker += speakerList[i].ToString()+"_";
            }
            string sample = "sample-";

            List<int> trueSampleList = new List<int>();
            trueSampleList.AddRange(sampleList);
            for (int i = 0; i < trueSampleList.Count; i++)
            {
                trueSampleList[i]++;
                sample += trueSampleList[i].ToString() + "_";
            }
            System.Diagnostics.Debug.WriteLine("training " + speaker +  sample);

            // words to be tested
            List<int> wordsToBeTested = new List<int>();
             for (int i = 1; i <= MWordTypes; i++)
                 wordsToBeTested.Add(i);

             // set up data
             Data tempData = new Data(data.audioDirectory, data.audioFileName, data.numberOfSamplesPerWord, speakerList, wordsToBeTested, trueSampleList,
                 data.wordListPath);

             // setup grammar
             GrammarCreator gc = new GrammarCreator(grammarDirectory, data.language + speaker + sample, "allcombinations");

             // setup training
             TrainingAlgorithm ta = new TrainingAlgorithm(gc, tempData, numberOfAlternates, 15);

             return ta.LearnAllWords();
        }