public void convert() { for (int NTrainingSpeakers = 1; NTrainingSpeakers < MSpeakers; NTrainingSpeakers++) { for (int NTrainingSamplesPerSpeaker = startingNumberOfSamples; NTrainingSamplesPerSpeaker <= MSample; NTrainingSamplesPerSpeaker++) { setUpTrainingSet(NTrainingSpeakers, NTrainingSamplesPerSpeaker); foreach (List<int> speakerList in trainingSet[0]) { foreach (List<int> sampleList in trainingSet[1]) { List<int> trueSampleList = new List<int>(); foreach (int index in sampleList) { trueSampleList.Add(index + 1); } gc = new GrammarCreator(directory, gr.getFileName(speakerList, trueSampleList), "allcombinations"); Dictionary<string, List<string>> pronunciations = gr.output(speakerList, trueSampleList); gc.boundgr(true); gc.boundrule(true, gc.grammarRuleName); foreach (string wordType in pronunciations.Keys) { for (int i = 0; i < pronunciations[wordType].Count; i++) { gc.LogGrammar("<item><token sapi:pron=\"" + pronunciations[wordType][i] + "\">" + wordType + "_" + i + "</token></item>"); } } gc.boundrule(false, ""); gc.boundgr(false); } } } } }
static void Main(string[] args) { // words to be tested List<int> wordsToBeTested = new List<int>(); for(int i = 1; i <= 100; i++) wordsToBeTested.Add(i); //wordsToBeTested.Add(1); // samples to be tested List<int> samplesToBeTested = new List<int>(); samplesToBeTested.Add(1); samplesToBeTested.Add(2); samplesToBeTested.Add(3); samplesToBeTested.Add(4); samplesToBeTested.Add(5); // audio directory List<string> audioDirectory = new List<string>(); audioDirectory.Add("C:\\Users\\Administrator\\Documents\\Visual Studio 2008\\Projects\\cmuspeechrecognition_cmuspeechmain\\test_data\\audio\\092910_123758_Hebrew"); audioDirectory.Add("C:\\Users\\Administrator\\Documents\\Visual Studio 2008\\Projects\\cmuspeechrecognition_cmuspeechmain\\test_data\\audio\\101410_140344_Hebrew"); audioDirectory.Add("C:\\Users\\Administrator\\Documents\\Visual Studio 2008\\Projects\\cmuspeechrecognition_cmuspeechmain\\test_data\\audio\\101510_111237_Hebrew"); // audio file names List<string> audioFileNames = new List<string>(); audioFileNames.Add("[Wed_(Sep_29_2010)_12-37-58]_4124143701_"); audioFileNames.Add("[Thu_(Oct_14_2010)_14-03-44]_4122688595_"); audioFileNames.Add("[Fri_(Oct_15_2010)_11-12-37]_4126203298_"); for (int i = 0; i < 3; i++) { List<int> speakersToBeTested = new List<int>(); speakersToBeTested.Add(i); // set up data data = new Data(audioDirectory, audioFileNames, 5, speakersToBeTested, wordsToBeTested, samplesToBeTested, "C:\\Users\\Administrator\\Documents\\Visual Studio 2008\\Projects\\cmuspeechrecognition_cmuspeechmain\\test_data\\config_files\\config.txt.100.english"); // setup grammar GrammarCreator gc = new GrammarCreator("C:\\Users\\Administrator\\Documents\\Visual Studio 2008\\Projects\\cmuspeechrecognition_cmuspeechmain\\test_data", "Hebrew", "allcombinations"); // setup training TrainingAlgorithm ta = new TrainingAlgorithm(gc, data, 10, 15); ta.LearnAllWords(); } }
/**************************************************************************************/ /* /* Testing algorithms /* /***************************************************************************************/ /// <summary> /// Runs the testing algorithm /// </summary> public void testingAlgorithmDiscriminative() { for (int NTrainingSpeakers = 1; NTrainingSpeakers < MSpeakers; NTrainingSpeakers++) { for (int NTrainingSamplesPerSpeaker = startingNumberOfSamples; NTrainingSamplesPerSpeaker <= MSample; NTrainingSamplesPerSpeaker++) { setUpTrainingSet(NTrainingSpeakers, NTrainingSamplesPerSpeaker); foreach (List<int> speakerList in trainingSet[0]) { foreach (List<int> sampleList in trainingSet[1]) { Dictionary<string, List<string>> pronunciations; /*if (input == null) { pronunciations = trainAllWords(speakerList, sampleList); } else { List<int> trueSampleList = new List<int>(); foreach (int sample in sampleList) { trueSampleList.Add(sample + 1); } pronunciations = input.output(speakerList, trueSampleList); System.Diagnostics.Debug.WriteLine(pronunciations.Count); }*/ // set up testSpeakerSet List<int> testSpeakerSet = new List<int>(); for (int speakerNum = 0; speakerNum < MSpeakers; speakerNum++) { if (!speakerList.Contains(speakerNum)) { testSpeakerSet.Add(speakerNum); } } // begin testing foreach (int vocabSize in V) { foreach (int iTestSpeaker in testSpeakerSet) { if (input == null) { pronunciations = trainAllWords(speakerList, sampleList); } else { List<int> trueSampleList = new List<int>(); foreach (int sample in sampleList) { trueSampleList.Add(sample + 1); } pronunciations = input.outputDiscriminative(speakerList, trueSampleList, (iTestSpeaker+ 1)); System.Diagnostics.Debug.WriteLine(pronunciations.Count); } for (int iWordType = 1; iWordType <= MWordTypes; iWordType++) { System.Diagnostics.Debug.WriteLine("Testing: word" + iWordType); for (int iRepeat = 1; iRepeat <= repeats; iRepeat++) { // create random vocab GrammarCreator gc = new GrammarCreator(grammarDirectory, "repeat_" + grammarCounter, "allcombinations"); grammarCounter++; gc.boundgr(true); gc.boundrule(true, gc.grammarRuleName); // add correct word for (int i = 0; i < pronunciations[data.listOfWords[iWordType]].Count; i++) { for (int j = 0; j < MSample; j++) { ConfusionMatrix[NTrainingSpeakers - 1, iTestSpeaker, MSample * (iWordType - 1) + j, numberOfAlternates * (iWordType - 1) + i].den++; } gc.LogGrammar("<item><token sapi:pron=\"" + pronunciations[data.listOfWords[iWordType]][i] + "\">" + data.listOfWords[iWordType] + "_" + i + "</token></item>"); } /*foreach (string pronun in pronunciations[data.listOfWords[iWordType]]) { gc.LogGrammar("<item><token sapi:pron=\"" + pronun + "\">" + data.listOfWords[iWordType] + "</token></item>"); }*/ // add V - 1 random words Random random = new Random(); List<int> wordTypesChosen = new List<int>(); wordTypesChosen.Add(iWordType); //ConfusionMatrix[NTrainingSpeakers - 1, iTestSpeaker, iWordType - 1, iWordType - 1].den += MSample; int wordCount = 1; while (wordCount < vocabSize) { int randomWordType; if (vocabSize < MWordTypes) { do { randomWordType = random.Next(1, MWordTypes + 1); // 1 to MWordTypes } while (wordTypesChosen.Contains(randomWordType)); // add word wordTypesChosen.Add(randomWordType); for (int i = 0; i < pronunciations[data.listOfWords[randomWordType]].Count; i++) { for (int j = 0; j < MSample; j++) { ConfusionMatrix[NTrainingSpeakers - 1, iTestSpeaker, MSample * (iWordType - 1) + j, numberOfAlternates * (randomWordType - 1) + i].den++; } gc.LogGrammar("<item><token sapi:pron=\"" + pronunciations[data.listOfWords[randomWordType]][i] + "\">" + data.listOfWords[randomWordType] + "_" + i + "</token></item>"); } wordCount++; } else { for (int k = 1; k <= MWordTypes; k++) { randomWordType = k; // we don't add the "correct" word if (randomWordType != iWordType) { wordTypesChosen.Add(randomWordType); for (int i = 0; i < pronunciations[data.listOfWords[randomWordType]].Count; i++) { for (int j = 0; j < MSample; j++) { ConfusionMatrix[NTrainingSpeakers - 1, iTestSpeaker, MSample * (iWordType - 1) + j, numberOfAlternates * (randomWordType - 1) + i].den++; } gc.LogGrammar("<item><token sapi:pron=\"" + pronunciations[data.listOfWords[randomWordType]][i] + "\">" + data.listOfWords[randomWordType] + "_" + i + "</token></item>"); } //System.Diagnostics.Debug.WriteLine("word chosen: " + randomWordType); } } break; } //ConfusionMatrix[NTrainingSpeakers - 1, iTestSpeaker, iWordType - 1, randomWordType - 1].den+=MSample; /*foreach (string pronun in pronunciations[data.listOfWords[randomWordType]]) { gc.LogGrammar("<item><token sapi:pron=\"" + pronun + "\">" + data.listOfWords[randomWordType] + "</token></item>"); }*/ //wordCount++; } gc.boundrule(false, ""); gc.boundgr(false); // setup recognizer SpeechRecognitionEngine rec = new SpeechRecognitionEngine(); rec.LoadGrammar(gc.getGrammar()); // recognizing for (int iSample = 1; iSample <= MSample; iSample++) { rec.SetInputToWaveFile(data.getAudioName(iTestSpeaker, iWordType, iSample)); RecognitionResult result; try { result = rec.Recognize(); } catch (Exception e) { System.Diagnostics.Debug.WriteLine(e.Message); System.Diagnostics.Debug.WriteLine(data.getAudioName(iTestSpeaker, iWordType, iSample)); continue; } //System.Diagnostics.Debug.WriteLine("recognising"); if (result == null) { ConfusionMatrix[NTrainingSpeakers - 1, iTestSpeaker, MSample * (iWordType - 1) + iSample - 1, numberOfAlternates * MWordTypes].num++; } else { string[] word = result.Alternates[0].Words[0].Text.Split('_'); word = wordTypeResult(2, result); //if (result.Alternates.Count > 2) // System.Diagnostics.Debug.WriteLine("No. of results found for wordType " + iWordType + "sample " + iSample + " = " + result.Alternates[2].Confidence); ConfusionMatrix[NTrainingSpeakers - 1, iTestSpeaker, MSample * (iWordType - 1) + iSample - 1, numberOfAlternates * (Array.IndexOf(data.listOfWords.ToArray(), word[0]) - 1) + int.Parse(word[1])].num++; if (word[0].Equals(data.listOfWords[iWordType])) SampleAccuracy[NTrainingSpeakers - 1, NTrainingSamplesPerSpeaker - 1, Array.IndexOf(V, vocabSize), iTestSpeaker, iWordType - 1, iSample - 1].num++; } /* if (result != null && result.Alternates[0].Words[0].Text.Split('_')[0].Equals(data.listOfWords[iWordType])) { SampleAccuracy[NTrainingSpeakers - 1, NTrainingSamplesPerSpeaker - 1, Array.IndexOf(V, vocabSize), iTestSpeaker, iWordType - 1, iSample - 1].num++; //System.Diagnostics.Debug.WriteLine(NTrainingSpeakers + "_" + NTrainingSamplesPerSpeaker + "_" + Array.IndexOf(V, vocabSize) + "_" + iTestSpeaker + "_" + iWordType + "_" + iSample); }*/ SampleAccuracy[NTrainingSpeakers - 1, NTrainingSamplesPerSpeaker - 1, Array.IndexOf(V, vocabSize), iTestSpeaker, iWordType - 1, iSample - 1].den++; } gc.Destroy(); } } } } // end testing } } } } }
/// <summary> /// Given a list of speakers and the samples per speakers to train on, training is carried out to produce a List of pronunciations /// </summary> /// <param name="speakerList">list of speakers for training</param> /// <param name="sampleList">list of samples for training</param> /// <returns>List of pronunciations</returns> private Dictionary<string, List<string>> trainAllWords(List<int> speakerList, List<int> sampleList) { string speaker = "speaker-"; for (int i = 0; i < speakerList.Count; i++) { speaker += speakerList[i].ToString()+"_"; } string sample = "sample-"; List<int> trueSampleList = new List<int>(); trueSampleList.AddRange(sampleList); for (int i = 0; i < trueSampleList.Count; i++) { trueSampleList[i]++; sample += trueSampleList[i].ToString() + "_"; } System.Diagnostics.Debug.WriteLine("training " + speaker + sample); // words to be tested List<int> wordsToBeTested = new List<int>(); for (int i = 1; i <= MWordTypes; i++) wordsToBeTested.Add(i); // set up data Data tempData = new Data(data.audioDirectory, data.audioFileName, data.numberOfSamplesPerWord, speakerList, wordsToBeTested, trueSampleList, data.wordListPath); // setup grammar GrammarCreator gc = new GrammarCreator(grammarDirectory, data.language + speaker + sample, "allcombinations"); // setup training TrainingAlgorithm ta = new TrainingAlgorithm(gc, tempData, numberOfAlternates, 15); return ta.LearnAllWords(); }