private void ConvertUnmatchedPhonemesToWords(ModuleUKS2 UKS, List <Thing> bestPhrase) { //convert remaining phonemes to word(s) add remainder of phrase as word(s) int startOfWord = 0; int wordLength = 0; int bestWordCount = GetWordCount(UKS, bestPhrase); while (startOfWord < bestPhrase.Count) { while (startOfWord + wordLength < bestPhrase.Count && bestPhrase[startOfWord + wordLength].Parents[0] != UKS.Labeled("Word")) { wordLength++; } if ((wordLength > 1 && wordLength < maxPhonemesPerWord) || (wordLength == 1 && bestPhrase.Count == 1)) //only add a single-phoneme word if it's the only word in the phrase { List <Thing> newRefs = bestPhrase.GetRange(startOfWord, wordLength); Thing newWord = UKS.AddThing("w" + wordCount++, UKS.Labeled("Word"), null, newRefs.ToArray()); ReplaceWordInPhrase(bestPhrase, newWord); startOfWord++; } else { startOfWord += wordLength + 1; } wordLength = 0; } }
private void MakeWordOfShortInput(ModuleUKS2 UKS, List <Thing> phrase, List <Thing> words) { if (phrase.Count < maxPhonemesPerWord) { if (UKS.ReferenceMatch(phrase, words) == null) { UKS.AddThing("w" + wordCount++, UKS.Labeled("Word"), null, phrase.ToArray()); } } }
private Thing AddPhrase(ModuleUKS2 UKS, List <Thing> bestPhrase) { //see if the phrase already exists and add it if not Thing phraseFound = UKS.ReferenceMatch(bestPhrase, UKS.GetChildren(UKS.Labeled("Phrase"))); if (phraseFound == null)// && bestPhrase.Count > 1) { phraseFound = UKS.AddThing("ph" + phraseCount++, UKS.Labeled("Phrase"), null, bestPhrase.ToArray()); } foreach (Thing t in bestPhrase) { t.useCount++; } if (phraseFound != null) { phraseFound.useCount++; } return(phraseFound); }
//TODO: make this happen when new phrases are added private void PrunePhrases(ModuleUKS2 UKS) { //find phrases which differ by a single word...can this be a grammar exemplar? //find phrases which have phonemes in them and see if there are now words to put in them List <Thing> phrases = UKS.GetChildren(UKS.Labeled("Phrase")); List <Thing> words = UKS.GetChildren(UKS.Labeled("Word")); //if a word is usually followed by another specific word, create a new bigger word out of two smaller ones. for (int i = 0; i < words.Count; i++) { Thing word = words[i]; List <Link> followingWords = new List <Link>(); int greatestWeight = -1; int greatestWeightIndex = -1; for (int j = 0; j < word.ReferencedBy.Count; j++) { Thing phrase = word.ReferencedBy[j].T; int k = phrase.References.FindIndex(x => x.T == word); Debug.Assert(k >= 0); if (k < phrase.References.Count - 1) { Thing followingWord = phrase.References[k + 1].T; int index = followingWords.FindIndex(x => x.T == followingWord); if (index != -1) { followingWords[index].weight++; if (followingWords[index].weight > greatestWeight) { greatestWeight = (int)followingWords[index].weight; greatestWeightIndex = index; } } else { followingWords.Add(new Link() { T = followingWord, weight = 0 }); } } } if (greatestWeight > 3) { } } List <Thing> phrasesWithPhonemes = phrases.FindAll(x => x.References.Any(l => l.T.Parents[0] == UKS.Labeled("Phoneme"))); foreach (Thing phrase in phrases)//hrasesWithPhonemes) { possiblePhrases.Clear(); //convert phrase back to phonemes Thing expandedPhrase = new Thing(); ExpandToClass(expandedPhrase, phrase, UKS.Labeled("Phoneme")); FindPossibleWords(UKS, expandedPhrase.ReferencesAsThings); List <Thing> bestPhrase = FindBestPhrase(UKS); ConvertUnmatchedPhonemesToWords(UKS, bestPhrase); ExtendWordsWithSinglePhonemes(UKS, bestPhrase, words); int newUseCount = bestPhrase.Sum(x => x.useCount); int oldUseCount = phrase.ReferencesAsThings.Sum(x => x.useCount); int newWordCount = GetWordCount(UKS, bestPhrase); int oldWordCount = GetWordCount(UKS, phrase.ReferencesAsThings); if (newWordCount == bestPhrase.Count || newWordCount < oldWordCount || newUseCount > oldUseCount) { //replace the references in the phrase while (phrase.References.Count > 0) { phrase.RemoveReference(phrase.References[0].T); } foreach (Thing t in bestPhrase) { phrase.AddReference(t); } } } return; //find phrases which incorporate other phrases #pragma warning disable CS0162 // Unreachable code detected for (int i = 0; i < phrases.Count; i++) #pragma warning restore CS0162 // Unreachable code detected { Thing phrase1 = phrases[i]; for (int j = i + 1; j < phrases.Count; j++) { Thing phrase2 = phrases[j]; if (phrase1.References.Count < phrase2.References.Count) { int index = IndexOfSequence(phrase2.ReferencesAsThings, phrase1.ReferencesAsThings); if (index > -1) { ReplaceReferences(phrase1, phrase2, index); } } else if (phrase1.References.Count > phrase2.References.Count) { int index = IndexOfSequence(phrase1.ReferencesAsThings, phrase2.ReferencesAsThings); if (index > -1) { phrase1.References[index].T = phrase2; for (int k = 0; k < phrase2.References.Count - 1; k++) { phrase1.RemoveReferenceAt(index + 1); } } } else if (phrase1.References.Count == phrase2.References.Count) { int index = IndexOfSequence(phrase2.ReferencesAsThings, phrase1.ReferencesAsThings); if (index > -1) { UKS.DeleteThing(phrase2); } } } } //search for common subsphrases and convert them into phrases //l is the length of commonality we're searching for for (int i = 0; i < phrases.Count; i++) { Thing phrase1 = phrases[i]; for (int l = phrase1.References.Count - 1; l > 1; l--) { for (int offset = 0; offset < phrase1.References.Count - l + 1; offset++) { List <Thing> subRange = phrase1.ReferencesAsThings.GetRange(offset, l).ToList(); Thing newPhrase = null; for (int j = i + 1; j < phrases.Count; j++) { Thing phrase2 = phrases[j]; int index = IndexOfSequence(phrase2.ReferencesAsThings, subRange); if (index > -1 && phrase2.References.Count > subRange.Count) { if (newPhrase == null) { newPhrase = UKS.AddThing("ph" + phraseCount++, UKS.Labeled("Phrase"), null, subRange.ToArray()); phrase1.References[offset].T = newPhrase; for (int k = 0; k < newPhrase.References.Count - 1; k++) { phrase1.RemoveReferenceAt(offset + 1); } } phrase2.References[index].T = newPhrase; for (int k = 0; k < newPhrase.References.Count - 1; k++) { phrase2.RemoveReferenceAt(index + 1); } } } } } } }
private void ExtendWordsWithSinglePhonemes(ModuleUKS2 UKS, List <Thing> bestPhrase, List <Thing> words) { //if the phrase contains any single phonemes, append/prepend them to adjascent words for (int i = 0; i < bestPhrase.Count; i++) { if (bestPhrase[i].Parents[0] == UKS.Labeled("Phoneme")) { bool preceeding = false; bool following = false; if (i == 0 || bestPhrase[i - 1].Parents[0] == UKS.Labeled("Word")) { preceeding = true; } if (i == bestPhrase.Count - 1 || bestPhrase[i + 1].Parents[0] == UKS.Labeled("Word")) { following = true; } if (preceeding && following) { //create new word merged with preceeding word Thing newWordExtended = null; Thing newWordPrepended = null; if (i != 0) { Thing baseWord = bestPhrase[i - 1]; List <Thing> newRefs = new List <Thing>(); foreach (Link l in baseWord.References) { newRefs.Add(l.T); } newRefs.Add(bestPhrase[i]); Thing t = UKS.ReferenceMatch(newRefs, words); if (t == null) { newWordExtended = UKS.AddThing("w" + wordCount++, UKS.Labeled("Word"), null, newRefs.ToArray()); } } //create new word merged with following word if (i != bestPhrase.Count - 1) { Thing baseWord = bestPhrase[i + 1]; List <Thing> newRefs = new List <Thing>(); newRefs.Add(bestPhrase[i]); foreach (Link l in baseWord.References) { newRefs.Add(l.T); } Thing t = UKS.ReferenceMatch(newRefs, words); if (t == null) { newWordPrepended = UKS.AddThing("w" + wordCount++, UKS.Labeled("Word"), null, newRefs.ToArray()); } } if (newWordExtended != null) { bestPhrase.RemoveRange(i - 1, 2); bestPhrase.Insert(i - 1, newWordExtended); } else if (newWordPrepended != null) { bestPhrase.RemoveRange(i, 2); bestPhrase.Insert(i, newWordPrepended); } } } } }