private void PruneWords(ModuleUKS2 UKS) { List <Thing> words = UKS.GetChildren(UKS.Labeled("Word")); #pragma warning disable CS0162 // Unreachable code detected if (false) { return; } #pragma warning restore CS0162 // Unreachable code detected if (words.Count < maxWords / 2) { return; } List <Thing> sortedWords = (List <Thing>)words.OrderByDescending(x => x.useCount).ToList(); for (int j = sortedWords.Count / 2; j < sortedWords.Count; j++) { Thing word = sortedWords[j]; //remove words with low usecount or not referenced in any phrases // if (word.useCount < 1 || word.ReferencedBy.Count == 0) { for (int i = 0; i < word.ReferencedBy.Count; i++) { Thing phrase = word.ReferencedBy[i].T; UnReplaceReferences(word, phrase); } UKS.DeleteThing(word); } } }
//TODO: make this happen when new phrases are added private void PrunePhrases(ModuleUKS2 UKS) { //find phrases which differ by a single word...can this be a grammar exemplar? //find phrases which have phonemes in them and see if there are now words to put in them List <Thing> phrases = UKS.GetChildren(UKS.Labeled("Phrase")); List <Thing> words = UKS.GetChildren(UKS.Labeled("Word")); //if a word is usually followed by another specific word, create a new bigger word out of two smaller ones. for (int i = 0; i < words.Count; i++) { Thing word = words[i]; List <Link> followingWords = new List <Link>(); int greatestWeight = -1; int greatestWeightIndex = -1; for (int j = 0; j < word.ReferencedBy.Count; j++) { Thing phrase = word.ReferencedBy[j].T; int k = phrase.References.FindIndex(x => x.T == word); Debug.Assert(k >= 0); if (k < phrase.References.Count - 1) { Thing followingWord = phrase.References[k + 1].T; int index = followingWords.FindIndex(x => x.T == followingWord); if (index != -1) { followingWords[index].weight++; if (followingWords[index].weight > greatestWeight) { greatestWeight = (int)followingWords[index].weight; greatestWeightIndex = index; } } else { followingWords.Add(new Link() { T = followingWord, weight = 0 }); } } } if (greatestWeight > 3) { } } List <Thing> phrasesWithPhonemes = phrases.FindAll(x => x.References.Any(l => l.T.Parents[0] == UKS.Labeled("Phoneme"))); foreach (Thing phrase in phrases)//hrasesWithPhonemes) { possiblePhrases.Clear(); //convert phrase back to phonemes Thing expandedPhrase = new Thing(); ExpandToClass(expandedPhrase, phrase, UKS.Labeled("Phoneme")); FindPossibleWords(UKS, expandedPhrase.ReferencesAsThings); List <Thing> bestPhrase = FindBestPhrase(UKS); ConvertUnmatchedPhonemesToWords(UKS, bestPhrase); ExtendWordsWithSinglePhonemes(UKS, bestPhrase, words); int newUseCount = bestPhrase.Sum(x => x.useCount); int oldUseCount = phrase.ReferencesAsThings.Sum(x => x.useCount); int newWordCount = GetWordCount(UKS, bestPhrase); int oldWordCount = GetWordCount(UKS, phrase.ReferencesAsThings); if (newWordCount == bestPhrase.Count || newWordCount < oldWordCount || newUseCount > oldUseCount) { //replace the references in the phrase while (phrase.References.Count > 0) { phrase.RemoveReference(phrase.References[0].T); } foreach (Thing t in bestPhrase) { phrase.AddReference(t); } } } return; //find phrases which incorporate other phrases #pragma warning disable CS0162 // Unreachable code detected for (int i = 0; i < phrases.Count; i++) #pragma warning restore CS0162 // Unreachable code detected { Thing phrase1 = phrases[i]; for (int j = i + 1; j < phrases.Count; j++) { Thing phrase2 = phrases[j]; if (phrase1.References.Count < phrase2.References.Count) { int index = IndexOfSequence(phrase2.ReferencesAsThings, phrase1.ReferencesAsThings); if (index > -1) { ReplaceReferences(phrase1, phrase2, index); } } else if (phrase1.References.Count > phrase2.References.Count) { int index = IndexOfSequence(phrase1.ReferencesAsThings, phrase2.ReferencesAsThings); if (index > -1) { phrase1.References[index].T = phrase2; for (int k = 0; k < phrase2.References.Count - 1; k++) { phrase1.RemoveReferenceAt(index + 1); } } } else if (phrase1.References.Count == phrase2.References.Count) { int index = IndexOfSequence(phrase2.ReferencesAsThings, phrase1.ReferencesAsThings); if (index > -1) { UKS.DeleteThing(phrase2); } } } } //search for common subsphrases and convert them into phrases //l is the length of commonality we're searching for for (int i = 0; i < phrases.Count; i++) { Thing phrase1 = phrases[i]; for (int l = phrase1.References.Count - 1; l > 1; l--) { for (int offset = 0; offset < phrase1.References.Count - l + 1; offset++) { List <Thing> subRange = phrase1.ReferencesAsThings.GetRange(offset, l).ToList(); Thing newPhrase = null; for (int j = i + 1; j < phrases.Count; j++) { Thing phrase2 = phrases[j]; int index = IndexOfSequence(phrase2.ReferencesAsThings, subRange); if (index > -1 && phrase2.References.Count > subRange.Count) { if (newPhrase == null) { newPhrase = UKS.AddThing("ph" + phraseCount++, UKS.Labeled("Phrase"), null, subRange.ToArray()); phrase1.References[offset].T = newPhrase; for (int k = 0; k < newPhrase.References.Count - 1; k++) { phrase1.RemoveReferenceAt(offset + 1); } } phrase2.References[index].T = newPhrase; for (int k = 0; k < newPhrase.References.Count - 1; k++) { phrase2.RemoveReferenceAt(index + 1); } } } } } } }