Example #1
0
        private void PruneWords(ModuleUKS2 UKS)
        {
            List <Thing> words = UKS.GetChildren(UKS.Labeled("Word"));

#pragma warning disable CS0162 // Unreachable code detected
            if (false)
            {
                return;
            }
#pragma warning restore CS0162 // Unreachable code detected
            if (words.Count < maxWords / 2)
            {
                return;
            }
            List <Thing> sortedWords = (List <Thing>)words.OrderByDescending(x => x.useCount).ToList();

            for (int j = sortedWords.Count / 2; j < sortedWords.Count; j++)
            {
                Thing word = sortedWords[j];

                //remove words with low usecount or not referenced in any phrases
                // if (word.useCount < 1 || word.ReferencedBy.Count == 0)
                {
                    for (int i = 0; i < word.ReferencedBy.Count; i++)
                    {
                        Thing phrase = word.ReferencedBy[i].T;
                        UnReplaceReferences(word, phrase);
                    }
                    UKS.DeleteThing(word);
                }
            }
        }
Example #2
0
        //TODO: make this happen when new phrases are added
        private void PrunePhrases(ModuleUKS2 UKS)
        {
            //find phrases which differ by a single word...can this be a grammar exemplar?
            //find phrases which have phonemes in them and see if there are now words to put in them
            List <Thing> phrases = UKS.GetChildren(UKS.Labeled("Phrase"));
            List <Thing> words   = UKS.GetChildren(UKS.Labeled("Word"));

            //if a word is usually followed by another specific word, create a new bigger word out of two smaller ones.
            for (int i = 0; i < words.Count; i++)
            {
                Thing       word                = words[i];
                List <Link> followingWords      = new List <Link>();
                int         greatestWeight      = -1;
                int         greatestWeightIndex = -1;
                for (int j = 0; j < word.ReferencedBy.Count; j++)
                {
                    Thing phrase = word.ReferencedBy[j].T;
                    int   k      = phrase.References.FindIndex(x => x.T == word);
                    Debug.Assert(k >= 0);
                    if (k < phrase.References.Count - 1)
                    {
                        Thing followingWord = phrase.References[k + 1].T;

                        int index = followingWords.FindIndex(x => x.T == followingWord);
                        if (index != -1)
                        {
                            followingWords[index].weight++;
                            if (followingWords[index].weight > greatestWeight)
                            {
                                greatestWeight      = (int)followingWords[index].weight;
                                greatestWeightIndex = index;
                            }
                        }
                        else
                        {
                            followingWords.Add(new Link()
                            {
                                T = followingWord, weight = 0
                            });
                        }
                    }
                }
                if (greatestWeight > 3)
                {
                }
            }



            List <Thing> phrasesWithPhonemes = phrases.FindAll(x => x.References.Any(l => l.T.Parents[0] == UKS.Labeled("Phoneme")));

            foreach (Thing phrase in phrases)//hrasesWithPhonemes)
            {
                possiblePhrases.Clear();
                //convert phrase back to phonemes
                Thing expandedPhrase = new Thing();
                ExpandToClass(expandedPhrase, phrase, UKS.Labeled("Phoneme"));
                FindPossibleWords(UKS, expandedPhrase.ReferencesAsThings);
                List <Thing> bestPhrase = FindBestPhrase(UKS);
                ConvertUnmatchedPhonemesToWords(UKS, bestPhrase);
                ExtendWordsWithSinglePhonemes(UKS, bestPhrase, words);
                int newUseCount  = bestPhrase.Sum(x => x.useCount);
                int oldUseCount  = phrase.ReferencesAsThings.Sum(x => x.useCount);
                int newWordCount = GetWordCount(UKS, bestPhrase);
                int oldWordCount = GetWordCount(UKS, phrase.ReferencesAsThings);
                if (newWordCount == bestPhrase.Count || newWordCount < oldWordCount || newUseCount > oldUseCount)
                {
                    //replace the references in the phrase
                    while (phrase.References.Count > 0)
                    {
                        phrase.RemoveReference(phrase.References[0].T);
                    }
                    foreach (Thing t in bestPhrase)
                    {
                        phrase.AddReference(t);
                    }
                }
            }
            return;

            //find phrases which incorporate other phrases
#pragma warning disable CS0162 // Unreachable code detected
            for (int i = 0; i < phrases.Count; i++)
#pragma warning restore CS0162 // Unreachable code detected
            {
                Thing phrase1 = phrases[i];
                for (int j = i + 1; j < phrases.Count; j++)
                {
                    Thing phrase2 = phrases[j];
                    if (phrase1.References.Count < phrase2.References.Count)
                    {
                        int index = IndexOfSequence(phrase2.ReferencesAsThings, phrase1.ReferencesAsThings);
                        if (index > -1)
                        {
                            ReplaceReferences(phrase1, phrase2, index);
                        }
                    }
                    else if (phrase1.References.Count > phrase2.References.Count)
                    {
                        int index = IndexOfSequence(phrase1.ReferencesAsThings, phrase2.ReferencesAsThings);
                        if (index > -1)
                        {
                            phrase1.References[index].T = phrase2;
                            for (int k = 0; k < phrase2.References.Count - 1; k++)
                            {
                                phrase1.RemoveReferenceAt(index + 1);
                            }
                        }
                    }
                    else if (phrase1.References.Count == phrase2.References.Count)
                    {
                        int index = IndexOfSequence(phrase2.ReferencesAsThings, phrase1.ReferencesAsThings);
                        if (index > -1)
                        {
                            UKS.DeleteThing(phrase2);
                        }
                    }
                }
            }

            //search for common subsphrases and convert them into phrases
            //l is the length of commonality we're searching for
            for (int i = 0; i < phrases.Count; i++)
            {
                Thing phrase1 = phrases[i];
                for (int l = phrase1.References.Count - 1; l > 1; l--)
                {
                    for (int offset = 0; offset < phrase1.References.Count - l + 1; offset++)
                    {
                        List <Thing> subRange  = phrase1.ReferencesAsThings.GetRange(offset, l).ToList();
                        Thing        newPhrase = null;
                        for (int j = i + 1; j < phrases.Count; j++)
                        {
                            Thing phrase2 = phrases[j];
                            int   index   = IndexOfSequence(phrase2.ReferencesAsThings, subRange);
                            if (index > -1 && phrase2.References.Count > subRange.Count)
                            {
                                if (newPhrase == null)
                                {
                                    newPhrase = UKS.AddThing("ph" + phraseCount++, UKS.Labeled("Phrase"), null, subRange.ToArray());
                                    phrase1.References[offset].T = newPhrase;
                                    for (int k = 0; k < newPhrase.References.Count - 1; k++)
                                    {
                                        phrase1.RemoveReferenceAt(offset + 1);
                                    }
                                }
                                phrase2.References[index].T = newPhrase;
                                for (int k = 0; k < newPhrase.References.Count - 1; k++)
                                {
                                    phrase2.RemoveReferenceAt(index + 1);
                                }
                            }
                        }
                    }
                }
            }
        }