예제 #1
0
        private void ProcessLinearDistanceMeasure(int docID, List <int> searchPhraseIndex)
        {
            string           sql    = "SELECT PhraseID, WordIndex FROM PhraseLocation WHERE DocumentID = " + docID + ";";
            SQLiteDataReader reader = dataBase.PerformQuery(sql);

            while (reader.Read())
            {
                int phraseID  = (int)reader["PhraseID"];
                int wordIndex = (int)reader["WordIndex"];

                //if this isn't already in the dict add it
                if (!frequencyDictionary.ContainsKey(phraseID))
                {
                    FreqDataSet freqDataSet = new FreqDataSet();
                    frequencyDictionary.Add(phraseID, freqDataSet);
                }

                //find the min linear distance and update frequency
                int minDist = int.MaxValue;
                foreach (int searchIndex in searchPhraseIndex)
                {
                    minDist = Math.Min(minDist, Math.Abs(wordIndex - searchIndex));
                }

                //update the changed values
                frequencyDictionary[phraseID].SumLinearDistance += 1.0 / Math.Sqrt(minDist);
                frequencyDictionary[phraseID].StandardFrequency++;
            }
            reader.Close();
        }
예제 #2
0
        private void CreateFrequency()
        {
            frequency = new List <ProximityWordFreq>();

            foreach (KeyValuePair <int, FreqDataSet> entry in frequencyDictionary)
            {
                //check if this is a valid word
                String phrase = GetPhrase(entry.Key).Phrase;
                if (phrase.Length <= 1 || phrase.Equals("rrb") || phrase.Equals("lrb") || phrase.Equals(searchPhrase.Phrase))
                {
                    continue;
                }

                FreqDataSet freqDataSet = entry.Value;

                //add the data to the list
                double            linearDistance    = ((int)(10000.0 * freqDataSet.SumLinearDistance / freqDataSet.StandardFrequency) / 100.0);
                ProximityWordFreq proximityWordFreq = new ProximityWordFreq(phrase,
                                                                            freqDataSet.StandardFrequency,
                                                                            freqDataSet.NumMatchingParagraphs,
                                                                            freqDataSet.NumMatchingSentences,
                                                                            linearDistance);
                frequency.Add(proximityWordFreq);
            }
        }
예제 #3
0
        private void ProcessParagraphAndSentence(int docID, int paraIndex, HashSet <int> uniqueSentenceIndex)
        {
            //get the sentence data for the paragraph
            String           sql    = "SELECT PhraseID, SentenceIndex FROM PhraseLocation WHERE DocumentID = " + docID + " AND ParagraphIndex = " + paraIndex + ";";
            SQLiteDataReader reader = dataBase.PerformQuery(sql);

            while (reader.Read())
            {
                int phraseID      = (int)reader["phraseID"];
                int sentenceIndex = (int)reader["SentenceIndex"];

                // Don't process the search word when found
                if (phraseID == searchPhrase.PhraseID)
                {
                    continue;
                }

                //if this isn't already in the dict add it
                if (!frequencyDictionary.ContainsKey(phraseID))
                {
                    FreqDataSet freqDataSet = new FreqDataSet();
                    frequencyDictionary.Add(phraseID, freqDataSet);
                }

                frequencyDictionary[phraseID].NumMatchingParagraphs++;

                //if the sentance has the target
                if (uniqueSentenceIndex.Contains(sentenceIndex))
                {
                    frequencyDictionary[phraseID].NumMatchingSentences++;
                }
            }
            reader.Close();
        }