Ejemplo n.º 1
0
        /// <summary>
        /// Index list of texts into temporal posting dictionary
        /// </summary>
        /// <param name="docs">List of texts</param>
        /// <returns>Dictionary of terms and posting data</returns>
        public Dictionary <string, TermPostingList> IndexDocumentsToTemporalPostingFile(List <string> docs)
        {
            Dictionary <string, TermPostingList> TemporalPostingFile = new Dictionary <string, TermPostingList>((StringComparer.InvariantCultureIgnoreCase));

            foreach (string doc in docs) // for each document text in docs
            {
                // parse the document text into dictionary of terms and it's frequency and update the max term in the document
                var maxTerm = new KeyValuePair <string, int>();
                Dictionary <string, int> termsInDocumentDictionary = controller.parser.ParseText(doc, out maxTerm);
                // update the document data in the documents list
                controller.DocumentsDataList[DocID].DocLength        = termsInDocumentDictionary.Count;
                controller.DocumentsDataList[DocID].MostFrequentTerm = maxTerm.Key;
                controller.DocumentsDataList[DocID].FrequentTermNumberOfInstances = maxTerm.Value;
                // for each entry in the terms dictionary update the data in the temporal posting dictionary
                foreach (KeyValuePair <string, int> entery in termsInDocumentDictionary)
                {
                    if (!TemporalPostingFile.ContainsKey(entery.Key)) // if the key not exist in the temporal posting dictionary create new instance
                    {
                        TemporalPostingFile[entery.Key] = new TermPostingList();
                    }

                    if (!termsTFs.ContainsKey(entery.Key)) // if the key not exist in the termsTFs dictionary create new instance
                    {
                        termsTFs[entery.Key] = entery.Value;
                    }
                    else
                    {
                        termsTFs[entery.Key] += entery.Value;                            // add the TF to total TF of the term in the termsTFs dictionary
                    }
                    TermPostingData termData = new TermPostingData(entery.Value, DocID); // create term posting data
                    TemporalPostingFile[entery.Key].AddToTermPostingList(termData);      // update the data in the dictionary
                }
                DocID++;                                                                 // move to the next doc in the courpus
            }
            return(TemporalPostingFile);
        }
Ejemplo n.º 2
0
 /// <summary>
 /// add termPostingData to the end of the list
 /// </summary>
 /// <param name="termData"></param>
 public void AddToTermPostingList(TermPostingData termData)
 {
     PostingList.Add(termData);
 }