Beispiel #1
0
 /// <summary>
 /// method to write to posting files
 /// </summary>
 /// <param name="documentTermsDic">input dictionary of the Parse class</param>
 /// <param name="hasWritten">boolean input for first writing</param>
 public void WriteToPostingFile(Dictionary <string, DocumentsTerm> documentTermsDic, bool hasWritten)
 {
     tempDic = documentTermsDic;
     if (!hasWritten)
     {
         this.tempDic = tempDic.OrderBy(i => i.Value.postNum).ThenBy(i => i.Value.line).ToDictionary(p => p.Key, p => p.Value);
         StringBuilder data    = new StringBuilder();
         int           postNum = -1;
         foreach (KeyValuePair <string, DocumentsTerm> pair in tempDic)
         {
             if (StreamShouldBeChanged(pair.Value.postNum, postNum))
             {
                 if (Writer != null)
                 {
                     Writer.Write(data);
                     data.Clear();
                     Writer.Flush();
                     Writer.Close();
                 }
                 SwitchWriterForPosting(pair.Value.postNum, "Posting");
             }
             postNum = pair.Value.postNum;
             IndexTerm currentTerm = new IndexTerm(pair.Value.m_valueOfTerm, postNum, currentLine[postNum]);
             currentTerm.IncreaseTfc(pair.Value.countTotalFrequency());
             currentTerm.IncreaseDf(pair.Value.m_Terms.Count);
             dictionaries[postNum].Add(pair.Value.m_valueOfTerm, currentTerm);
             data.AppendLine(pair.Value.WriteToPostingFileDocDocTerm(false).ToString());
             currentLine[postNum]++;
         }
         if (Writer != null)
         {
             Writer.Write(data);
             data.Clear();
         }
         tempDic.Clear();
         Writer.Flush();
         Writer.Close();
         Writer = null;
     }
     else
     {
         UpdatePosting();
         DeleteOldFiles();
     }
     Console.WriteLine("-----------" + m_indexCounter++);
 }
Beispiel #2
0
        /// <summary>
        /// method to load dictionary to memory
        /// </summary>
        public void LoadDictionary()
        {
            string[] AllLines, splittedLine;
            string   term, df, tfc, PN, LN;
            int      postNum;

            Dictionary <string, IndexTerm>[] dictionaries = new Dictionary <string, IndexTerm> [27];
            bool existWithStem    = File.Exists(Path.Combine(outPutPath, "WithStem\\Dictionary.txt"));
            bool existWithoutStem = File.Exists(Path.Combine(outPutPath, "WithOutStem\\Dictionary.txt"));

            if (m_doStemming && existWithStem)
            {
                AllLines = File.ReadAllLines(Path.Combine(outPutPath, "WithStem\\Dictionary.txt"));
            }
            else if (!m_doStemming && existWithoutStem)
            {
                AllLines = File.ReadAllLines(Path.Combine(outPutPath, "WithOutStem\\Dictionary.txt"));
            }
            else
            {
                return;
            }
            for (int i = 0; i < 27; i++)
            {
                dictionaries[i] = new Dictionary <string, IndexTerm>();
            }
            for (int j = 0; j < AllLines.Length; j++)
            {
                splittedLine = AllLines[j].Split(new string[] { "(#)" }, StringSplitOptions.None);
                term         = splittedLine[0];
                df           = splittedLine[1].Split(':')[1];
                tfc          = splittedLine[2].Split(':')[1];
                PN           = splittedLine[3].Split(':')[1];
                LN           = splittedLine[4].Split(':')[1];
                postNum      = Int32.Parse(PN);
                IndexTerm temp = new IndexTerm(term, postNum, Int32.Parse(LN));
                temp.IncreaseDf(Int32.Parse(df));
                temp.IncreaseTfc(Int32.Parse(tfc));
                dictionaries[postNum].Add(term, temp);
            }
            _dictionaries = dictionaries;
        }
Beispiel #3
0
        /// <summary>
        /// method which updates current posting files
        /// </summary>
        public void UpdatePosting()
        {
            InitTerms();
            CreateNewPostingFiles();
            bool          finishedUpdatePrevTerms = true;
            int           PostNumber = -1, LineNumberOfTerm, currentLineNumber = 0, indexLine = 0;
            StringBuilder writeData = new StringBuilder();

            string[] lines = null;
            foreach (KeyValuePair <string, DocumentsTerm> pair in tempDic)
            {
                if (StreamShouldBeChanged(pair.Value.postNum, PostNumber))
                {
                    if (Writer != null)
                    {
                        Writer.Write(writeData);
                        writeData.Clear();
                        Writer.Flush();
                        Writer.Close();
                    }
                    SwitchWriterForPosting(pair.Value.postNum, "NewPosting");
                }
                if (StreamHasChanged)
                {
                    lines                   = File.ReadAllLines(Path.Combine(m_outPutPath, "Posting" + pair.Value.postNum + ".txt"));
                    currentLineNumber       = 0;
                    indexLine               = 0;
                    StreamHasChanged        = false;
                    finishedUpdatePrevTerms = true;
                }
                PostNumber       = pair.Value.postNum;
                LineNumberOfTerm = pair.Value.line;
                if (LineNumberOfTerm != Int32.MaxValue) // exist in the posting file
                {
                    while (LineNumberOfTerm > currentLineNumber)
                    {
                        writeData.AppendLine(lines[indexLine++]);
                        currentLineNumber++;
                    }
                    writeData.AppendLine(lines[indexLine++] + pair.Value.WriteToPostingFileDocDocTerm(true));
                    dictionaries[PostNumber][pair.Value.m_valueOfTerm].IncreaseTfc(pair.Value.countTotalFrequency());
                    dictionaries[PostNumber][pair.Value.m_valueOfTerm].IncreaseDf(pair.Value.m_Terms.Count);
                    currentLineNumber++;
                }
                else
                {
                    if (finishedUpdatePrevTerms)
                    {
                        while (indexLine < lines.Length)
                        {
                            writeData.AppendLine(lines[indexLine++]);
                            currentLineNumber++;
                        }
                        finishedUpdatePrevTerms = false;
                    }
                    IndexTerm currentTerm = new IndexTerm(pair.Value.m_valueOfTerm, PostNumber, currentLine[PostNumber]);
                    LineNumberOfTerm = currentLine[PostNumber];
                    currentLine[PostNumber]++;
                    currentTerm.IncreaseTfc(pair.Value.countTotalFrequency());
                    currentTerm.IncreaseDf(pair.Value.m_Terms.Count);
                    dictionaries[PostNumber].Add(pair.Value.m_valueOfTerm, currentTerm);
                    writeData.AppendLine(pair.Value.WriteToPostingFileDocDocTerm(false).ToString());
                }
            }
            if (Writer != null)
            {
                Writer.Write(writeData);
                writeData.Clear();
            }
            Writer.Flush();
            Writer.Close();
            Writer = null;
            tempDic.Clear();
        }