//parses the given corpus and places it in the inverted index private void Parse( String DocumentPath ) { InvertedIndex = new SuffixNode(' '); invertedIndexWatch.Reset(); StringBuilder builder = new StringBuilder(); Parser = new SgmlParser(DocumentPath); String value; String prevDocId = ""; while ((value = Parser.Next()) != null) { if (!StopWordsIndex.HasWord(value) && !isNumber(value) ) { value = Stem(value); int weight = (isCapital(value[0]))? 2 : 1; invertedIndexWatch.Start(); InvertedIndex.Add(value, new DocumentIndex(Parser.DocID, 0),weight); invertedIndexWatch.Stop(); } if (!Documents.Contains(Parser.DocID)) { Documents[Parser.DocID] = new DocumentVector(Parser.DocID, Parser.HeadLine, Parser.DateLine,Parser.DocumentPosition); DocumentLengths[Parser.DocID] = 0; } DocumentLengths[Parser.DocID] = ((int)DocumentLengths[Parser.DocID]) + 1; //fire an event out to any attatched methods if ( prevDocId != Parser.DocID && ParseIteration != null) ParseIteration(this); prevDocId = Parser.DocID; } Parser.Close(); }