public ResultFile(File sourceFile) { this.PathAndName = sourceFile.PathAndName; this.Title = sourceFile.Title; this.Description = sourceFile.Description; this.CrawledDate = sourceFile.CrawledDate; this.Size = sourceFile.Size; this.Rank = -1; this.KeywordString = sourceFile.KeywordString; this.Extension = sourceFile.Extension; }
/// <summary> /// Add the Document subclass to the catalog, BY FIRST 'copying' the main /// properties into a File class. The distinction is a bit arbitrary: Documents /// are downloaded and indexed, but their content is modelled in as a File /// class in the Catalog. /// </summary> /// <return>Number of words catalogued in the Document</return> protected int AddToCatalog(Document downloadDocument) { File infile = new File(downloadDocument.PathAndName , downloadDocument.Title , downloadDocument.Description , DateTime.Now , downloadDocument.Length , downloadDocument.Extension , downloadDocument.KeywordString); // ### Loop through words in the file ### int i = 0; // count of words string key; // temp variables foreach (string word in downloadDocument.WordsArray) { key = word.ToLower(); if (!_GoChecker.IsGoWord(key)) { // not a special case, parse like any other word RemovePunctuation(ref key); if (!IsNumber(ref key)) { // not a number, so get rid of numeric seperators and catalog as a word // TODO: remove inline punctuation, split hyphenated words? // http://blogs.msdn.com/ericgu/archive/2006/01/16/513645.aspx key = System.Text.RegularExpressions.Regex.Replace(key, "[,.]", "", System.Text.RegularExpressions.RegexOptions.IgnoreCase); // Apply Stemmer (set by preferences) key = _Stemmer.StemWord(key); // Apply Stopper (set by preferences) key = _Stopper.StopWord(key); } } else { ProgressEvent(this, new ProgressEventArgs(4, "Found GoWord " + key + " in " + downloadDocument.Title)); } if (key != String.Empty) { _Catalog.Add(key, infile, i); i++; } } return i; }