public ResultFile(File sourceFile) { this.Url = sourceFile.Url; this.Title = sourceFile.Title; this.Description = sourceFile.Description; this.CrawledDate = sourceFile.CrawledDate; this.Size = sourceFile.Size; this.Rank = -1; this.GpsLocation = sourceFile.GpsLocation; this.KeywordString = sourceFile.KeywordString; this.Extension = sourceFile.Extension; }
/// <summary> /// Add the Document subclass to the catalog, BY FIRST 'copying' the main /// properties into a File class. The distinction is a bit arbitrary: Documents /// are downloaded and indexed, but their content is modelled in as a File /// class in the Catalog (and represented as a ResultFile object in the search ASPX page) /// </summary> /// <return>Number of words catalogued in the Document</return> protected int AddToCatalog(Document downloadDocument) { File infile = new File(downloadDocument.Uri.AbsoluteUri , downloadDocument.Title.UnicodeToCharacter() , downloadDocument.Description.UnicodeToCharacter() , DateTime.Now , downloadDocument.Length , downloadDocument.GpsLocation , downloadDocument.Extension , downloadDocument.KeywordString.UnicodeToCharacter()); // ### Loop through words in the file ### int i = 0, j = 0; // count of words, count of words _indexed string key = ""; // temp variables foreach (string word in downloadDocument.WordsArray) { key = word.UnicodeToCharacter().ToLower(); if (!_GoChecker.IsGoWord(key)) { // not a special case, parse like any other word RemovePunctuation(ref key); if (!IsNumber(ref key)) { // not a number, so get rid of numeric seperators and catalog as a word // TODO: remove inline punctuation, split hyphenated words? // http://blogs.msdn.com/ericgu/archive/2006/01/16/513645.aspx key = System.Text.RegularExpressions.Regex.Replace(key, "[,.]", "", System.Text.RegularExpressions.RegexOptions.IgnoreCase); // Apply Stemmer (set by preferences) key = _Stemmer.StemWord(key); // Apply Stopper (set by preferences) key = _Stopper.StopWord(key); } } else { ProgressEvent(this, new ProgressEventArgs(4, "Found GoWord " + key + " in " + downloadDocument.Title)); } if (key != String.Empty) { _Catalog.Add(key, infile, i); j++; } i++; } _Catalog.FileCache.Add(downloadDocument.WordsArray, infile); return i; }
public bool Add(string[] words, File infile) { // ### Make sure the Word object is in the index ONCE only if (_Index.ContainsKey(infile.Url.UnicodeToCharacter())) { // already cached return false; } else { CachedFile cf = new CachedFile(); cf.Url = infile.Url.UnicodeToCharacter(); for(int i = 0; i < words.Length; i++) { words[i] = words[i].UnicodeToCharacter(); } cf.Words = words; _Index.Add(infile.Url, cf); } return true; }