/// <summary> /// Searches for the specified query, returns a ranked List of Documents matching any word in the query. /// </summary> /// <param name="query">The query.</param> /// <returns></returns> public static List <Document> Search(String query) { //Separate words, remove punctiations,make lowercase List <String> words = Semanter.Splitwords(query, ":").ToList(); //Obtains possible types searched by this query HashSet <String> typesPossible = TypeChecker(words); //Stem words and remove stopwords //slower method words = words.Except(invt.Stopwords).ToList(); List <String> splitwords = new List <String>(); string stem; foreach (string word in words) { stem = invt.Samantha.StemWord(word); if (!(invt.Stopwords.Contains(stem))) { splitwords.Add(stem); } } if (splitwords.Count == 0) { return(new List <Document>()); } //search for documents Dictionary <Document, Dictionary <string, List <int> > > Results = DocsFound(splitwords, typesPossible); if (Results.Keys.Count < 2) { return(Results.Keys.ToList()); } return(Ranker.RankQuery(splitwords, Results, invt.DocumentCount)); }
/// <summary> /// Modifies the file in the inverted Index Table. /// </summary> /// <param name="doc">The document that was Modified.</param> public static Document ModifyFile(Document doc) { String[] words = Semanter.Splitwords(x.Extract(doc.Address).Text); return(invt.ModifyDocument(words, doc)); }
/// <summary> /// Adds words from the Specified Document to the specified Inverted Index Table /// </summary> /// <param name="doc">The document to be Tokenized.</param> /// <exception cref="TextExtractionException">Could not extract Files from the Document</exception> public static void AddFileFrom(Document doc) { String[] words = Semanter.Splitwords(x.Extract(doc.Address).Text); invt.AddDocument(words, doc); }