public override HashSet <Document> GetResults(IInvertedIndex index) { var results = new HashSet <Document>(); Tokens.ForEach(t => results.UnionWith(index.GetDocumentsOfToken(t))); return(results); }
public IInvertedIndex CreateIndex(IndexType kind) { LanguageFactory _languageFactory = new LanguageFactory(); PageProcesser processer = new PageProcesser(); IEnumerable <Page> allPages = fetchPages(); IInvertedIndex index = null; if (kind.Equals(IndexType.Boolean)) { index = new InvertedIndexBoolean(); } else if (kind.Equals(IndexType.ContentBased)) { index = new InvertedIndexTF(allPages.Count()); } //var temp = allPages.Take(100); Parallel.ForEach(allPages, p => { ILanguageBehaviour language = _languageFactory.GetLanguage(p.Url); p.Tokens = processer.PreprocessPage(p.SiteText, language); index.AddDocumentToIndex(p); }); if (kind.Equals(IndexType.ContentBased)) { index.InitialiseIndex(); } return(index); }
public override DocSetIterator Search(IInvertedIndex index, bool ignoreNegation = false) { if (ignoreNegation) { return(index.All()); } return(Negated ? new EmptyIterator() : index.All()); }
public static HashSet <Document> GetQueryResults(QueryBuilder query, IInvertedIndex index) { if (query.Ands.IsEmpty() && query.Ors.IsEmpty()) { throw new ArgumentException("Cannot search the whole internet!"); } return(GetSubQueriesResults(query, index)); }
private static HashSet <Document> GetSubQueriesResults(QueryBuilder query, IInvertedIndex index) { var results = new HashSet <Document>(); results.UnionWith(query.Ands.GetResults(index)); // + AND results results.UnionWith(query.Ors.GetResults(index)); // + OR results results.ExceptWith(query.Excs.GetResults(index)); // - EXC results return(results); }
public override HashSet <Document> GetResults(IInvertedIndex index) { var results = new HashSet <Document>(); if (Tokens.Any()) { results.UnionWith(index.GetDocumentsOfToken(Tokens[0])); Tokens.ForEach(t => results.IntersectWith(index.GetDocumentsOfToken(t))); } return(results); }
public IndexService(IInvertedIndex index, IDirectoryObserver directoryObserver, int buildTasksCount = 2) { if (buildTasksCount <= 0) { throw new ArgumentException("Invalid index build tasks count"); } this.index = index ?? throw new ArgumentException("Index should not be null"); this.directoryObserver = directoryObserver ?? throw new ArgumentException("Directory observer should not be null"); this.buildTasksCount = buildTasksCount; this.eventsQueue = new ConcurrentQueue <FileSystemEventArgs>(); this.directoryObserver.Created += (sender, args) => this.eventsQueue.Enqueue(args); this.directoryObserver.Start(); }
public override DocSetIterator Search(IInvertedIndex index, bool ignoreNegation = false) { var iterator = index.Get(Literal); if (Negated && !ignoreNegation) { if (!index.AllowAll) { throw new NotSupportedException("Negative literal outside conjunction not supported"); } iterator = new ButNotIterator(index.All(), iterator); } return(iterator); }
public fmInvIndex() { InitializeComponent(); DocumentCorpus = new DocumentCorpus(); InvertedIndex = new InvertedIndex(); InvertedIndexBuildBegin += OnInvertedIndexBuildBegin; InvertedIndexHasBeenBuilt += OnInvertedIndexHasBeenBuilt; TextProgress = new DisplayTextProgressMergeToBegin(); TextProgress.IsChanged += ProgressInfoIsChanged; SavingBegin += OnSavingBegin; SavingEnd += OnSavingEnd; InitBackGrounWorkers(); }
public abstract DocSetIterator Search(IInvertedIndex index, bool ignoreNegation = false);
public IndexController(IInvertedIndex invertedIndex) { ps = new EnglishPorter2Stemmer(); invIndex = (InvertedIndex)invertedIndex; }
public abstract HashSet <Document> GetResults(IInvertedIndex index);
private static void BuildIndex(IInvertedIndex index, string[] lines) { index.Add(lines, "doc"); }
public ProcessorTest() { _hashInvertedIndex = Substitute.For <IInvertedIndex>(); _sut = new Processor(_hashInvertedIndex); }
public Processor(IInvertedIndex hashInvertedIndex) { _hashInvertedIndex = hashInvertedIndex; }
public override HashSet <Document> GetResults(IInvertedIndex index) { return(new HashSet <Document>(Tokens.SelectMany(token => index.GetDocumentsOfToken(token)))); }