public void GettingDocFrequency_ProvidingATerm_ShouldReturnCorrectResult5() { var invertedIndex = new InvertedIndex(this.Documents); var actual = invertedIndex.GetDocumentFrequency("whatever"); Assert.AreEqual(2, actual); }
public void ShouldMarkAsDeleted() { var index = new InvertedIndex(); var fieldName = Utils.RandomString(); var value1 = Utils.RandomString(); var value2 = Utils.RandomString(); var docId1 = Utils.RandomInteger(); var docId2 = Utils.RandomInteger(); var docId3 = Utils.RandomInteger(); index.AddPosting(fieldName, value1, docId1); index.AddPosting(fieldName, value2, docId2); index.AddPosting(fieldName, value2, docId3); index.MarkAsDeleted(docId2); var actual1 = index.GetPostings(fieldName, value1).ToList(); Assert.AreEqual(1, actual1.Count); Assert.AreEqual(docId1, actual1[0]); var actual2 = index.GetPostings(fieldName, value2).ToList(); Assert.AreEqual(1, actual2.Count); Assert.AreEqual(docId3, actual2[0]); }
public void GettingDocFrequency_ProvidingUnseenTerm_ShouldReturnZero() { var invertedIndex = new InvertedIndex(this.Documents); var actual = invertedIndex.GetDocumentFrequency("navigator"); Assert.AreEqual(0, actual); }
private void handleIndexCommand(string[] tokens) { var watch = System.Diagnostics.Stopwatch.StartNew(); if (tokens.Length < 2) { throw new ArgumentException("No path provided for indexing."); } var path = String.Join(" ", tokens, 1, tokens.Length - 1) .Replace("'", String.Empty).Replace("\"", String.Empty); var pathHash = HashUtilities.CreateMD5Hash(path); Console.WriteLine($"pathHash = {pathHash}"); Console.WriteLine($"Indexing path '{path}' started at {DateTime.Now}"); var invertedIndex = new InvertedIndex(path, pathHash); var filepaths = fileUtilities.GetIndexableFilePaths(path, this.configuration.AllowedExtensions); var indexBuildResult = invertedIndex.BuildIndex(filepaths, fileUtilities, stopWords: this.configuration.StopWords); this.invertedIndexCache.set(invertedIndex.MD5, invertedIndex); // save to memory, force overwrite this.lastIndexedPath = invertedIndex.Path; var savePath = $"{pathHash}.txt"; this.fileUtilities.SerializeInvertedIndex(savePath, invertedIndex); Console.WriteLine($"Index written to '{savePath}'"); watch.Stop(); var elapsedMs = watch.ElapsedMilliseconds; Console.WriteLine($"Indexed Path '{path}' with {filepaths.Count} files in {elapsedMs} ms"); }
public void GettingDocuments_ProvidingUnseenTerm_ShouldReturnEmptyCollection() { var invertedIndex = new InvertedIndex(this.Documents); var actual = invertedIndex.GetDocumentsContainingTerm("bear").Select(x => x.Id).ToList(); CollectionAssert.AreEquivalent(new string[] { }, actual); }
public void ShouldAddPostingsInParallel() { const int count = 1000; long currentDocId = -1; var fieldNames = Enumerable.Repeat(0, count).Select(x => Utils.RandomString()).ToList(); var values = Enumerable.Repeat(0, count).Select(x => Utils.RandomString()).ToList(); var index = new InvertedIndex(cleanUpPeriod: 1); Parallel.For(0, count, i => { var field = Utils.RandomElement(fieldNames); var value = Utils.RandomElement(values); index.AddPosting(field, value, Interlocked.Increment(ref currentDocId)); }); var docsCount = 0; foreach (var fieldName in fieldNames) { foreach (var value in values) { docsCount += index.GetPostings(fieldName, value).Count(); } } Assert.AreEqual(count, docsCount); }
public void Import(SearchExport searchExport) { _invertedIndex = searchExport.InvertedIndex; _documentNumberOfTerms = searchExport.DocumentNumberOfTerms; NumberOfDocuments = searchExport.NumberOfDocuments; NumberOfTerms = searchExport.NumberOfTerms; }
private static void Main(string[] args) { //Console.WriteLine("Enter url"); //_url = Console.ReadLine(); DocumentData document = new DocumentData(_url); //document.CreateXMLDoc(); Console.WriteLine("Choose stemmer: porter or mystem"); _stemmer = Console.ReadLine(); InvertedIndex invertedIndex = new InvertedIndex(); invertedIndex.CreateInvertedIndex(_stemmer); document.XMLForInvertedIndex(invertedIndex.GetInvertedIndex()); Console.WriteLine("enter messege: "); _mess = Console.ReadLine(); invertedIndex.And(_mess); LSIClass c = new LSIClass(invertedIndex.FillMatrix(), invertedIndex.q, _mess, invertedIndex.n); c.LatentSemanticIndexing(); }
public void AddDocumentTest() { var invertedIndex = new InvertedIndex(); invertedIndex.AddDocument(new Document("doc"), "this is content in doc"); Assert.True(true); }
private static InvertedIndex InitializeInvertedIndex() { var elasticIndex = new ElasticIndex(elasticServerUrl, elasticServerPort, indexName); var invertedIndex = new InvertedIndex(elasticIndex); return(invertedIndex); }
public void Init() { //Note fill testdata into the invertedindex -- in progress index = new InvertedIndex(); index.Add(doc1, new List <string>() { "word1", "word2", "word3", "word4" }); index.Add(doc2, new List <string>() { "word2", "word4", "word6", "word8" }); index.Add(doc3, new List <string>() { "word3", "word6", "word9", "word12" }); index.Add(doc4, new List <string>() { "word4", "word8", "word12", "word16" }); index.Add(doc5, new List <string>() { "word5", "word10", "word15", "word20" }); index.Add(doc6, new List <string>() { "word6", "word12", "word18", "word24" }); }
public override IEnumerable <int> Execute(InvertedIndex index) { var postings = new List <List <int> >(); foreach (var query in _queries) { var l = query.Execute(index).ToList(); if (!l.Any()) { return(Enumerable.Empty <int>()); } postings.Add(l.ToList()); } postings.Sort((l1, l2) => l1.Count.CompareTo(l2.Count)); var results = postings[0]; for (var i = 1; i < postings.Count; i++) { results = results.Intersect(postings[1]).ToList(); } return(results); }
public void GettingDocuments_ContainingATerm_ShouldReturnCorrectResult5() { var invertedIndex = new InvertedIndex(this.Documents); var actual = invertedIndex.GetDocumentsContainingTerm("you").Select(x => x.Id).ToList(); CollectionAssert.AreEquivalent(new[] { "2", "5", "6" }, actual); }
public void GettingTheInverseDocFrequency_ProvidingATerm_ShouldReturnCorrectResult5() { var invertedIndex = new InvertedIndex(this.Documents); var expected = Math.Round(Math.Log(6 / 3), 2); var actual = Math.Round(invertedIndex.GetInverseDocumentFrequency("you"), 2); Assert.AreEqual(expected, actual); }
public void GettingTheInverseDocFrequency_ProvidingUnseenTerm_ShouldReturnZero() { var invertedIndex = new InvertedIndex(this.Documents); var expected = 0; var actual = invertedIndex.GetInverseDocumentFrequency("tree"); Assert.AreEqual(expected, actual); }
public void CreateIndexTest() { var invertedIndex = new InvertedIndex(); var docs = SampleCreator.CreateStringList(); invertedIndex.CreateIndex(docs); Assert.Equal(invertedIndex.Index, SampleCreator.CreateIndex()); }
public void Initialize() { _index = new InvertedIndex(); var data1 = string.Join("\r\n", _data1); Add("test1", string.Empty, Encoding.Default.GetByteCount(data1), _timestamp, TokenizeString("test1", string.Empty, data1)); }
public override IEnumerable <int> Execute(InvertedIndex index) { var results = Enumerable.Empty <int>(); return(_queries.Aggregate( seed: Enumerable.Empty <int>(), func: (current, query) => current.Union(query.Execute(index)) )); }
public void ConstructorDictionaryTest() { var inputData = new Dictionary <Document, string> { { new Document("doc"), "this is content of the file" } }; _ = new InvertedIndex(inputData); Assert.True(true); }
public void QueryEmptyResultTest() { var invertedIndex = new InvertedIndex(); invertedIndex.AddDocuments(sampleData); var expected = new DocumentSet(new HashSet <Document>()); var actual = invertedIndex.SearchTokenInDocuments(new Token("Mohammad")); Assert.Equal(expected, actual); }
public void UpdateTheMapTest() { string str = "The sky , at sunset , looked like a carnivorous flower ."; InvertedIndex invertedIndex = new InvertedIndex(mapOfTheWords); invertedIndex.UpdateTheMap(str, 1); Assert.Equal(mapOfTheWords["sky"].Count, 2); Assert.Equal(mapOfTheWords["sunset"].Count, 2); Assert.Equal(mapOfTheWords["tree"].Count, 1); Assert.Equal(mapOfTheWords["flower"].Count, 1); }
public InvertedIndexTest() { var options = new DbContextOptionsBuilder <InvertedIndexDbContext>() .UseInMemoryDatabase("Test") .Options; invertedIndexContext = new InvertedIndexDbContext(options); invertedIndexContext.Database.EnsureDeleted(); invertedIndexContext.Database.EnsureCreated(); invertedIndex = new InvertedIndex(invertedIndexContext); }
public IndexRepository(IOptions <AWSOptions> options) { index = new InvertedIndex <string>(); client = new AmazonDynamoDBClient( options.Value.AwsAccessKey, options.Value.AwsSecretKey, RegionEndpoint.EUWest1 ); context = new DynamoDBContext(client); throttler = new DBThrottler <IndexItem>(context, 60, 100); Initialisation = AsyncInitialise(); }
private void bw_DoWork(object sender, DoWorkEventArgs e) { BackgroundWorker loadWorker = sender as BackgroundWorker; index = new InvertedIndex(); logic = new Expressions(index, persistentFileName); logEntries = new LogEntries(logFilename, index, ProgressUpdate); if (logEntries!=null) { logEntries.Build(); } }
/// <summary> /// Initial test InvertedIndex with default values. /// </summary> /// <returns> /// An InvertedIndex with default values. /// </returns> public static InvertedIndex InitialInvertedIndex() { var invertedIndex = new InvertedIndex(); invertedIndex.InsertDatas(new List <Tuple <string, string> > { new Tuple <string, string>("file1", "test"), new Tuple <string, string>("file2", "test"), new Tuple <string, string>("file1", "test2"), new Tuple <string, string>("file3", "test2"), }); return(invertedIndex); }
public static void Run() { var ii = new InvertedIndex(); var words = new System.String[] { "Kanishka", "Kanishk", "ken", "Kanishka", "Ken", "ken", "Kanishka", "Kanishk", "ken", "Kanishka", "Ken", "ken" }; InvertedIndex.PreCompute(words); InvertedIndex.PrintDictionary(); }
public void AddToIndexTest() { var index = new Dictionary <string, HashSet <int> >(); index.Add("chocolate", new HashSet <int> { 3 }); var invertedIndex = new InvertedIndex(); invertedIndex.AddToIndex("chocolate", 3); Assert.Equal(index, invertedIndex.Index); }
public void QuerySimpleWithConsturctorDataTest() { var invertedIndex = new InvertedIndex(repository); var expected = new DocumentSet(new HashSet <Document> { doc1, doc2 } ); var actual = invertedIndex.SearchTokenInDocuments(new Token("content")); Assert.Equal(expected, actual); }
public void ShouldReturnEmptyIfNotFound() { var index = new InvertedIndex(); CollectionAssert.IsEmpty(index.GetPostings(Utils.RandomString(), Utils.RandomString())); var fieldName = Utils.RandomString(); index.AddPosting(fieldName, Utils.RandomString(), Utils.RandomInteger()); CollectionAssert.IsEmpty(index.GetPostings(fieldName, Utils.RandomString())); }
public void PopulateIndex(InvertedIndex <string> index, string content, Website web) { foreach (var word in content.Split(' ')) { string key = ProcessKey(word); if (!index.ContainsKey(key)) { index.Add(key, new HashSet <string>()); } index[key].Add(web.Url); } }
public static SequenceInvertedIndex Build(InvertedIndex invindex, SequenceBuilder builder) { var newinvindex = new SequenceInvertedIndex (); var xseq = new int[invindex.NumberOfItems]; for (int sym = 0; sym < invindex.Count; ++sym) { var list = invindex [sym]; foreach (var objID in list) { xseq [objID] = sym; } } newinvindex.seq = builder.Invoke (xseq, invindex.Count); return newinvindex; }
public ElasticSearch(int numDocuments) { _numDocuments = numDocuments; _invertedIndex = new InvertedIndex(_numDocuments); }
public static Plain64InvertedIndex Build(InvertedIndex invindex) { var I = new Plain64InvertedIndex (); for (int i = 0; i < invindex.Count; ++i) { I.Add (invindex[i]); } return I; }
public override void Load(BinaryReader Input) { base.Load(Input); this.Width = Input.ReadInt32 (); this.invindex = GenericIO<InvertedIndex>.Load (Input); }
public virtual void Build(MetricDB db, int width, Random rand, Func<InvertedIndex,InvertedIndex> create_invertedindex = null, Func<int,object> get_item = null) { this.DB = db; this.Width = width; int len = this.DB.Count; int pc = len / 100 + 1; int numbits = width > 32 ? 32 : width; Plain64InvertedIndex table = new Plain64InvertedIndex (); table.Initialize (1 << numbits); int maxhash = 0; this.PreBuild (rand, this.DB [0]); for (int objID = 0; objID < len; objID++) { if (objID % pc == 0) { Console.WriteLine ("Advance: {0:0.00}%, docid: {1}, total: {2}", objID * 100.0 / len, objID, len); } int hash; if (get_item == null) { hash = this.ComputeHash (this.DB [objID]); } else { hash = this.ComputeHash (get_item (objID)); } table.AddItem(hash, objID); if (hash > maxhash) { maxhash = hash; } } table.Trim (maxhash + 1); if (create_invertedindex == null) { this.invindex = table; } else { this.invindex = create_invertedindex (table); } }
public static CompressedInvertedIndex Build(InvertedIndex invindex) { var cii = new CompressedInvertedIndex (); for (int s = 0; s < invindex.Count; ++s) { cii.Add (invindex [s]); } return cii; }