public void CreateCompareStruct(int number) { Oid id; using (SessionNoServer session = new SessionNoServer(systemDir)) { Placement place = new Placement((UInt32)number, 1, 1, UInt16.MaxValue, UInt16.MaxValue); session.BeginUpdate(); BTreeSetOidShort <Oid> bTree = new BTreeSetOidShort <Oid>(null, session); bTree.Persist(place, session); id = bTree.Oid; for (int i = 0; i < number; i++) { bTree.Add(new Oid((ulong)i)); } session.Commit(); } using (SessionNoServer session = new SessionNoServer(systemDir)) { session.BeginRead(); BTreeSetOidShort <Oid> bTree = (BTreeSetOidShort <Oid>)session.Open(id); int count = 0; int prior = 0; foreach (Oid oid in bTree) { count++; Assert.True(oid.Id == (ulong)prior++); } Assert.True(number == count); session.Commit(); } }
static void createTopLevelInvertedIndex() { Console.WriteLine(DateTime.Now.ToString() + ", start creating top level inverted index"); using (SessionNoServer session = new SessionNoServer(s_systemDir)) { Placement wordPlacement = new Placement(Lexicon.PlaceInDatabase, 2, 1, 1000, 50000, true, false, UInt32.MaxValue, false); session.BeginUpdate(); IndexRoot indexRoot = (IndexRoot)session.Open(Oid.Encode(IndexRoot.PlaceInDatabase, 1, 1)); BTreeSetOidShort <Word> wordSet = indexRoot.lexicon.WordSet; BTreeSet <Document> documentSet = indexRoot.repository.documentSet; Word existingWord = null; foreach (Document doc in documentSet) { foreach (Word word in doc.WordSet) { WordHit wordHit = doc.WordHit[word]; if (wordSet.TryGetKey(word, ref existingWord)) { existingWord.GlobalCount = existingWord.GlobalCount + (uint)wordHit.Count; } else { existingWord = new WordGlobal(word.aWord, session, (uint)wordHit.Count); existingWord.Persist(wordPlacement, session); indexRoot.lexicon.WordSet.Add(existingWord); } existingWord.DocumentHit.AddFast(doc); } doc.Indexed = true; } session.Commit(); Console.WriteLine(DateTime.Now.ToString() + ", done creating top level inverted index"); } }
public void createGlobalInvertedIndex(IndexRoot indexRoot) { Placement wordPlacement = new Placement(Lexicon.PlaceInDatabase, 2); BTreeSetOidShort <Word> wordSet = indexRoot.lexicon.WordSet; BTreeSet <Document> docSet = indexRoot.repository.documentSet; Word existingWord = null; foreach (Document doc in docSet) { if (doc.Indexed == false) { foreach (Word word in doc.WordSet) { WordHit wordHit = doc.WordHit[word]; if (wordSet.TryGetKey(word, ref existingWord)) { existingWord.GlobalCount = existingWord.GlobalCount + (uint)wordHit.Count; } else { existingWord = new WordGlobal(word.aWord, session, (uint)wordHit.Count); existingWord.Persist(wordPlacement, session); wordSet.Add(existingWord); } existingWord.DocumentHit.AddFast(doc); } doc.Indexed = true; } } }
public Document(UInt64 id): base(id) {} // for lookups public Document(string url, IndexRoot indexRoot, SessionBase session) { this.url = url; HashCodeComparer<Word> hashCodeComparer = new HashCodeComparer<Word>(); m_wordHit = new BTreeMapOidShort<Word, WordHit>(null, session); m_wordHit.TransientBatchSize = 10000; wordSet = new BTreeSetOidShort<Word>(hashCodeComparer, session, 1500, sizeof(int)); }
} // for lookups public Document(string url, IndexRoot indexRoot, SessionBase session) { this.url = url; HashCodeComparer <Word> hashCodeComparer = new HashCodeComparer <Word>(); m_wordHit = new BTreeMapOidShort <Word, WordHit>(null, session); m_wordHit.TransientBatchSize = 10000; wordSet = new BTreeSetOidShort <Word>(hashCodeComparer, session, 1500, sizeof(int)); }
public void CreateDefaultCompareIntKey(int number) { Oid id; using (SessionNoServer session = new SessionNoServer(systemDir)) { Placement place = new Placement((UInt32)number, 1, 1, UInt16.MaxValue, UInt16.MaxValue); session.BeginUpdate(); BTreeSetOidShort <int> bTree = new BTreeSetOidShort <int>(null, session); bTree.Persist(place, session); id = bTree.Oid; for (int i = 0; i < number; i++) { if (i > 1000 && i < 20000) { bTree.Add(i); } else { bTree.AddFast(i); } } bTree.Clear(); for (int i = 0; i < number; i++) { if (i > 1000 && i < 20000) { bTree.Add(i); } else { bTree.AddFast(i); } } session.Commit(); } using (SessionNoServer session = new SessionNoServer(systemDir)) { session.BeginRead(); BTreeSetOidShort <int> bTree = (BTreeSetOidShort <int>)session.Open(id); int count = 0; int prior = 0; foreach (int i in bTree) { count++; Assert.True(i == prior++); } Assert.True(number == count); session.Commit(); } }
static void createDocumentInvertedIndex(SessionBase session, Database db, BTreeSet <Document> documentSet) { UInt32 dbNum = db.DatabaseNumber; Document doc = null; Document inputDoc = new Document(db.Id); Placement wordPlacement = new Placement(inputDoc.DatabaseNumber, 20000, 1, 25000, 65000, true, false, 1, false); Placement wordHitPlacement = new Placement(inputDoc.DatabaseNumber, 40000, 1, 25000, 65500, true, false, 1, false); //session.SetTraceDbActivity(db.DatabaseNumber); BTreeSetIterator <Document> iterator = documentSet.Iterator(); iterator.GoTo(inputDoc); inputDoc = iterator.Current(); while (inputDoc != null && inputDoc.Page.Database.DatabaseNumber == dbNum) { doc = (Document)session.Open(inputDoc.Page.Database, inputDoc.Id); // if matching database is availeble, use it to speed up lookup DocumentText docText = doc.Content; string text = docText.Text.ToLower(); MatchCollection tagMatches = Regex.Matches(text, "[a-z][a-z.$]+"); UInt64 wordCt = 0; WordHit wordHit; Word word; if (++s_docCountIndexed % 50000 == 0) { Console.WriteLine(DateTime.Now.ToString() + ", done indexing article: " + s_docCountIndexed); } BTreeSetOidShort <Word> wordSet = doc.WordSet; foreach (Match m in tagMatches) { word = new Word(m.Value); if (wordSet.TryGetKey(word, ref word)) { //wordHit = doc.WordHit[word]; // to costly to add tight now - figure out a better way ? //wordHit.Add(wordCt); } else { word = new Word(m.Value); word.Persist(wordPlacement, session); wordSet.Add(word); wordHit = new WordHit(doc, wordCt++, session); //wordHit.Persist(wordHitPlacement, session); doc.WordHit.ValuePlacement = wordHitPlacement; doc.WordHit.AddFast(word, wordHit); } } inputDoc = iterator.Next(); } session.FlushUpdates(db); session.ClearCachedObjects(db); // free up memory for objects we no longer need to have cached Console.WriteLine(DateTime.Now.ToString() + ", done indexing article: " + s_docCountIndexed + " Database: " + dbNum + " is completed."); }
static void outputSomeInfo(SessionNoServer session) { IndexRoot indexRoot = (IndexRoot)session.Open(Oid.Encode(IndexRoot.PlaceInDatabase, 1, 1)); BTreeSetOidShort <Word> wordSet = indexRoot.lexicon.WordSet; using (StreamWriter writer = new StreamWriter("Wikipedia.txt")) { writer.WriteLine("Number of words in Lexicon is: " + wordSet.Count); foreach (Word word in wordSet) { writer.WriteLine(word.aWord + " " + word.DocumentHit.Count); } writer.Close(); } }
private void Button_Click(object sender, RoutedEventArgs e) { using (SessionNoServer session = new SessionNoServer(systemDir)) { Console.WriteLine("Running with databases in directory: " + session.SystemDirectory); const UInt32 numberOfPersons = 10000; const ushort nodeMaxSize = 5000; const ushort comparisonByteArraySize = sizeof(UInt64); // enough room to hold entire idNumber of a Person const bool comparisonArrayIsCompleteKey = true; const bool addIdCompareIfEqual = false; Person person; session.BeginUpdate(); session.DefaultDatabaseLocation().CompressPages = PageInfo.compressionKind.None; //mySession.SetTraceAllDbActivity(); BTreeSet <string> stringSet = new BTreeSet <string>(null, session); BTreeSetOidShort <string> stringSetShort = new BTreeSetOidShort <string>(null, session); BTreeMap <string, string> stringMap = new BTreeMap <string, string>(null, session); BTreeMapOidShort <string, string> stringMapShort = new BTreeMapOidShort <string, string>(null, session); CompareByField <Person> compareByField = new CompareByField <Person>("idNumber", session, addIdCompareIfEqual); BTreeSet <Person> bTree = new BTreeSet <Person>(compareByField, session, nodeMaxSize, comparisonByteArraySize, comparisonArrayIsCompleteKey); session.Persist(bTree); // Persist the root of the BTree so that we have something persisted that can be flushed to disk if memory available becomes low for (int i = 0; i < numberOfPersons; i++) { person = new Person(); // session.Persist(person); bTree.AddFast(person); } session.Commit(); } using (SessionNoServer session = new SessionNoServer(systemDir)) { session.UseExternalStorageApi = true; session.BeginRead(); BTreeSet <Person> bTree = session.AllObjects <BTreeSet <Person> >().First(); foreach (Person person in (IEnumerable <Person>)bTree) { if (person.IdNumber > 196988888791402) { Console.WriteLine(person); break; } } session.Commit(); } }
private void Button_Click(object sender, RoutedEventArgs e) { using (SessionNoServer session = new SessionNoServer(systemDir)) { Console.WriteLine("Running with databases in directory: " + session.SystemDirectory); const UInt32 numberOfPersons = 10000; const ushort nodeMaxSize = 5000; const ushort comparisonByteArraySize = sizeof(UInt64); // enough room to hold entire idNumber of a Person const bool comparisonArrayIsCompleteKey = true; const bool addIdCompareIfEqual = false; Person person; session.BeginUpdate(); session.DefaultDatabaseLocation().CompressPages = PageInfo.compressionKind.None; //mySession.SetTraceAllDbActivity(); BTreeSet<string> stringSet = new BTreeSet<string>(null, session); BTreeSetOidShort<string> stringSetShort = new BTreeSetOidShort<string>(null, session); BTreeMap<string, string> stringMap = new BTreeMap<string, string>(null, session); BTreeMapOidShort<string, string> stringMapShort = new BTreeMapOidShort<string, string>(null, session); CompareByField<Person> compareByField = new CompareByField<Person>("idNumber", session, addIdCompareIfEqual); BTreeSet<Person> bTree = new BTreeSet<Person>(compareByField, session, nodeMaxSize, comparisonByteArraySize, comparisonArrayIsCompleteKey); session.Persist(bTree); // Persist the root of the BTree so that we have something persisted that can be flushed to disk if memory available becomes low for (int i = 0; i < numberOfPersons; i++) { person = new Person(); // session.Persist(person); bTree.AddFast(person); } session.Commit(); } using (SessionNoServer session = new SessionNoServer(systemDir)) { session.UseExternalStorageApi = true; session.BeginRead(); BTreeSet<Person> bTree = session.AllObjects<BTreeSet<Person>>().First(); foreach (Person person in (IEnumerable<Person>)bTree) { if (person.IdNumber > 196988888791402) { Console.WriteLine(person); break; } } session.Commit(); } }
public void createLocalInvertedIndex(Document doc, Word word, UInt64 wordCt, Placement wordPlacement, Placement wordHitPlacement) { WordHit wordHit; BTreeSetOidShort <Word> wordSet = doc.WordSet; if (wordSet.TryGetKey(word, ref word)) { wordHit = doc.WordHit[word]; wordHit.Add(wordCt); } else { word.Persist(wordPlacement, session); wordSet.Add(word); wordHit = new WordHit(doc, wordCt++, session); doc.WordHit.ValuePlacement = wordHitPlacement; doc.WordHit.AddFast(word, wordHit); } }
public void CreateTicksCompareFieldsOidShort(int numberOfTicks, int nodeSize) { using (SessionNoServer session = new SessionNoServer(systemDir)) { session.BeginRead(); session.Open(10, 1, 1, false); session.Open(10, 1, 2, false); session.Open(10, 2, 1, false); session.Open(10, 2, 2, false); session.Commit(); } using (SessionNoServer session = new SessionNoServer(systemDir, 2000, false)) { //session.SetTraceAllDbActivity(); //session.ClientCache.MinimumAvailableMegaBytes = 1100; session.BeginUpdate(); CompareByField <Tick> compareByField = new CompareByField <Tick>("<Bid>k__BackingField", session, true); //compareByField.AddFieldToCompare("<Timestamp>k__BackingField"); BTreeSetOidShort <Tick> bTree = new BTreeSetOidShort <Tick>(compareByField, session, (UInt16)nodeSize, sizeof(double), true); Placement place = new Placement((UInt32)numberOfTicks, 1, 1, UInt16.MaxValue, UInt16.MaxValue); Placement ticksPlace = new Placement((UInt32)numberOfTicks, 10000, 1, UInt16.MaxValue, UInt16.MaxValue); bTree.Persist(place, session); int i = 0; int dublicates = 0; foreach (var record in Tick.GenerateRandom((ulong)numberOfTicks)) { session.Persist(record, ticksPlace); if (bTree.Add(record)) { i++; } else { dublicates++; } } session.Commit(); Console.WriteLine("Done creating and sorting with BTreeSetOidShort<Tick>" + i + " Tick objects by Bid value. Number of dublicates (not added to BTreeSet): " + dublicates); } }
static public IEnumerable <Key> Where <Key>(this BTreeBase <Key, Key> sourceCollection, Expression <Func <Key, bool> > expr) #if WINDOWS_PHONE where Key : new() #endif { if (sourceCollection != null) { bool noIndex = true; SessionBase session = sourceCollection.Session; CompareByField <Key> comparer = sourceCollection.Comparer as CompareByField <Key>; BinaryExpression binExp = expr.Body as BinaryExpression; if (binExp != null && canUseIndex <Key>(sourceCollection, binExp, comparer)) { session.WaitForIndexUpdates(); switch (expr.Body.NodeType) { case ExpressionType.AndAlso: { noIndex = AndUseIndex(sourceCollection, binExp) == false; if (noIndex == false) { foreach (var x in And <Key>(sourceCollection, binExp)) { yield return(x); } } else { BinaryExpression leftExpr = (BinaryExpression)binExp.Left; binExp = leftExpr; switch (binExp.NodeType) { case ExpressionType.Equal: { noIndex = EqualUseIndex <Key>(sourceCollection, binExp) == false; if (noIndex == false) { IEnumerable <Key> equal = Equal <Key>(sourceCollection, binExp); IEnumerable <Key> result = equal.Where <Key>(expr.Compile()); foreach (Key resultItem in result) { yield return(resultItem); } } yield break; } case ExpressionType.LessThan: { noIndex = LessThanUseIndex <Key>(sourceCollection, binExp) == false; if (noIndex == false) { IEnumerable <Key> lessThan = LessThan <Key>(sourceCollection, binExp); IEnumerable <Key> result = lessThan.Where <Key>(expr.Compile()); foreach (Key resultItem in result) { yield return(resultItem); } } yield break; } case ExpressionType.LessThanOrEqual: { noIndex = LessThanOrEqualUseIndex <Key>(sourceCollection, binExp) == false; if (noIndex == false) { IEnumerable <Key> lessThan = LessThanOrEqual <Key>(sourceCollection, binExp); IEnumerable <Key> result = lessThan.Where <Key>(expr.Compile()); foreach (Key resultItem in result) { yield return(resultItem); } } yield break; } case ExpressionType.GreaterThan: { noIndex = GreaterThanUseIndex <Key>(sourceCollection, binExp) == false; if (noIndex == false) { IEnumerable <Key> greaterThan = GreaterThan <Key>(sourceCollection, binExp); IEnumerable <Key> result = greaterThan.Where <Key>(expr.Compile()); foreach (Key resultItem in result) { yield return(resultItem); } } yield break; } case ExpressionType.GreaterThanOrEqual: { noIndex = GreaterThanOrEqualUseIndex <Key>(sourceCollection, binExp) == false; if (noIndex == false) { IEnumerable <Key> greaterThan = GreaterThanOrEqual <Key>(sourceCollection, binExp); IEnumerable <Key> result = greaterThan.Where <Key>(expr.Compile()); foreach (Key resultItem in result) { yield return(resultItem); } } yield break; } } ; } } break; case ExpressionType.Equal: { noIndex = EqualUseIndex <Key>(sourceCollection, binExp) == false; if (noIndex == false) { foreach (var x in Equal <Key>(sourceCollection, binExp)) { yield return(x); } } } break; case ExpressionType.GreaterThan: { noIndex = GreaterThanUseIndex <Key>(sourceCollection, binExp) == false; if (noIndex == false) { foreach (var x in GreaterThan <Key>(sourceCollection, binExp)) { yield return(x); } } } break; case ExpressionType.GreaterThanOrEqual: { noIndex = GreaterThanOrEqualUseIndex <Key>(sourceCollection, binExp) == false; if (noIndex == false) { foreach (var x in GreaterThanOrEqual <Key>(sourceCollection, binExp)) { yield return(x); } } } break; case ExpressionType.LessThan: { noIndex = LessThanUseIndex <Key>(sourceCollection, binExp) == false; if (noIndex == false) { foreach (var x in LessThan <Key>(sourceCollection, binExp)) { yield return(x); } } } break; case ExpressionType.LessThanOrEqual: { noIndex = LessThanOrEqualUseIndex <Key>(sourceCollection, binExp) == false; if (noIndex == false) { foreach (var x in LessThanOrEqual <Key>(sourceCollection, binExp)) { yield return(x); } } } break; } } if (noIndex) //no index? just do it the normal slow way then... { IEnumerable <Key> sourceEnum; if (sourceCollection.UsesOidShort) { BTreeSetOidShort <Key> c = (BTreeSetOidShort <Key>)sourceCollection; sourceEnum = c.AsEnumerable <Key>(); } else { BTreeSet <Key> c = (BTreeSet <Key>)sourceCollection; sourceEnum = c.AsEnumerable <Key>(); } IEnumerable <Key> result = sourceEnum.Where <Key>(expr.Compile()); foreach (Key resultItem in result) { yield return(resultItem); } } } }
public void CreateTicksCompareFieldsOidShort(int numberOfTicks, int nodeSize) { using (SessionNoServer session = new SessionNoServer(systemDir)) { session.BeginRead(); session.Open(10, 1, 1, false); session.Open(10, 1, 2, false); session.Open(10, 2, 1, false); session.Open(10, 2, 2, false); session.Commit(); } using (SessionNoServer session = new SessionNoServer(systemDir, 2000, false)) { //session.SetTraceAllDbActivity(); //session.ClientCache.MinimumAvailableMegaBytes = 1100; session.BeginUpdate(); CompareByField<Tick> compareByField = new CompareByField<Tick>("<Bid>k__BackingField", session, true); //compareByField.AddFieldToCompare("<Timestamp>k__BackingField"); BTreeSetOidShort<Tick> bTree = new BTreeSetOidShort<Tick>(compareByField, session, (UInt16) nodeSize, sizeof(double), true); Placement place = new Placement((UInt32)numberOfTicks, 1, 1, UInt16.MaxValue, UInt16.MaxValue); Placement ticksPlace = new Placement((UInt32)numberOfTicks, 10000, 1, UInt16.MaxValue, UInt16.MaxValue); bTree.Persist(place, session); int i = 0; int dublicates = 0; foreach (var record in Tick.GenerateRandom((ulong) numberOfTicks)) { session.Persist(record, ticksPlace); if (bTree.Add(record)) i++; else dublicates++; } session.Commit(); Console.WriteLine("Done creating and sorting with BTreeSetOidShort<Tick>" + i + " Tick objects by Bid value. Number of dublicates (not added to BTreeSet): " + dublicates); } }
public void CreateCompareStruct(int number) { Oid id; using (SessionNoServer session = new SessionNoServer(systemDir)) { Placement place = new Placement((UInt32)number, 1, 1, UInt16.MaxValue, UInt16.MaxValue); session.BeginUpdate(); BTreeSetOidShort<Oid> bTree = new BTreeSetOidShort<Oid>(null, session); bTree.Persist(place, session); id = bTree.Oid; for (int i = 0; i < number; i++) { bTree.Add(new Oid((ulong) i)); } session.Commit(); } using (SessionNoServer session = new SessionNoServer(systemDir)) { session.BeginRead(); BTreeSetOidShort<Oid> bTree = (BTreeSetOidShort<Oid>) session.Open(id); int count = 0; int prior = 0; foreach (Oid oid in bTree) { count++; Assert.True(oid.Id == (ulong) prior++); } Assert.True(number == count); session.Commit(); } }
public void CreateDefaultCompareIntKey(int number) { Oid id; using (SessionNoServer session = new SessionNoServer(systemDir)) { Placement place = new Placement((UInt32)number, 1, 1, UInt16.MaxValue, UInt16.MaxValue); session.BeginUpdate(); BTreeSetOidShort<int> bTree = new BTreeSetOidShort<int>(null, session); bTree.Persist(place, session); id = bTree.Oid; for (int i = 0; i < number; i++) { if (i > 1000 && i < 20000) bTree.Add(i); else bTree.AddFast(i); } bTree.Clear(); for (int i = 0; i < number; i++) { if (i > 1000 && i < 20000) bTree.Add(i); else bTree.AddFast(i); } session.Commit(); } using (SessionNoServer session = new SessionNoServer(systemDir)) { session.BeginRead(); BTreeSetOidShort<int> bTree = (BTreeSetOidShort<int>) session.Open(id); int count = 0; int prior = 0; foreach (int i in bTree) { count++; Assert.True(i == prior++); } Assert.True(number == count); session.Commit(); } }
public Lexicon(ushort nodeSize, HashCodeComparer <Word> hashComparer, SessionBase session) { wordSet = new BTreeSetOidShort <Word>(hashComparer, session, nodeSize, sizeof(int)); }
public Lexicon(ushort nodeSize, HashCodeComparer<Word> hashComparer, SessionBase session) { wordSet = new BTreeSetOidShort<Word>(hashComparer, session, nodeSize, sizeof(int)); }