public void AlphanumericSplitter_EndToEndPerformance() { String8 code = AllCodeText.AllCode8; String8Set set = default(String8Set); PartialArray <int> matchContainer = new PartialArray <int>(2048); HashSet <String8> uniqueWords = new HashSet <String8>(); StringStore strings = new StringStore(); MemberIndex index = new MemberIndex(); int iterations = 10; int totalWordsSplit = 0; // Split, Add, Index Goal: 30k per millisecond [30 MB/sec] Verify.PerformanceByBytes(30 * LongExtensions.Megabyte, () => { for (int iteration = 0; iteration < iterations; ++iteration) { String8Set codeByLine = code.Split(UTF8.Newline, new PartialArray <int>()); for (int lineIndex = 0; lineIndex < codeByLine.Count; ++lineIndex) { // Convert and Split the line String8 line = codeByLine[lineIndex]; set = AlphanumericSplitter.Split(line, ref matchContainer); totalWordsSplit += set.Count; if (set.Count > 0) { int matchIndex = AlphanumericSplitter.IsAlphaNumeric(set[0][0]) ? 0 : 1; for (; matchIndex < set.Count; matchIndex += 2) { // If the word is long enough... String8 word = set[matchIndex]; if (word.Length > 2) { if (!uniqueWords.Contains(word)) { int wordIdentifier = strings.FindOrAddString(word); uniqueWords.Add(strings[wordIdentifier]); index.AddItem(wordIdentifier, lineIndex); } } } } } } return(iterations * code.Length); }); }
public void MemberIndex_Basic() { StringStore strings = new StringStore(); MemberIndex index = new MemberIndex(); // Add six sample strings to StringStore string[] testValues = new string[] { "Zero", "One", "Two", "Three", "Four", "Five" }; int[] testValueIDs = new int[testValues.Length]; for (int i = 0; i < testValues.Length; ++i) { testValueIDs[i] = strings.FindOrAddString(testValues[i]); } // Add 100 items to index - each item has the values it is evenly divisible by (10 has "Five" and "Two") for (int indexId = 1; indexId < 20; ++indexId) { for (int wordIndex = 1; wordIndex < testValueIDs.Length; ++wordIndex) { if (indexId % wordIndex == 0) { index.AddItem(testValueIDs[wordIndex], indexId); } } } // Convert for search strings.ConvertToImmutable(); index.ConvertToImmutable(strings); // Verify matches for three are correct Assert.AreEqual("3, 6, 9, 12, 15, 18", MatchesForWordToString(index, strings, strings[testValueIDs[3]])); Assert.AreEqual("3, 6, 9, 12, 15, 18", MatchesForPrefixToString(index, strings, String8.Convert("Three", new byte[String8.GetLength("Three")]))); // Verify matches for five are correct Assert.AreEqual("5, 10, 15", MatchesForWordToString(index, strings, strings[testValueIDs[5]])); Assert.AreEqual("5, 10, 15", MatchesForPrefixToString(index, strings, String8.Convert("Five", new byte[String8.GetLength("Five")]))); // Verify no matches for zero Assert.AreEqual("", MatchesForWordToString(index, strings, strings[testValueIDs[0]])); Assert.AreEqual("", MatchesForPrefixToString(index, strings, String8.Convert("Zero", new byte[String8.GetLength("Zero")]))); // Verify "Four" and "Five" matches for "F" Assert.AreEqual("5, 10, 15, 4, 8, 12, 16", MatchesForPrefixToString(index, strings, String8.Convert("F", new byte[String8.GetLength("F")]))); }
public int AddPath(int rootIndex, String8Set path, StringStore strings) { int currentIndex = rootIndex; for (int i = 0; i < path.Count; ++i) { String8 part = path[i]; int partNameIdentifier = strings.FindOrAddString(part); int foundNode; if (!TryFindChildByName(currentIndex, partNameIdentifier, out foundNode)) { foundNode = Add(currentIndex, partNameIdentifier); } currentIndex = foundNode; } return(currentIndex); }
public void MemberIndex_CaseSensitivity() { StringStore strings = new StringStore(); MemberIndex index = new MemberIndex(); byte[] buffer = new byte[20]; // Add strings to store (some differ only by casing), ten values string[] testValues = new string[] { "null", "bool", "Bool", "array", "ARRAY", "Collections", "Dictionary", "int", "Int", "friend" }; int[] testValueIDs = new int[testValues.Length]; for (int i = 0; i < testValues.Length; ++i) { testValueIDs[i] = strings.FindOrAddString(testValues[i]); } // Add 3 items per string to index [0, 10, 20 => "null", 1, 11, 21 => "bool", 2, 12, 22 => "Bool", ...] int indexId = 0; for (int countToIndex = 0; countToIndex < 3; ++countToIndex) { for (int wordIndex = 0; wordIndex < testValueIDs.Length; ++wordIndex) { index.AddItem(testValueIDs[wordIndex], indexId++); } } // Convert index for search. Pages should be merged into case-insensitive groups in insertion (ID) order strings.ConvertToImmutable(); index.ConvertToImmutable(strings); // Verify "BOOL" gets matches for "bool" and "Bool" in insertion order Assert.AreEqual("1, 2, 11, 12, 21, 22", MatchesForWordToString(index, strings, String8.Convert("BOOL", buffer))); // Verify "array" gets matches for "array" and "ARRAY" in insertion order Assert.AreEqual("3, 4, 13, 14, 23, 24", MatchesForWordToString(index, strings, String8.Convert("array", buffer))); // Verify "Dictionary" matches unmerged Assert.AreEqual("6, 16, 26", MatchesForWordToString(index, strings, String8.Convert("Dictionary", buffer))); }
public void StringStore_CaseSensitivity() { // Sample Strings: Not all in order, including casing differences, including duplicates StringStore store = new StringStore(); string[] strings = { "bool", "bool", "boolean", "Boolean", "BOOLEAN", "array", "Array", "aRRay", "ARRAY", "Array", "Collections", "ARR", "BIT" }; int[] addedIDs = new int[strings.Length]; // Add each value for (int i = 0; i < strings.Length; ++i) { addedIDs[i] = store.FindOrAddString(strings[i]); } // Verify each value comes back cased correctly (case sensitive add) for (int i = 0; i < strings.Length; ++i) { Assert.AreEqual(strings[i], store[addedIDs[i]].ToString()); } // Convert to Immutable store.ConvertToImmutable(); // Remap IDs for (int i = 0; i < strings.Length; ++i) { addedIDs[i] = store.GetSerializationIdentifier(addedIDs[i]); } // Verify each value comes back cased correctly (case sensitive values preserved on convert) for (int i = 0; i < strings.Length; ++i) { Assert.AreEqual(strings[i], store[addedIDs[i]].ToString()); } // Verify values have ascending IDs and are in case insensitive *stable* order string last = store[0].ToString(); for (int i = 1; i < store.Count; ++i) { string current = store[i].ToString(); // Verify all strings are in case insensitive order int cmp = string.Compare(last, current, StringComparison.OrdinalIgnoreCase); Assert.IsTrue(cmp <= 0); // Verify case-insensitive ties are in case sensitive order relative to each other if (cmp == 0) { Assert.IsTrue(string.Compare(last, current, StringComparison.Ordinal) < 0); } last = current; } // Verify searches return the range of capitalizations for the value byte[] buffer = new byte[20]; for (int i = 0; i < strings.Length; ++i) { String8 value8 = String8.Convert(strings[i], buffer); // Verify the string is found Range range; Assert.IsTrue(store.TryFindString(value8, out range)); // Verify the ID for the exact casing is reported within the range Assert.IsTrue(range.Contains(addedIDs[i])); // Verify every value in the range matches the value (case-insensitive) for (int j = range.Start; j <= range.End; ++j) { String8 otherMatch = store[j]; Assert.AreEqual(0, value8.CompareTo(otherMatch, true), String.Format("'{0}' in match range wasn't reported equal to '{1}' being matched", otherMatch, value8)); } // Verify the values just before and after the range don't match if (range.Start > 0) { String8 valueBefore = store[range.Start - 1]; Assert.IsTrue(value8.CompareTo(valueBefore, true) > 0, String.Format("'{0}' before match range wasn't reported before '{1}' being matched", valueBefore, value8)); } if (range.End < store.Count - 1) { String8 valueAfter = store[range.End + 1]; Assert.IsTrue(value8.CompareTo(valueAfter, true) < 0, String.Format("'{0}' after match range wasn't reported after '{1}' being matched", valueAfter, value8)); } // Ask for the case-sensitive range Range caseSensitive; Assert.IsTrue(store.TryFindString(value8, false, out caseSensitive)); // Verify every value in the range matches the value (case-sensitive) for (int j = caseSensitive.Start; j <= caseSensitive.End; ++j) { String8 otherMatch = store[j]; Assert.AreEqual(0, value8.CompareTo(otherMatch, false), String.Format("'{0}' in case sensitive range wasn't reported equal to '{1}' being matched", otherMatch, value8)); } // Verify the values just before and after the range don't match if (caseSensitive.Start > 0) { String8 valueBefore = store[caseSensitive.Start - 1]; Assert.IsTrue(value8.CompareTo(valueBefore, false) != 0, String.Format("'{0}' before case sensitive range still matches '{1}'", valueBefore, value8)); } if (caseSensitive.End < store.Count - 1) { String8 valueAfter = store[caseSensitive.End + 1]; Assert.IsTrue(value8.CompareTo(valueAfter, false) != 0, String.Format("'{0}' after case sensitive range still matches '{1}'", valueAfter, value8)); } } // Verify MakeCaseSensitive goes to empty if the provided casing isn't any of the values String8 BOOLean = String8.Convert("BOOLean", buffer); Range booleanRange; Assert.IsFalse(store.TryFindString(BOOLean, false, out booleanRange)); }
public void ItemTree_Basic() { byte[] byteBuffer = new byte[100]; int[] intBuffer = new int[10]; String8Set splitPath8; StringStore strings = new StringStore(); ItemTree fileTree = new ItemTree(); string[] filePaths = { @"C:\Code\Arriba\Arriba\Diagnostics\DailyLogTraceListener.cs", @"C:\Code\Arriba\Arriba\Diagnostics\ProgressWriter.cs", @"C:\Code\Arriba\Arriba\Diagnostics\Log4NetDiagnosticConsumer.cs", @"C:\Code\Arriba\Arriba\Diagnostics\Memory.cs", @"C:\Code\Arriba\Arriba\Diagnostics\TraceWriter.cs" }; List <int> fileTreeIndexes = new List <int>(); // Index each file path foreach (string filePath in filePaths) { splitPath8 = String8.Convert(filePath, byteBuffer).Split('\\', intBuffer); fileTreeIndexes.Add(fileTree.AddPath(0, splitPath8, strings)); } for (int i = 0; i < filePaths.Length; ++i) { // Reconstruct each file path and confirm they match string rebuiltPath = fileTree.GetPath(fileTreeIndexes[i], strings, '\\').ToString(); Assert.AreEqual(filePaths[i], rebuiltPath); // Verify find by path works splitPath8 = String8.Convert(filePaths[i], byteBuffer).Split('\\', intBuffer); int foundAtIndex; Assert.IsTrue(fileTree.TryFindByPath(0, splitPath8, strings, out foundAtIndex)); Assert.AreEqual(fileTreeIndexes[i], foundAtIndex); } // Verify find works int foundIndex; // Root found under sentinel root Assert.IsTrue(fileTree.TryFindChildByName(0, strings.FindOrAddString("C:"), out foundIndex)); Assert.AreEqual(1, foundIndex); // Root not found under another node Assert.IsFalse(fileTree.TryFindChildByName(1, strings.FindOrAddString("C:"), out foundIndex)); // Node not found at root Assert.IsFalse(fileTree.TryFindChildByName(0, strings.FindOrAddString("Code"), out foundIndex)); // Node found under the right parent Assert.IsTrue(fileTree.TryFindChildByName(1, strings.FindOrAddString("Code"), out foundIndex)); Assert.AreEqual(2, foundIndex); // FindByPath works under a partial path splitPath8 = String8.Convert(@"Code\Arriba", byteBuffer).Split('\\', intBuffer); int arribaIndex; Assert.IsTrue(fileTree.TryFindByPath(1, splitPath8, strings, out arribaIndex)); splitPath8 = String8.Convert(@"Arriba\Diagnostics\DailyLogTraceListener.cs", byteBuffer).Split('\\', intBuffer); int dailyLogIndex; Assert.IsTrue(fileTree.TryFindByPath(arribaIndex, splitPath8, strings, out dailyLogIndex)); Assert.AreEqual(fileTreeIndexes[0], dailyLogIndex); // FindByPath returns the closest element when it fails splitPath8 = String8.Convert(@"C:\Nope", byteBuffer).Split('\\', intBuffer); int nopeIndex; Assert.IsFalse(fileTree.TryFindByPath(0, splitPath8, strings, out nopeIndex)); Assert.AreEqual(1, nopeIndex, @"Failed find for C:\Nope should return 'C:' index; the successful portion of the search."); splitPath8 = String8.Convert(@"C:\Code\Arriba\Arriba\Diagnostics\TraceWriter.cs\Nope", byteBuffer).Split('\\', intBuffer); Assert.IsFalse(fileTree.TryFindByPath(0, splitPath8, strings, out nopeIndex)); Assert.AreEqual(fileTreeIndexes[4], nopeIndex); // Verify depth works Assert.AreEqual(0, fileTree.GetDepth(0)); Assert.AreEqual(1, fileTree.GetDepth(1)); Assert.AreEqual(6, fileTree.GetDepth(fileTreeIndexes[0])); Assert.AreEqual("C:", strings[fileTree.GetNameIdentifier(fileTree.GetAncestorAtDepth(fileTreeIndexes[0], 1))].ToString()); Assert.AreEqual("Code", strings[fileTree.GetNameIdentifier(fileTree.GetAncestorAtDepth(fileTreeIndexes[0], 2))].ToString()); Assert.AreEqual("Arriba", strings[fileTree.GetNameIdentifier(fileTree.GetAncestorAtDepth(fileTreeIndexes[0], 3))].ToString()); Assert.AreEqual("Arriba", strings[fileTree.GetNameIdentifier(fileTree.GetAncestorAtDepth(fileTreeIndexes[0], 4))].ToString()); // Sort the tree by name fileTree.SortByName(strings); // Log the tree Trace.WriteLine(Write.ToString((w) => fileTree.WriteTree(w, strings, 1))); // Verify roundtrip ItemTree readTree = new ItemTree(); Verify.RoundTrip(fileTree, readTree); fileTree = readTree; // Reconstruct each file path for (int i = 0; i < filePaths.Length; ++i) { string rebuiltPath = fileTree.GetPath(fileTreeIndexes[i], strings, '\\').ToString(); Assert.AreEqual(filePaths[i], rebuiltPath); } }