public void AlphanumericSplitter_EndToEndPerformance() { String8 code = AllCodeText.AllCode8; String8Set set = default(String8Set); PartialArray <int> matchContainer = new PartialArray <int>(2048); HashSet <String8> uniqueWords = new HashSet <String8>(); StringStore strings = new StringStore(); MemberIndex index = new MemberIndex(); int iterations = 10; int totalWordsSplit = 0; // Split, Add, Index Goal: 30k per millisecond [30 MB/sec] Verify.PerformanceByBytes(30 * LongExtensions.Megabyte, () => { for (int iteration = 0; iteration < iterations; ++iteration) { String8Set codeByLine = code.Split(UTF8.Newline, new PartialArray <int>()); for (int lineIndex = 0; lineIndex < codeByLine.Count; ++lineIndex) { // Convert and Split the line String8 line = codeByLine[lineIndex]; set = AlphanumericSplitter.Split(line, ref matchContainer); totalWordsSplit += set.Count; if (set.Count > 0) { int matchIndex = AlphanumericSplitter.IsAlphaNumeric(set[0][0]) ? 0 : 1; for (; matchIndex < set.Count; matchIndex += 2) { // If the word is long enough... String8 word = set[matchIndex]; if (word.Length > 2) { if (!uniqueWords.Contains(word)) { int wordIdentifier = strings.FindOrAddString(word); uniqueWords.Add(strings[wordIdentifier]); index.AddItem(wordIdentifier, lineIndex); } } } } } } return(iterations * code.Length); }); }
public void MemberIndex_Basic() { StringStore strings = new StringStore(); MemberIndex index = new MemberIndex(); // Add six sample strings to StringStore string[] testValues = new string[] { "Zero", "One", "Two", "Three", "Four", "Five" }; int[] testValueIDs = new int[testValues.Length]; for (int i = 0; i < testValues.Length; ++i) { testValueIDs[i] = strings.FindOrAddString(testValues[i]); } // Add 100 items to index - each item has the values it is evenly divisible by (10 has "Five" and "Two") for (int indexId = 1; indexId < 20; ++indexId) { for (int wordIndex = 1; wordIndex < testValueIDs.Length; ++wordIndex) { if (indexId % wordIndex == 0) { index.AddItem(testValueIDs[wordIndex], indexId); } } } // Convert for search strings.ConvertToImmutable(); index.ConvertToImmutable(strings); // Verify matches for three are correct Assert.AreEqual("3, 6, 9, 12, 15, 18", MatchesForWordToString(index, strings, strings[testValueIDs[3]])); Assert.AreEqual("3, 6, 9, 12, 15, 18", MatchesForPrefixToString(index, strings, String8.Convert("Three", new byte[String8.GetLength("Three")]))); // Verify matches for five are correct Assert.AreEqual("5, 10, 15", MatchesForWordToString(index, strings, strings[testValueIDs[5]])); Assert.AreEqual("5, 10, 15", MatchesForPrefixToString(index, strings, String8.Convert("Five", new byte[String8.GetLength("Five")]))); // Verify no matches for zero Assert.AreEqual("", MatchesForWordToString(index, strings, strings[testValueIDs[0]])); Assert.AreEqual("", MatchesForPrefixToString(index, strings, String8.Convert("Zero", new byte[String8.GetLength("Zero")]))); // Verify "Four" and "Five" matches for "F" Assert.AreEqual("5, 10, 15, 4, 8, 12, 16", MatchesForPrefixToString(index, strings, String8.Convert("F", new byte[String8.GetLength("F")]))); }
public void MemberIndex_CaseSensitivity() { StringStore strings = new StringStore(); MemberIndex index = new MemberIndex(); byte[] buffer = new byte[20]; // Add strings to store (some differ only by casing), ten values string[] testValues = new string[] { "null", "bool", "Bool", "array", "ARRAY", "Collections", "Dictionary", "int", "Int", "friend" }; int[] testValueIDs = new int[testValues.Length]; for (int i = 0; i < testValues.Length; ++i) { testValueIDs[i] = strings.FindOrAddString(testValues[i]); } // Add 3 items per string to index [0, 10, 20 => "null", 1, 11, 21 => "bool", 2, 12, 22 => "Bool", ...] int indexId = 0; for (int countToIndex = 0; countToIndex < 3; ++countToIndex) { for (int wordIndex = 0; wordIndex < testValueIDs.Length; ++wordIndex) { index.AddItem(testValueIDs[wordIndex], indexId++); } } // Convert index for search. Pages should be merged into case-insensitive groups in insertion (ID) order strings.ConvertToImmutable(); index.ConvertToImmutable(strings); // Verify "BOOL" gets matches for "bool" and "Bool" in insertion order Assert.AreEqual("1, 2, 11, 12, 21, 22", MatchesForWordToString(index, strings, String8.Convert("BOOL", buffer))); // Verify "array" gets matches for "array" and "ARRAY" in insertion order Assert.AreEqual("3, 4, 13, 14, 23, 24", MatchesForWordToString(index, strings, String8.Convert("array", buffer))); // Verify "Dictionary" matches unmerged Assert.AreEqual("6, 16, 26", MatchesForWordToString(index, strings, String8.Convert("Dictionary", buffer))); }