public TagsGroup[] GetFiftyMostSimilarGroupsHeap(TagsGroup value) { const int resultLength = 50; BinaryHeap <TagsSimilarityInfo> binaryHeap = new BinaryHeap <TagsSimilarityInfo>(50); for (int groupIndex = 0; groupIndex < Groups.Length; groupIndex++) { TagsGroup tagsGroup = Groups[groupIndex]; int similarityValue = TagsGroup.MeasureSimilarity(value, tagsGroup); TagsSimilarityInfo newInfo = new TagsSimilarityInfo(groupIndex, similarityValue); if (binaryHeap.Count == resultLength && binaryHeap.Max.CompareTo(newInfo) == -1) { continue; } binaryHeap.Add(newInfo); if (binaryHeap.Count > resultLength) { binaryHeap.RemoveMax(); } } TagsGroup[] result = new TagsGroup[resultLength]; List <TagsSimilarityInfo> list = new List <TagsSimilarityInfo>(binaryHeap); list.Sort(); for (int i = 0; i < resultLength; i++) { result[i] = Groups[list[i].Index]; binaryHeap.RemoveMax(); } return(result); }
public TagsGroup[] GetFiftyMostSimilarGroups(TagsGroup value) { const int resultLength = 50; List <TagsSimilarityInfo> list = new List <TagsSimilarityInfo>(resultLength); for (int groupIndex = 0; groupIndex < Groups.Length; groupIndex++) { TagsGroup tagsGroup = Groups[groupIndex]; int similarityValue = TagsGroup.MeasureSimilarity(value, tagsGroup); TagsSimilarityInfo newInfo = new TagsSimilarityInfo(groupIndex, similarityValue); if (list.Count == resultLength && list[resultLength - 1].CompareTo(newInfo) == -1) { continue; } int index = ~list.BinarySearch(newInfo); list.Insert(index, newInfo); if (list.Count > resultLength) { list.RemoveAt(resultLength); } } TagsGroup[] result = new TagsGroup[resultLength]; for (int i = 0; i < resultLength; i++) { result[i] = Groups[list[i].Index]; } return(result); }
public TagsGroup[] GetFiftyMostSimilarGroupsSortedSet(TagsGroup value) { const int resultLength = 50; SortedSet <TagsSimilarityInfo> sortedSet = new SortedSet <TagsSimilarityInfo>(); for (int groupIndex = 0; groupIndex < Groups.Length; groupIndex++) { TagsGroup tagsGroup = Groups[groupIndex]; int similarityValue = TagsGroup.MeasureSimilarity(value, tagsGroup); TagsSimilarityInfo newInfo = new TagsSimilarityInfo(groupIndex, similarityValue); if (sortedSet.Count == resultLength && sortedSet.Max.CompareTo(newInfo) == -1) { continue; } sortedSet.Add(newInfo); if (sortedSet.Count > resultLength) { sortedSet.Remove(sortedSet.Max); } } TagsGroup[] result = new TagsGroup[resultLength]; int i = 0; foreach (var info in sortedSet) { result[i++] = Groups[info.Index]; } return(result); }
public TagsGroup[] GetFiftyMostSimilarGroupsCount(TagsGroup value) { const int resultLength = 50; List <int>[] buckets = new List <int> [TagsGroup.TagsGroupLength + 1]; for (int groupIndex = 0; groupIndex < Groups.Length; groupIndex++) { var tagsGroup = Groups[groupIndex]; int similarityValue = TagsGroup.MeasureSimilarity(value, tagsGroup); List <int> bucket = buckets[similarityValue]; if (bucket == null) { bucket = new List <int>(); buckets[similarityValue] = bucket; } bucket.Add(groupIndex); } TagsGroup[] result = new TagsGroup[resultLength]; for (int i = TagsGroup.TagsGroupLength, j = 0; i >= 0 && j < resultLength; i--) { if (buckets[i] == null) { continue; } for (int index = 0; index < buckets[i].Count && j < resultLength; index++) { int groupIndex = buckets[i][index]; result[j++] = Groups[groupIndex]; } } return(result); }
internal static TagsGroup[] CreateRandomGroups(int groupsCount) { TagsGroup[] groups = new TagsGroup[groupsCount]; for (int i = 0; i < groupsCount; i++) { groups[i] = new TagsGroup(GetRandomBools()); } return(groups); }
int GetSimilaritySum(TagsGroup[] tagsGroups) { int result = 0; foreach (TagsGroup tagsGroup in tagsGroups) { result += TagsGroup.MeasureSimilarity(tagsGroup, etalon); } return(result); }
public static int MeasureSimilarity(TagsGroup a, TagsGroup b) { int result = 0; for (int i = 0; i < TagsGroupLength / BucketSize; i++) { ulong t = a.InnerTags[i] & b.InnerTags[i]; result += (int)System.Runtime.Intrinsics.X86.Popcnt.X64.PopCount(t); } return(result); }
public void GlobalSetup() { randomCalculator = new SimilarTagsCalculator(Program.CreateRandomGroups(GroupsCount)); TagsGroup[] ascendantTestGroups = Program.CreateAscendantTestGroups(GroupsCount); TagsGroup[] descendantTestGroups = new TagsGroup[ascendantTestGroups.Length]; Array.Copy(ascendantTestGroups, descendantTestGroups, ascendantTestGroups.Length); Array.Reverse(descendantTestGroups); ascendantCalculator = new SimilarTagsCalculator(ascendantTestGroups); descendantCalculator = new SimilarTagsCalculator(descendantTestGroups); allTagsTrue = new TagsGroup(Program.CreateAllTagsTrue()); }
SimilarTagsCalculator.TagsSimilarityInfo[] CalcTagsSimilarityInfo(TagsGroup[] tagsGroups) { SimilarTagsCalculator.TagsSimilarityInfo[] result = new SimilarTagsCalculator.TagsSimilarityInfo[tagsGroups.Length]; TagsGroup tagsGroup = new TagsGroup(Program.CreateAllTagsTrue()); for (int i = 0; i < tagsGroups.Length; i++) { result[i] = new SimilarTagsCalculator.TagsSimilarityInfo(i, TagsGroup.MeasureSimilarity(tagsGroup, tagsGroups[i])); } return(result); }
static void TestCore(TagsGroup[] groups, TagsGroup etalon, string testName) { var dummyResult = GetDummyResult(groups, etalon); SimilarTagsCalculator calculator = new SimilarTagsCalculator(groups); TestCoreCore(dummyResult, calculator.GetFiftyMostSimilarGroupsMultiThread(etalon)); // TestCoreCore(dummyResult, calculator.GetFiftyMostSimilarGroups(etalon)); // TestCoreCore(dummyResult, calculator.GetFiftyMostSimilarGroupsSortedSet(etalon)); // TestCoreCore(dummyResult, calculator.GetFiftyMostSimilarGroupsHeap(etalon)); // TestCoreCore(dummyResult, calculator.GetFiftyMostSimilarGroupsCount(etalon)); Console.WriteLine($"{testName} passed!"); }
static void SpecialTest() { TagsGroup[] groups = new TagsGroup[testGroupCount]; for (int i = 0; i < testGroupCount; i++) { groups[i] = new TagsGroup(GetRandomBools()); } bool[] fullTagsGroup = CreateAllTagsTrue(); for (int i = 0; i < resultLength; i++) { groups[i * 100] = new TagsGroup(fullTagsGroup); } TestCore(groups, new TagsGroup(fullTagsGroup), "Special test"); }
static TagsGroup[] GetDummyResult(TagsGroup[] groups, TagsGroup etalon) { List <(int index, TagsGroup tagsGroup, int similarity)> list = groups.Select((t, i) => (i, t, TagsGroup.MeasureSimilarity(t, etalon))).ToList(); list.Sort((a, b) => { int similarityCompare = b.similarity.CompareTo(a.similarity); return(similarityCompare == 0 ? a.index.CompareTo(b.index) : similarityCompare); }); TagsGroup[] result = new TagsGroup[resultLength]; for (int i = 0; i < resultLength; i++) { result[i] = list[i].tagsGroup; } return(result); }
static void RandomMeasureSimilarityTest() { for (int i = 0; i < 1000; i++) { bool[] aBools = GetRandomBools(); bool[] bBools = GetRandomBools(); TagsGroup a = new TagsGroup(aBools); TagsGroup b = new TagsGroup(bBools); int similarity = TagsGroup.MeasureSimilarity(a, b); int dummySimilarity = CalcSimilarityDummy(aBools, bBools); if (similarity != dummySimilarity) { throw new Exception("Test failed"); } } Console.WriteLine("Measure similarity test passed"); }
public TagsGroup[] GetFiftyMostSimilarGroupsMultiThread(TagsGroup value) { const int resultLength = 50; const int threadsCount = 4; int bucketSize = Groups.Length / threadsCount; Task <List <TagsSimilarityInfo> >[] tasks = new Task <List <TagsSimilarityInfo> > [threadsCount]; for (int i = 0; i < threadsCount; i++) { int leftIndex = i * bucketSize; int rightIndex = (i + 1) * bucketSize; tasks[i] = Task <List <TagsSimilarityInfo> > .Factory.StartNew(() => GetFiftyMostSimilarGroupsMultiThreadCore(value, leftIndex, rightIndex)); } Task.WaitAll(tasks); List <TagsSimilarityInfo>[] taskResults = new List <TagsSimilarityInfo> [threadsCount]; for (int i = 0; i < threadsCount; i++) { taskResults[i] = tasks[i].Result; } return(MergeTaskResults(resultLength, threadsCount, taskResults)); }
internal static TagsGroup[] CreateAscendantTestGroups(int groupsCount) { TagsGroup[] groups = new TagsGroup[groupsCount]; int bucketsCount = groupsCount / TagsGroup.TagsGroupLength; int i = 0; for (int j = 0; j <= TagsGroup.TagsGroupLength && i < groupsCount; j++) { bool[] tags = GetTrueBools(j); for (int k = 0; k < bucketsCount && i < groupsCount; k++, i++) { groups[i] = new TagsGroup(tags); } } var fullTags = CreateAllTagsTrue(); while (i < groupsCount) { groups[i++] = new TagsGroup(fullTags); } return(groups); }
TagsGroup[] MergeTaskResults(int resultLength, int threadsCount, List <TagsSimilarityInfo>[] taskResults) { TagsGroup[] result = new TagsGroup[resultLength]; int[] indices = new int[threadsCount]; for (int i = 0; i < resultLength; i++) { int minIndex = 0; TagsSimilarityInfo currentBest = taskResults[minIndex][indices[minIndex]]; for (int j = 0; j < threadsCount; j++) { var current = taskResults[j][indices[j]]; if (current.CompareTo(currentBest) == -1) { minIndex = j; currentBest = taskResults[minIndex][indices[minIndex]]; } } int groupIndex = currentBest.Index; result[i] = Groups[groupIndex]; indices[minIndex]++; } return(result); }
List <TagsSimilarityInfo> GetFiftyMostSimilarGroupsMultiThreadCore(TagsGroup value, int leftIndex, int rightIndex) { const int resultLength = 50; List <TagsSimilarityInfo> list = new List <TagsSimilarityInfo>(resultLength); for (int groupIndex = leftIndex; groupIndex < rightIndex; groupIndex++) { TagsGroup tagsGroup = Groups[groupIndex]; int similarityValue = TagsGroup.MeasureSimilarity(value, tagsGroup); TagsSimilarityInfo newInfo = new TagsSimilarityInfo(groupIndex, similarityValue); if (list.Count == resultLength && list[resultLength - 1].CompareTo(newInfo) == -1) { continue; } int index = ~list.BinarySearch(newInfo); list.Insert(index, newInfo); if (list.Count > resultLength) { list.RemoveAt(resultLength); } } return(list); }
public void GlobalSetup() { unsortedGroups = Program.CreateRandomAscendantTestGroups(1000000); sortedGroups = Program.CreateAscendantTestGroups(1000000); etalon = new TagsGroup(Program.CreateAllTagsTrue()); }
public void IterationSetup() { randomValue = new TagsGroup(Program.GetRandomBools()); }