public static Vector CalculateTFIDFVector(GroupingQueryResult TargetGroup, List <GroupingQueryResult> Corpus, bool ByRawCount = false) { Vector IDFSummedVector = new DenseVector(TargetGroup.ReturnAccessVector().Count); Vector ReturnVector = new DenseVector(IDFSummedVector.Count); Parallel.For(0, IDFSummedVector.Count, i => { foreach (GroupingQueryResult CurrentGQR in Corpus) { if (CurrentGQR.GroupingName != TargetGroup.GroupingName) { if (CurrentGQR.ReturnAccessVector()[i] > 0) { IDFSummedVector[i] = IDFSummedVector[i] + 1; } } } }); for (int i2 = 0; i2 < IDFSummedVector.Count; i2++) { //0.1 has been added to the denominator to prevent divide by zero issues IDFSummedVector[i2] = Math.Log(Corpus.Count / (IDFSummedVector[i2] + 0.1)); } if (ByRawCount) { return((Vector)IDFSummedVector.PointwiseMultiply(TargetGroup.ReturnRawCountVector())); } else { return((Vector)IDFSummedVector.PointwiseMultiply(TargetGroup.ReturnAccessVector())); } }
public GroupingQueryResult(GroupingQueryResult GQR) : base(GQR.ReturnAccessVector()) { this.accessSummaryVector = GQR.ReturnAccessVector(); this.rawCountVector = GQR.ReturnRawCountVector(); this.tF_IDFVector = GQR.ReturnTF_IDFVector(); this.aDGroupsRepresented = GQR.ADGroupsRepresented; this.groupingName = GQR.GroupingName; this.groupingType = GQR.GroupingType; this.groupMemberCount = GQR.MemberCount; this.members = GQR.Members; }
public static string ReturnFormattedGroupSummary(GroupingQueryResult GQR, List <Tuple <string, string> > AllADGroupsList) { string Outstring = DateTime.Now.ToLongDateString() + "\r\n" + GQR.GroupingType + " = " + GQR.GroupingName.ToUpper() + "\r\n" + $"{GQR.MemberCount} Members\r\n------------------------------------------------------------------------------------------------------------------------------\r\n\r\nGroup Representations:\r\n\r\n"; Vector RefVect = GQR.ReturnAccessVector(); Vector RefTFIDF = GQR.ReturnTF_IDFVector(); if (RefVect.Count != AllADGroupsList.Count) { throw new ArgumentOutOfRangeException(); } //string Tempstring1 = ""; for (int i = 0; i < RefVect.Count; i++) { if (RefVect[i] > 0) { string Tempstring1 = AllADGroupsList[i].Item1; while (Tempstring1.Length < 80) { Tempstring1 = Tempstring1 + " "; } Tempstring1 = Tempstring1 + (RefVect[i] * 100).ToString("F2") + "%"; while (Tempstring1.Length < 88) { Tempstring1 = Tempstring1 + " "; } Tempstring1 = Tempstring1 + $"(Weighted for rarity {RefTFIDF[i].ToString("F2")})\r\n"; Outstring = Outstring + Tempstring1; } } Outstring = Outstring + "\r\n\r\n"; return(Outstring); }
public static string ReturnFormattedGroupInfo(List <UserQueryResult> UsersList, GroupingQueryResult GQR, List <Tuple <string, string> > AllADGroupsList) { string Outstring = DateTime.Now.ToLongDateString() + "\r\n" + GQR.GroupingType + " = " + GQR.GroupingName.ToUpper() + "\r\n" + $"{GQR.MemberCount} Members\r\n------------------------------------------------------------------------------------------------------------------------------\r\n\r\nGroup Representations:\r\n\r\n"; Vector RefVect = GQR.ReturnAccessVector(); Vector RefTFIDF = GQR.ReturnTF_IDFVector(); if (RefVect.Count != AllADGroupsList.Count) { throw new ArgumentOutOfRangeException(); } //string Tempstring1 = ""; for (int i = 0; i < RefVect.Count; i++) { if (RefVect[i] > 0) { string Tempstring1 = AllADGroupsList[i].Item1; while (Tempstring1.Length < 80) { Tempstring1 = Tempstring1 + " "; } Tempstring1 = Tempstring1 + (RefVect[i] * 100).ToString("F2") + "%"; while (Tempstring1.Length < 88) { Tempstring1 = Tempstring1 + " "; } Tempstring1 = Tempstring1 + $"(Weighted for rarity {RefTFIDF[i].ToString("F2")})\r\n"; Outstring = Outstring + Tempstring1; } } Outstring = Outstring + "\r\n\r\n"; //List<string> Names = GQR.Members.Split(',').ToList<string>(); foreach (string Name in GQR.Members.Split(',')) { string TrueName; if (Name.StartsWith(" ")) { TrueName = Name.Substring(1); } else { TrueName = Name; } Parallel.ForEach <UserQueryResult>(UsersList, UQR => { if (UQR.AccountName == TrueName) { if (GQR.GroupingType == "Description") { Outstring = Outstring + ReturnFormattedPersonInfo(UQR, AllADGroupsList, true); } else { Outstring = Outstring + ReturnFormattedPersonInfo(UQR, AllADGroupsList, false); } } }); } return(Outstring); }
public static string ReturnRecommendationString(List <QueryResult> AllInputs, List <Tuple <string, string> > AllGroupNamesAndDescriptions, double Threshold) { Vector CountVector = new DenseVector(AllInputs[0].ReturnAccessVector().Count); //bool TFIDFAttempted = false; /*if (ByTFIDF) * { * TFIDFAttempted = true; * try * { * GroupingQueryResult GQR = (GroupingQueryResult)AllInputs[0]; * Vector TestVector = GQR.ReturnTF_IDFVector(); * } * catch * { * ByTFIDF = false; * } * if (ByTFIDF) * { * Parallel.ForEach<QueryResult>(AllInputs, QR => { * GroupingQueryResult GQR = (GroupingQueryResult)QR; * CountVector = (DenseVector)(CountVector + GQR.ReturnTF_IDFVector()); * }); * CountVector = (DenseVector)(CountVector / AllInputs.Count); * Parallel.For(0, CountVector.Count, i => { * if (CountVector[i] < Threshold) * { * CountVector[i] = 0; * } * }); * * string Titlestring = $"Template Recommended By TF-IDF, with Threshold of {Threshold.ToString()}"; * return RecommendationString(CountVector, AllGroupNamesAndDescriptions, Titlestring); * } * }*/ //the following executes if TFIDF is not being used if (AllInputs[0].GetType() == typeof(RBACS.UserQueryResult)) { Parallel.ForEach <QueryResult>(AllInputs, QR => { CountVector = (DenseVector)(CountVector + QR.ReturnAccessVector()); }); } else if (AllInputs[0].GetType() == typeof(GroupingQueryResult)) { Parallel.ForEach(AllInputs, QR => { GroupingQueryResult GQR = (GroupingQueryResult)QR; CountVector = (DenseVector)(CountVector + GQR.ReturnAccessVector()); }); } else if (AllInputs[0].GetType() == typeof(UserClusteringResult)) { Parallel.ForEach(AllInputs, QR => { UserClusteringResult UCR = (UserClusteringResult)QR; CountVector = (DenseVector)(CountVector + UCR.ReturnAccessVector()); }); } else if (AllInputs[0].GetType() == typeof(GroupingClusteringResult)) { Parallel.ForEach(AllInputs, QR => { GroupingClusteringResult GCR = (GroupingClusteringResult)QR; CountVector = (DenseVector)(CountVector + GCR.ReturnAccessVector()); }); } CountVector = (DenseVector)(CountVector / AllInputs.Count); Parallel.For(0, CountVector.Count, i => { if (CountVector[i] < Threshold) { CountVector[i] = 0; } }); string TitleString; TitleString = $"Template Recommended By Relative Count with Threshold {Threshold.ToString()}"; return(RecommendationString(CountVector, AllGroupNamesAndDescriptions, TitleString)); }
public static List <GroupRepresentationTFIDFResult> QueryListToGroupRepresentationTFIDFList(GroupingQueryResult QR, List <GroupingQueryResult> AllQueries, List <Tuple <string, string> > GroupNames) { ConcurrentBag <GroupRepresentationTFIDFResult> GRTBag = new ConcurrentBag <GroupRepresentationTFIDFResult>(); DenseVector TFIDFVector = (DenseVector)CalculateTFIDFVector(QR, AllQueries); /*DenseVector SummedVector = new DenseVector(QRList[0].ReturnAccessVector().Count); * //DenseVector AllQRSummedVector = new DenseVector(QRList[0].ReturnAccessVector().Count); * Parallel.ForEach(QRList, QR => { * SummedVector = SummedVector + (DenseVector)QR.ReturnAccessVector(); * }); * DenseVector IDFVector = new DenseVector(GroupNames.Count); * Parallel.ForEach(AllQueries, QR => { * IDFVector = IDFVector + (DenseVector)QR.ReturnAccessVector(); * }); * Parallel.For(0, IDFVector.Count, i => { * IDFVector[i] = Math.Log((double)Decimal.Divide(AllQueries.Count, (decimal)IDFVector[i])); * }); * SummedVector = (DenseVector)SummedVector.Divide((double)QRList.Count); * DenseVector TFIDFVector = (DenseVector)SummedVector.PointwiseMultiply(IDFVector);*/ Parallel.For(0, GroupNames.Count, i => { GroupRepresentationTFIDFResult CurrentGRT = new GroupRepresentationTFIDFResult(GroupNames[i].Item1, GroupNames[i].Item2, QR.ReturnAccessVector()[i], TFIDFVector[i]); GRTBag.Add(CurrentGRT); }); return((from GR in GRTBag where Convert.ToDouble(GR.Percent.Substring(0, GR.Percent.Length - 1)) > 0 select GR).Distinct().OrderBy(o => o.Name).ToList()); }
private void nearestNeighboursButton_Click(object sender, EventArgs e) { nNDataGridView.AutoGenerateColumns = true; bool KSpecified = true; int K; List <iKNNResult> AllResults = new List <iKNNResult>(); ConcurrentBag <iKNNResult> ResultBag = new ConcurrentBag <iKNNResult>(); try { K = Convert.ToInt32(kTextBox.Text); } catch { KSpecified = false; kTextBox.Text = "All"; Application.DoEvents(); K = -1; } if (thisFormResultType == typeof(UserQueryResult)) { UserQueryResult UQR = (UserQueryResult)thisFormResult; List <UserKNNResult> UKRList = new List <UserKNNResult>(); Parallel.ForEach <UserQueryResult>(parentReference.UserQueryResults, CurrentUQR => { if (CurrentUQR.AccountName != UQR.AccountName) { UserKNNResult UKR = new UserKNNResult(CurrentUQR); double CurrentDistance = HelperFunctions.GetEuclideanDistance(UQR.ReturnAccessVector(), CurrentUQR.ReturnAccessVector()); UKR.AssignKNNDistanceFromX(CurrentDistance); ResultBag.Add(UKR); } }); UKRList = ResultBag.Cast <UserKNNResult>().ToList <UserKNNResult>(); AllResults = UKRList.OrderBy(o => o.Distance).ToList().Cast <iKNNResult>().ToList(); thisQueryReport = new UserKNNReport(AllResults.Cast <UserKNNResult>().ToList(), Ordering.Ascending); } else if (thisFormResultType == typeof(GroupingQueryResult)) { GroupingQueryResult GQR = (GroupingQueryResult)thisFormResult; List <GroupingKNNResult> GKRList = new List <GroupingKNNResult>(); Parallel.ForEach <GroupingQueryResult>(parentReference.GroupingQueryResults, CurrentGQR => { if (CurrentGQR.GroupingName != GQR.GroupingName) { GroupingKNNResult GKR = new GroupingKNNResult(CurrentGQR); double CurrentDistance; if (parentReference.ClusterByRelativeCount) { CurrentDistance = HelperFunctions.GetEuclideanDistance(GQR.ReturnAccessVector(), CurrentGQR.ReturnAccessVector()); } else { CurrentDistance = HelperFunctions.GetEuclideanDistance(GQR.ReturnTF_IDFVector(), CurrentGQR.ReturnTF_IDFVector()); } GKR.AssignKNNDistanceFromX(CurrentDistance); ResultBag.Add(GKR); } }); GKRList = ResultBag.Cast <GroupingKNNResult>().ToList(); AllResults = GKRList.OrderBy(o => o.Distance).ToList().Cast <iKNNResult>().ToList(); thisQueryReport = new GroupingKNNReport(AllResults.Cast <GroupingKNNResult>().ToList(), Ordering.Ascending); } else { } if (KSpecified && K <= AllResults.Count) { List <iKNNResult> Outlist = new List <iKNNResult>(); for (int i = 0; i < K; i++) { Outlist.Add(AllResults[i]); } //AllKNNResults = Outlist; if (thisFormResultType == typeof(UserQueryResult)) { thisQueryReport = new UserKNNReport(Outlist.Cast <UserKNNResult>().ToList(), Ordering.Ascending); } else if (thisFormResultType == typeof(GroupingQueryResult)) { thisQueryReport = new GroupingKNNReport(Outlist.Cast <GroupingKNNResult>().ToList(), Ordering.Ascending); } else { } } else { AllKNNResults = AllResults; } if (thisFormResultType == typeof(UserQueryResult)) { UserKNNReport ReportPointer = (UserKNNReport)thisQueryReport; thisBindingSource.DataSource = ReportPointer.QRList; //thisBindingSource.DataSource = (UserKNNReport)thisQueryReport..Cast<UserKNNResult>().ToList(); nNDataGridView.DataSource = thisBindingSource; } else if (thisFormResultType == typeof(GroupingQueryResult)) { GroupingKNNReport ReportPointer = (GroupingKNNReport)thisQueryReport; thisBindingSource.DataSource = ReportPointer.QRList; nNDataGridView.DataSource = thisBindingSource; } else { } }
public ClusteringOutput(List <QueryResult> QRList, RBAC RBACRef) { thisFormResultType = QRList[0].GetType(); parentReference = RBACRef; ConcurrentBag <Tuple <string, DenseVector> > InputList = new ConcurrentBag <Tuple <string, DenseVector> >(); if (QRList[0].GetType() == typeof(UserQueryResult)) { Parallel.ForEach <QueryResult>(QRList, QR => { UserQueryResult UQR = (UserQueryResult)QR; Tuple <string, DenseVector> TupleIn = new Tuple <string, DenseVector>(UQR.AccountName, (DenseVector)UQR.ReturnAccessVector()); InputList.Add(TupleIn); }); } else if (QRList[0].GetType() == typeof(GroupingQueryResult)) { Parallel.ForEach <QueryResult>(QRList, QR => { GroupingQueryResult GQR = (GroupingQueryResult)QR; DenseVector VectorA; if (parentReference.ClusterByRelativeCount) { VectorA = (DenseVector)GQR.ReturnAccessVector(); } else { VectorA = (DenseVector)GQR.ReturnTF_IDFVector(); } Tuple <string, DenseVector> TupleIn = new Tuple <string, DenseVector>(GQR.GroupingName, VectorA); InputList.Add(TupleIn); }); } else { } //add options on algo configtab on main form later if (parentReference.ClusteringAlgoType == typeof(HACAlgo)) { thisAlgo = new HACAlgo(InputList.OrderBy(o => o.Item1).ToList(), parentReference.PreferredDistanceStyle, parentReference.HACStoppingMetric, parentReference.PreferredStoppingStyle); } else { thisAlgo = new KMeansPlusPlus(InputList.OrderBy(o => o.Item1).ToList(), parentReference.KMeansValue, parentReference.PreferredKMeansStoppingStyle, parentReference.KMeansMaxIter); } parentReference.statusLabelChanger($"Initialising {thisAlgo.GetType().ToString().Split('.')[1]}, please be patient"); thisAlgo.InitialiseClusters(); while (!thisAlgo.Stopped) { thisAlgo.IterateOnce(); if (!thisAlgo.Stopped) { parentReference.statusLabelChanger($"Running {thisAlgo.GetType().ToString().Split('.')[1]}, iteration {thisAlgo.Iterator}"); } } //set all centroids as means in case mapping of clusters is required further //down the line: if (QRList[0].ReturnAccessVector().Count > 500) { thisAlgo.SetCentroidsAsMeansHighDimensionality(); } else { thisAlgo.SetCentroidsAsMeans(); } parentReference.statusLabelChanger("Creating Data View"); ConcurrentBag <QueryResult> ResultsBag = new ConcurrentBag <QueryResult>(); if (thisFormResultType == typeof(UserQueryResult)) { foreach (Cluster Clust in thisAlgo.Clusters) { Parallel.ForEach <Tuple <string, DenseVector> >(Clust.MemberList, Member => { UserQueryResult Target = (UserQueryResult)(from UQR in QRList.Cast <UserQueryResult>() where UQR.AccountName == Member.Item1 select UQR).ToList()[0]; ResultsBag.Add(new UserClusteringResult(Target, Clust.ClusterID, Clust.ListPosition)); }); } clusteringResultList = ResultsBag.Cast <UserClusteringResult>().OrderBy(o => o.ClusterIndex).ToList <QueryResult>(); } else if (thisFormResultType == typeof(GroupingQueryResult)) { foreach (Cluster Clust in thisAlgo.Clusters) { Parallel.ForEach <Tuple <string, DenseVector> >(Clust.MemberList, Member => { GroupingQueryResult Target = (GroupingQueryResult)(from GQR in QRList.Cast <GroupingQueryResult>().ToList() where GQR.GroupingName == Member.Item1 select GQR).ToList <GroupingQueryResult>()[0]; ResultsBag.Add(new GroupingClusteringResult(Target, Clust.ClusterID, Clust.ListPosition)); }); } clusteringResultList = ResultsBag.Cast <GroupingClusteringResult>().OrderBy(o => o.ClusterIndex).ToList <QueryResult>(); } else { } parentReference.statusLabelChanger("Idle"); InitializeComponent(); this.Text = $"Clustering Results from {thisAlgo.Iterator} Iterations, using {thisAlgo.GetType().ToString().Split('.')[1]}, {thisAlgo.Clusters.Count} Clusters"; thisBindingSource = new BindingSource(); if (thisFormResultType == typeof(UserQueryResult)) { //needs a bit of casting to allow datagridview to access type-specific public properties thisQR = new UserClusteringReport(clusteringResultList.Cast <UserClusteringResult>().ToList(), Ordering.Ascending); UserClusteringReport ReportReference = (UserClusteringReport)thisQR; thisBindingSource.DataSource = ReportReference.QRList; clustersDataGridView.DataSource = thisBindingSource; } else { thisQR = new GroupingClusteringReport(clusteringResultList.Cast <GroupingClusteringResult>().ToList(), Ordering.Ascending); GroupingClusteringReport ReportReference = (GroupingClusteringReport)thisQR; thisBindingSource.DataSource = ReportReference.QRList; clustersDataGridView.DataSource = thisBindingSource; } }