public static Vector CalculateTFIDFVector(GroupingQueryResult TargetGroup, List <GroupingQueryResult> Corpus, bool ByRawCount = false)
        {
            Vector IDFSummedVector = new DenseVector(TargetGroup.ReturnAccessVector().Count);
            Vector ReturnVector    = new DenseVector(IDFSummedVector.Count);

            Parallel.For(0, IDFSummedVector.Count, i => {
                foreach (GroupingQueryResult CurrentGQR in Corpus)
                {
                    if (CurrentGQR.GroupingName != TargetGroup.GroupingName)
                    {
                        if (CurrentGQR.ReturnAccessVector()[i] > 0)
                        {
                            IDFSummedVector[i] = IDFSummedVector[i] + 1;
                        }
                    }
                }
            });

            for (int i2 = 0; i2 < IDFSummedVector.Count; i2++)
            {
                //0.1 has been added to the denominator to prevent divide by zero issues
                IDFSummedVector[i2] = Math.Log(Corpus.Count / (IDFSummedVector[i2] + 0.1));
            }
            if (ByRawCount)
            {
                return((Vector)IDFSummedVector.PointwiseMultiply(TargetGroup.ReturnRawCountVector()));
            }
            else
            {
                return((Vector)IDFSummedVector.PointwiseMultiply(TargetGroup.ReturnAccessVector()));
            }
        }
示例#2
0
 public GroupingQueryResult(GroupingQueryResult GQR) : base(GQR.ReturnAccessVector())
 {
     this.accessSummaryVector = GQR.ReturnAccessVector();
     this.rawCountVector      = GQR.ReturnRawCountVector();
     this.tF_IDFVector        = GQR.ReturnTF_IDFVector();
     this.aDGroupsRepresented = GQR.ADGroupsRepresented;
     this.groupingName        = GQR.GroupingName;
     this.groupingType        = GQR.GroupingType;
     this.groupMemberCount    = GQR.MemberCount;
     this.members             = GQR.Members;
 }
        public static string ReturnFormattedGroupSummary(GroupingQueryResult GQR, List <Tuple <string, string> > AllADGroupsList)
        {
            string Outstring = DateTime.Now.ToLongDateString() + "\r\n" + GQR.GroupingType + " = " + GQR.GroupingName.ToUpper() + "\r\n" + $"{GQR.MemberCount} Members\r\n------------------------------------------------------------------------------------------------------------------------------\r\n\r\nGroup Representations:\r\n\r\n";
            Vector RefVect   = GQR.ReturnAccessVector();
            Vector RefTFIDF  = GQR.ReturnTF_IDFVector();

            if (RefVect.Count != AllADGroupsList.Count)
            {
                throw new ArgumentOutOfRangeException();
            }
            //string Tempstring1 = "";
            for (int i = 0; i < RefVect.Count; i++)
            {
                if (RefVect[i] > 0)
                {
                    string Tempstring1 = AllADGroupsList[i].Item1;
                    while (Tempstring1.Length < 80)
                    {
                        Tempstring1 = Tempstring1 + " ";
                    }
                    Tempstring1 = Tempstring1 + (RefVect[i] * 100).ToString("F2") + "%";
                    while (Tempstring1.Length < 88)
                    {
                        Tempstring1 = Tempstring1 + " ";
                    }
                    Tempstring1 = Tempstring1 + $"(Weighted for rarity {RefTFIDF[i].ToString("F2")})\r\n";
                    Outstring   = Outstring + Tempstring1;
                }
            }
            Outstring = Outstring + "\r\n\r\n";
            return(Outstring);
        }
        public static string ReturnFormattedGroupInfo(List <UserQueryResult> UsersList, GroupingQueryResult GQR, List <Tuple <string, string> > AllADGroupsList)
        {
            string Outstring = DateTime.Now.ToLongDateString() + "\r\n" + GQR.GroupingType + " = " + GQR.GroupingName.ToUpper() + "\r\n" + $"{GQR.MemberCount} Members\r\n------------------------------------------------------------------------------------------------------------------------------\r\n\r\nGroup Representations:\r\n\r\n";
            Vector RefVect   = GQR.ReturnAccessVector();
            Vector RefTFIDF  = GQR.ReturnTF_IDFVector();

            if (RefVect.Count != AllADGroupsList.Count)
            {
                throw new ArgumentOutOfRangeException();
            }
            //string Tempstring1 = "";
            for (int i = 0; i < RefVect.Count; i++)
            {
                if (RefVect[i] > 0)
                {
                    string Tempstring1 = AllADGroupsList[i].Item1;
                    while (Tempstring1.Length < 80)
                    {
                        Tempstring1 = Tempstring1 + " ";
                    }
                    Tempstring1 = Tempstring1 + (RefVect[i] * 100).ToString("F2") + "%";
                    while (Tempstring1.Length < 88)
                    {
                        Tempstring1 = Tempstring1 + " ";
                    }
                    Tempstring1 = Tempstring1 + $"(Weighted for rarity {RefTFIDF[i].ToString("F2")})\r\n";
                    Outstring   = Outstring + Tempstring1;
                }
            }
            Outstring = Outstring + "\r\n\r\n";
            //List<string> Names = GQR.Members.Split(',').ToList<string>();
            foreach (string Name in GQR.Members.Split(','))
            {
                string TrueName;
                if (Name.StartsWith(" "))
                {
                    TrueName = Name.Substring(1);
                }
                else
                {
                    TrueName = Name;
                }
                Parallel.ForEach <UserQueryResult>(UsersList, UQR => {
                    if (UQR.AccountName == TrueName)
                    {
                        if (GQR.GroupingType == "Description")
                        {
                            Outstring = Outstring + ReturnFormattedPersonInfo(UQR, AllADGroupsList, true);
                        }
                        else
                        {
                            Outstring = Outstring + ReturnFormattedPersonInfo(UQR, AllADGroupsList, false);
                        }
                    }
                });
            }
            return(Outstring);
        }
        public static string ReturnRecommendationString(List <QueryResult> AllInputs, List <Tuple <string, string> > AllGroupNamesAndDescriptions, double Threshold)
        {
            Vector CountVector = new DenseVector(AllInputs[0].ReturnAccessVector().Count);

            //bool TFIDFAttempted = false;

            /*if (ByTFIDF)
             * {
             *  TFIDFAttempted = true;
             *  try
             *  {
             *      GroupingQueryResult GQR = (GroupingQueryResult)AllInputs[0];
             *      Vector TestVector = GQR.ReturnTF_IDFVector();
             *  }
             *  catch
             *  {
             *      ByTFIDF = false;
             *  }
             *  if (ByTFIDF)
             *  {
             *      Parallel.ForEach<QueryResult>(AllInputs, QR => {
             *          GroupingQueryResult GQR = (GroupingQueryResult)QR;
             *          CountVector = (DenseVector)(CountVector + GQR.ReturnTF_IDFVector());
             *      });
             *      CountVector = (DenseVector)(CountVector / AllInputs.Count);
             *      Parallel.For(0, CountVector.Count, i => {
             *          if (CountVector[i] < Threshold)
             *          {
             *              CountVector[i] = 0;
             *          }
             *      });
             *
             *      string Titlestring = $"Template Recommended By TF-IDF, with Threshold of {Threshold.ToString()}";
             *      return RecommendationString(CountVector, AllGroupNamesAndDescriptions, Titlestring);
             *  }
             * }*/

            //the following executes if TFIDF is not being used
            if (AllInputs[0].GetType() == typeof(RBACS.UserQueryResult))
            {
                Parallel.ForEach <QueryResult>(AllInputs, QR =>
                {
                    CountVector = (DenseVector)(CountVector + QR.ReturnAccessVector());
                });
            }
            else if (AllInputs[0].GetType() == typeof(GroupingQueryResult))
            {
                Parallel.ForEach(AllInputs, QR => {
                    GroupingQueryResult GQR = (GroupingQueryResult)QR;
                    CountVector             = (DenseVector)(CountVector + GQR.ReturnAccessVector());
                });
            }
            else if (AllInputs[0].GetType() == typeof(UserClusteringResult))
            {
                Parallel.ForEach(AllInputs, QR => {
                    UserClusteringResult UCR = (UserClusteringResult)QR;
                    CountVector = (DenseVector)(CountVector + UCR.ReturnAccessVector());
                });
            }
            else if (AllInputs[0].GetType() == typeof(GroupingClusteringResult))
            {
                Parallel.ForEach(AllInputs, QR => {
                    GroupingClusteringResult GCR = (GroupingClusteringResult)QR;
                    CountVector = (DenseVector)(CountVector + GCR.ReturnAccessVector());
                });
            }
            CountVector = (DenseVector)(CountVector / AllInputs.Count);
            Parallel.For(0, CountVector.Count, i => {
                if (CountVector[i] < Threshold)
                {
                    CountVector[i] = 0;
                }
            });
            string TitleString;

            TitleString = $"Template Recommended By Relative Count with Threshold {Threshold.ToString()}";

            return(RecommendationString(CountVector, AllGroupNamesAndDescriptions, TitleString));
        }
        public static List <GroupRepresentationTFIDFResult> QueryListToGroupRepresentationTFIDFList(GroupingQueryResult QR, List <GroupingQueryResult> AllQueries, List <Tuple <string, string> > GroupNames)
        {
            ConcurrentBag <GroupRepresentationTFIDFResult> GRTBag = new ConcurrentBag <GroupRepresentationTFIDFResult>();
            DenseVector TFIDFVector = (DenseVector)CalculateTFIDFVector(QR, AllQueries);

            /*DenseVector SummedVector = new DenseVector(QRList[0].ReturnAccessVector().Count);
            *  //DenseVector AllQRSummedVector = new DenseVector(QRList[0].ReturnAccessVector().Count);
            *  Parallel.ForEach(QRList, QR => {
            *   SummedVector = SummedVector + (DenseVector)QR.ReturnAccessVector();
            *  });
            *  DenseVector IDFVector = new DenseVector(GroupNames.Count);
            *  Parallel.ForEach(AllQueries, QR => {
            *   IDFVector = IDFVector + (DenseVector)QR.ReturnAccessVector();
            *  });
            *  Parallel.For(0, IDFVector.Count, i => {
            *   IDFVector[i] = Math.Log((double)Decimal.Divide(AllQueries.Count, (decimal)IDFVector[i]));
            *  });
            *  SummedVector = (DenseVector)SummedVector.Divide((double)QRList.Count);
            *  DenseVector TFIDFVector = (DenseVector)SummedVector.PointwiseMultiply(IDFVector);*/
            Parallel.For(0, GroupNames.Count, i => {
                GroupRepresentationTFIDFResult CurrentGRT = new GroupRepresentationTFIDFResult(GroupNames[i].Item1, GroupNames[i].Item2, QR.ReturnAccessVector()[i], TFIDFVector[i]);
                GRTBag.Add(CurrentGRT);
            });
            return((from GR in GRTBag
                    where Convert.ToDouble(GR.Percent.Substring(0, GR.Percent.Length - 1)) > 0
                    select GR).Distinct().OrderBy(o => o.Name).ToList());
        }
        private void nearestNeighboursButton_Click(object sender, EventArgs e)
        {
            nNDataGridView.AutoGenerateColumns = true;
            bool KSpecified = true;
            int  K;
            List <iKNNResult>          AllResults = new List <iKNNResult>();
            ConcurrentBag <iKNNResult> ResultBag  = new ConcurrentBag <iKNNResult>();

            try { K = Convert.ToInt32(kTextBox.Text); }
            catch
            {
                KSpecified    = false;
                kTextBox.Text = "All";
                Application.DoEvents();
                K = -1;
            }
            if (thisFormResultType == typeof(UserQueryResult))
            {
                UserQueryResult      UQR     = (UserQueryResult)thisFormResult;
                List <UserKNNResult> UKRList = new List <UserKNNResult>();
                Parallel.ForEach <UserQueryResult>(parentReference.UserQueryResults, CurrentUQR => {
                    if (CurrentUQR.AccountName != UQR.AccountName)
                    {
                        UserKNNResult UKR      = new UserKNNResult(CurrentUQR);
                        double CurrentDistance = HelperFunctions.GetEuclideanDistance(UQR.ReturnAccessVector(), CurrentUQR.ReturnAccessVector());
                        UKR.AssignKNNDistanceFromX(CurrentDistance);
                        ResultBag.Add(UKR);
                    }
                });
                UKRList         = ResultBag.Cast <UserKNNResult>().ToList <UserKNNResult>();
                AllResults      = UKRList.OrderBy(o => o.Distance).ToList().Cast <iKNNResult>().ToList();
                thisQueryReport = new UserKNNReport(AllResults.Cast <UserKNNResult>().ToList(), Ordering.Ascending);
            }
            else if (thisFormResultType == typeof(GroupingQueryResult))
            {
                GroupingQueryResult      GQR     = (GroupingQueryResult)thisFormResult;
                List <GroupingKNNResult> GKRList = new List <GroupingKNNResult>();
                Parallel.ForEach <GroupingQueryResult>(parentReference.GroupingQueryResults, CurrentGQR => {
                    if (CurrentGQR.GroupingName != GQR.GroupingName)
                    {
                        GroupingKNNResult GKR = new GroupingKNNResult(CurrentGQR);
                        double CurrentDistance;
                        if (parentReference.ClusterByRelativeCount)
                        {
                            CurrentDistance = HelperFunctions.GetEuclideanDistance(GQR.ReturnAccessVector(), CurrentGQR.ReturnAccessVector());
                        }
                        else
                        {
                            CurrentDistance = HelperFunctions.GetEuclideanDistance(GQR.ReturnTF_IDFVector(), CurrentGQR.ReturnTF_IDFVector());
                        }
                        GKR.AssignKNNDistanceFromX(CurrentDistance);
                        ResultBag.Add(GKR);
                    }
                });
                GKRList         = ResultBag.Cast <GroupingKNNResult>().ToList();
                AllResults      = GKRList.OrderBy(o => o.Distance).ToList().Cast <iKNNResult>().ToList();
                thisQueryReport = new GroupingKNNReport(AllResults.Cast <GroupingKNNResult>().ToList(), Ordering.Ascending);
            }
            else
            {
            }

            if (KSpecified && K <= AllResults.Count)
            {
                List <iKNNResult> Outlist = new List <iKNNResult>();
                for (int i = 0; i < K; i++)
                {
                    Outlist.Add(AllResults[i]);
                }
                //AllKNNResults = Outlist;
                if (thisFormResultType == typeof(UserQueryResult))
                {
                    thisQueryReport = new UserKNNReport(Outlist.Cast <UserKNNResult>().ToList(), Ordering.Ascending);
                }
                else if (thisFormResultType == typeof(GroupingQueryResult))
                {
                    thisQueryReport = new GroupingKNNReport(Outlist.Cast <GroupingKNNResult>().ToList(), Ordering.Ascending);
                }
                else
                {
                }
            }
            else
            {
                AllKNNResults = AllResults;
            }
            if (thisFormResultType == typeof(UserQueryResult))
            {
                UserKNNReport ReportPointer = (UserKNNReport)thisQueryReport;
                thisBindingSource.DataSource = ReportPointer.QRList;
                //thisBindingSource.DataSource = (UserKNNReport)thisQueryReport..Cast<UserKNNResult>().ToList();
                nNDataGridView.DataSource = thisBindingSource;
            }
            else if (thisFormResultType == typeof(GroupingQueryResult))
            {
                GroupingKNNReport ReportPointer = (GroupingKNNReport)thisQueryReport;
                thisBindingSource.DataSource = ReportPointer.QRList;

                nNDataGridView.DataSource = thisBindingSource;
            }
            else
            {
            }
        }
        public ClusteringOutput(List <QueryResult> QRList, RBAC RBACRef)
        {
            thisFormResultType = QRList[0].GetType();
            parentReference    = RBACRef;
            ConcurrentBag <Tuple <string, DenseVector> > InputList = new ConcurrentBag <Tuple <string, DenseVector> >();

            if (QRList[0].GetType() == typeof(UserQueryResult))
            {
                Parallel.ForEach <QueryResult>(QRList, QR =>
                {
                    UserQueryResult UQR = (UserQueryResult)QR;
                    Tuple <string, DenseVector> TupleIn = new Tuple <string, DenseVector>(UQR.AccountName, (DenseVector)UQR.ReturnAccessVector());
                    InputList.Add(TupleIn);
                });
            }
            else if (QRList[0].GetType() == typeof(GroupingQueryResult))
            {
                Parallel.ForEach <QueryResult>(QRList, QR =>
                {
                    GroupingQueryResult GQR = (GroupingQueryResult)QR;
                    DenseVector VectorA;
                    if (parentReference.ClusterByRelativeCount)
                    {
                        VectorA = (DenseVector)GQR.ReturnAccessVector();
                    }
                    else
                    {
                        VectorA = (DenseVector)GQR.ReturnTF_IDFVector();
                    }
                    Tuple <string, DenseVector> TupleIn = new Tuple <string, DenseVector>(GQR.GroupingName, VectorA);
                    InputList.Add(TupleIn);
                });
            }
            else
            {
            }
            //add options on algo configtab on main form later
            if (parentReference.ClusteringAlgoType == typeof(HACAlgo))
            {
                thisAlgo = new HACAlgo(InputList.OrderBy(o => o.Item1).ToList(), parentReference.PreferredDistanceStyle, parentReference.HACStoppingMetric, parentReference.PreferredStoppingStyle);
            }
            else
            {
                thisAlgo = new KMeansPlusPlus(InputList.OrderBy(o => o.Item1).ToList(), parentReference.KMeansValue, parentReference.PreferredKMeansStoppingStyle, parentReference.KMeansMaxIter);
            }
            parentReference.statusLabelChanger($"Initialising {thisAlgo.GetType().ToString().Split('.')[1]}, please be patient");
            thisAlgo.InitialiseClusters();

            while (!thisAlgo.Stopped)
            {
                thisAlgo.IterateOnce();
                if (!thisAlgo.Stopped)
                {
                    parentReference.statusLabelChanger($"Running {thisAlgo.GetType().ToString().Split('.')[1]}, iteration {thisAlgo.Iterator}");
                }
            }
            //set all centroids as means in case mapping of clusters is required further
            //down the line:

            if (QRList[0].ReturnAccessVector().Count > 500)
            {
                thisAlgo.SetCentroidsAsMeansHighDimensionality();
            }
            else
            {
                thisAlgo.SetCentroidsAsMeans();
            }
            parentReference.statusLabelChanger("Creating Data View");

            ConcurrentBag <QueryResult> ResultsBag = new ConcurrentBag <QueryResult>();

            if (thisFormResultType == typeof(UserQueryResult))
            {
                foreach (Cluster Clust in thisAlgo.Clusters)
                {
                    Parallel.ForEach <Tuple <string, DenseVector> >(Clust.MemberList, Member => {
                        UserQueryResult Target = (UserQueryResult)(from UQR in QRList.Cast <UserQueryResult>() where UQR.AccountName == Member.Item1 select UQR).ToList()[0];
                        ResultsBag.Add(new UserClusteringResult(Target, Clust.ClusterID, Clust.ListPosition));
                    });
                }
                clusteringResultList = ResultsBag.Cast <UserClusteringResult>().OrderBy(o => o.ClusterIndex).ToList <QueryResult>();
            }
            else if (thisFormResultType == typeof(GroupingQueryResult))
            {
                foreach (Cluster Clust in thisAlgo.Clusters)
                {
                    Parallel.ForEach <Tuple <string, DenseVector> >(Clust.MemberList, Member => {
                        GroupingQueryResult Target = (GroupingQueryResult)(from GQR in QRList.Cast <GroupingQueryResult>().ToList() where GQR.GroupingName == Member.Item1 select GQR).ToList <GroupingQueryResult>()[0];
                        ResultsBag.Add(new GroupingClusteringResult(Target, Clust.ClusterID, Clust.ListPosition));
                    });
                }
                clusteringResultList = ResultsBag.Cast <GroupingClusteringResult>().OrderBy(o => o.ClusterIndex).ToList <QueryResult>();
            }
            else
            {
            }
            parentReference.statusLabelChanger("Idle");

            InitializeComponent();
            this.Text = $"Clustering Results from {thisAlgo.Iterator} Iterations, using {thisAlgo.GetType().ToString().Split('.')[1]}, {thisAlgo.Clusters.Count} Clusters";


            thisBindingSource = new BindingSource();
            if (thisFormResultType == typeof(UserQueryResult))
            {
                //needs a bit of casting to allow datagridview to access type-specific public properties
                thisQR = new UserClusteringReport(clusteringResultList.Cast <UserClusteringResult>().ToList(), Ordering.Ascending);
                UserClusteringReport ReportReference = (UserClusteringReport)thisQR;
                thisBindingSource.DataSource    = ReportReference.QRList;
                clustersDataGridView.DataSource = thisBindingSource;
            }
            else
            {
                thisQR = new GroupingClusteringReport(clusteringResultList.Cast <GroupingClusteringResult>().ToList(), Ordering.Ascending);
                GroupingClusteringReport ReportReference = (GroupingClusteringReport)thisQR;
                thisBindingSource.DataSource    = ReportReference.QRList;
                clustersDataGridView.DataSource = thisBindingSource;
            }
        }