Пример #1
0
        public override void PrepareTheModel(SpaceModel space, ILogBuilder log)
        {
            FeatureCWPAnalysisSettings CWPSettings = new FeatureCWPAnalysisSettings(computation, FeatureCWPAnalysisSettings.AnalysisPurpose.application);

            if (CWPAnalysis != null)
            {
                log.log("Shared CWPAnalysis in use at " + shortName);
                CWPAnalysis.settings.DeployUpdate(CWPSettings);
            }
            else
            {
                log.log("New CWPAnalysis will be used at " + shortName);
                CWPAnalysis = new FeatureCWPAnalysis(CWPSettings);
                CWPAnalysis.Prepare(space, null);
            }


            CWPAnalysis.Analysis(null, log);

            foreach (String term in space.GetTokens(true, false))
            {
                index.Add(term, GetScore(term));
            }
        }
Пример #2
0
        public override void PrepareTheModel(SpaceModel space, ILogBuilder log)
        {
            if (!IsEnabled)
            {
                return;
            }

            index.Clear();

            var labels = space.labels;

            Dictionary <String, Dictionary <SpaceLabel, Double> > TermClassDensity = new Dictionary <string, Dictionary <SpaceLabel, double> >();

            //    Dictionary<String, List<SpaceLabel>> TermToLabelIndex = new Dictionary<string, List<SpaceLabel>>();

            var terms = space.GetTokens(true, false);

            foreach (String term in terms)
            {
                Dictionary <SpaceLabel, Double> ClassDensity = new Dictionary <SpaceLabel, double>();
                foreach (SpaceLabel label in labels)
                {
                    ClassDensity.Add(label, 0);
                }

                TermClassDensity.Add(term, ClassDensity);
                index.Add(term, 0);
            }



            foreach (SpaceLabel label in labels)
            {
                List <SpaceDocumentModel> documents = space.GetDocumentsOfLabel(label.name); // .LabelToDocumentLinks.GetAllLinked(label);

                Int32 doc_N = documents.Count;
                foreach (String term in terms)
                {
                    Int32 doc_t = documents.Count(x => x.Contains(term));
                    if (doc_t > 0)
                    {
                        Double f = Convert.ToDouble(doc_t) / Convert.ToDouble(doc_N);
                        if (f > 0)
                        {
                            TermClassDensity[term][label] = f;
                        }
                    }
                }
            }

            Double C = labels.Count;

            foreach (String term in terms)
            {
                Double CS = 0;
                foreach (SpaceLabel label in labels)
                {
                    if (TermClassDensity.ContainsKey(term))
                    {
                        if (TermClassDensity[term][label] > 0)
                        {
                            CS = CS + TermClassDensity[term][label];
                        }
                    }
                }
                if (CS > 0)
                {
                    if (index.ContainsKey(term))
                    {
                        index[term] = Math.Log(C / CS);
                    }
                }
            }

            //foreach (KeyValuePair<string, double> pair in index)
            //{
            //    if (index.ContainsKey(pair.Key)) index[pair.Key] =
            //}
        }
Пример #3
0
        public override void PrepareTheModel(SpaceModel space, ILogBuilder log)
        {
            if (!IsEnabled)
            {
                return;
            }

            index.Clear();

            if (Computation == IDFComputation.DF)
            {
                shortName = "DF";
            }

            Dictionary <String, List <SpaceDocumentModel> > TermToDocumentIndex = new Dictionary <string, List <SpaceDocumentModel> >();

            List <SpaceLabel> labels = space.labels.ToList();

            var terms = space.GetTokens(true, true);

            foreach (String term in terms)
            {
                TermToDocumentIndex.Add(term, new List <SpaceDocumentModel>());
            }

            Double N = 0;

            foreach (SpaceLabel label in labels)
            {
                foreach (SpaceDocumentModel document in space.LabelToDocumentLinks.GetAllLinked(label))
                {
                    var termsInDocument = document.GetTokens(terms); //.GetTerms(true, true, true).GetTokens();

                    for (int i = 0; i < termsInDocument.Count; i++)
                    {
                        if (TermToDocumentIndex.ContainsKey(termsInDocument[i]))
                        {
                            TermToDocumentIndex[termsInDocument[i]].Add(document);
                        }
                    }

                    DocumentN++;
                }
            }

            N = DocumentN;

            foreach (String term in terms)
            {
                Double DF_t  = TermToDocumentIndex[term].Count;
                Double IDF_t = 0;

                if (DF_t != 0)
                {
                    switch (Computation)
                    {
                    case IDFComputation.logPlus:
                        IDF_t = Math.Log(N / DF_t) + 1;
                        break;

                    case IDFComputation.modified:
                        IDF_t = Math.Log((N * N) - (N - DF_t) + N);
                        break;

                    case IDFComputation.DF:
                        IDF_t = DF_t / N;
                        break;
                    }
                }

                index.Add(term, IDF_t);
            }
        }
Пример #4
0
        public override void PrepareTheModel(SpaceModel space, ILogBuilder log)
        {
            if (!IsEnabled)
            {
                return;
            }

            index.Clear();

            var labels = space.labels;

            if (labels.Any(x => x.name == SpaceLabel.UNKNOWN))
            {
                log.log("Space labels include the UNKNOWN label!");
                labels.RemoveAll(x => x.name == SpaceLabel.UNKNOWN);
            }


            Dictionary <String, List <SpaceLabel> > TermToLabelIndex = new Dictionary <string, List <SpaceLabel> >();

            var terms = space.GetTokens(true, false);

            foreach (String term in terms)
            {
                TermToLabelIndex.Add(term, new List <SpaceLabel>());
            }

            foreach (SpaceLabel label in labels)
            {
                if (label.name != SpaceLabel.UNKNOWN)
                {
                    List <SpaceDocumentModel> documents = space.GetDocumentsOfLabel(label.name); //.//LabelToDocumentLinks.GetAllLinked(label);
                    foreach (SpaceDocumentModel document in documents)
                    {
                        var termsInDocument = document.GetTokens(terms); //.GetTerms(true, true).GetTokens();
                        for (int i = 0; i < termsInDocument.Count; i++)
                        {
                            if (TermToLabelIndex.ContainsKey(termsInDocument[i]))
                            {
                                if (!TermToLabelIndex[termsInDocument[i]].Contains(label))
                                {
                                    TermToLabelIndex[termsInDocument[i]].Add(label);
                                }
                            }
                        }
                    }
                }
            }

            Double N = labels.Count;

            foreach (String term in terms)
            {
                if (TermToLabelIndex.ContainsKey(term))
                {
                    Double CF_t  = TermToLabelIndex[term].Count;
                    Double ICF_t = 0;
                    if (CF_t == 0)
                    {
                    }
                    else
                    {
                        ICF_t = Math.Log(1 + (N / CF_t));
                    }
                    index.Add(term, ICF_t);
                }
                else
                {
                    index.Add(term, 0);
                }
            }
        }
Пример #5
0
        /// <summary>
        /// Prepares the model.
        /// </summary>
        /// <param name="space">The space.</param>
        /// <exception cref="System.ArgumentException">A document is already assigned to a label! This model is not applicable for multi-label problem.</exception>
        public override void PrepareTheModel(SpaceModel space, ILogBuilder log)
        {
            List <String>     terms  = space.GetTokens(true, false);
            List <SpaceLabel> labels = space.labels;

            var labelNames = labels.Select(x => x.name);

            TermDiscriminatingPowerModel model = new TermDiscriminatingPowerModel();

            model.PrepareBlank(labelNames, terms);

            //N = space.documents.Count;

            Dictionary <String, List <SpaceDocumentModel> > documentDict         = new Dictionary <String, List <SpaceDocumentModel> >();
            Dictionary <String, List <SpaceDocumentModel> > documentNegativeDict = new Dictionary <String, List <SpaceDocumentModel> >();

            foreach (SpaceLabel label in labels)
            {
                //model.dictionaries.Add(label.name, new TermDiscriminatingPowerDictionary(label.name, terms));

                documentDict.Add(label.name, space.LabelToDocumentLinks.GetAllLinked(label));

                //foreach (SpaceDocumentModel document in documentDict[label])
                //{
                //    if (DocumentVsLabel.ContainsKey(document))
                //    {
                //        throw new ArgumentException("A document is already assigned to a label! This model is not applicable for multi-label problem.");
                //    }
                //    DocumentVsLabel.Add(document, label);
                //}

                documentNegativeDict.Add(label.name, new List <SpaceDocumentModel>());

                //foreach (SpaceDocumentModel doc in space.documents)
                //{
                //    if (!documentDict[label].Contains(doc)) negativeDocuments.Add(doc);
                //}
            }


            foreach (KeyValuePair <String, List <SpaceDocumentModel> > pair in documentDict)
            {
                N += pair.Value.Count;
                foreach (KeyValuePair <String, List <SpaceDocumentModel> > pairSub in documentDict)
                {
                    if (pair.Key != pairSub.Key)
                    {
                        documentNegativeDict[pair.Key].AddRange(pairSub.Value);
                    }
                }
            }

            //   var documents = space.LabelToDocumentLinks.GetAllLinked(label);



            Parallel.ForEach(labels, label =>
            {
                foreach (String term in terms)
                {
                    TermDiscriminatingPower TDP = model[label.name][term];
                    TDP.a = documentDict[label.name].Count(x => x.Contains(term));
                    TDP.b = documentDict[label.name].Count() - TDP.a;

                    TDP.c = documentNegativeDict[label.name].Count(x => x.Contains(term));
                    TDP.d = documentNegativeDict[label.name].Count() - TDP.c;
                }
            });


            computedModel = model.GetComputedModel(factor, N);

            Dictionary <String, Double> tempIndex = new Dictionary <string, double>();

            foreach (String term in terms)
            {
                List <Double> scores = new List <double>();

                foreach (String ln in labelNames)
                {
                    var d = GetElementFactor(term, ln);

                    scores.Add(d);
                }

                tempIndex.Add(term, operationExtensions.CompressNumericVector(scores.ToArray(), defaultOperation));
            }

            index = tempIndex;



            /*
             *  foreach (SpaceLabel label in labels) // << --- UNKNOWN LABEL IS INCLUDED
             *  {
             *      var documents = space.LabelToDocumentLinks.GetAllLinked(label);
             *
             *      foreach (SpaceDocumentModel document in documents)
             *      {
             *          if (DocumentVsLabel.ContainsKey(document))
             *          {
             *              throw new ArgumentException("A document is already assigned to a label! This model is not applicable for multi-label problem.");
             *          }
             *          DocumentVsLabel.Add(document, label);
             *      }
             *
             *      List<SpaceDocumentModel> negativeDocuments = new List<SpaceDocumentModel>();
             *      foreach (SpaceDocumentModel doc in space.documents)
             *      {
             *          if (!documents.Contains(doc)) negativeDocuments.Add(doc);
             *      }
             *
             *      foreach (String term in terms)
             *      {
             *          TermDiscriminatingPower TDP = model[label.name][term];
             *          TDP.a = documents.Count(x => x.terms.Contains(term));
             *          TDP.b = documents.Count - TDP.b;
             *
             *          TDP.c = negativeDocuments.Count(x => x.terms.Contains(term));
             *          TDP.d = negativeDocuments.Count - TDP.c;
             *      }
             *  }*/
        }
Пример #6
0
        /// <summary>
        /// Prepares the model - computes IGM for each term
        /// </summary>
        /// <param name="space">The space.</param>
        /// <exception cref="ArgumentException">A document is already assigned to a label! This model is not applicable for multi-label problem.</exception>
        public override void PrepareTheModel(SpaceModel space, ILogBuilder log)
        {
            if (!IsEnabled)
            {
                return;
            }

            index.Clear();

            Dictionary <String, Dictionary <SpaceLabel, Int32> >           TermClassFrequency = new Dictionary <string, Dictionary <SpaceLabel, int> >();
            Dictionary <String, List <KeyValuePair <SpaceLabel, Int32> > > TermClassRank      = new Dictionary <String, List <KeyValuePair <SpaceLabel, Int32> > >();
            Dictionary <SpaceDocumentModel, SpaceLabel> DocumentVsLabel = new Dictionary <SpaceDocumentModel, SpaceLabel>();

            var labels = space.labels.ToList();

            var terms = space.GetTokens(true, false);

            foreach (String term in terms)
            {
                Dictionary <SpaceLabel, Int32> ClassFrequency = new Dictionary <SpaceLabel, Int32>();
                foreach (SpaceLabel label in labels)
                {
                    ClassFrequency.Add(label, 0);
                }

                TermClassFrequency.Add(term, ClassFrequency);
                index.Add(term, 0);
            }

            /*
             * foreach (SpaceDocumentModel document in space.documents)
             * {
             *  String lab = document.labels.First();
             *  var spaceLabel = space.labels.FirstOrDefault(x => x.name == lab);
             *
             *  if (lab != SpaceLabel.UNKNOWN)
             *  {
             *
             *      if (DocumentVsLabel.ContainsKey(document))
             *      {
             *          throw new ArgumentException("A document [" + document.name + "] is already assigned to a label! This model is not applicable for multi-label problem.");
             *      }
             *
             *      DocumentVsLabel.Add(document, spaceLabel);
             *  }
             * }
             */

            foreach (SpaceLabel label in labels)
            {
                List <SpaceDocumentModel> documents = space.LabelToDocumentLinks.GetAllLinked(label);
                foreach (SpaceDocumentModel document in documents)
                {
                    if (DocumentVsLabel.ContainsKey(document))
                    {
                        throw new ArgumentException("A document [" + document.name + "] is already assigned to a label! This model is not applicable for multi-label problem.");
                    }
                    DocumentVsLabel.Add(document, label);
                }

                foreach (SpaceDocumentModel document in documents)
                {
                    var doc_terms_dict = document.GetTerms(true, true, true);
                    var doc_terms      = doc_terms_dict.GetTokens();

                    foreach (String term in doc_terms)
                    {
                        if (TermClassFrequency.ContainsKey(term))
                        {
                            TermClassFrequency[term][label] += doc_terms_dict.GetTokenFrequency(term);
                        }
                    }
                }
            }

            foreach (String term in terms)
            {
                TermClassRank.Add(term, TermClassFrequency[term].OrderByDescending(x => x.Value).ToList());

                Double igm_tk_below = 0;

                Double f_ki = TermClassRank[term].Max(x => x.Value);

                Double r = 1;

                var termRanks = TermClassRank[term];

                foreach (KeyValuePair <SpaceLabel, int> ranked in termRanks)
                {
                    if (ranked.Value > 0)
                    {
                        igm_tk_below += (Convert.ToDouble(ranked.Value) / f_ki) * r;
                    }
                    r++;
                }

                Double t = 0;

                if (igm_tk_below == 0)
                {
                    //index[term] = 0;
                }
                else
                {
                    t = 1 / igm_tk_below;
                }


                index[term] = 1 + (l * t);
            }
        }