public override void PrepareTheModel(SpaceModel space, ILogBuilder log)
        {
            if (!IsEnabled)
            {
                return;
            }

            index.Clear();

            var labels = space.labels;

            Dictionary <String, Dictionary <SpaceLabel, Double> > TermClassDensity = new Dictionary <string, Dictionary <SpaceLabel, double> >();

            //    Dictionary<String, List<SpaceLabel>> TermToLabelIndex = new Dictionary<string, List<SpaceLabel>>();

            var terms = space.GetTokens(true, false);

            foreach (String term in terms)
            {
                Dictionary <SpaceLabel, Double> ClassDensity = new Dictionary <SpaceLabel, double>();
                foreach (SpaceLabel label in labels)
                {
                    ClassDensity.Add(label, 0);
                }

                TermClassDensity.Add(term, ClassDensity);
                index.Add(term, 0);
            }



            foreach (SpaceLabel label in labels)
            {
                List <SpaceDocumentModel> documents = space.GetDocumentsOfLabel(label.name); // .LabelToDocumentLinks.GetAllLinked(label);

                Int32 doc_N = documents.Count;
                foreach (String term in terms)
                {
                    Int32 doc_t = documents.Count(x => x.Contains(term));
                    if (doc_t > 0)
                    {
                        Double f = Convert.ToDouble(doc_t) / Convert.ToDouble(doc_N);
                        if (f > 0)
                        {
                            TermClassDensity[term][label] = f;
                        }
                    }
                }
            }

            Double C = labels.Count;

            foreach (String term in terms)
            {
                Double CS = 0;
                foreach (SpaceLabel label in labels)
                {
                    if (TermClassDensity.ContainsKey(term))
                    {
                        if (TermClassDensity[term][label] > 0)
                        {
                            CS = CS + TermClassDensity[term][label];
                        }
                    }
                }
                if (CS > 0)
                {
                    if (index.ContainsKey(term))
                    {
                        index[term] = Math.Log(C / CS);
                    }
                }
            }

            //foreach (KeyValuePair<string, double> pair in index)
            //{
            //    if (index.ContainsKey(pair.Key)) index[pair.Key] =
            //}
        }
示例#2
0
        public override void PrepareTheModel(SpaceModel space, ILogBuilder log)
        {
            if (!IsEnabled)
            {
                return;
            }

            index.Clear();

            var labels = space.labels;

            if (labels.Any(x => x.name == SpaceLabel.UNKNOWN))
            {
                log.log("Space labels include the UNKNOWN label!");
                labels.RemoveAll(x => x.name == SpaceLabel.UNKNOWN);
            }


            Dictionary <String, List <SpaceLabel> > TermToLabelIndex = new Dictionary <string, List <SpaceLabel> >();

            var terms = space.GetTokens(true, false);

            foreach (String term in terms)
            {
                TermToLabelIndex.Add(term, new List <SpaceLabel>());
            }

            foreach (SpaceLabel label in labels)
            {
                if (label.name != SpaceLabel.UNKNOWN)
                {
                    List <SpaceDocumentModel> documents = space.GetDocumentsOfLabel(label.name); //.//LabelToDocumentLinks.GetAllLinked(label);
                    foreach (SpaceDocumentModel document in documents)
                    {
                        var termsInDocument = document.GetTokens(terms); //.GetTerms(true, true).GetTokens();
                        for (int i = 0; i < termsInDocument.Count; i++)
                        {
                            if (TermToLabelIndex.ContainsKey(termsInDocument[i]))
                            {
                                if (!TermToLabelIndex[termsInDocument[i]].Contains(label))
                                {
                                    TermToLabelIndex[termsInDocument[i]].Add(label);
                                }
                            }
                        }
                    }
                }
            }

            Double N = labels.Count;

            foreach (String term in terms)
            {
                if (TermToLabelIndex.ContainsKey(term))
                {
                    Double CF_t  = TermToLabelIndex[term].Count;
                    Double ICF_t = 0;
                    if (CF_t == 0)
                    {
                    }
                    else
                    {
                        ICF_t = Math.Log(1 + (N / CF_t));
                    }
                    index.Add(term, ICF_t);
                }
                else
                {
                    index.Add(term, 0);
                }
            }
        }