public override void PrepareTheModel(SpaceModel space, ILogBuilder log) { FeatureCWPAnalysisSettings CWPSettings = new FeatureCWPAnalysisSettings(computation, FeatureCWPAnalysisSettings.AnalysisPurpose.application); if (CWPAnalysis != null) { log.log("Shared CWPAnalysis in use at " + shortName); CWPAnalysis.settings.DeployUpdate(CWPSettings); } else { log.log("New CWPAnalysis will be used at " + shortName); CWPAnalysis = new FeatureCWPAnalysis(CWPSettings); CWPAnalysis.Prepare(space, null); } CWPAnalysis.Analysis(null, log); foreach (String term in space.GetTokens(true, false)) { index.Add(term, GetScore(term)); } }
public override void PrepareTheModel(SpaceModel space, ILogBuilder log) { if (!IsEnabled) { return; } index.Clear(); var labels = space.labels; Dictionary <String, Dictionary <SpaceLabel, Double> > TermClassDensity = new Dictionary <string, Dictionary <SpaceLabel, double> >(); // Dictionary<String, List<SpaceLabel>> TermToLabelIndex = new Dictionary<string, List<SpaceLabel>>(); var terms = space.GetTokens(true, false); foreach (String term in terms) { Dictionary <SpaceLabel, Double> ClassDensity = new Dictionary <SpaceLabel, double>(); foreach (SpaceLabel label in labels) { ClassDensity.Add(label, 0); } TermClassDensity.Add(term, ClassDensity); index.Add(term, 0); } foreach (SpaceLabel label in labels) { List <SpaceDocumentModel> documents = space.GetDocumentsOfLabel(label.name); // .LabelToDocumentLinks.GetAllLinked(label); Int32 doc_N = documents.Count; foreach (String term in terms) { Int32 doc_t = documents.Count(x => x.Contains(term)); if (doc_t > 0) { Double f = Convert.ToDouble(doc_t) / Convert.ToDouble(doc_N); if (f > 0) { TermClassDensity[term][label] = f; } } } } Double C = labels.Count; foreach (String term in terms) { Double CS = 0; foreach (SpaceLabel label in labels) { if (TermClassDensity.ContainsKey(term)) { if (TermClassDensity[term][label] > 0) { CS = CS + TermClassDensity[term][label]; } } } if (CS > 0) { if (index.ContainsKey(term)) { index[term] = Math.Log(C / CS); } } } //foreach (KeyValuePair<string, double> pair in index) //{ // if (index.ContainsKey(pair.Key)) index[pair.Key] = //} }
public override void PrepareTheModel(SpaceModel space, ILogBuilder log) { if (!IsEnabled) { return; } index.Clear(); if (Computation == IDFComputation.DF) { shortName = "DF"; } Dictionary <String, List <SpaceDocumentModel> > TermToDocumentIndex = new Dictionary <string, List <SpaceDocumentModel> >(); List <SpaceLabel> labels = space.labels.ToList(); var terms = space.GetTokens(true, true); foreach (String term in terms) { TermToDocumentIndex.Add(term, new List <SpaceDocumentModel>()); } Double N = 0; foreach (SpaceLabel label in labels) { foreach (SpaceDocumentModel document in space.LabelToDocumentLinks.GetAllLinked(label)) { var termsInDocument = document.GetTokens(terms); //.GetTerms(true, true, true).GetTokens(); for (int i = 0; i < termsInDocument.Count; i++) { if (TermToDocumentIndex.ContainsKey(termsInDocument[i])) { TermToDocumentIndex[termsInDocument[i]].Add(document); } } DocumentN++; } } N = DocumentN; foreach (String term in terms) { Double DF_t = TermToDocumentIndex[term].Count; Double IDF_t = 0; if (DF_t != 0) { switch (Computation) { case IDFComputation.logPlus: IDF_t = Math.Log(N / DF_t) + 1; break; case IDFComputation.modified: IDF_t = Math.Log((N * N) - (N - DF_t) + N); break; case IDFComputation.DF: IDF_t = DF_t / N; break; } } index.Add(term, IDF_t); } }
public override void PrepareTheModel(SpaceModel space, ILogBuilder log) { if (!IsEnabled) { return; } index.Clear(); var labels = space.labels; if (labels.Any(x => x.name == SpaceLabel.UNKNOWN)) { log.log("Space labels include the UNKNOWN label!"); labels.RemoveAll(x => x.name == SpaceLabel.UNKNOWN); } Dictionary <String, List <SpaceLabel> > TermToLabelIndex = new Dictionary <string, List <SpaceLabel> >(); var terms = space.GetTokens(true, false); foreach (String term in terms) { TermToLabelIndex.Add(term, new List <SpaceLabel>()); } foreach (SpaceLabel label in labels) { if (label.name != SpaceLabel.UNKNOWN) { List <SpaceDocumentModel> documents = space.GetDocumentsOfLabel(label.name); //.//LabelToDocumentLinks.GetAllLinked(label); foreach (SpaceDocumentModel document in documents) { var termsInDocument = document.GetTokens(terms); //.GetTerms(true, true).GetTokens(); for (int i = 0; i < termsInDocument.Count; i++) { if (TermToLabelIndex.ContainsKey(termsInDocument[i])) { if (!TermToLabelIndex[termsInDocument[i]].Contains(label)) { TermToLabelIndex[termsInDocument[i]].Add(label); } } } } } } Double N = labels.Count; foreach (String term in terms) { if (TermToLabelIndex.ContainsKey(term)) { Double CF_t = TermToLabelIndex[term].Count; Double ICF_t = 0; if (CF_t == 0) { } else { ICF_t = Math.Log(1 + (N / CF_t)); } index.Add(term, ICF_t); } else { index.Add(term, 0); } } }
/// <summary> /// Prepares the model. /// </summary> /// <param name="space">The space.</param> /// <exception cref="System.ArgumentException">A document is already assigned to a label! This model is not applicable for multi-label problem.</exception> public override void PrepareTheModel(SpaceModel space, ILogBuilder log) { List <String> terms = space.GetTokens(true, false); List <SpaceLabel> labels = space.labels; var labelNames = labels.Select(x => x.name); TermDiscriminatingPowerModel model = new TermDiscriminatingPowerModel(); model.PrepareBlank(labelNames, terms); //N = space.documents.Count; Dictionary <String, List <SpaceDocumentModel> > documentDict = new Dictionary <String, List <SpaceDocumentModel> >(); Dictionary <String, List <SpaceDocumentModel> > documentNegativeDict = new Dictionary <String, List <SpaceDocumentModel> >(); foreach (SpaceLabel label in labels) { //model.dictionaries.Add(label.name, new TermDiscriminatingPowerDictionary(label.name, terms)); documentDict.Add(label.name, space.LabelToDocumentLinks.GetAllLinked(label)); //foreach (SpaceDocumentModel document in documentDict[label]) //{ // if (DocumentVsLabel.ContainsKey(document)) // { // throw new ArgumentException("A document is already assigned to a label! This model is not applicable for multi-label problem."); // } // DocumentVsLabel.Add(document, label); //} documentNegativeDict.Add(label.name, new List <SpaceDocumentModel>()); //foreach (SpaceDocumentModel doc in space.documents) //{ // if (!documentDict[label].Contains(doc)) negativeDocuments.Add(doc); //} } foreach (KeyValuePair <String, List <SpaceDocumentModel> > pair in documentDict) { N += pair.Value.Count; foreach (KeyValuePair <String, List <SpaceDocumentModel> > pairSub in documentDict) { if (pair.Key != pairSub.Key) { documentNegativeDict[pair.Key].AddRange(pairSub.Value); } } } // var documents = space.LabelToDocumentLinks.GetAllLinked(label); Parallel.ForEach(labels, label => { foreach (String term in terms) { TermDiscriminatingPower TDP = model[label.name][term]; TDP.a = documentDict[label.name].Count(x => x.Contains(term)); TDP.b = documentDict[label.name].Count() - TDP.a; TDP.c = documentNegativeDict[label.name].Count(x => x.Contains(term)); TDP.d = documentNegativeDict[label.name].Count() - TDP.c; } }); computedModel = model.GetComputedModel(factor, N); Dictionary <String, Double> tempIndex = new Dictionary <string, double>(); foreach (String term in terms) { List <Double> scores = new List <double>(); foreach (String ln in labelNames) { var d = GetElementFactor(term, ln); scores.Add(d); } tempIndex.Add(term, operationExtensions.CompressNumericVector(scores.ToArray(), defaultOperation)); } index = tempIndex; /* * foreach (SpaceLabel label in labels) // << --- UNKNOWN LABEL IS INCLUDED * { * var documents = space.LabelToDocumentLinks.GetAllLinked(label); * * foreach (SpaceDocumentModel document in documents) * { * if (DocumentVsLabel.ContainsKey(document)) * { * throw new ArgumentException("A document is already assigned to a label! This model is not applicable for multi-label problem."); * } * DocumentVsLabel.Add(document, label); * } * * List<SpaceDocumentModel> negativeDocuments = new List<SpaceDocumentModel>(); * foreach (SpaceDocumentModel doc in space.documents) * { * if (!documents.Contains(doc)) negativeDocuments.Add(doc); * } * * foreach (String term in terms) * { * TermDiscriminatingPower TDP = model[label.name][term]; * TDP.a = documents.Count(x => x.terms.Contains(term)); * TDP.b = documents.Count - TDP.b; * * TDP.c = negativeDocuments.Count(x => x.terms.Contains(term)); * TDP.d = negativeDocuments.Count - TDP.c; * } * }*/ }
/// <summary> /// Prepares the model - computes IGM for each term /// </summary> /// <param name="space">The space.</param> /// <exception cref="ArgumentException">A document is already assigned to a label! This model is not applicable for multi-label problem.</exception> public override void PrepareTheModel(SpaceModel space, ILogBuilder log) { if (!IsEnabled) { return; } index.Clear(); Dictionary <String, Dictionary <SpaceLabel, Int32> > TermClassFrequency = new Dictionary <string, Dictionary <SpaceLabel, int> >(); Dictionary <String, List <KeyValuePair <SpaceLabel, Int32> > > TermClassRank = new Dictionary <String, List <KeyValuePair <SpaceLabel, Int32> > >(); Dictionary <SpaceDocumentModel, SpaceLabel> DocumentVsLabel = new Dictionary <SpaceDocumentModel, SpaceLabel>(); var labels = space.labels.ToList(); var terms = space.GetTokens(true, false); foreach (String term in terms) { Dictionary <SpaceLabel, Int32> ClassFrequency = new Dictionary <SpaceLabel, Int32>(); foreach (SpaceLabel label in labels) { ClassFrequency.Add(label, 0); } TermClassFrequency.Add(term, ClassFrequency); index.Add(term, 0); } /* * foreach (SpaceDocumentModel document in space.documents) * { * String lab = document.labels.First(); * var spaceLabel = space.labels.FirstOrDefault(x => x.name == lab); * * if (lab != SpaceLabel.UNKNOWN) * { * * if (DocumentVsLabel.ContainsKey(document)) * { * throw new ArgumentException("A document [" + document.name + "] is already assigned to a label! This model is not applicable for multi-label problem."); * } * * DocumentVsLabel.Add(document, spaceLabel); * } * } */ foreach (SpaceLabel label in labels) { List <SpaceDocumentModel> documents = space.LabelToDocumentLinks.GetAllLinked(label); foreach (SpaceDocumentModel document in documents) { if (DocumentVsLabel.ContainsKey(document)) { throw new ArgumentException("A document [" + document.name + "] is already assigned to a label! This model is not applicable for multi-label problem."); } DocumentVsLabel.Add(document, label); } foreach (SpaceDocumentModel document in documents) { var doc_terms_dict = document.GetTerms(true, true, true); var doc_terms = doc_terms_dict.GetTokens(); foreach (String term in doc_terms) { if (TermClassFrequency.ContainsKey(term)) { TermClassFrequency[term][label] += doc_terms_dict.GetTokenFrequency(term); } } } } foreach (String term in terms) { TermClassRank.Add(term, TermClassFrequency[term].OrderByDescending(x => x.Value).ToList()); Double igm_tk_below = 0; Double f_ki = TermClassRank[term].Max(x => x.Value); Double r = 1; var termRanks = TermClassRank[term]; foreach (KeyValuePair <SpaceLabel, int> ranked in termRanks) { if (ranked.Value > 0) { igm_tk_below += (Convert.ToDouble(ranked.Value) / f_ki) * r; } r++; } Double t = 0; if (igm_tk_below == 0) { //index[term] = 0; } else { t = 1 / igm_tk_below; } index[term] = 1 + (l * t); } }