/* * public static WeightDictionary GetChildrenWT(WeightDictionary output, SpaceDocumentModel model, FeatureWeightModel weightModel, SpaceModel space, List<string> FV) * { * if (output == null) * { * output = new WeightDictionary(model.name, ""); * } * if (model.Children.Any()) * { * foreach (var child in model.Children) * { * GetChildrenWT(output, child, weightModel, space, FV); * * } * * } * else * { * var wd = weightModel.GetWeights(FV, model, space); * output.Merge(wd.index.Values, model.weight); * } * return output; * * }*/ public static T BlendToVector <T>(this SpaceDocumentModel model, FeatureWeightModel weightModel, SpaceModel space, List <string> FV) where T : VectorDocument, new() { T output = new T(); output.name = model.name; var leafs = model.GetLeafs(); foreach (var leaf in leafs) { var wd = weightModel.GetWeights(FV, model, space); output.terms.Merge(wd); } //output.terms = //output.Merge(wd.index.Values, model.weight); //WeightDictionary wd = new WeightDictionary(model.name, ""); // GetChildrenWT(wd, model, weightModel, space, FV); return(output); }
/// <summary> /// Prepares the specified context. /// </summary> /// <param name="context">The context.</param> /// <param name="log">The log.</param> /// <exception cref="ArgumentException">context</exception> public override void Prepare(DocumentSelectResult context, ILogBuilder log) { String p_m = FeatureWeightModel.GetModelDefinitionFilename(modelDefinitionFile, context.folder); String p_d = FeatureWeightModel.GetModelDataFilename(modelDefinitionFile, context.folder); TermWeightModel = FeatureWeightModel.LoadModel(p_m, log); //if (File.Exists(p_m)) //{ // //TermWeightModel = objectSerialization.loadObjectFromXML<FeatureWeightModel>(p_m, log); //} TermWeightModel.Deploy(log); if (context.spaceModel == null) { String msg = "Error: TermWeight factor requires SpaceModel declared in the context for operation"; throw new ArgumentException(msg, nameof(context)); } if (File.Exists(p_d) && useStoredData) { WeightingModelDataSet data = objectSerialization.loadObjectFromXML <WeightingModelDataSet>(p_d, log); TermWeightModel.LoadModelDataSet(data, log); if (useSelectedFeatures) { SelectedTerms = WeightDictionary.LoadFile(WeightDictionary.GetDictionaryFilename(modelDefinitionFile + "_sf", context.folder), log); } } else { TermWeightModel.PrepareTheModel(context.spaceModel, log); } if (context.query.isNullOrEmpty()) { context.query.QueryTerms = context.query.QueryTerms.Trim(); List <String> tkns = context.query.QueryTerms.getTokens(true, true, true, false, 4); foreach (String tkn in tkns) { queryTerms.Add(context.stemmingContext.Stem(tkn)); } } }
public static void SetReportDataFields(this classificationReport report, IClassifier classifier, FeatureFilter filter, FeatureWeightModel featureWeight) { report.data.Add(nameof(ReportDataFieldEnum.Classifier), classifier.GetSignature(), "Signature of the classification algorithm"); report.data.Add(nameof(ReportDataFieldEnum.FeatureSelection), filter.GetSignature(), "Signature of feature selection filter model"); report.data.Add(nameof(ReportDataFieldEnum.FeatureWeighting), featureWeight.GetSignature(), "Signature of feature weight model"); }
public override void Prepare(DocumentSelectResult context, ILogBuilder log) { String p_m = ""; String p_d = ""; modelDefinitionFile = modelDefinitionFile.Replace("*", ""); if (!modelDefinitionFile.isNullOrEmpty()) { p_m = FeatureWeightModel.GetModelDefinitionFilename(modelDefinitionFile, context.folder); p_d = FeatureWeightModel.GetModelDataFilename(modelDefinitionFile, context.folder); } if (TermWeightModel == null) { log.log("Loading model from [" + p_m + "]"); if (File.Exists(p_m)) { TermWeightModel = FeatureWeightModel.LoadModel(p_m, log); } } TermWeightModel.Deploy(log); if (File.Exists(p_d) && UseModelData) { log.log("Loading model data from [" + p_d + "]"); var dataset = objectSerialization.loadObjectFromXML <WeightingModelDataSet>(p_d, log); // WeightingModelDataSet TermWeightModel.LoadModelDataSet(dataset, log); } else { log.log("Preparing model ..."); TermWeightModel.PrepareTheModel(context.spaceModel, log); } if (computation.HasFlag(ScoreComputationModeEnum.category)) { vectorDictionary = context.TransformToFVDictionaryAsCategorySimilarity(TermWeightModel, function, log); } else if (computation.HasFlag(ScoreComputationModeEnum.site)) { vectorDictionary = context.TransformToFVDictionaryAsSiteSimilarity(TermWeightModel, function, log); } else if (computation.HasFlag(ScoreComputationModeEnum.pageDivergence)) { vectorDictionary = context.TransformToFVDictionaryAsPageSimilarity(TermWeightModel, function, ScoreComputationModeEnum.site, log); } else if (computation.HasFlag(ScoreComputationModeEnum.pagesOfCategory)) { vectorDictionary = context.TransformToFVDictionaryAsPageSimilarity(TermWeightModel, function, ScoreComputationModeEnum.category, log); } else if (computation.HasFlag(ScoreComputationModeEnum.pagesOfDataset)) { vectorDictionary = context.TransformToFVDictionaryAsPageSimilarity(TermWeightModel, function, ScoreComputationModeEnum.dataset, log); } log.log("Category similarity ready ... [" + computation.ToString() + "]"); }
/// <summary> /// Gets default configuration /// </summary> /// <returns></returns> public static PlanesMethodSettings GetDefaultSettings() { PlanesMethodSettings output = new PlanesMethodSettings(); output.entityMethod.instructions.Add(DocumentRenderInstruction.GetDescriptionInstruction()); output.entityMethod.instructions.Add(DocumentRenderInstruction.GetTitleInstruction()); output.entityMethod.instructions.Add(DocumentRenderInstruction.GetBodyTextInstruction()); // output.entityMethod.blenderOptions = DocumentBlenderFunctionOptions.binaryAggregation | DocumentBlenderFunctionOptions.pageLevel; output.entityMethod.filterFunctionName = ""; // nameof(DocumentEntropyFunction); output.entityMethod.filterLimit = 5; output.corpusMethod.stemmer = nameof(EnglishStemmer); output.corpusMethod.tokenizer = nameof(TokenizerBasic); output.corpusMethod.transliterationRuleSetId = ""; #region PREPARE Weighting model var weightModel = new FeatureWeightModel(); weightModel.LocalFunction = new Weighting.Local.TermFrequencyFunction(); var globalFactor = new FeatureWeightFactor(); globalFactor.Settings.functionName = nameof(IDFElement); weightModel.GlobalFactors.Add(globalFactor); output.corpusMethod.WeightModel = weightModel; #endregion var featureFilter = new FeatureFilter(); featureFilter.limit = 8000; //featureFilter. //featureFilter.functionSettings = new GlobalFunctionSettings(); //featureFilter.functionSettings.functionName = nameof(CollectionTDPElement); //featureFilter.functionSettings.weight = 1.0; //featureFilter.functionSettings.flags.Add(Weighting.Metrics.TDPFactor.chi.ToString()); output.corpusMethod.filter = featureFilter; /* * output.vectorMethod.constructor = new Feature.Settings.FeatureVectorConstructorSettings(); * dimensionSpecification dimSpec = new dimensionSpecification(); * dimSpec.functionName = nameof(CosineSimilarityFunction); * dimSpec.type = FeatureVectorDimensionType.similarityFunction; * output.vectorMethod.constructor.labelDimensions.Add(dimSpec); */ //output.vectorMethod.constructor = new Feature.Settings.FeatureVectorConstructorSettings(); //dimensionSpecification dimSpec = new dimensionSpecification(); ////dimSpec.functionName = nameof(CosineSimilarityFunction); //dimSpec.type = FeatureVectorDimensionType.directTermWeight; //output.vectorMethod.constructor.featureDimensions.Add(dimSpec); output.featureMethod.classifierSettings.type = Classifiers.ClassifierType.multiClassSVM; output.featureMethod.classifierSettings.lossFunctionForTraining = Accord.MachineLearning.VectorMachines.Learning.Loss.L2; /* * output.featureMethod.classifierSettings.type = Classifiers.ClassifierType.kNearestNeighbors; * output.featureMethod.classifierSettings.lossFunctionForTraining = Accord.MachineLearning.VectorMachines.Learning.Loss.L2; * output.featureMethod.classifierSettings.kNN_k = 4; */ return(output); }
/// <summary> /// Prepares the specified context. /// </summary> /// <param name="context">The context.</param> /// <param name="log">The log.</param> /// <exception cref="ArgumentException">context</exception> public override void Prepare(DocumentSelectResult context, ILogBuilder log) { //context.folder.GetOrFindFiles("*", dictionaryFile + "*.xml"); scoreDictionary = FeatureVectorDictionaryWithDimensions.LoadFile(context.folder, dictionaryFile, log); // WeightDictionary.LoadFile(WeightDictionary.GetDictionaryFilename(dictionaryFile, context.folder), log); if (scoreDictionary == null) { String msg = "Error: Failed to find score dictionary [" + dictionaryFile + "] in " + context.folder.path; throw new ArgumentException(msg, nameof(context)); } if (useMachineLearning) { #region --------------- PREPARING TERM WEIGHT MODEL String p_m = FeatureWeightModel.GetModelDefinitionFilename(modelDefinitionFile, context.folder); String p_d = FeatureWeightModel.GetModelDataFilename(modelDefinitionFile, context.folder); if (TermWeightModel == null) { TermWeightModel = FeatureWeightModel.LoadModel(p_m, log); } TermWeightModel.Deploy(log); if (context.spaceModel == null) { String msg = "Error: TermWeight factor requires SpaceModel declared in the context for operation"; throw new ArgumentException(msg, nameof(context)); } if (File.Exists(p_d) && useStoredData) { WeightingModelDataSet data = objectSerialization.loadObjectFromXML <WeightingModelDataSet>(p_d, log); TermWeightModel.LoadModelDataSet(data, log); if (useSelectedFeatures) { SelectedTerms = WeightDictionary.LoadFile(WeightDictionary.GetDictionaryFilename(modelDefinitionFile + "_sf", context.folder), log); } } else { TermWeightModel.PrepareTheModel(context.spaceModel, log); } if (SelectedTerms.Count == 0) { SelectedTerms = context.selectedFeatures; } List <String> sel_tkns = new List <String>(); sel_tkns.AddRange(SelectedTerms.index.Values.Select(x => x.name)); if (!sel_tkns.Any()) { sel_tkns.AddRange(context.spaceModel.terms_known_label.GetTokens()); } #endregion fvConstructor.Deploy(featureMethod.constructor, sel_tkns); classifier = featureMethod.classifierSettings.GetClassifier(); sc_id = scoreDictionary.GetVectorsWithLabelID(null, criterion).ToNameVsLabelID(); List <FeatureVectorWithLabelID> trainingSet = new List <FeatureVectorWithLabelID>(); foreach (var item in context.items) { if (sc_id.ContainsKey(item.AssignedID)) { WeightDictionary dc_vec = TermWeightModel.GetWeights(sel_tkns, item.spaceDocument, context.spaceModel); var n_vec = fvConstructor.ConstructFeatureVector(dc_vec, item.AssignedID); FeatureVectorWithLabelID id_vec = new FeatureVectorWithLabelID(n_vec, sc_id[item.AssignedID]); trainingSet.Add(id_vec); } } log.log("Training [" + classifier.name + "] with [" + sc_id.Count + "] feature vectors."); classifier.DoTraining(trainingSet, log); } }
/// <summary> /// Transforms to fv dictionary. /// </summary> /// <param name="context">The context.</param> /// <param name="TermWeightModel">The term weight model.</param> /// <param name="function">The function.</param> /// <returns></returns> public static FeatureVectorSetDictionary TransformToFVDictionaryAsCategorySimilarity(this DocumentSelectResult context, FeatureWeightModel TermWeightModel, IVectorSimilarityFunction function, ILogBuilder log) { log.log("... Category Similarity ..."); List <string> selectedTerms = context.selectedFeatures.GetKeys(); //.entries.Select(x => x.name)?.ToList(); Dictionary <String, WeightDictionary> categoryDictionarties = new Dictionary <string, WeightDictionary>(); foreach (SpaceLabel label in context.spaceModel.labels) { Relationship <SpaceLabel, SpaceCategoryModel> categoryModel = context.spaceModel.LabelToCategoryLinks.GetAllRelationships(label).FirstOrDefault(); var c = TermWeightModel.GetWeights(selectedTerms, categoryModel.NodeB, context.spaceModel, label); categoryDictionarties.Add(label.name, c); } FeatureVectorSetDictionary dict = new FeatureVectorSetDictionary(); String domainNameLast = ""; Double total = context.Count; Int32 i = 0; Int32 p = (context.Count / 20); foreach (var entry in context.items) { i++; WeightDictionary documentWeights = TermWeightModel.GetWeights(selectedTerms, entry.spaceDocument, context.spaceModel); FeatureVector fv = new FeatureVector(entry.AssignedID); fv.dimensions = new double[context.spaceModel.labels.Count]; Int32 c = 0; Parallel.ForEach(context.spaceModel.labels, (label) => { var docToClassSimilarity = function.ComputeSimilarity(categoryDictionarties[label.name], documentWeights); fv.dimensions[context.spaceModel.labels.IndexOf(label)] = docToClassSimilarity; }); Int32 r = i % p; if (r == 0) { log.Append(" [" + i.GetRatio(context.Count).ToString("P2") + "] "); } dict.GetOrAdd(entry.DomainID).Add(fv, -1); } foreach (KeyValuePair <string, FeatureVectorWithLabelIDSet> pair in dict) { pair.Value.CloseDeploy(); } log.log("... Preparation done..."); return(dict); }
///// <summary> ///// Transforms to fv dictionary. ///// </summary> ///// <param name="context">The context.</param> ///// <param name="TermWeightModel">The term weight model.</param> ///// <param name="function">The function.</param> ///// <returns></returns> //public static FeatureVectorSetDictionary TransformToFVDictionaryAsPageInCategorySimilarity(this DocumentSelectResult context, FeatureWeightModel TermWeightModel, IVectorSimilarityFunction function, ILogBuilder log) //{ // log.log("... Page Similarity ..."); // List<string> selectedTerms = context.selectedFeatures.GetKeys(); // var ByDomain = context.GetByDomain(log); // Dictionary<string, List<string>> assignIDByLabel = context.featureSpace.labelToDocumentAssociations.GetAllRelationShipByName(true); // var ByCategory = context.GetByAssignIDCategory(assignIDByLabel,log); // Dictionary<String, List<DocumentSelectResultEntry>> EntryByLabel = new Dictionary<string, List<DocumentSelectResultEntry>>(); // Dictionary<String, WeightDictionary> documentDictionarties = new Dictionary<string, WeightDictionary>(); // foreach (var entry in context.items) // { // WeightDictionary documentWeights = TermWeightModel.GetWeights(selectedTerms, entry.spaceDocument, context.spaceModel); // documentDictionarties.Add(entry.AssignedID, documentWeights); // } // FeatureVectorSetDictionary dict = new FeatureVectorSetDictionary(); // Double total = context.Count; // Int32 i = 0; // Int32 p = (context.Count / 10); // //List<List<Double>> matrix = new List<List<double>>(); // //foreach (var entry in context.items) // //{ // // matrix.Add(new List<double>()); // //} // //for (int x = 0; x < context.items.Count; x++) // //{ // // for (int y = 0; y < context.items.Count; x++) // // { // // } // //} // ConcurrentDictionary<String, Double> computedPairs = new ConcurrentDictionary<string, double>(); // foreach (var domainPair in ByCategory) // { // List<DocumentSelectResultEntry> relatives = ByCategory[domainPair.Key].ToList(); // foreach (var entry in relatives) // { // i++; // FeatureVector fv = new FeatureVector(entry.AssignedID); // // List<Double> d = new List<>(); // fv.dimensions = new double[relatives.Count - 1]; // // List<String> keys = documentDictionarties.Keys.ToList(); // Int32 hostInd = relatives.IndexOf(entry); // Int32 c = 0; // //foreach (var pair in documentDictionarties) // //{ // Parallel.ForEach(relatives, (pair) => // { // Int32 ind = relatives.IndexOf(pair); // keys.IndexOf(pair.AssignedID); // if (ind >= hostInd) // { // ind = ind - 1; // } // if (pair.AssignedID != entry.AssignedID) // { // Double docToClassSimilarity = 0; // if (computedPairs.ContainsKey(entry.AssignedID + pair.AssignedID)) // { // docToClassSimilarity = computedPairs[entry.AssignedID + pair.AssignedID]; // } // else if (computedPairs.ContainsKey(pair.AssignedID + entry.AssignedID)) // { // docToClassSimilarity = computedPairs[pair.AssignedID + entry.AssignedID]; // } // else // { // var vecA = documentDictionarties[pair.AssignedID]; // var vecB = documentDictionarties[entry.AssignedID]; // docToClassSimilarity = function.ComputeSimilarity(vecA, vecB); // if (docToClassSimilarity > 0) // { // } // if (!computedPairs.ContainsKey(entry.AssignedID + pair.AssignedID)) // { // computedPairs.GetOrAdd(entry.AssignedID + pair.AssignedID, docToClassSimilarity); // //computedPairs.AddOrUpdate(entry.AssignedID + pair.Key, docToClassSimilarity); // } // else if (!computedPairs.ContainsKey(pair.AssignedID + entry.AssignedID)) // { // computedPairs.GetOrAdd(pair.AssignedID + entry.AssignedID, docToClassSimilarity); // } // } // fv.dimensions[ind] = docToClassSimilarity; // } // }); // Int32 r = i % p; // if (r == 0) // { // log.Append(" [" + i.GetRatio(context.Count).ToString("P2") + "] "); // } // dict.GetOrAdd(entry.DomainID).Add(fv, -1); // } // } // //foreach (KeyValuePair<string, FeatureVectorWithLabelIDSet> pair in dict) // //{ // // pair.Value.CloseDeploy(); // //} // log.log("... Preparation finished ..."); // return dict; //} /// <summary> /// Transforms to fv dictionary. /// </summary> /// <param name="context">The context.</param> /// <param name="TermWeightModel">The term weight model.</param> /// <param name="function">The function.</param> /// <returns></returns> public static FeatureVectorSetDictionary TransformToFVDictionaryAsSiteSimilarity(this DocumentSelectResult context, FeatureWeightModel TermWeightModel, IVectorSimilarityFunction function, ILogBuilder log) { log.log("... Site Similarity ..."); List <string> selectedTerms = context.selectedFeatures.GetKeys(); //.entries.Select(x => x.name)?.ToList(); Dictionary <String, WeightDictionary> categoryDictionarties = new Dictionary <string, WeightDictionary>(); Dictionary <String, WeightDictionary> documentDictionarties = new Dictionary <string, WeightDictionary>(); var byDomain = context.GetByDomain(log); FeatureVectorSetDictionary dict = new FeatureVectorSetDictionary(); Double total = context.Count; Int32 i = 0; Int32 p = (context.Count / 10); foreach (var pair in byDomain) { i++; SpaceDocumentModel siteModel = new SpaceDocumentModel(); foreach (var ent in pair.Value) { WeightDictionary documentWeights = TermWeightModel.GetWeights(selectedTerms, ent.spaceDocument, context.spaceModel); documentDictionarties.Add(ent.AssignedID, documentWeights); siteModel.Children.Add(ent.spaceDocument); //siteModel.terms.MergeDictionary(ent.spaceDocument.terms); } siteModel.Flatten(false); categoryDictionarties.Add(pair.Key, TermWeightModel.GetWeights(selectedTerms, siteModel, context.spaceModel)); foreach (var ent in pair.Value) { FeatureVector fv = new FeatureVector(ent.AssignedID); fv.dimensions = new double[context.spaceModel.labels.Count]; // documentDictionarties[ent.AssignedID].entries var docToClassSimilarity = function.ComputeSimilarity(categoryDictionarties[pair.Key], documentDictionarties[ent.AssignedID]); fv.dimensions[0] = docToClassSimilarity; dict.GetOrAdd(pair.Key).Add(fv, -1); } Int32 r = i % p; if (r == 0) { log.Append(" [" + i.GetRatio(context.Count).ToString("P2") + "] "); } } foreach (KeyValuePair <string, FeatureVectorWithLabelIDSet> pair in dict) { pair.Value.CloseDeploy(); } log.log("... Preparation finished ..."); return(dict); }
/// <summary> /// Transforms to fv dictionary. /// </summary> /// <param name="context">The context.</param> /// <param name="TermWeightModel">The term weight model.</param> /// <param name="function">The function.</param> /// <returns></returns> public static FeatureVectorSetDictionary TransformToFVDictionaryAsPageSimilarity(this DocumentSelectResult context, FeatureWeightModel TermWeightModel, IVectorSimilarityFunction function, ScoreComputationModeEnum groupmode, ILogBuilder log) { List <string> selectedTerms = context.selectedFeatures.GetKeys(); Dictionary <String, WeightDictionary> documentDictionarties = new Dictionary <string, WeightDictionary>(); foreach (var entry in context.items) { WeightDictionary documentWeights = TermWeightModel.GetWeights(selectedTerms, entry.spaceDocument, context.spaceModel); documentDictionarties.Add(entry.AssignedID, documentWeights); } FeatureVectorSetDictionary dict = new FeatureVectorSetDictionary(); Double total = context.Count; Int32 i = 0; Int32 p = (context.Count / 10); Dictionary <string, List <DocumentSelectResultEntry> > relative_groups = null; if (groupmode == ScoreComputationModeEnum.category) { Dictionary <string, List <string> > assignIDByLabel = context.spaceModel.LabelToDocumentLinks.GetAllRelationShipByName(true); relative_groups = context.GetByAssignIDCategory(assignIDByLabel, log); if (assignIDByLabel.ContainsKey(SpaceLabel.UNKNOWN)) { assignIDByLabel.Remove(SpaceLabel.UNKNOWN); } log.log("... Page Similarity ... Groups by category"); } else if (groupmode == ScoreComputationModeEnum.site) { relative_groups = context.GetByDomain(log); log.log("... Page Similarity ... Groups by site"); } else if (groupmode == ScoreComputationModeEnum.dataset) { relative_groups = new Dictionary <string, List <DocumentSelectResultEntry> >(); relative_groups.Add("dataset", context.items); log.log("... Page Similarity ... dataset"); } ConcurrentDictionary <String, Double> computedPairs = new ConcurrentDictionary <string, double>(); foreach (var domainPair in relative_groups) { List <DocumentSelectResultEntry> relatives = domainPair.Value; //relative_groups[domainPair.Key].ToList(); foreach (var entry in relatives) { i++; FeatureVector fv = new FeatureVector(entry.AssignedID); // List<Double> d = new List<>(); fv.dimensions = new double[relatives.Count - 1]; // List<String> keys = documentDictionarties.Keys.ToList(); Int32 hostInd = relatives.IndexOf(entry); Int32 c = 0; //foreach (var pair in documentDictionarties) //{ Parallel.ForEach(relatives, (pair) => { Int32 ind = relatives.IndexOf(pair); // keys.IndexOf(pair.AssignedID); if (ind >= hostInd) { ind = ind - 1; } if (pair.AssignedID != entry.AssignedID) { Double docToClassSimilarity = 0; if (computedPairs.ContainsKey(entry.AssignedID + pair.AssignedID)) { docToClassSimilarity = computedPairs[entry.AssignedID + pair.AssignedID]; } else if (computedPairs.ContainsKey(pair.AssignedID + entry.AssignedID)) { docToClassSimilarity = computedPairs[pair.AssignedID + entry.AssignedID]; } else { var vecA = documentDictionarties[pair.AssignedID]; var vecB = documentDictionarties[entry.AssignedID]; docToClassSimilarity = function.ComputeSimilarity(vecA, vecB); if (docToClassSimilarity > 0) { } if (!computedPairs.ContainsKey(entry.AssignedID + pair.AssignedID)) { computedPairs.GetOrAdd(entry.AssignedID + pair.AssignedID, docToClassSimilarity); //computedPairs.AddOrUpdate(entry.AssignedID + pair.Key, docToClassSimilarity); } else if (!computedPairs.ContainsKey(pair.AssignedID + entry.AssignedID)) { computedPairs.GetOrAdd(pair.AssignedID + entry.AssignedID, docToClassSimilarity); } } fv.dimensions[ind] = docToClassSimilarity; } }); Int32 r = i % p; if (r == 0) { log.Append(" [" + i.GetRatio(context.Count).ToString("P2") + "] "); } dict.GetOrAdd(domainPair.Key).Add(fv, -1); } } log.log("... Preparation finished ..."); return(dict); }
public override ExperimentDataSetFoldContextPair <OperationContext> Execute(ILogBuilder logger, OperationContext executionContextMain = null, ExperimentModelExecutionContext executionContextExtra = null) { ExperimentDataSetFoldContextPair <OperationContext> output = new ExperimentDataSetFoldContextPair <OperationContext>(fold, executionContextMain); Open(); String p_m = FeatureWeightModel.GetModelDefinitionFilename(setup.OutputFilename, fold_notes.folder); String p_d = FeatureWeightModel.GetModelDataFilename(setup.OutputFilename, fold_notes.folder); String w_t = WeightDictionary.GetDictionaryFilename(setup.OutputFilename, fold_notes.folder); Boolean skip = false; if (setup.skipIfExisting) { if (File.Exists(p_m) && File.Exists(p_d) && File.Exists(w_t)) { logger.log("WeightTable [" + p_d + "] found, skipping the operation"); skip = true; } } if (!skip) { output.context.DeployDataSet(fold, logger); entityOperation.TextRendering(output.context, notes); /* * entityOperation.TextPreblendFilter(output.context, notes); * * entityOperation.TextBlending(output.context, notes); */ corpusOperation.SpaceModelPopulation(output.context, notes); corpusOperation.SpaceModelCategories(output.context, notes); corpusOperation.FeatureSelection(output.context, notes, requirements.MayUseSelectedFeatures); output.context.SelectedFeatures.Save(fold_notes.folder, notes, setup.OutputFilename + "_fs"); //corpusOperation.weightModel. corpusOperation.weightModel.PrepareTheModel(output.context.spaceModel, logger); var wt_s = corpusOperation.weightModel.GetElementFactors(output.context.SelectedFeatures.GetKeys(), output.context.spaceModel); wt_s.Save(fold_notes.folder, notes, setup.OutputFilename); corpusOperation.weightModel.Save(setup.OutputFilename, fold_notes.folder, notes); OperationContextReport reportOperation = new OperationContextReport(); reportOperation.DeploySettingsBase(notes); reportOperation.GenerateReports(output.context, setup.reportOptions, notes); } Close(); return(output); }
/// <summary> /// Picks specified number of sample documents and constructs a demo table, showing all term weight components /// </summary> /// <param name="space">The space.</param> /// <param name="weightModel">The weight model.</param> /// <param name="sampleDocuments">The sample documents.</param> /// <param name="name">The name.</param> /// <param name="description">The description.</param> /// <returns></returns> public static DataTable MakeWeightModelDemoTable(this SpaceModel space, FeatureWeightModel weightModel, WeightDictionary selectedFeatures, Int32 sampleDocuments, String name, String description) { DataTable table = new DataTable(); table.SetTitle(name); table.SetDescription(description); table.SetAdditionalInfoEntry("Documents", space.documents.Count, "Total count of document vectors"); table.SetAdditionalInfoEntry("Local function", weightModel.LocalFunction.GetSignature(), weightModel.LocalFunction.description); var sampleIn = space.documents.Take(Math.Min(sampleDocuments, space.documents.Count)).ToList(); List <SpaceDocumentModel> sample = new List <SpaceDocumentModel>(); foreach (var s in sampleIn) { sample.Add(s); } List <String> terms = new List <String>(); var terms_in = sample.First().GetTerms(true, true).GetTokens(); foreach (var t in terms_in) { if (selectedFeatures.ContainsKey(t)) { terms.Add(t); } if (terms.Count > 500) { break; } } DataColumn column_token = table.Add("Name", "Name of the document vector", "Name", typeof(String), imbSCI.Core.enums.dataPointImportance.normal).SetWidth(50); List <DataColumn> dimensions = new List <DataColumn>(); DataColumn loc = null; List <DataColumn> localColumns = new List <DataColumn>(); for (int i = 0; i < sample.Count; i++) { var doc = sample[i]; localColumns.Add( table.Add(weightModel.LocalFunction.shortName + i.ToString(), weightModel.LocalFunction.GetSignature() + " for document: " + doc.name, weightModel.LocalFunction.shortName, typeof(Double), imbSCI.Core.enums.dataPointImportance.normal, "F5", weightModel.LocalFunction.GetSignature() + "[" + i.ToString("D2") + "]").SetGroup("Local")); } Int32 c = 0; List <DataColumn> globalColumns = new List <DataColumn>(); foreach (FeatureWeightFactor gl in weightModel.GlobalFactors) { globalColumns.Add( table.Add(gl.GlobalFunction.shortName + c.ToString(), gl.GlobalFunction.shortName + " at w= " + gl.weight, gl.GlobalFunction.shortName, typeof(Double), imbSCI.Core.enums.dataPointImportance.important, "F5", gl.Settings.GetSignature() + "[" + c.ToString("D2") + "]").SetGroup("Global")); c++; } Int32 ct = 0; List <DataColumn> totalColumns = new List <DataColumn>(); foreach (var doc in sample) { totalColumns.Add( table.Add("TotalScore" + ct.ToString(), weightModel.LocalFunction.GetSignature() + " for document: " + doc.name, weightModel.LocalFunction.shortName, typeof(Double), imbSCI.Core.enums.dataPointImportance.normal, "F5", weightModel.LocalFunction.GetSignature() + "[" + ct.ToString("D2") + "]").SetGroup("Total")); ct++; } /* * for (int i = 0; i < sample.Count; i++) * { * var doc = sample[i]; * * foreach (String term in terms) * { * weightModel.GetCompositeEntry(term, doc, space); * } * * }*/ foreach (String term in terms) { var dr = table.NewRow(); dr[column_token] = term; Int32 li = 0; foreach (DataColumn local in localColumns) { dr[local] = weightModel.LocalFunction.GetElementFactor(term, sample[li]); li++; } li = 0; foreach (DataColumn local in globalColumns) { dr[local] = weightModel.GlobalFactors[li].GlobalFunction.GetElementFactor(term, space); li++; } li = 0; foreach (DataColumn local in totalColumns) { dr[local] = weightModel.GetWeight(term, sample[li], space); //. //GetElementFactor(term, sample[li]); li++; } table.Rows.Add(dr); } return(table); }