/// <summary>
        /// Prepares the specified context.
        /// </summary>
        /// <param name="context">The context.</param>
        /// <param name="log">The log.</param>
        /// <exception cref="ArgumentException">context</exception>
        public override void Prepare(DocumentSelectResult context, ILogBuilder log)
        {
            /*
             * String p_m = WeightDictionary.GetDictionaryFilename(, context.folder);
             *
             * if (File.Exists(p_m))
             * {
             *   //objectSerialization.loadObjectFromXML<WeightDictionary>(p_m, log);
             *
             * }
             */
            weightDictionary = WeightDictionary.LoadFile(WeightDictionary.GetDictionaryFilename(dictionaryFile, context.folder), log);

            if (context.spaceModel == null)
            {
                String msg = "Error: TermWeight factor requires SpaceModel declared in the context for operation";
                throw new ArgumentException(msg, nameof(context));
            }



            if (context.query.isNullOrEmpty())
            {
                context.query.QueryTerms = context.query.QueryTerms.Trim();

                List <String> tkns = context.query.QueryTerms.getTokens(true, true, true, false, 4);

                foreach (String tkn in tkns)
                {
                    queryTerms.Add(context.stemmingContext.Stem(tkn));
                }
            }
        }
        /// <summary>
        /// Scores the specified entry.
        /// </summary>
        /// <param name="entry">The entry.</param>
        /// <param name="context">The context.</param>
        /// <param name="log">The log.</param>
        /// <returns></returns>
        public override double Score(DocumentSelectResultEntry entry, DocumentSelectResult context, ILogBuilder log)
        {
            if (useMachineLearning)
            {
                WeightDictionary dc_vec = TermWeightModel.GetWeights(SelectedTerms.GetKeys(), entry.spaceDocument, context.spaceModel);

                var n_vec = fvConstructor.ConstructFeatureVector(dc_vec, entry.AssignedID);


                Double score = 0;
                Int32  l_id  = -1;
                if (sc_id.ContainsKey(entry.AssignedID))
                {
                    l_id = sc_id[entry.AssignedID];
                }

                score = classifier.DoScore(n_vec, log, l_id);

                return(score);
            }
            else
            {
                if (scoreDictionary.ContainsKey(entry.AssignedID))
                {
                    var fv = scoreDictionary[entry.AssignedID];
                    return(fv.CompressNumericVector(vectorCompression));
                }
                else
                {
                    return(0);
                }
            }
        }
        /// <summary>
        /// Filters the space model features.
        /// </summary>
        /// <param name="spaceModel">The space model.</param>
        /// <param name="selectedFeatures">The selected features.</param>
        /// <param name="log">The log.</param>
        /// <returns></returns>
        public static Int32 FilterSpaceModelFeatures(this SpaceModel spaceModel, WeightDictionary selectedFeatures, ILogBuilder log)
        {
            Int32 i = 0;
            Int32 s = spaceModel.documents.Count() / 5;

            Int32         c_filter_out = 0;
            List <String> keys         = selectedFeatures.GetKeys();


            List <String> termsToRemove = spaceModel.terms.GetTokensOtherThan(keys);

            for (int i2 = 0; i2 < spaceModel.documents.Count; i2++)
            {
                c_filter_out += spaceModel.documents[i2].FilterSelectedFeatures(termsToRemove, false);


                if (i > s)
                {
                    Double r = i2.GetRatio(spaceModel.documents.Count());
                    log.log("Filter SelectedFeatures [" + r.ToString("P2") + "]");
                    i = 0;
                }
                i++;
            }


            spaceModel.terms_known_label.FilterTokens(termsToRemove, false);
            spaceModel.terms_unknown_label.FilterTokens(termsToRemove, false);

            return(c_filter_out);
        }
        /// <summary>
        /// Transforms to fv dictionary.
        /// </summary>
        /// <param name="context">The context.</param>
        /// <param name="TermWeightModel">The term weight model.</param>
        /// <param name="function">The function.</param>
        /// <returns></returns>
        public static FeatureVectorSetDictionary TransformToFVDictionaryAsCategorySimilarity(this DocumentSelectResult context, FeatureWeightModel TermWeightModel, IVectorSimilarityFunction function, ILogBuilder log)
        {
            log.log("... Category Similarity ...");

            List <string> selectedTerms = context.selectedFeatures.GetKeys(); //.entries.Select(x => x.name)?.ToList();

            Dictionary <String, WeightDictionary> categoryDictionarties = new Dictionary <string, WeightDictionary>();

            foreach (SpaceLabel label in context.spaceModel.labels)
            {
                Relationship <SpaceLabel, SpaceCategoryModel> categoryModel = context.spaceModel.LabelToCategoryLinks.GetAllRelationships(label).FirstOrDefault();

                var c = TermWeightModel.GetWeights(selectedTerms, categoryModel.NodeB, context.spaceModel, label);
                categoryDictionarties.Add(label.name, c);
            }

            FeatureVectorSetDictionary dict = new FeatureVectorSetDictionary();

            String domainNameLast = "";

            Double total = context.Count;
            Int32  i     = 0;
            Int32  p     = (context.Count / 20);

            foreach (var entry in context.items)
            {
                i++;

                WeightDictionary documentWeights = TermWeightModel.GetWeights(selectedTerms, entry.spaceDocument, context.spaceModel);

                FeatureVector fv = new FeatureVector(entry.AssignedID);
                fv.dimensions = new double[context.spaceModel.labels.Count];

                Int32 c = 0;

                Parallel.ForEach(context.spaceModel.labels, (label) =>
                {
                    var docToClassSimilarity = function.ComputeSimilarity(categoryDictionarties[label.name], documentWeights);
                    fv.dimensions[context.spaceModel.labels.IndexOf(label)] = docToClassSimilarity;
                });


                Int32 r = i % p;
                if (r == 0)
                {
                    log.Append(" [" + i.GetRatio(context.Count).ToString("P2") + "] ");
                }


                dict.GetOrAdd(entry.DomainID).Add(fv, -1);
            }

            foreach (KeyValuePair <string, FeatureVectorWithLabelIDSet> pair in dict)
            {
                pair.Value.CloseDeploy();
            }

            log.log("... Preparation done...");
            return(dict);
        }
Exemple #5
0
        public override double ComputeSimilarity(WeightDictionary vectorA, WeightDictionary vectorB)
        {
            WeightDictionaryEntryPairs termPairs = new WeightDictionaryEntryPairs(vectorA, vectorB);

            if (termPairs.Count == 0)
            {
                return(0);
            }
            return(Compute(termPairs));
        }
        /// <summary>
        /// Gets the weights.
        /// </summary>
        /// <param name="termWhiteList">The term white list.</param>
        /// <param name="document">The document.</param>
        /// <param name="space">The space.</param>
        /// <param name="label">The label.</param>
        /// <returns></returns>
        public WeightDictionary GetWeights(List <String> termWhiteList, SpaceDocumentModel document, SpaceModel space, SpaceLabel label = null)
        {
            WeightDictionary output = new WeightDictionary();

            output.name        = GetSignature() + "_" + document.name;
            output.description = "Feature weight table constructed by [" + GetSignature() + "] for features [" + termWhiteList.Count + "] in document [" + document.name + "]";
            output.nDimensions = nDimensions;

            if (KERNELOPTION_USE_WHITELISTTERMS)
            {
                foreach (String term in termWhiteList)
                {
                    if (document.terms.Contains(term))
                    {
                        throw new NotImplementedException();
                        //output.entries.Add(entry);
                    }
                }
            }
            else
            {
                List <String> terms = document.terms.GetTokens();

                for (int i = 0; i < document.terms.Count; i++)
                {
                    String term = terms[i];

                    WeightDictionaryEntry entry = new WeightDictionaryEntry(term, 0);


                    if (DoUseLocalFunction)
                    {
                        entry = LocalFunction.GetElementFactorEntry(term, document);
                    }

                    foreach (FeatureWeightFactor gf in GlobalFactors)
                    {
                        entry = entry * (gf.GlobalFunction.GetElementFactorEntry(term, space, label) * gf.weight);
                    }

                    if (document.weight != 1)
                    {
                        entry = entry * document.weight;
                    }

                    output.Merge(entry);
                    //output.AddEntry(term, entry.dimensions, false);
                }
            }

            return(output);
        }
        public FeatureCWPAnalysisEntryReport(String __name, String description, folderNode _folder, FeatureCWPAnalysisSettings.AnalysisPurpose _purpose)
        {
            purpose = _purpose;

            name = __name;
            if (purpose != FeatureCWPAnalysisSettings.AnalysisPurpose.application)
            {
                //EntryDictionary = new WeightDictionary("DictionaryFor" + CategoryID, "Dictionary with term metrics for category " + CategoryID);
                EntryDictionary             = new WeightDictionary("DictionaryFor" + name, description);
                EntryDictionary.nDimensions = fields().Count;
            }

            folder = _folder;
        }
        /// <summary>
        /// Prepares the specified context.
        /// </summary>
        /// <param name="context">The context.</param>
        /// <param name="log">The log.</param>
        /// <exception cref="ArgumentException">context</exception>
        public override void Prepare(DocumentSelectResult context, ILogBuilder log)
        {
            String p_m = FeatureWeightModel.GetModelDefinitionFilename(modelDefinitionFile, context.folder);
            String p_d = FeatureWeightModel.GetModelDataFilename(modelDefinitionFile, context.folder);

            TermWeightModel = FeatureWeightModel.LoadModel(p_m, log);

            //if (File.Exists(p_m))
            //{

            //    //TermWeightModel = objectSerialization.loadObjectFromXML<FeatureWeightModel>(p_m, log);
            //}

            TermWeightModel.Deploy(log);

            if (context.spaceModel == null)
            {
                String msg = "Error: TermWeight factor requires SpaceModel declared in the context for operation";
                throw new ArgumentException(msg, nameof(context));
            }

            if (File.Exists(p_d) && useStoredData)
            {
                WeightingModelDataSet data = objectSerialization.loadObjectFromXML <WeightingModelDataSet>(p_d, log);
                TermWeightModel.LoadModelDataSet(data, log);

                if (useSelectedFeatures)
                {
                    SelectedTerms = WeightDictionary.LoadFile(WeightDictionary.GetDictionaryFilename(modelDefinitionFile + "_sf", context.folder), log);
                }
            }
            else
            {
                TermWeightModel.PrepareTheModel(context.spaceModel, log);
            }

            if (context.query.isNullOrEmpty())
            {
                context.query.QueryTerms = context.query.QueryTerms.Trim();

                List <String> tkns = context.query.QueryTerms.getTokens(true, true, true, false, 4);

                foreach (String tkn in tkns)
                {
                    queryTerms.Add(context.stemmingContext.Stem(tkn));
                }
            }
        }
        public void DeployAndRun(ILogBuilder log, SpaceModel _space, folderNode folder)
        {
            filter.Deploy(log, folder);

            weightedFeatures = new WeightDictionary(name + "_weg" + filter.limit.ToString(), "weighted features, before filter");
            selectedFeatures = new WeightDictionary(name + "_sel" + filter.limit.ToString(), "selected weighted featyres");

            var selected = filter.SelectFeatures(_space, log, folder, weightedFeatures);

            foreach (var pair in selected)
            {
                selectedFeatures.AddEntry(pair.Key, pair.Value);
            }

            weightedFeatures.Save(folder, log, WeightDictionary.GetDictionaryFilename(weightedFeatures.name, folder));
            selectedFeatures.Save(folder, log, WeightDictionary.GetDictionaryFilename(selectedFeatures.name, folder));
        }
        /// <summary>
        /// Constructs global weight fictionary using global elements
        /// </summary>
        /// <param name="terms">The terms.</param>
        /// <param name="space">The space.</param>
        /// <param name="label">The label.</param>
        /// <returns></returns>
        public WeightDictionary GetElementFactors(IEnumerable <string> terms, SpaceModel space, SpaceLabel label = null)
        {
            var output = new WeightDictionary();


            output.name = GetSignature() + "_globalOnly";

            foreach (String term in terms)
            {
                Double score = GetElementFactor(term, space, label);
                WeightDictionaryEntry entry = new WeightDictionaryEntry(term, score);

                output.AddEntry(entry, true);
            }

            output.description = "Global weights for [" + output.Count + "] terms.";

            return(output);
        }
Exemple #11
0
        private void SetWeightButton_Click(object sender, EventArgs e)
        {
            this.allStringWeightList = new List <WeightDictionary>();
            Dictionary <int, int> weightRelation = convertColumnIndex(this.weightRelation);

            if (!weightAndColumnNameRelationIndexConverted)
            {
                convertWeightAndColumnNameRelationIndex();
                this.weightAndColumnNameRelationIndexConverted = true;
            }

            foreach (KeyValuePair <int, int> kvp in weightRelation)
            {
                WeightDictionary weightDict = new WeightDictionary();
                int weightValueColumn       = kvp.Key;
                int weightNameColumn        = kvp.Value;

                weightDict.columnName   = weightAndColumnNameRelation[weightValueColumn];
                weightDict.variableName = selectedColumnVariableDict[weightAndColumnNameRelation[weightValueColumn]].ToString();

                for (int i = 0; i < this.selectedColumnDataGridView.Rows.Count; i++)
                {
                    if (!selectedColumnDataGridView.Rows[i].Cells[weightNameColumn].Value.ToString().Equals("") && selectedColumnDataGridView.Rows[i].Cells[weightValueColumn].Value == null)
                    {
                        DialogResult dr1 = MessageBox.Show(loadGlobalChineseCharacters.GlobalChineseCharactersDict["error_1"] + "," + weightAndColumnNameRelation[weightValueColumn] + loadGlobalChineseCharacters.GlobalChineseCharactersDict["weight"] + ":" + (i + 1) + loadGlobalChineseCharacters.GlobalChineseCharactersDict["row"], "message");
                        return;
                    }
                    else if (selectedColumnDataGridView.Rows[i].Cells[weightNameColumn].Value.ToString().Equals("") && selectedColumnDataGridView.Rows[i].Cells[weightValueColumn].Value == null)
                    {
                        break;
                    }
                    else if (!selectedColumnDataGridView.Rows[i].Cells[weightNameColumn].Value.ToString().Equals("") && selectedColumnDataGridView.Rows[i].Cells[weightValueColumn].Value != null)
                    {
                        string valueStr = selectedColumnDataGridView.Rows[i].Cells[weightValueColumn].Value.ToString();
                        float  value    = Convert.ToSingle(valueStr);
                        string name     = selectedColumnDataGridView.Rows[i].Cells[weightNameColumn].Value.ToString();
                        weightDict.Add(name, value);
                    }
                }
                allStringWeightList.Add(weightDict);
            }
            printAllWeightList();
        }
Exemple #12
0
        /// <summary>
        /// Builds dictionary of global element factors
        /// </summary>
        /// <param name="terms">The terms.</param>
        /// <param name="space">The space.</param>
        /// <param name="label">The label.</param>
        /// <returns></returns>
        public WeightDictionary GetElementFactors(IEnumerable <String> terms, SpaceModel space, SpaceLabel label = null)
        {
            WeightDictionary output = new WeightDictionary();

            switch (resultType)
            {
            case FunctionResultTypeEnum.numeric:
                output.nDimensions = 1;
                break;

            case FunctionResultTypeEnum.numericVectorForMultiClass:
                output.nDimensions = space.labels.Count;
                break;
            }

            foreach (String term in terms)
            {
                output.AddEntry(GetElementFactorEntry(term, space, label));
            }

            return(output);
        }
        public void Deploy(ILogBuilder logger, folderNode folder = null)
        {
            if (!outputFilename.isNullOrEmpty())
            {
                if (folder != null)
                {
                    String p_m = folder.pathFor(outputFilename, imbSCI.Data.enums.getWritableFileMode.none, "", false);
                    precompiledSelection = WeightDictionary.LoadFile(p_m, logger);
                }
            }

            if (WeightModel != null)
            {
                WeightModel.DoUseLocalFunction = false;

                WeightModel.Deploy(logger);
            }

            //function = functionSettings.GetFunction(logger);

            //_isEnabled = function.IsEnabled;
        }
        /// <summary>
        /// Constructs a feature vector - having dimension values set by <see cref="dimensionFunctionSet"/>
        /// </summary>
        /// <param name="vector">The vector.</param>
        /// <returns></returns>
        public FeatureVector ConstructFeatureVector(WeightDictionary terms, String name)
        {
            FeatureVector fv = new FeatureVector(name);

            Int32 c = 0;
            Int32 d = terms.nDimensions;

            fv.dimensions = new double[dimensionFunctionSet.Count * d]; //terms.index.Select(x => x.Value.CompressNumericVector(compression)).ToArray(); // new double[dimensionFunctionSet.Count];

            foreach (FeatureSpaceDimensionBase dimension in dimensionFunctionSet)
            {
                for (int i = 0; i < d; i++)
                {
                    fv.dimensions[c] = dimension.ComputeDimension(terms, i);
                    c++;
                }
            }



            return(fv);
        }
        ///// <summary>
        ///// Transforms to fv dictionary.
        ///// </summary>
        ///// <param name="context">The context.</param>
        ///// <param name="TermWeightModel">The term weight model.</param>
        ///// <param name="function">The function.</param>
        ///// <returns></returns>
        //public static FeatureVectorSetDictionary TransformToFVDictionaryAsPageInCategorySimilarity(this DocumentSelectResult context, FeatureWeightModel TermWeightModel, IVectorSimilarityFunction function, ILogBuilder log)
        //{
        //    log.log("... Page Similarity ...");

        //    List<string> selectedTerms = context.selectedFeatures.GetKeys();



        //    var ByDomain = context.GetByDomain(log);

        //    Dictionary<string, List<string>> assignIDByLabel = context.featureSpace.labelToDocumentAssociations.GetAllRelationShipByName(true);

        //    var ByCategory = context.GetByAssignIDCategory(assignIDByLabel,log);

        //    Dictionary<String, List<DocumentSelectResultEntry>> EntryByLabel = new Dictionary<string, List<DocumentSelectResultEntry>>();



        //    Dictionary<String, WeightDictionary> documentDictionarties = new Dictionary<string, WeightDictionary>();


        //    foreach (var entry in context.items)
        //    {

        //        WeightDictionary documentWeights = TermWeightModel.GetWeights(selectedTerms, entry.spaceDocument, context.spaceModel);
        //        documentDictionarties.Add(entry.AssignedID, documentWeights);
        //    }


        //    FeatureVectorSetDictionary dict = new FeatureVectorSetDictionary();



        //    Double total = context.Count;
        //    Int32 i = 0;
        //    Int32 p = (context.Count / 10);

        //    //List<List<Double>> matrix = new List<List<double>>();

        //    //foreach (var entry in context.items)
        //    //{
        //    //    matrix.Add(new List<double>());
        //    //}


        //    //for (int x = 0; x < context.items.Count; x++)
        //    //{

        //    //    for (int y = 0; y < context.items.Count; x++)
        //    //    {



        //    //    }

        //    //}

        //    ConcurrentDictionary<String, Double> computedPairs = new ConcurrentDictionary<string, double>();


        //    foreach (var domainPair in ByCategory)
        //    {
        //        List<DocumentSelectResultEntry> relatives = ByCategory[domainPair.Key].ToList();


        //        foreach (var entry in relatives)
        //        {

        //            i++;
        //            FeatureVector fv = new FeatureVector(entry.AssignedID);

        //            // List<Double> d = new List<>();

        //            fv.dimensions = new double[relatives.Count - 1];


        //            // List<String> keys = documentDictionarties.Keys.ToList();

        //            Int32 hostInd = relatives.IndexOf(entry);

        //            Int32 c = 0;


        //            //foreach (var pair in documentDictionarties)
        //            //{

        //            Parallel.ForEach(relatives, (pair) =>
        //            {

        //                Int32 ind = relatives.IndexOf(pair); // keys.IndexOf(pair.AssignedID);
        //                if (ind >= hostInd)
        //                {
        //                    ind = ind - 1;
        //                }

        //                if (pair.AssignedID != entry.AssignedID)
        //                {
        //                    Double docToClassSimilarity = 0;

        //                    if (computedPairs.ContainsKey(entry.AssignedID + pair.AssignedID))
        //                    {
        //                        docToClassSimilarity = computedPairs[entry.AssignedID + pair.AssignedID];
        //                    }
        //                    else if (computedPairs.ContainsKey(pair.AssignedID + entry.AssignedID))
        //                    {
        //                        docToClassSimilarity = computedPairs[pair.AssignedID + entry.AssignedID];
        //                    }
        //                    else
        //                    {
        //                        var vecA = documentDictionarties[pair.AssignedID];
        //                        var vecB = documentDictionarties[entry.AssignedID];
        //                        docToClassSimilarity = function.ComputeSimilarity(vecA, vecB);
        //                        if (docToClassSimilarity > 0)
        //                        {

        //                        }
        //                        if (!computedPairs.ContainsKey(entry.AssignedID + pair.AssignedID))
        //                        {
        //                            computedPairs.GetOrAdd(entry.AssignedID + pair.AssignedID, docToClassSimilarity);
        //                            //computedPairs.AddOrUpdate(entry.AssignedID + pair.Key, docToClassSimilarity);
        //                        }
        //                        else if (!computedPairs.ContainsKey(pair.AssignedID + entry.AssignedID))
        //                        {
        //                            computedPairs.GetOrAdd(pair.AssignedID + entry.AssignedID, docToClassSimilarity);
        //                        }

        //                    }

        //                    fv.dimensions[ind] = docToClassSimilarity;

        //                }
        //            });



        //            Int32 r = i % p;
        //            if (r == 0)
        //            {
        //                log.Append(" [" + i.GetRatio(context.Count).ToString("P2") + "] ");
        //            }


        //            dict.GetOrAdd(entry.DomainID).Add(fv, -1);
        //        }



        //    }


        //    //foreach (KeyValuePair<string, FeatureVectorWithLabelIDSet> pair in dict)
        //    //{
        //    //    pair.Value.CloseDeploy();
        //    //}

        //    log.log("... Preparation finished ...");

        //    return dict;


        //}



        /// <summary>
        /// Transforms to fv dictionary.
        /// </summary>
        /// <param name="context">The context.</param>
        /// <param name="TermWeightModel">The term weight model.</param>
        /// <param name="function">The function.</param>
        /// <returns></returns>
        public static FeatureVectorSetDictionary TransformToFVDictionaryAsSiteSimilarity(this DocumentSelectResult context, FeatureWeightModel TermWeightModel, IVectorSimilarityFunction function, ILogBuilder log)
        {
            log.log("... Site Similarity ...");

            List <string> selectedTerms = context.selectedFeatures.GetKeys(); //.entries.Select(x => x.name)?.ToList();

            Dictionary <String, WeightDictionary> categoryDictionarties = new Dictionary <string, WeightDictionary>();
            Dictionary <String, WeightDictionary> documentDictionarties = new Dictionary <string, WeightDictionary>();

            var byDomain = context.GetByDomain(log);

            FeatureVectorSetDictionary dict = new FeatureVectorSetDictionary();


            Double total = context.Count;
            Int32  i     = 0;
            Int32  p     = (context.Count / 10);

            foreach (var pair in byDomain)
            {
                i++;
                SpaceDocumentModel siteModel = new SpaceDocumentModel();

                foreach (var ent in pair.Value)
                {
                    WeightDictionary documentWeights = TermWeightModel.GetWeights(selectedTerms, ent.spaceDocument, context.spaceModel);
                    documentDictionarties.Add(ent.AssignedID, documentWeights);
                    siteModel.Children.Add(ent.spaceDocument);

                    //siteModel.terms.MergeDictionary(ent.spaceDocument.terms);
                }

                siteModel.Flatten(false);

                categoryDictionarties.Add(pair.Key, TermWeightModel.GetWeights(selectedTerms, siteModel, context.spaceModel));


                foreach (var ent in pair.Value)
                {
                    FeatureVector fv = new FeatureVector(ent.AssignedID);
                    fv.dimensions = new double[context.spaceModel.labels.Count];

                    // documentDictionarties[ent.AssignedID].entries


                    var docToClassSimilarity = function.ComputeSimilarity(categoryDictionarties[pair.Key], documentDictionarties[ent.AssignedID]);

                    fv.dimensions[0] = docToClassSimilarity;

                    dict.GetOrAdd(pair.Key).Add(fv, -1);
                }

                Int32 r = i % p;
                if (r == 0)
                {
                    log.Append(" [" + i.GetRatio(context.Count).ToString("P2") + "] ");
                }
            }



            foreach (KeyValuePair <string, FeatureVectorWithLabelIDSet> pair in dict)
            {
                pair.Value.CloseDeploy();
            }

            log.log("... Preparation finished ...");

            return(dict);
        }
Exemple #16
0
 public abstract double ComputeDimension(WeightDictionary vector, Int32 d = 0);
 /// <summary>
 /// Computes the similarity.
 /// </summary>
 /// <param name="vectorA">The vector a.</param>
 /// <param name="vectorB">The vector b.</param>
 /// <returns></returns>
 //public abstract Double ComputeSimilarity(IEnumerable<WeightDictionaryEntry> vectorA, IEnumerable<WeightDictionaryEntry> vectorB);
 public abstract double ComputeSimilarity(WeightDictionary vectorA, WeightDictionary vectorB);
Exemple #18
0
        /// <summary>
        /// Picks specified number of sample documents and constructs a demo table, showing all term weight components
        /// </summary>
        /// <param name="space">The space.</param>
        /// <param name="weightModel">The weight model.</param>
        /// <param name="sampleDocuments">The sample documents.</param>
        /// <param name="name">The name.</param>
        /// <param name="description">The description.</param>
        /// <returns></returns>
        public static DataTable MakeWeightModelDemoTable(this SpaceModel space, FeatureWeightModel weightModel, WeightDictionary selectedFeatures, Int32 sampleDocuments, String name, String description)
        {
            DataTable table = new DataTable();

            table.SetTitle(name);
            table.SetDescription(description);

            table.SetAdditionalInfoEntry("Documents", space.documents.Count, "Total count of document vectors");
            table.SetAdditionalInfoEntry("Local function", weightModel.LocalFunction.GetSignature(), weightModel.LocalFunction.description);

            var sampleIn = space.documents.Take(Math.Min(sampleDocuments, space.documents.Count)).ToList();

            List <SpaceDocumentModel> sample = new List <SpaceDocumentModel>();

            foreach (var s in sampleIn)
            {
                sample.Add(s);
            }

            List <String> terms = new List <String>();

            var terms_in = sample.First().GetTerms(true, true).GetTokens();

            foreach (var t in terms_in)
            {
                if (selectedFeatures.ContainsKey(t))
                {
                    terms.Add(t);
                }
                if (terms.Count > 500)
                {
                    break;
                }
            }



            DataColumn        column_token = table.Add("Name", "Name of the document vector", "Name", typeof(String), imbSCI.Core.enums.dataPointImportance.normal).SetWidth(50);
            List <DataColumn> dimensions   = new List <DataColumn>();


            DataColumn loc = null;

            List <DataColumn> localColumns = new List <DataColumn>();

            for (int i = 0; i < sample.Count; i++)
            {
                var doc = sample[i];
                localColumns.Add(
                    table.Add(weightModel.LocalFunction.shortName + i.ToString(),
                              weightModel.LocalFunction.GetSignature() + " for document: " + doc.name,
                              weightModel.LocalFunction.shortName, typeof(Double), imbSCI.Core.enums.dataPointImportance.normal, "F5",
                              weightModel.LocalFunction.GetSignature() + "[" + i.ToString("D2") + "]").SetGroup("Local"));
            }

            Int32             c             = 0;
            List <DataColumn> globalColumns = new List <DataColumn>();

            foreach (FeatureWeightFactor gl in weightModel.GlobalFactors)
            {
                globalColumns.Add(
                    table.Add(gl.GlobalFunction.shortName + c.ToString(),
                              gl.GlobalFunction.shortName + " at w= " + gl.weight,
                              gl.GlobalFunction.shortName, typeof(Double), imbSCI.Core.enums.dataPointImportance.important, "F5",
                              gl.Settings.GetSignature() + "[" + c.ToString("D2") + "]").SetGroup("Global"));

                c++;
            }

            Int32             ct           = 0;
            List <DataColumn> totalColumns = new List <DataColumn>();

            foreach (var doc in sample)
            {
                totalColumns.Add(
                    table.Add("TotalScore" + ct.ToString(),
                              weightModel.LocalFunction.GetSignature() + " for document: " + doc.name,
                              weightModel.LocalFunction.shortName, typeof(Double), imbSCI.Core.enums.dataPointImportance.normal, "F5",
                              weightModel.LocalFunction.GetSignature() + "[" + ct.ToString("D2") + "]").SetGroup("Total"));

                ct++;
            }



            /*
             * for (int i = 0; i < sample.Count; i++)
             * {
             *  var doc = sample[i];
             *
             *  foreach (String term in terms)
             *  {
             *      weightModel.GetCompositeEntry(term, doc, space);
             *  }
             *
             * }*/



            foreach (String term in terms)
            {
                var dr = table.NewRow();

                dr[column_token] = term;
                Int32 li = 0;
                foreach (DataColumn local in localColumns)
                {
                    dr[local] = weightModel.LocalFunction.GetElementFactor(term, sample[li]);
                    li++;
                }

                li = 0;
                foreach (DataColumn local in globalColumns)
                {
                    dr[local] = weightModel.GlobalFactors[li].GlobalFunction.GetElementFactor(term, space);
                    li++;
                }

                li = 0;
                foreach (DataColumn local in totalColumns)
                {
                    dr[local] = weightModel.GetWeight(term, sample[li], space); //. //GetElementFactor(term, sample[li]);
                    li++;
                }

                table.Rows.Add(dr);
            }


            return(table);
        }
Exemple #19
0
        /// <summary>
        /// Makes ranked weight table
        /// </summary>
        /// <param name="terms">The terms.</param>
        /// <param name="name">The name.</param>
        /// <param name="description">The description.</param>
        /// <param name="dimension">Custom names of dimensions - for case of vector collection</param>
        /// <param name="limit">The limit.</param>
        /// <returns></returns>
        public static DataTable MakeTable(this WeightDictionary terms, String name, String description, List <String> dimension = null, Int32 limit = 0, Int32 sortByDimension = 0, Int32 distinctBlockSize = 25)
        {
            DataTable table = new DataTable();

            if (sortByDimension > 0)
            {
                if (sortByDimension < dimension.Count)
                {
                    name = name + "_" + dimension[sortByDimension];
                }
                else
                {
                    name = name + "_" + sortByDimension.ToString("D3");
                }
            }
            table.SetTitle(name);
            table.SetDescription(description);

            List <WeightDictionaryEntry> ranking = terms.index.Values.OrderByDescending(x => x.dimensions[sortByDimension]).ToList();
            String sortedByDimension             = dimension[sortByDimension];

            if (dimension == null)
            {
                dimension = new List <string>();
                dimension.Add("Weight");
            }

            table.SetAdditionalInfoEntry("Count", terms.Count, "Total weighted features in the dictionary");
            table.SetAdditionalInfoEntry("Dimensions", dimension.Count, "Number of dimensions");


            DataColumn column_rank = table.Add("Rank", "Rank by frequency", "R", typeof(Int32), imbSCI.Core.enums.dataPointImportance.normal).SetWidth(6);

            DataColumn column_token = table.Add("Token", "Token", "t", typeof(String), imbSCI.Core.enums.dataPointImportance.normal).SetWidth(20);

            List <DataColumn> dimensions = new List <DataColumn>();
            Dictionary <String, List <Double> > distinctValues = new Dictionary <string, List <Double> >();

            Int32 cd = 1;

            foreach (String dim in dimension)
            {
                var cn = table.Add(dim, "Associated dimension [" + cd.ToString() + "] " + dim, dim, typeof(Double), imbSCI.Core.enums.dataPointImportance.normal, "F5", dim);
                cn.SetWidth(10);
                distinctValues.Add(dim, new List <Double>());
                dimensions.Add(cn);
                cd++;
            }

            var list = ranking;

            if (limit > 0)
            {
                list = ranking.Take(Math.Min(limit, ranking.Count)).ToList();
                if (list.Count < terms.Count)
                {
                    table.AddExtra("Table contains top [" + list.Count + "] entries, out of [" + terms.Count + "] enumerated in the feature weighted dictionary");
                }
            }


            Int32 sortByDimensionNonDistinct = 0;


            Int32 c = 1;

            foreach (var pair in list)
            {
                if (distinctValues[sortedByDimension].Contains(pair.dimensions[sortByDimension]))
                {
                    sortByDimensionNonDistinct++;
                }
                else
                {
                    sortByDimensionNonDistinct = 0;
                }

                if (sortByDimensionNonDistinct < distinctBlockSize)
                {
                    var dr = table.NewRow();

                    dr[column_rank] = c;
                    //dr[column_id] = terms.GetTokenID(pair.Key);
                    dr[column_token] = pair.name;

                    Int32 ci = 0;
                    foreach (DataColumn dimCol in dimensions)
                    {
                        if (ci < pair.dimensions.Length)
                        {
                            var v = pair.dimensions[ci];


                            if (!distinctValues[dimCol.ColumnName].Contains(v))
                            {
                                distinctValues[dimCol.ColumnName].Add(v);
                            }


                            dr[dimCol] = v;
                        }
                        ci++;
                    }

                    //dr[column_freq] = pair.Value;
                    c++;
                    table.Rows.Add(dr);
                }
            }

            foreach (String dim in dimension)
            {
                if (dim != sortedByDimension)
                {
                    if (distinctValues[dim].Count < 2)
                    {
                        table.Columns.Remove(dim);
                        table.SetAdditionalInfoEntry(dim + " removed", "Removed as having no distinct values", "Automatically removed");
                    }
                }
            }



            return(table);
        }
        //public Boolean ComputeFeatureScores(WeightDictionary featureScores, SpaceModel space, ILogBuilder log, folderNode folder = null)
        //{



        //    return doAll;

        //}


        /// <summary>
        /// Selects the top <see cref="limit"/> terms, ranked by <see cref="function"/>
        /// </summary>
        /// <param name="space">The space.</param>
        /// <returns></returns>
        public List <KeyValuePair <string, double> > SelectFeatures(SpaceModel space, ILogBuilder log, folderNode folder = null, WeightDictionary featureScores = null)
        {
            Dictionary <String, Double> rank = new Dictionary <string, double>();
            Boolean doAll = false;

            if (limit == -1)
            {
                doAll = true;
            }

            if (featureScores == null)
            {
                featureScores = new WeightDictionary();
            }

            var tokens = space.terms_known_label.GetTokens();

            if (precompiledSelection != null && precompiledSelection.Count > 0)
            {
                log.log("Using precompiled selection filter from [" + outputFilename + "]");
                featureScores.Merge(precompiledSelection);
            }
            else
            {
                WeightModel.PrepareTheModel(space, log);

                featureScores = WeightModel.GetElementFactors(tokens, space);
            }


            if (tokens.Count() <= limit)
            {
                doAll = true;
            }

            if (doAll)
            {
                List <KeyValuePair <string, double> > outAll = new List <KeyValuePair <string, double> >();

                foreach (String tkn in tokens)
                {
                    outAll.Add(new KeyValuePair <string, double>(tkn, 1));
                }
                return(outAll);
            }

            //function.PrepareTheModel(space, log);



            if (!outputFilename.isNullOrEmpty())
            {
                if (folder != null)
                {
                    String p_m = folder.pathFor(outputFilename, imbSCI.Data.enums.getWritableFileMode.none, "", false);
                    featureScores.Save(folder, log, outputFilename);
                    //precompiledSelection = WeightDictionary.LoadFile(p_m, logger);
                }
            }


            foreach (WeightDictionaryEntry en in featureScores.index.Values)
            {
                //   rank.Add(en.name, en.weight);
                Double v = 0;

                if (featureScores.nDimensions > 1)
                {
                    v = en.CompressNumericVector(nVectorValueSelectionOperation);
                }
                else
                {
                    v = en.weight;
                }


                Boolean ok = true;

                if (RemoveZero)
                {
                    if (v == 0)
                    {
                        ok = false;
                    }
                }


                if (ok)
                {
                    rank.Add(en.name, v);
                }
            }

            var rankSorted = rank.OrderByDescending(x => x.Value).ToList();
            List <KeyValuePair <string, double> > top = rankSorted.Take(Math.Min(limit, rankSorted.Count)).ToList();

            return(top);
        }
Exemple #21
0
 public static void SetReportDataFields(this classificationReport report, WeightDictionary selected)
 {
     // report.data.Add(nameof(ReportDataFieldEnum.PagePerSite), classifier.GetSignature(), "Signature of the classification algorithm");
     report.data.Add(nameof(ReportDataFieldEnum.SelectedFeatures), selected.Count.ToString(), "Number of selected features");
     //  report.data.Add(nameof(ReportDataFieldEnum.FeatureWeighting), featureWeight.GetSignature(), "Signature of feature weight model");
 }
        public override double ComputeDimension(WeightDictionary vector, Int32 d = 0)
        {
            var entry = vector.GetValue(term, d);

            return(entry);
        }
        public override ExperimentDataSetFoldContextPair <OperationContext> Execute(ILogBuilder logger, OperationContext executionContextMain = null, ExperimentModelExecutionContext executionContextExtra = null)
        {
            ExperimentDataSetFoldContextPair <OperationContext> output = new ExperimentDataSetFoldContextPair <OperationContext>(fold, executionContextMain);

            Open();

            String p_m = FeatureWeightModel.GetModelDefinitionFilename(setup.OutputFilename, fold_notes.folder);

            String p_d = FeatureWeightModel.GetModelDataFilename(setup.OutputFilename, fold_notes.folder);

            String w_t = WeightDictionary.GetDictionaryFilename(setup.OutputFilename, fold_notes.folder);

            Boolean skip = false;

            if (setup.skipIfExisting)
            {
                if (File.Exists(p_m) && File.Exists(p_d) && File.Exists(w_t))
                {
                    logger.log("WeightTable [" + p_d + "] found, skipping the operation");
                    skip = true;
                }
            }

            if (!skip)
            {
                output.context.DeployDataSet(fold, logger);

                entityOperation.TextRendering(output.context, notes);

                /*
                 * entityOperation.TextPreblendFilter(output.context, notes);
                 *
                 * entityOperation.TextBlending(output.context, notes);
                 */

                corpusOperation.SpaceModelPopulation(output.context, notes);

                corpusOperation.SpaceModelCategories(output.context, notes);

                corpusOperation.FeatureSelection(output.context, notes, requirements.MayUseSelectedFeatures);

                output.context.SelectedFeatures.Save(fold_notes.folder, notes, setup.OutputFilename + "_fs");

                //corpusOperation.weightModel.

                corpusOperation.weightModel.PrepareTheModel(output.context.spaceModel, logger);

                var wt_s = corpusOperation.weightModel.GetElementFactors(output.context.SelectedFeatures.GetKeys(), output.context.spaceModel);

                wt_s.Save(fold_notes.folder, notes, setup.OutputFilename);

                corpusOperation.weightModel.Save(setup.OutputFilename, fold_notes.folder, notes);

                OperationContextReport reportOperation = new OperationContextReport();
                reportOperation.DeploySettingsBase(notes);

                reportOperation.GenerateReports(output.context, setup.reportOptions, notes);
            }

            Close();

            return(output);
        }
        /// <summary>
        /// Prepares the specified context.
        /// </summary>
        /// <param name="context">The context.</param>
        /// <param name="log">The log.</param>
        /// <exception cref="ArgumentException">context</exception>
        public override void Prepare(DocumentSelectResult context, ILogBuilder log)
        {
            //context.folder.GetOrFindFiles("*", dictionaryFile + "*.xml");

            scoreDictionary = FeatureVectorDictionaryWithDimensions.LoadFile(context.folder, dictionaryFile, log); // WeightDictionary.LoadFile(WeightDictionary.GetDictionaryFilename(dictionaryFile, context.folder), log);

            if (scoreDictionary == null)
            {
                String msg = "Error: Failed to find score dictionary [" + dictionaryFile + "] in " + context.folder.path;
                throw new ArgumentException(msg, nameof(context));
            }

            if (useMachineLearning)
            {
                #region --------------- PREPARING TERM WEIGHT MODEL


                String p_m = FeatureWeightModel.GetModelDefinitionFilename(modelDefinitionFile, context.folder);
                String p_d = FeatureWeightModel.GetModelDataFilename(modelDefinitionFile, context.folder);


                if (TermWeightModel == null)
                {
                    TermWeightModel = FeatureWeightModel.LoadModel(p_m, log);
                }


                TermWeightModel.Deploy(log);

                if (context.spaceModel == null)
                {
                    String msg = "Error: TermWeight factor requires SpaceModel declared in the context for operation";
                    throw new ArgumentException(msg, nameof(context));
                }



                if (File.Exists(p_d) && useStoredData)
                {
                    WeightingModelDataSet data = objectSerialization.loadObjectFromXML <WeightingModelDataSet>(p_d, log);
                    TermWeightModel.LoadModelDataSet(data, log);

                    if (useSelectedFeatures)
                    {
                        SelectedTerms = WeightDictionary.LoadFile(WeightDictionary.GetDictionaryFilename(modelDefinitionFile + "_sf", context.folder), log);
                    }
                }
                else
                {
                    TermWeightModel.PrepareTheModel(context.spaceModel, log);
                }

                if (SelectedTerms.Count == 0)
                {
                    SelectedTerms = context.selectedFeatures;
                }
                List <String> sel_tkns = new List <String>();

                sel_tkns.AddRange(SelectedTerms.index.Values.Select(x => x.name));

                if (!sel_tkns.Any())
                {
                    sel_tkns.AddRange(context.spaceModel.terms_known_label.GetTokens());
                }


                #endregion

                fvConstructor.Deploy(featureMethod.constructor, sel_tkns);



                classifier = featureMethod.classifierSettings.GetClassifier();

                sc_id = scoreDictionary.GetVectorsWithLabelID(null, criterion).ToNameVsLabelID();


                List <FeatureVectorWithLabelID> trainingSet = new List <FeatureVectorWithLabelID>();
                foreach (var item in context.items)
                {
                    if (sc_id.ContainsKey(item.AssignedID))
                    {
                        WeightDictionary dc_vec = TermWeightModel.GetWeights(sel_tkns, item.spaceDocument, context.spaceModel);


                        var n_vec = fvConstructor.ConstructFeatureVector(dc_vec, item.AssignedID);

                        FeatureVectorWithLabelID id_vec = new FeatureVectorWithLabelID(n_vec, sc_id[item.AssignedID]);

                        trainingSet.Add(id_vec);
                    }
                }


                log.log("Training [" + classifier.name + "] with [" + sc_id.Count + "] feature vectors.");
                classifier.DoTraining(trainingSet, log);
            }
        }
Exemple #25
0
        public override double ComputeDimension(WeightDictionary vector, Int32 d = 0)
        {
            var entry = similarityFunction.ComputeSimilarity(vector, classVector.terms);  //vector.GetValue(term, d);

            return(entry);
        }
Exemple #26
0
        public override ExperimentDataSetFoldContextPair <OperationContext> Execute(ILogBuilder logger, OperationContext executionContextMain = null, ExperimentModelExecutionContext executionContextExtra = null)
        {
            ExperimentDataSetFoldContextPair <OperationContext> output = new ExperimentDataSetFoldContextPair <OperationContext>(fold, executionContextMain);

            Open();

            Boolean skip = false;

            //  String fn = setup.OutputFilename;

            String p_m = WeightDictionary.GetDictionaryFilename(setup.OutputFilename, fold_notes.folder);  //FeatureWeightModel.GetModelDefinitionFilename(setup.OutputFilename, fold_notes.folder);

            //String p_d = FeatureWeightModel.GetModelDataFilename(setup.OutputFilename, fold_notes.folder);


            if (setup.skipIfExisting)
            {
                if (File.Exists(p_m))
                {
                    logger.log("WeightTable [" + p_m + "] found, skipping the operation");
                    skip = true;
                }
            }



            if (!skip)
            {
                notes.log("Rendering primary view");

                // ------------------- PRIMARY CONTEXT

                output.context.DeployDataSet(fold, logger);

                primaryEntityOperation.TextRendering(output.context, notes);

                //primaryEntityOperation.TextPreblendFilter(output.context, notes);

                //primaryEntityOperation.TextBlending(output.context, notes);


                corpusOperation.SpaceModelPopulation(output.context, notes);

                corpusOperation.SpaceModelCategories(output.context, notes);

                corpusOperation.FeatureSelection(output.context, notes, requirements.MayUseSelectedFeatures);



                OperationContext primaryContext = output.context;

                // ------------------- SECONDARY CONTEXT

                output.context = new OperationContext();

                notes.log("Rendering secondary view");

                output.context.DeployDataSet(fold, logger);

                secondaryEntityOperation.TextRendering(output.context, notes);

                //  secondaryEntityOperation.TextPreblendFilter(output.context, notes);

                // secondaryEntityOperation.TextBlending(output.context, notes);

                corpusOperation.SpaceModelPopulation(output.context, notes);

                corpusOperation.SpaceModelCategories(output.context, notes);

                corpusOperation.FeatureSelection(output.context, notes, requirements.MayUseSelectedFeatures);


                OperationContext secondaryContext = output.context;



                ProjectionDictionary projectionPairs = DocumentRankingTools.ConstructPairDictionary(primaryContext.spaceModel.documents, secondaryContext.spaceModel.documents);

                DocumentSelectResult drmContext = output.context.PrepareContext(rankingOperation, fold_notes.folder, logger);
                drmContext             = rankingOperation.ExecuteEvaluation(drmContext, logger);
                drmContext.description = "Document score assigned to the primary text render" + name;
                drmContext.saveObjectToXML(fold_notes.folder.pathFor("DS_" + name + "_projection_score.xml", imbSCI.Data.enums.getWritableFileMode.overwrite, "Projection within [" + name + "] operation"));

                TokenFrequencyAndScoreDictionary tokenFrequencyAndScoreDictionary = ProjectionTools.ProjectPrimaryTermsToScores(projectionPairs, drmContext, logger);

                WeightDictionary wt = tokenFrequencyAndScoreDictionary.ConstructWeightDictionary();
                wt.name        = setup.OutputFilename;
                wt.description = "Projected PrimaryView to ScoreTable - WeightTable, constructed from [" + projectionPairs.Count + "] render pairs. Document ranking: " + drmContext.description;

                wt.Save(fold_notes.folder, logger, setup.OutputFilename);

                //                wt.saveObjectToXML(p_m);
            }


            Close();

            return(output);
        }
        /// <summary>
        /// Transforms to fv dictionary.
        /// </summary>
        /// <param name="context">The context.</param>
        /// <param name="TermWeightModel">The term weight model.</param>
        /// <param name="function">The function.</param>
        /// <returns></returns>
        public static FeatureVectorSetDictionary TransformToFVDictionaryAsPageSimilarity(this DocumentSelectResult context, FeatureWeightModel TermWeightModel, IVectorSimilarityFunction function, ScoreComputationModeEnum groupmode, ILogBuilder log)
        {
            List <string> selectedTerms = context.selectedFeatures.GetKeys();

            Dictionary <String, WeightDictionary> documentDictionarties = new Dictionary <string, WeightDictionary>();


            foreach (var entry in context.items)
            {
                WeightDictionary documentWeights = TermWeightModel.GetWeights(selectedTerms, entry.spaceDocument, context.spaceModel);
                documentDictionarties.Add(entry.AssignedID, documentWeights);
            }


            FeatureVectorSetDictionary dict = new FeatureVectorSetDictionary();


            Double total = context.Count;
            Int32  i     = 0;
            Int32  p     = (context.Count / 10);



            Dictionary <string, List <DocumentSelectResultEntry> > relative_groups = null;


            if (groupmode == ScoreComputationModeEnum.category)
            {
                Dictionary <string, List <string> > assignIDByLabel = context.spaceModel.LabelToDocumentLinks.GetAllRelationShipByName(true);

                relative_groups = context.GetByAssignIDCategory(assignIDByLabel, log);
                if (assignIDByLabel.ContainsKey(SpaceLabel.UNKNOWN))
                {
                    assignIDByLabel.Remove(SpaceLabel.UNKNOWN);
                }
                log.log("... Page Similarity ... Groups by category");
            }
            else if (groupmode == ScoreComputationModeEnum.site)
            {
                relative_groups = context.GetByDomain(log);
                log.log("... Page Similarity ... Groups by site");
            }
            else if (groupmode == ScoreComputationModeEnum.dataset)
            {
                relative_groups = new Dictionary <string, List <DocumentSelectResultEntry> >();
                relative_groups.Add("dataset", context.items);
                log.log("... Page Similarity ... dataset");
            }


            ConcurrentDictionary <String, Double> computedPairs = new ConcurrentDictionary <string, double>();


            foreach (var domainPair in relative_groups)
            {
                List <DocumentSelectResultEntry> relatives = domainPair.Value; //relative_groups[domainPair.Key].ToList();


                foreach (var entry in relatives)
                {
                    i++;
                    FeatureVector fv = new FeatureVector(entry.AssignedID);

                    // List<Double> d = new List<>();

                    fv.dimensions = new double[relatives.Count - 1];


                    // List<String> keys = documentDictionarties.Keys.ToList();

                    Int32 hostInd = relatives.IndexOf(entry);

                    Int32 c = 0;


                    //foreach (var pair in documentDictionarties)
                    //{

                    Parallel.ForEach(relatives, (pair) =>
                    {
                        Int32 ind = relatives.IndexOf(pair); // keys.IndexOf(pair.AssignedID);
                        if (ind >= hostInd)
                        {
                            ind = ind - 1;
                        }

                        if (pair.AssignedID != entry.AssignedID)
                        {
                            Double docToClassSimilarity = 0;

                            if (computedPairs.ContainsKey(entry.AssignedID + pair.AssignedID))
                            {
                                docToClassSimilarity = computedPairs[entry.AssignedID + pair.AssignedID];
                            }
                            else if (computedPairs.ContainsKey(pair.AssignedID + entry.AssignedID))
                            {
                                docToClassSimilarity = computedPairs[pair.AssignedID + entry.AssignedID];
                            }
                            else
                            {
                                var vecA             = documentDictionarties[pair.AssignedID];
                                var vecB             = documentDictionarties[entry.AssignedID];
                                docToClassSimilarity = function.ComputeSimilarity(vecA, vecB);
                                if (docToClassSimilarity > 0)
                                {
                                }
                                if (!computedPairs.ContainsKey(entry.AssignedID + pair.AssignedID))
                                {
                                    computedPairs.GetOrAdd(entry.AssignedID + pair.AssignedID, docToClassSimilarity);
                                    //computedPairs.AddOrUpdate(entry.AssignedID + pair.Key, docToClassSimilarity);
                                }
                                else if (!computedPairs.ContainsKey(pair.AssignedID + entry.AssignedID))
                                {
                                    computedPairs.GetOrAdd(pair.AssignedID + entry.AssignedID, docToClassSimilarity);
                                }
                            }

                            fv.dimensions[ind] = docToClassSimilarity;
                        }
                    });



                    Int32 r = i % p;
                    if (r == 0)
                    {
                        log.Append(" [" + i.GetRatio(context.Count).ToString("P2") + "] ");
                    }


                    dict.GetOrAdd(domainPair.Key).Add(fv, -1);
                }
            }



            log.log("... Preparation finished ...");

            return(dict);
        }