Пример #1
0
        /// <summary>
        /// Merges the ds rankings - searches folder for specified input names or search pattern
        /// </summary>
        /// <param name="folder">The folder.</param>
        /// <param name="inputNames">The input names.</param>
        /// <param name="output">The output.</param>
        /// <param name="searchPattern">The search pattern.</param>
        /// <returns></returns>
        public static FeatureVectorDictionaryWithDimensions MergeWeightDictionaries(folderNode folder, String inputNames, ILogBuilder output, String searchPattern = "*_wt.xml")
        {
            List <string> filepaths = folder.GetOrFindFiles(inputNames, searchPattern);


            List <WeightDictionary> results = new List <WeightDictionary>();

            String tmpOutputName = "";


            Int32 c = 0;

            foreach (var fp in filepaths)
            {
                var lr = WeightDictionary.LoadFile(fp, output); //DocumentSelectResult.LoadFromFile(fp, output);

                lr.description += "Source name: " + lr.name;
                String fn = Path.GetFileNameWithoutExtension(fp);
                lr.name = fn + c.ToString("D3");
                c++;
                results.Add(lr);
            }


            FeatureVectorDictionaryWithDimensions featureDict = MergeWeightDictionaries(results);

            return(featureDict);
        }
 public WeightDictionaryEntryPairs(WeightDictionary vectorA, WeightDictionary vectorB, Int32 vectorADimID = 0, Int32 vectorBDimID = 0)
 {
     foreach (String k in vectorA.GetKeys())
     {
         if (vectorB.ContainsKey(k))
         {
             Add(new WeightDictionaryEntryPair(k, vectorA.index[k].dimensions[vectorADimID], vectorA.index[k].dimensions[vectorBDimID]));
         }
     }
 }
Пример #3
0
        /// <summary>
        /// Loads dictionary from specified path
        /// </summary>
        /// <param name="p_m">The p m.</param>
        /// <param name="log">The log.</param>
        /// <returns></returns>
        public static WeightDictionary LoadFile(String p_m, ILogBuilder log)
        {
            if (!File.Exists(p_m))
            {
                return(null);
            }
            WeightDictionary weightDictionary = objectSerialization.loadObjectFromXML <WeightDictionary>(p_m, log);

            weightDictionary.populateIndex();

            //   weightDictionary.populateIndex();

            if (weightDictionary.name.isNullOrEmpty())
            {
                weightDictionary.name = Path.GetFileNameWithoutExtension(p_m);
            }

            return(weightDictionary);
        }
Пример #4
0
        public static FeatureVectorDictionaryWithDimensions MergeWeightDictionaries(this List <WeightDictionary> results)
        {
            FeatureVectorDictionaryWithDimensions featureDict = new FeatureVectorDictionaryWithDimensions();
            Int32 i = 0;

            foreach (var dict in results)
            {
                featureDict.dimensions.Add(dict.name + i.ToString(), "Weights from [" + dict.name + "]", Feature.Settings.FeatureVectorDimensionType.precompiledDocumentScore, "WeightDictionary");
            }
            WeightDictionary output = MergeDimensions(results);

            var fvs = output.index.Values.ToFeatureVectors();

            foreach (var fv in fvs)
            {
                featureDict.Add(fv);
            }

            return(featureDict);
        }
Пример #5
0
        public void Merge(WeightDictionary dictionary)
        {
            if (nDimensions != dictionary.nDimensions)
            {
                throw new ArgumentOutOfRangeException("Dictionary sent for merge has different number of dimensions. Host dictionary [" + nDimensions + "] - merging with [" + dictionary.nDimensions + "]!", nameof(dictionary));
            }

            foreach (WeightDictionaryEntry entry in dictionary.index.Values)
            {
                if (!index.ContainsKey(entry.name))
                {
                    index.Add(entry.name, entry);
                }
                else
                {
                    for (int i = 0; i < index[entry.name].dimensions.Length; i++)
                    {
                        index[entry.name].dimensions[i] += entry.dimensions[i];
                    }
                }
            }
        }
Пример #6
0
        /// <summary>
        /// Merges base dimension of each weight dictionary
        /// </summary>
        /// <param name="results">The results.</param>
        /// <returns></returns>
        public static WeightDictionary MergeDimensions(this List <WeightDictionary> results)
        {
            WeightDictionary output = new WeightDictionary();

            output.nDimensions = results.Count;


            Dictionary <String, Double[]> tempMatrix = new Dictionary <string, double[]>();


            Int32 id = 0;

            foreach (var dict in results)
            {
                output.name += dict.name;

                foreach (var en in dict.index)
                {
                    if (!tempMatrix.ContainsKey(en.Key))
                    {
                        tempMatrix.Add(en.Key, new Double[output.nDimensions]);
                    }
                    tempMatrix[en.Key][id] = en.Value.weight;
                }

                id++;
            }


            foreach (var pair in tempMatrix)
            {
                output.AddEntry(pair.Key, pair.Value, false);
            }

            return(output);
        }
Пример #7
0
        /// <summary>
        /// Constructs the weight dictionary according to stored frequency and score information
        /// </summary>
        /// <returns></returns>
        public WeightDictionary ConstructWeightDictionary()
        {
            WeightDictionary output = new WeightDictionary();

            var tkns = frequencyIndex.GetTokens();

            Int32  maxFrequency = frequencyIndex.GetMaxFrequency();
            Double maxWeight    = scoreIndex.Values.Max();
            Int32  maxDF        = documentIndex.GetMaxFrequency();

            foreach (String token in tkns)
            {
                Double finalWeight = scoreIndex[token].GetRatio(maxWeight);                        //.GetRatio(frequencyIndex.GetTokenFrequency(token));
                Double TF          = frequencyIndex.GetTokenFrequency(token).GetRatio(maxFrequency);
                Double IDF         = Math.Log(maxDF / documentIndex.GetTokenFrequency(token)) + 1; // Math.Log(1 - ( / maxDF));

                finalWeight = finalWeight * (TF * IDF);

                output.AddEntry(token, finalWeight);
            }


            return(output);
        }
Пример #8
0
        public static histogramModel GetHistogram(this WeightDictionary dictionary, Int32 binCount = 50)
        {
            histogramModel model = dictionary.index.Values.ToList().GetHistogramModel(dictionary.name, x => x.weight, binCount); //new histogramModel(binCount, dictionary.name);

            return(model);
        }