/// <summary> /// Merges the ds rankings - searches folder for specified input names or search pattern /// </summary> /// <param name="folder">The folder.</param> /// <param name="inputNames">The input names.</param> /// <param name="output">The output.</param> /// <param name="searchPattern">The search pattern.</param> /// <returns></returns> public static FeatureVectorDictionaryWithDimensions MergeWeightDictionaries(folderNode folder, String inputNames, ILogBuilder output, String searchPattern = "*_wt.xml") { List <string> filepaths = folder.GetOrFindFiles(inputNames, searchPattern); List <WeightDictionary> results = new List <WeightDictionary>(); String tmpOutputName = ""; Int32 c = 0; foreach (var fp in filepaths) { var lr = WeightDictionary.LoadFile(fp, output); //DocumentSelectResult.LoadFromFile(fp, output); lr.description += "Source name: " + lr.name; String fn = Path.GetFileNameWithoutExtension(fp); lr.name = fn + c.ToString("D3"); c++; results.Add(lr); } FeatureVectorDictionaryWithDimensions featureDict = MergeWeightDictionaries(results); return(featureDict); }
public WeightDictionaryEntryPairs(WeightDictionary vectorA, WeightDictionary vectorB, Int32 vectorADimID = 0, Int32 vectorBDimID = 0) { foreach (String k in vectorA.GetKeys()) { if (vectorB.ContainsKey(k)) { Add(new WeightDictionaryEntryPair(k, vectorA.index[k].dimensions[vectorADimID], vectorA.index[k].dimensions[vectorBDimID])); } } }
/// <summary> /// Loads dictionary from specified path /// </summary> /// <param name="p_m">The p m.</param> /// <param name="log">The log.</param> /// <returns></returns> public static WeightDictionary LoadFile(String p_m, ILogBuilder log) { if (!File.Exists(p_m)) { return(null); } WeightDictionary weightDictionary = objectSerialization.loadObjectFromXML <WeightDictionary>(p_m, log); weightDictionary.populateIndex(); // weightDictionary.populateIndex(); if (weightDictionary.name.isNullOrEmpty()) { weightDictionary.name = Path.GetFileNameWithoutExtension(p_m); } return(weightDictionary); }
public static FeatureVectorDictionaryWithDimensions MergeWeightDictionaries(this List <WeightDictionary> results) { FeatureVectorDictionaryWithDimensions featureDict = new FeatureVectorDictionaryWithDimensions(); Int32 i = 0; foreach (var dict in results) { featureDict.dimensions.Add(dict.name + i.ToString(), "Weights from [" + dict.name + "]", Feature.Settings.FeatureVectorDimensionType.precompiledDocumentScore, "WeightDictionary"); } WeightDictionary output = MergeDimensions(results); var fvs = output.index.Values.ToFeatureVectors(); foreach (var fv in fvs) { featureDict.Add(fv); } return(featureDict); }
public void Merge(WeightDictionary dictionary) { if (nDimensions != dictionary.nDimensions) { throw new ArgumentOutOfRangeException("Dictionary sent for merge has different number of dimensions. Host dictionary [" + nDimensions + "] - merging with [" + dictionary.nDimensions + "]!", nameof(dictionary)); } foreach (WeightDictionaryEntry entry in dictionary.index.Values) { if (!index.ContainsKey(entry.name)) { index.Add(entry.name, entry); } else { for (int i = 0; i < index[entry.name].dimensions.Length; i++) { index[entry.name].dimensions[i] += entry.dimensions[i]; } } } }
/// <summary> /// Merges base dimension of each weight dictionary /// </summary> /// <param name="results">The results.</param> /// <returns></returns> public static WeightDictionary MergeDimensions(this List <WeightDictionary> results) { WeightDictionary output = new WeightDictionary(); output.nDimensions = results.Count; Dictionary <String, Double[]> tempMatrix = new Dictionary <string, double[]>(); Int32 id = 0; foreach (var dict in results) { output.name += dict.name; foreach (var en in dict.index) { if (!tempMatrix.ContainsKey(en.Key)) { tempMatrix.Add(en.Key, new Double[output.nDimensions]); } tempMatrix[en.Key][id] = en.Value.weight; } id++; } foreach (var pair in tempMatrix) { output.AddEntry(pair.Key, pair.Value, false); } return(output); }
/// <summary> /// Constructs the weight dictionary according to stored frequency and score information /// </summary> /// <returns></returns> public WeightDictionary ConstructWeightDictionary() { WeightDictionary output = new WeightDictionary(); var tkns = frequencyIndex.GetTokens(); Int32 maxFrequency = frequencyIndex.GetMaxFrequency(); Double maxWeight = scoreIndex.Values.Max(); Int32 maxDF = documentIndex.GetMaxFrequency(); foreach (String token in tkns) { Double finalWeight = scoreIndex[token].GetRatio(maxWeight); //.GetRatio(frequencyIndex.GetTokenFrequency(token)); Double TF = frequencyIndex.GetTokenFrequency(token).GetRatio(maxFrequency); Double IDF = Math.Log(maxDF / documentIndex.GetTokenFrequency(token)) + 1; // Math.Log(1 - ( / maxDF)); finalWeight = finalWeight * (TF * IDF); output.AddEntry(token, finalWeight); } return(output); }
public static histogramModel GetHistogram(this WeightDictionary dictionary, Int32 binCount = 50) { histogramModel model = dictionary.index.Values.ToList().GetHistogramModel(dictionary.name, x => x.weight, binCount); //new histogramModel(binCount, dictionary.name); return(model); }