public void Prepare(IEnumerable <T> instances, float minBrightness = 0.6F, float maxBrightness = 0.9F) { ColorSelectors = new Dictionary <T, circularSelector <String> >(); frequencyCounter <T> counter = new frequencyCounter <T>(); foreach (var inst in instances) { counter.Count(inst); } var types = counter.GetDistinctItems(); var rootColors = rootGradient.GetColorHSVSteps(types.Count); //GetColorSteps(types.Count); foreach (var type in types) { Int32 ti = types.IndexOf(type); var rootColor = rootColors[ti]; var A = rootColor.Clone(); A.V = minBrightness; var B = rootColor.Clone(); B.V = maxBrightness; RootColors.Add(type, A.GetHexColor(true)); ColorGradient typeGradient = new ColorGradient(A.GetHexColor(true), B.GetHexColor(true), ColorGradientFunction.AllAToB); ColorSelectors.Add(type, new circularSelector <string>(typeGradient.GetColorSteps(counter.GetFrequencyForItem(type)))); } }
/// <summary> /// Returns common finger print /// </summary> /// <param name="input">The input.</param> /// <returns></returns> public static StructureFingerPrint CommonFingerPrint(IEnumerable <StructureFingerPrint> input) { StructureFingerPrint output = new StructureFingerPrint(); frequencyCounter <String> XPathFrequencyCounter = new frequencyCounter <string>(); Int32 c = 0; foreach (StructureFingerPrint print in input) { foreach (String xpath in print.XPathList) { XPathFrequencyCounter.Count(xpath); } c++; } var bins = XPathFrequencyCounter.GetFrequencyBins(); if (bins.ContainsKey(c)) { foreach (String xpath in bins[c]) { output.XPathList.Add(xpath); } } else { return(null); } return(output); }
/// <summary> /// Builds <see cref="LeafNodeDictionary"/> and <see cref="LeafNodeDictionaryEntryNGram"/>s for each document, to allow performance optimization /// </summary> /// <param name="documents">The documents.</param> /// <param name="leafSelectXPath">The leaf select x path, leave blank to use from settings, <see cref="DocumentSimilaritySettings.XPathToSelectLeafs"/></param> /// <param name="tagsToIgnore">The tags to ignore, leave unspecified to use from settings, <see cref="DocumentSimilaritySettings.TagsToIgnore"/>.</param> /// <returns></returns> public DocumentSimilarityResult Prepare(IEnumerable <HtmlNode> documents, String leafSelectXPath = "", List <String> tagsToIgnore = null) { leafSelectXPath = leafSelectXPath.or(settings.XPathToSelectLeafs, LeafNodeDictionary.DefaultNodeSelectionXPath); tagsToIgnore = tagsToIgnore.or(settings.TagsToIgnore, LeafNodeDictionary.DefaultTagsToIgnore); DocumentSimilarityResult result = new DocumentSimilarityResult(); frequencyCounter <String> xpathCounter = new frequencyCounter <string>(); Dictionary <HtmlNode, LeafNodeDictionary> leafDictionary = new Dictionary <HtmlNode, LeafNodeDictionary>(); foreach (HtmlNode documentA in documents) { LeafNodeDictionary leafNodeDictionaryA = new LeafNodeDictionary(documentA, leafSelectXPath, tagsToIgnore); if (leafNodeDictionaryA.items.Count < 5) { } foreach (var entry in leafNodeDictionaryA.items) { xpathCounter.Count(entry.XPath); } leafDictionary.Add(documentA, leafNodeDictionaryA); } var commonXPaths = xpathCounter.GetItemsWithTopFrequency(); foreach (var pair in leafDictionary) { pair.Value.RemoveEntriesByXPath(commonXPaths); } foreach (HtmlNode documentA in documents) { try { LeafNodeDictionary leafNodeDictionaryA = leafDictionary[documentA]; List <LeafNodeDictionaryEntryNGram> nGrams_A = setAnalysisTools <LeafNodeDictionaryEntry> .getNGramSet <LeafNodeDictionaryEntryNGram>(leafNodeDictionaryA.items, settings.nGramWidth, settings.nGramMode); result.DocumentsByLeafDictionary.Add(leafNodeDictionaryA, documentA); result.DocumentsByNGrams.Add(nGrams_A, documentA); result.LeafDictionaryByDocuments.Add(documentA, leafNodeDictionaryA); result.NGramsByDocuments.Add(documentA, nGrams_A); } catch (Exception ex) { result.DocumentsWithExceptions.Add(documentA, ex); } } return(result); }
public DirectedGraphWithSourceData Publish(folderNode folder, String name) { TagCounter = new frequencyCounter <string>(); NodeTagCounter = new frequencyCounter <string>(); RebuildIndex(); String listPath = folder.pathFor("nd_" + name + "_list.txt", imbSCI.Data.enums.getWritableFileMode.overwrite, "List of selected nodes"); String statPath = folder.pathFor("nd_" + name + "_stats.txt", imbSCI.Data.enums.getWritableFileMode.overwrite, "Stats of selected nodes"); String graphPath = folder.pathFor("nd_" + name + "_graph.dgml", imbSCI.Data.enums.getWritableFileMode.overwrite, "Structure graph"); String graphStatsPath = folder.pathFor("nd_" + name + "_graph_stats.txt", imbSCI.Data.enums.getWritableFileMode.overwrite, "Stats of the complete graph"); StringBuilder listBuilder = new StringBuilder(); StringBuilder statBuilder = new StringBuilder(); StringBuilder graphStatBuilder = new StringBuilder(); foreach (var item in items) { listBuilder.AppendLine(item.XPath); TagCounter.Count(item.node.Name); } var freqBins = TagCounter.GetFrequencyBins(); foreach (var bin in freqBins) { statBuilder.AppendLine(bin.Key + " " + bin.Value.toCsvInLine()); } var GraphFreqBins = NodeTagCounter.GetFrequencyBins(); foreach (var bin in GraphFreqBins) { graphStatBuilder.AppendLine(bin.Key + " " + bin.Value.toCsvInLine()); } DirectedGraphWithSourceData dgml = BuildDGML(); dgml.Save(graphPath, imbSCI.Data.enums.getWritableFileMode.overwrite); File.WriteAllText(listPath, listBuilder.ToString()); File.WriteAllText(statPath, statBuilder.ToString()); File.WriteAllText(graphStatsPath, graphStatBuilder.ToString()); return(dgml); }
/// <summary> /// Returns frequency counts on non-empty <see cref="templateFieldDataTable"/> column meta-setters for all columns in the table /// </summary> /// <param name="table">The table.</param> /// <returns></returns> public static frequencyCounter <templateFieldDataTable> GetColumnSetupStatistics(this DataTable table) { frequencyCounter <templateFieldDataTable> output = new frequencyCounter <templateFieldDataTable>(); foreach (DataColumn dc in table.Columns) { foreach (var k in dc.ExtendedProperties.Keys) { if (k is templateFieldDataTable enumField) { if (!dc.ExtendedProperties[k].isNullOrEmpty()) { output.Count(enumField); } } } } return(output); }
internal override void Compute() { base.Compute(); if (dominantType == CellContentType.numeric) { frequencyCounter <String> SymbolCounters = new frequencyCounter <String>(); frequencyCounter <Int32> RightPositionOfComma = new frequencyCounter <Int32>(); frequencyCounter <Int32> RightPositionOfDot = new frequencyCounter <Int32>(); foreach (CellContentInfo info in ContentInfos) { Int32 commaPos = info.content.LastIndexOf(','); if (commaPos > -1) { commaPos = (info.content.Length - 1) - commaPos; RightPositionOfComma.Count(commaPos); } Int32 dotPos = info.content.LastIndexOf('.'); if (dotPos > -1) { dotPos = (info.content.Length - 1) - dotPos; RightPositionOfDot.Count(dotPos); } } Int32 comma_pos = RightPositionOfComma.GetMostFrequentItem(); Int32 dot_pos = RightPositionOfDot.GetMostFrequentItem(); if (RightPositionOfComma.Any() && RightPositionOfDot.Any()) { if (comma_pos > dot_pos) { thousantDelimiter = ","; decimalDelimiter = "."; decimalPlaces = dot_pos - 1; } else { thousantDelimiter = "."; decimalDelimiter = ","; decimalPlaces = comma_pos - 1; } } else if (RightPositionOfComma.Any()) { decimalDelimiter = ","; decimalPlaces = comma_pos; } else if (RightPositionOfDot.Any()) { decimalDelimiter = "."; decimalPlaces = dot_pos; } else { thousantDelimiter = ""; decimalDelimiter = ""; decimalPlaces = 0; } if (!decimalDelimiter.isNullOrEmpty() && !thousantDelimiter.isNullOrEmpty()) { ValueTypeName = nameof(Decimal); } else if (!thousantDelimiter.isNullOrEmpty()) { ValueTypeName = nameof(Int32); } else if (!decimalDelimiter.isNullOrEmpty()) { ValueTypeName = nameof(Double); } else { ValueTypeName = nameof(Int32); } } }