public IEnumerable <WordWeight> ExtractTagsWithWeight(ForensicResult resource) { init(); var source = new List <object>(); foreach (var it in resource.GetItemWithMark(UserTextMark)) { source.AddRange(it.GetColumnDataByMark(UserTextMark)); } var t = string.Join(" ", source); return(Extractor.ExtractTagsWithWeight(t, KeywordCount).Select(c => new WordWeight() { Weight = c.Weight, Word = c.Word, })); }
public void Classify(ForensicResult resource, int Kcount = 3) { init(); var source = new List <object>(); Dictionary <string, int> WordIndexMap = new Dictionary <string, int>(); List <string> WordList = new List <string>(); var WordWeight = new Dictionary <ForensicResultItem, IEnumerable <WordWeight> >(); foreach (var it in resource.GetItemWithMark(UserTextMark)) { var tTags = ExtractTagsWithWeight(it); if (tTags.Count() > 0) { WordWeight.Add(it, tTags); WordList.AddRange(tTags.Select(c => c.Word)); } } WordList = WordList.Distinct().ToList(); for (int i = 0; i < WordList.Count; ++i) { WordIndexMap.Add(WordList[i], i); } ; var tEngine = new EngineModel() { ClassCount = Kcount, Samples = WordWeight.Values.Select(c => { var tX = new double[WordList.Count]; foreach (var it in c) { tX[WordIndexMap[it.Word]] = it.Weight; } var t = new PointModel() { X = tX, }; return(t); }).ToList() }; tEngine.Run(); var res = tEngine.Classes; }
static public void SaveForensicResult(ForensicResult res, string path, bool includeAll = false) { if (!includeAll) { res.Items.ForEach(c => { c.Children.RemoveAll(s => s.Desc == "数据集合"); }); } JsonSerializerSettings jsonSerializerSettings = new JsonSerializerSettings(); jsonSerializerSettings.TypeNameHandling = TypeNameHandling.Auto; jsonSerializerSettings.Formatting = Formatting.Indented; jsonSerializerSettings.NullValueHandling = NullValueHandling.Ignore; var st = JsonConvert.SerializeObject(res, jsonSerializerSettings); var by = Encoding.Default.GetBytes(st); using (var fs = File.Create(path)) { fs.Write(by, 0, by.Length); } }