public SparseVectorClassification AsClassification(StringTableBuilder stringTable) { var weightedIndex = new List <WeightedIndex>(); foreach (var item in Keyword) { weightedIndex.Add(new WeightedIndex { Index = stringTable.GetIndex(item), Weight = 1f }); } foreach (var item in Topic) { weightedIndex.Add(new WeightedIndex { Index = stringTable.GetIndex(item), Weight = 1f }); } return(new SparseVectorClassification { Name = Title, Data = weightedIndex .GroupBy(d => d.Index) .Select(g => new WeightedIndex { Index = g.Key, Weight = g.Sum(d => d.Weight) }) .ToArray() }); }
public (string Classification, WeightedIndexList Data) AsClassification(StringTableBuilder stringTable) { var weightedIndex = new List <WeightedIndexList.WeightedIndex>(); foreach (var item in Keyword) { weightedIndex.Add(new WeightedIndexList.WeightedIndex { Index = stringTable.GetIndex(item), Weight = 1f }); } foreach (var item in Topic) { weightedIndex.Add(new WeightedIndexList.WeightedIndex { Index = stringTable.GetIndex(item), Weight = 1f }); } return(Title, new WeightedIndexList { IndexList = weightedIndex .GroupBy(d => d.Index) .Select(g => new WeightedIndexList.WeightedIndex { Index = g.Key, Weight = g.Sum(d => d.Weight) }) .ToArray() }); }
static IReadOnlyList <(string Classification, IndexList Data)> _BuildIndexedClassifications(IReadOnlyList <Tuple <string[], string> > data, StringTableBuilder stringTable) { return(data .Select(d => (d.Item2, IndexList.Create(d.Item1.Select(str => stringTable.GetIndex(str)).ToArray()))) .ToList() ); }
public void TestTFIDF() { var stringTableBuilder = new StringTableBuilder(); var bag = new ClassificationBag { Classification = new[] { Tuple.Create(new[] { "Chinese", "Beijing", "Chinese" }, true), Tuple.Create(new[] { "Chinese", "Chinese", "Shanghai" }, true), Tuple.Create(new[] { "Chinese", "Macao" }, true), Tuple.Create(new[] { "Tokyo", "Japan", "Chinese" }, false), }.Select(d => new IndexedClassification { Name = d.Item2 ? "china" : "japan", Data = d.Item1.Select(s => stringTableBuilder.GetIndex(s)).ToArray() }).ToArray() }; Assert.AreEqual(bag.Classification.Length, 4); Assert.AreEqual(bag.Classification[0].Data.Length, 3); var set = bag.ConvertToSparseVectors(true); Assert.AreEqual(set.Classification.Length, 2); Assert.AreEqual(set.Classification[0].Data.Length, 4); var tfidf = set.TFIDF(); Assert.AreEqual(tfidf.Classification.Length, 2); Assert.AreEqual(tfidf.Classification[0].Data.Length, 4); }
static ClassificationBag _BuildClassificationBag(IReadOnlyList <Tuple <string[], string> > data, StringTableBuilder stringTable) { return(new ClassificationBag { Classification = data.Select(d => new IndexedClassification { Name = d.Item2, Data = d.Item1.Select(str => stringTable.GetIndex(str)).ToArray() }).ToArray() }); }
public static ClassificationBag GetSimpleChineseSet(StringTableBuilder stringTableBuilder) { // sample data from: http://nlp.stanford.edu/IR-book/html/htmledition/naive-bayes-text-classification-1.html return(new ClassificationBag { Classification = new[] { Tuple.Create(new[] { "Chinese", "Beijing", "Chinese" }, true), Tuple.Create(new[] { "Chinese", "Chinese", "Shanghai" }, true), Tuple.Create(new[] { "Chinese", "Macao" }, true), Tuple.Create(new[] { "Tokyo", "Japan", "Chinese" }, false), }.Select(d => new IndexedClassification { Name = d.Item2 ? "china" : "japan", Data = d.Item1.Select(s => stringTableBuilder.GetIndex(s)).ToArray() }).ToArray() }); }
public static IReadOnlyList <uint> GetTestRow(StringTableBuilder stringTableBuilder) { return(new[] { "Chinese", "Chinese", "Chinese", "Tokyo", "Japan" }.Select(s => stringTableBuilder.GetIndex(s)).ToArray()); }