Пример #1
0
            public SparseVectorClassification AsClassification(StringTableBuilder stringTable)
            {
                var weightedIndex = new List <WeightedIndex>();

                foreach (var item in Keyword)
                {
                    weightedIndex.Add(new WeightedIndex {
                        Index  = stringTable.GetIndex(item),
                        Weight = 1f
                    });
                }
                foreach (var item in Topic)
                {
                    weightedIndex.Add(new WeightedIndex {
                        Index  = stringTable.GetIndex(item),
                        Weight = 1f
                    });
                }
                return(new SparseVectorClassification {
                    Name = Title,
                    Data = weightedIndex
                           .GroupBy(d => d.Index)
                           .Select(g => new WeightedIndex {
                        Index = g.Key,
                        Weight = g.Sum(d => d.Weight)
                    })
                           .ToArray()
                });
            }
Пример #2
0
            public (string Classification, WeightedIndexList Data) AsClassification(StringTableBuilder stringTable)
            {
                var weightedIndex = new List <WeightedIndexList.WeightedIndex>();

                foreach (var item in Keyword)
                {
                    weightedIndex.Add(new WeightedIndexList.WeightedIndex {
                        Index  = stringTable.GetIndex(item),
                        Weight = 1f
                    });
                }
                foreach (var item in Topic)
                {
                    weightedIndex.Add(new WeightedIndexList.WeightedIndex {
                        Index  = stringTable.GetIndex(item),
                        Weight = 1f
                    });
                }
                return(Title, new WeightedIndexList {
                    IndexList = weightedIndex
                                .GroupBy(d => d.Index)
                                .Select(g => new WeightedIndexList.WeightedIndex {
                        Index = g.Key,
                        Weight = g.Sum(d => d.Weight)
                    })
                                .ToArray()
                });
            }
Пример #3
0
 static IReadOnlyList <(string Classification, IndexList Data)> _BuildIndexedClassifications(IReadOnlyList <Tuple <string[], string> > data, StringTableBuilder stringTable)
 {
     return(data
            .Select(d => (d.Item2, IndexList.Create(d.Item1.Select(str => stringTable.GetIndex(str)).ToArray())))
            .ToList()
            );
 }
Пример #4
0
        public void TestTFIDF()
        {
            var stringTableBuilder = new StringTableBuilder();
            var bag = new ClassificationBag {
                Classification = new[] {
                    Tuple.Create(new[] { "Chinese", "Beijing", "Chinese" }, true),
                    Tuple.Create(new[] { "Chinese", "Chinese", "Shanghai" }, true),
                    Tuple.Create(new[] { "Chinese", "Macao" }, true),
                    Tuple.Create(new[] { "Tokyo", "Japan", "Chinese" }, false),
                }.Select(d => new IndexedClassification {
                    Name = d.Item2 ? "china" : "japan",
                    Data = d.Item1.Select(s => stringTableBuilder.GetIndex(s)).ToArray()
                }).ToArray()
            };

            Assert.AreEqual(bag.Classification.Length, 4);
            Assert.AreEqual(bag.Classification[0].Data.Length, 3);
            var set = bag.ConvertToSparseVectors(true);

            Assert.AreEqual(set.Classification.Length, 2);
            Assert.AreEqual(set.Classification[0].Data.Length, 4);

            var tfidf = set.TFIDF();

            Assert.AreEqual(tfidf.Classification.Length, 2);
            Assert.AreEqual(tfidf.Classification[0].Data.Length, 4);
        }
Пример #5
0
 static ClassificationBag _BuildClassificationBag(IReadOnlyList <Tuple <string[], string> > data, StringTableBuilder stringTable)
 {
     return(new ClassificationBag {
         Classification = data.Select(d => new IndexedClassification {
             Name = d.Item2,
             Data = d.Item1.Select(str => stringTable.GetIndex(str)).ToArray()
         }).ToArray()
     });
 }
Пример #6
0
 public static ClassificationBag GetSimpleChineseSet(StringTableBuilder stringTableBuilder)
 {
     // sample data from: http://nlp.stanford.edu/IR-book/html/htmledition/naive-bayes-text-classification-1.html
     return(new ClassificationBag {
         Classification = new[] {
             Tuple.Create(new[] { "Chinese", "Beijing", "Chinese" }, true),
             Tuple.Create(new[] { "Chinese", "Chinese", "Shanghai" }, true),
             Tuple.Create(new[] { "Chinese", "Macao" }, true),
             Tuple.Create(new[] { "Tokyo", "Japan", "Chinese" }, false),
         }.Select(d => new IndexedClassification {
             Name = d.Item2 ? "china" : "japan",
             Data = d.Item1.Select(s => stringTableBuilder.GetIndex(s)).ToArray()
         }).ToArray()
     });
 }
Пример #7
0
 public static IReadOnlyList <uint> GetTestRow(StringTableBuilder stringTableBuilder)
 {
     return(new[] { "Chinese", "Chinese", "Chinese", "Tokyo", "Japan" }.Select(s => stringTableBuilder.GetIndex(s)).ToArray());
 }