Beispiel #1
0
        public void TestCalculateIndexFeatures()
        {
            const string indexFilepath = @"Test\Index.txt";

            if (File.Exists(indexFilepath))
            {
                File.Delete(indexFilepath);
            }
            Program.PrepareData();
            using (var buhonlineDataReader = new BuhonlineDataReader())
                using (var sparseFormatWriter = new SparseFormatWriter(indexFilepath))
                {
                    const string featuresFilepath  = @"Test\IndexFeatures.xml";
                    var          featuresList      = FeatureListBuilder.GetFeaturesList(featuresFilepath);
                    var          messageIds        = buhonlineDataReader.ReadAnswerIds();
                    var          featureCalculator = new FeatureCalculator(featuresList);
                    foreach (int messageId in messageIds)
                    {
                        var featureVectors = new List <PostData>();
                        IEnumerable <Feature> messageFeatures = featureCalculator.Calculate(new FeatureParameters {
                            MessageId = messageId
                        });
                        featureVectors.Add(new PostData {
                            PostId = messageId, Features = messageFeatures
                        });
                        sparseFormatWriter.Append(featureVectors);
                    }
                }
        }
Beispiel #2
0
        public static void ImportThanksDataFromDB(string fileToSave)
        {
            // todo импорт данных никакого отношения непосредственно к предобработке не имеет
            var reader             = new BuhonlineDataReader();
            List <IDataItem> posts = reader.Read().Where(post => !(((yaf_Message)post).Message.ToLower().StartsWith("спасибо") || ((yaf_Message)post).Message.ToLower().StartsWith("благодар"))).ToList();
            const int        batch = 2000;

            if (posts.Count > batch)
            {
                var randomPosts = new List <IDataItem>();
                var rand        = new Random();
                for (var i = 0; i < batch; i++)
                {
                    var id = rand.Next(posts.Count - i);
                    randomPosts.Add(posts[id]);
                    var post = posts[posts.Count - 1 - i];
                    posts[posts.Count - 1 - i] = posts[id];
                    posts[id] = post;
                }

                File.WriteAllLines(fileToSave, randomPosts.Select(item => item.ToString()));
            }
        }
Beispiel #3
0
 static Preprocessing()
 {
     // toto galina использовать provider
     buhonlineDataReader = new BuhonlineDataReader();
 }
Beispiel #4
0
 public BuhOnlineDataProvider()
 {
     buhonlineDataReader = new BuhonlineDataReader();
     buhOnlineDataCache  = new BuhOnlineDataCache();
 }