Example #1
0
 private static void SetStars(SingleProcessingData processingData, double defaultStars)
 {
     if (processingData.Stars == null)
     {
         processingData.Stars = defaultStars;
     }
 }
Example #2
0
        private SingleProcessingData ReadFile(string file)
        {
            var fileInfo = new FileInfo(file);
            var result   = new SingleProcessingData(File.ReadAllText(file).SanitizeXmlString());

            result.Id = $"{fileInfo.Directory.Name}_{Path.GetFileNameWithoutExtension(fileInfo.Name)}";
            SetStars(result);
            return(result);
        }
Example #3
0
        private void SetStars(SingleProcessingData processingData)
        {
            switch (sentimentClass)
            {
            case SentimentClass.Positive:
                SetStars(processingData, 5);
                break;

            case SentimentClass.Negative:
                SetStars(processingData, 1);
                break;

            case SentimentClass.Neutral:
                SetStars(processingData, 3);
                break;
            }
        }
Example #4
0
        private SingleProcessingData GetRecord(XElement element)
        {
            var text  = element.Descendants("Text").Select(x => x.Value).ToArray();
            var stars = element.Descendants("Stars").Select(x => double.Parse(x.Value)).ToArray();
            var date  = element.Attributes("Date").Select(x => DateTime.Parse(x.Value)).ToArray();
            var id    = element.Descendants().Attributes("Id").Select(x => x.Value).ToArray();

            // typical old xml has twice defined text
            if (text.Length > 2 || stars.Length > 2)
            {
                throw new InvalidOperationException("Can't handle this data: " + element);
            }

            var item = new SingleProcessingData(text.FirstOrDefault());

            item.Stars = stars.FirstOrDefault();
            item.Date  = date.FirstOrDefault();
            item.Id    = id.FirstOrDefault();
            return(item);
        }
        public ParsingDocumentHolder Convert(SingleRequestData review, bool doCleanup)
        {
            if (review == null)
            {
                throw new ArgumentNullException(nameof(review));
            }

            review.Text = doCleanup ? cleanup.Cleanup(review.Text) : review.Text;
            var data = new SingleProcessingData();

            data.Author = review.Author;
            if (review.IsPositive.HasValue)
            {
                data.Stars = review.IsPositive.Value ? 5 : 1;
            }

            data.Date = review.Date;
            data.Id   = review.Id;
            data.Text = review.Text;
            return(new ParsingDocumentHolder(splitter, wordFactory, sentenceRepair, data));
        }
Example #6
0
 private IEnumerable <DataPair> GetReview()
 {
     if (File.Exists(path))
     {
         var counter = 0;
         foreach (var line in File.ReadLines(path))
         {
             counter++;
             var item = new SingleProcessingData(line.SanitizeXmlString());
             item.Id = counter.ToString();
             SetStars(item);
             yield return(new DataPair(sentimentClass, Task.FromResult(item)));
         }
     }
     else
     {
         foreach (var file in Directory.EnumerateFiles(path, "*.*", SearchOption.AllDirectories))
         {
             yield return(new DataPair(sentimentClass, Task.Run(() => ReadFile(file))));
         }
     }
 }
        private IEnumerable <DataPair> GetReview()
        {
            logger.LogInformation("Loading {0}", path);
            var counter = 0;

            using (var streamRead = new StreamReader(path))
                using (var csvData = new CsvReader(streamRead, CultureInfo.InvariantCulture))
                {
                    csvData.Read();
                    csvData.ReadHeader();
                    csvData.Configuration.MissingFieldFound = null;
                    var headerTable = csvData.Context.HeaderRecord.ToLookup(item => item, StringComparer.OrdinalIgnoreCase);
                    while (csvData.Read())
                    {
                        counter++;
                        var            id             = counter.ToString();
                        double?        stars          = null;
                        SentimentClass?sentimentClass = null;
                        if (headerTable.Contains("id"))
                        {
                            id = csvData.GetField(headerTable["id"].First());
                        }

                        if (headerTable.Contains("sentiment"))
                        {
                            sentimentClass = csvData.GetField <SentimentClass?>(headerTable["sentiment"].First());
                        }

                        string author = null;
                        if (headerTable.Contains("userid"))
                        {
                            author = csvData.GetField(headerTable["userid"].First());
                        }

                        if (headerTable.Contains("author"))
                        {
                            author = csvData.GetField(headerTable["author"].First());
                        }

                        if (headerTable.Contains("stars"))
                        {
                            stars = csvData.GetField <double?>(headerTable["stars"].First());
                        }

                        if (headerTable.Contains("text"))
                        {
                            var text = csvData.GetField(headerTable["text"].First());
                            var item = new SingleProcessingData(text.SanitizeXmlString());
                            item.Id     = id;
                            item.Author = author;
                            item.Stars  = stars;

                            if (stars != null)
                            {
                                sentimentClass = stars > 3 ? SentimentClass.Positive : SentimentClass.Negative;
                            }

                            yield return(new DataPair(sentimentClass, Task.FromResult(item)));
                        }
                    }
                }
        }