private static void SetStars(SingleProcessingData processingData, double defaultStars) { if (processingData.Stars == null) { processingData.Stars = defaultStars; } }
private SingleProcessingData ReadFile(string file) { var fileInfo = new FileInfo(file); var result = new SingleProcessingData(File.ReadAllText(file).SanitizeXmlString()); result.Id = $"{fileInfo.Directory.Name}_{Path.GetFileNameWithoutExtension(fileInfo.Name)}"; SetStars(result); return(result); }
private void SetStars(SingleProcessingData processingData) { switch (sentimentClass) { case SentimentClass.Positive: SetStars(processingData, 5); break; case SentimentClass.Negative: SetStars(processingData, 1); break; case SentimentClass.Neutral: SetStars(processingData, 3); break; } }
private SingleProcessingData GetRecord(XElement element) { var text = element.Descendants("Text").Select(x => x.Value).ToArray(); var stars = element.Descendants("Stars").Select(x => double.Parse(x.Value)).ToArray(); var date = element.Attributes("Date").Select(x => DateTime.Parse(x.Value)).ToArray(); var id = element.Descendants().Attributes("Id").Select(x => x.Value).ToArray(); // typical old xml has twice defined text if (text.Length > 2 || stars.Length > 2) { throw new InvalidOperationException("Can't handle this data: " + element); } var item = new SingleProcessingData(text.FirstOrDefault()); item.Stars = stars.FirstOrDefault(); item.Date = date.FirstOrDefault(); item.Id = id.FirstOrDefault(); return(item); }
public ParsingDocumentHolder Convert(SingleRequestData review, bool doCleanup) { if (review == null) { throw new ArgumentNullException(nameof(review)); } review.Text = doCleanup ? cleanup.Cleanup(review.Text) : review.Text; var data = new SingleProcessingData(); data.Author = review.Author; if (review.IsPositive.HasValue) { data.Stars = review.IsPositive.Value ? 5 : 1; } data.Date = review.Date; data.Id = review.Id; data.Text = review.Text; return(new ParsingDocumentHolder(splitter, wordFactory, sentenceRepair, data)); }
private IEnumerable <DataPair> GetReview() { if (File.Exists(path)) { var counter = 0; foreach (var line in File.ReadLines(path)) { counter++; var item = new SingleProcessingData(line.SanitizeXmlString()); item.Id = counter.ToString(); SetStars(item); yield return(new DataPair(sentimentClass, Task.FromResult(item))); } } else { foreach (var file in Directory.EnumerateFiles(path, "*.*", SearchOption.AllDirectories)) { yield return(new DataPair(sentimentClass, Task.Run(() => ReadFile(file)))); } } }
private IEnumerable <DataPair> GetReview() { logger.LogInformation("Loading {0}", path); var counter = 0; using (var streamRead = new StreamReader(path)) using (var csvData = new CsvReader(streamRead, CultureInfo.InvariantCulture)) { csvData.Read(); csvData.ReadHeader(); csvData.Configuration.MissingFieldFound = null; var headerTable = csvData.Context.HeaderRecord.ToLookup(item => item, StringComparer.OrdinalIgnoreCase); while (csvData.Read()) { counter++; var id = counter.ToString(); double? stars = null; SentimentClass?sentimentClass = null; if (headerTable.Contains("id")) { id = csvData.GetField(headerTable["id"].First()); } if (headerTable.Contains("sentiment")) { sentimentClass = csvData.GetField <SentimentClass?>(headerTable["sentiment"].First()); } string author = null; if (headerTable.Contains("userid")) { author = csvData.GetField(headerTable["userid"].First()); } if (headerTable.Contains("author")) { author = csvData.GetField(headerTable["author"].First()); } if (headerTable.Contains("stars")) { stars = csvData.GetField <double?>(headerTable["stars"].First()); } if (headerTable.Contains("text")) { var text = csvData.GetField(headerTable["text"].First()); var item = new SingleProcessingData(text.SanitizeXmlString()); item.Id = id; item.Author = author; item.Stars = stars; if (stars != null) { sentimentClass = stars > 3 ? SentimentClass.Positive : SentimentClass.Negative; } yield return(new DataPair(sentimentClass, Task.FromResult(item))); } } } }