private IArffDataRow AddData(string id, DateTime?date, ExtractTextVectorBase extractTextVector, PositivityType positivity) { var cells = extractTextVector.GetCells(); if (cells.Count == 0) { return(null); } lock (DataSet) { IArffDataRow review = string.IsNullOrEmpty(id) ? DataSet.AddDocument() : DataSet.GetOrCreateDocument(id); review.Class.Value = positivity; review.Date = date ?? DateTime.Today; foreach (var cell in cells) { var name = cell.Name; var data = review.AddRecord(name); if (data != null) { data.Header.Source = cell.Item; data.Value = cell.Value; } } return(review); } }
public static void ProcessLine(this IArffDataRow row, DataLine line) { var indexes = new Dictionary <int, double>(); foreach (var wordsData in row.GetRecords().Where(item => !(item.Header is DateHeader))) { int index = row.Owner.Header.GetIndex(wordsData.Header); double value = 1; if (wordsData.Value != null) { value = Convert.ToDouble(wordsData.Value); } indexes[index] = value; } foreach (var index in indexes.OrderBy(item => item.Key)) { line.SetValue(index.Key, index.Value); } }
public void Save(string path) { if (string.IsNullOrWhiteSpace(path)) { throw new ArgumentException("Value cannot be null or whitespace.", nameof(path)); } log.LogInformation("Saving {0}...", path); var fileName = $"{Word.Text.CreatePureLetterText()}.arff"; path = Path.Combine(path, fileName); IArffDataSet arff = ArffDataSet.Create <PositivityType>(Word.Text); arff.UseTotal = true; foreach (WordsContext vector in Vectors) { IArffDataRow review = arff.AddDocument(); review.Class.Value = vector.SentimentValue > 0 ? PositivityType.Positive : PositivityType.Negative; foreach (WordEx wordItem in vector.Words) { if (!wordItem.IsAspect && wordItem.Value == 0) { continue; } DataRecord addedWord = review.AddRecord(wordItem.Text); addedWord.Value = addedWord.Total; } } arff.Save(path); log.LogInformation("Saving {0} Completed.", path); }