public Product ParseDataset(string datasetFile) { Product productSummary = null; if (File.Exists(datasetFile)) { SemEvalDataset dataset = XmlSerializationExtensions.FromXmlFile <SemEvalDataset>(datasetFile); FileInfo datasetFileInfo = new FileInfo(datasetFile); productSummary = new Product(); productSummary.Title = datasetFileInfo.Name; Review defaultReview = new Review(); defaultReview.Title = string.Format("Batch sentences for dataset: {0}", datasetFileInfo.Name); foreach (SemEvalDatasetSentence datasetSentence in dataset.Sentences) { Sentence sentence = new Sentence(); sentence.DatasetTypeId = (int)DatasetParserType.SemEval; sentence.Text = datasetSentence.Sentence; ManualResults calculatedResults = new ManualResults(); foreach (SemEvalDatasetAspect datasetAspect in datasetSentence.Aspects) { SemEvalResultAspect resultAspect = new SemEvalResultAspect(); resultAspect.AspectTermText = datasetAspect.Term; if (!aspects.Contains(datasetAspect.Term.ToLower())) { aspects.Add(datasetAspect.Term.ToLower()); } resultAspect.Polarity = datasetAspect.Polarity; resultAspect.OpinionStrengthFrom = datasetAspect.From; resultAspect.OpinionStrengthTo = datasetAspect.To; calculatedResults.SemEvalResultAspects.Add(resultAspect); } if (calculatedResults.SemEvalResultAspects.Count > 0) { sentence.ManualResults = XmlSerializationExtensions.ToXmlString(calculatedResults); } defaultReview.Sentence.Add(sentence); } productSummary.Review.Add(defaultReview); } //RunTest(); return(productSummary); }
private Sentence ParseSentence(string line) { Sentence sentence = null; ManualResults calculatedResults = new ManualResults(); /* * If we have 2 parts the first contains results information and the * second the actual line. If there is only one (the first is empty) * then we have only the review sentence available */ String[] reviewLineParts = line.Split(reviewLineTag.ToCharArray(), 2, StringSplitOptions.RemoveEmptyEntries); if (reviewLineParts.Length > 0) { sentence = new Sentence(); sentence.DatasetTypeId = (int)DatasetParserType.HuLiu; sentence.Text = reviewLineParts.Length == 2 ? reviewLineParts[1] : reviewLineParts[0]; // Split to the aspect result delimiter String[] aspects = (reviewLineParts.Length == 2 ? reviewLineParts[0] : String.Empty).Split(apectDelimiter.ToCharArray()); /* * Pass each aspect to the regular expression matcher to extract * information results from the matching groups */ foreach (String aspect in aspects) { Regex aspectInfo = new Regex(resultsRegexPattern); Match aspectMatch = aspectInfo.Match(aspect); if (aspectMatch.Success) { HLResultAspect resultAspect = new HLResultAspect(); resultAspect.AspectTermText = aspectMatch.Groups[1].Value.Trim(); resultAspect.OpinionStrength = int.Parse(aspectMatch.Groups[2].Value); if (aspectMatch.Groups[4].Value != String.Empty) { switch (aspectMatch.Groups[4].Value) { case aspectFeatureNotAppeared: resultAspect.NotAppeared = true; break; case aspectFeatureNotAppearedPronounResolutionNeeded: resultAspect.PronounResolutionNeeded = true; break; } } calculatedResults.HLResultAspects.Add(resultAspect); } } if (calculatedResults.HLResultAspects.Count > 0) { sentence.ManualResults = XmlSerializationExtensions.ToXmlString(calculatedResults); } } return(sentence); }