public void Parse(string input, string output) { var readModel= new ReadModel(input); var writeModel = new WriteModel(output); var writeDevModel = new WriteModel(output+".dev"); //var tempWrite = new WriteModel(output + "tempWrite"); var temp = new List<string>(); foreach (var line in readModel.GetNextLine()) { var newLine = RemoveTags(line); newLine = ReplaceTags(newLine); newLine = RemoveAllTags(newLine); if (string.IsNullOrEmpty(newLine)) continue; //tempWrite.WriteLine(newLine); var split = newLine.Split(new char[] {' '}); temp.AddRange(split.ToList()); //temp.Add("##NEWLINE##"); } //tempWrite.Flush(); bool location = false; var lastStr = string.Empty; foreach (var tempStr in temp) { var str = tempStr.Trim(); if (string.IsNullOrEmpty(str)) { lastStr = ""; continue; } //if (str.Equals("##NEWLINE##")) { if (!location && lastStr.EndsWith(".") && !IsSalutationAbbr(lastStr)) { lastStr = string.Empty; writeModel.WriteLine(""); writeDevModel.WriteLine(""); continue; } } if (location) { if (str.Equals("##ENDTAG##")) { location = false; lastStr = ""; continue; } writeModel.WriteLine(str + " " + "LOCATION"); writeDevModel.WriteLine(str); lastStr = str; continue; } if (str.Equals("##LOCATIONSTARTTAG##")) { lastStr = ""; location = true; continue; } if (str.Equals("##ENDTAG##")) { lastStr = ""; continue; } writeModel.WriteLine(str + " "+ "OTHER"); writeDevModel.WriteLine(str); lastStr = str; } writeModel.Flush(); writeDevModel.Flush(); }
internal static void CreateInputForCRF(string input, string output) { var reader = new ReadModel(input); var keyWriter = new WriteModel(string.Concat(output, ".key")); var devWriter = new WriteModel(string.Concat(output, ".key.dev")); foreach (var line in reader.GetNextLine()) { var words = line.Split(new[] {' '}); if (words.Length < 4) continue; foreach (var word in words) { if (string.IsNullOrEmpty(word.Trim())) continue; if (word.EndsWith("{LOCATION}")) { keyWriter.WriteLine(word.Replace("{LOCATION}", "") + " " + "LOCATION"); devWriter.WriteLine(word.Replace("{LOCATION}", "")); } else if (word.EndsWith("{LOCATION}.")) { keyWriter.WriteLine(word.Replace("{LOCATION}.", ".") + " " + "LOCATION"); devWriter.WriteLine(word.Replace("{LOCATION}.", ".")); } else { keyWriter.WriteLine(word + " " + "OTHER"); devWriter.WriteLine(word); } } keyWriter.WriteLine(""); devWriter.WriteLine(""); } keyWriter.Flush(); devWriter.Flush(); }
private Config() { BlackList = new HashSet<string>(); var readBlackList = new ReadModel(blackList); foreach (var line in readBlackList.GetNextLine()) { var word = line.ToLowerInvariant().Trim(); if (string.IsNullOrEmpty(word)) { continue; } BlackList.Add(word); } PronounSet = new HashSet<string>(); var readPronoun = new ReadModel(PronounList); foreach (var line in readPronoun.GetNextLine()) { var word = line.ToLowerInvariant().Trim(); if (string.IsNullOrEmpty(word)) { continue; } PronounSet.Add(word); } ConjunctionSet = new HashSet<string>(); var readConjunction = new ReadModel(ConjuctionList); foreach (var line in readConjunction.GetNextLine()) { var word = line.ToLowerInvariant().Trim(); if (string.IsNullOrEmpty(word)) { continue; } ConjunctionSet.Add(word); } VerbSet = new HashSet<string>(); var readVerb = new ReadModel(VerbList); foreach (var line in readVerb.GetNextLine()) { var word = line.ToLowerInvariant().Trim(); if (string.IsNullOrEmpty(word)) { continue; } VerbSet.Add(word); } ArticleSet = new HashSet<string>(); var readArticle = new ReadModel(ArticleList); foreach (var line in readArticle.GetNextLine()) { var word = line.ToLowerInvariant().Trim(); if (string.IsNullOrEmpty(word)) { continue; } ArticleSet.Add(word); } PrepositionSet = new HashSet<string>(); var readPreposition = new ReadModel(PrepositionList); foreach (var line in readPreposition.GetNextLine()) { var word = line.ToLowerInvariant().Trim(); if (string.IsNullOrEmpty(word)) { continue; } PrepositionSet.Add(word); } SuffixSet = new HashSet<string>(); var readSuffix = new ReadModel(SuffixList); foreach (var line in readSuffix.GetNextLine()) { var word = line.ToLowerInvariant().Trim(); if (string.IsNullOrEmpty(word)) { continue; } SuffixSet.Add(word); } AdjectiveSet = new HashSet<string>(); var readAdjective = new ReadModel(AdjectiveList); foreach (var line in readAdjective.GetNextLine()) { var word = line.ToLowerInvariant().Trim(); if (string.IsNullOrEmpty(word)) { continue; } AdjectiveSet.Add(word); } }