static void Save(sentence_pair pair, SaveInfo info) { string fileName = info.FileName + ".txt"; if (!File.Exists(fileName)) { // Create a file to write to. using (StreamWriter sw = File.CreateText(fileName)) { sw.WriteLine("<?xml version = \"1.0\" encoding = \"utf - 8\" ?>"); sw.WriteLine("<sentence_pairs>"); } } // This text is always added, making the file longer over time // if it is not deleted. using (StreamWriter sw = File.AppendText(fileName)) { sw.WriteLine($"<sentence_pair classification = \"\" edit_distance = \"{pair.edit_distance}\" shared_words = \"{pair.shared_words}\" pair_no = \"{info.Counter}\" >"); sw.WriteLine($"<sentence_1>{pair.sentence_1}</sentence_1>"); sw.WriteLine($"<sentence_2>{pair.sentence_2}</sentence_2>"); sw.WriteLine($"</sentence_pair>"); } }
public static bool IsFiltersSatisfied(sentence_pair pair) { if ( IsFilter1Satisfied(pair) && IsFilter2Satisfied(pair) && IsFilter3Satisfied(pair) ) { return(true); } return(false); }
static bool IsFilter3Satisfied(sentence_pair pair) { if ( pair.shared_words >= 5 ) { return(true); } return(false); }
static void SavePairToFile(sentence_pair pair, SaveInfo info) { lock (info.lock_obj) { if (info.Counter <= info.MaxSentencePairs) { Save(pair, info); info.Increment(); } } }
public static void SaveToFilteredSentence(sentence_pair pair) { switch (pair.classification) { case "PD": SavePairToFile(pair, FilteredPartiallyDerivedSentences); break; case "WD": SavePairToFile(pair, FilteredWhollyDerivedSentences); break; case "ND": SavePairToFile(pair, FilteredNonDerviedSentences); break; } }
static bool IsFilter2Satisfied(sentence_pair pair) { if ( pair.sentence1_words >= 5 && pair.sentence1_words <= 40 && pair.sentence2_words >= 5 && pair.sentence2_words <= 40 ) { return(true); } return(false); }
static bool IsFilter1Satisfied(sentence_pair pair) { if ( (pair.sentence_1_no <= 2 && pair.sentence_2_no <= 2) || ( pair.edit_distance_word_level >= 1 && pair.edit_distance_word_level <= 20 && ( (pair.sentence_1_length < pair.sentence_2_length && pair.sentence_1_length * 2 >= pair.sentence_2_length) || (pair.sentence_2_length < pair.sentence_1_length && pair.sentence_2_length * 2 >= pair.sentence_1_length) ) ) ) { return(true); } return(false); }