public void SaveData() { string fileName = null; StreamReader reader = null; try { Classes cls = m_gd.Classes; Assert.IsNotNull(cls); Morpheme m = new Morpheme(MorphemeType.stem, "M1"); m_gd.Morphemes.Add(m); WordRecord wr = new WordRecord(); m_gd.WordRecords.Add(wr); wr.WRID = "WR1"; wr.Stem = new Stem(); wr.Stem.MIDREF = m.MID; fileName = MakeFile(); m_gd.SaveData(fileName); reader = new StreamReader(fileName); string dataAfter = reader.ReadToEnd(); Assert.AreEqual(m_dataBefore, dataAfter, "Before and After"); } finally { if (reader != null) { reader.Close(); } DeleteFile(fileName); } }
public void LoadGoodData() { string fileName = null; try { Assert.AreEqual(0, m_gd.Morphemes.Count); // Shouldn't have any at this point. fileName = MakeFile(m_dataBefore); m_gd = GAFAWSData.LoadData(fileName); Assert.AreEqual(1, m_gd.Morphemes.Count); // Should be 1 of them now. Assert.AreEqual(1, m_gd.WordRecords.Count, "Wrong word record count."); WordRecord wr = m_gd.WordRecords[0]; Assert.IsNull(wr.Prefixes, "Should have null preffix collection."); Assert.IsNull(wr.Suffixes, "Should have null suffix collection."); } finally { DeleteFile(fileName); } }
public void AddOtherToStem() { try { WordRecord wr = new WordRecord(); m_gd.WordRecords.Add(wr); Stem stem = new Stem(); wr.Stem = stem; stem.Other = m_otherTop; AddOtherContents(); m_otherTop = m_gd.WordRecords[0].Stem.Other; CheckOtherContents(); } finally { DeleteFile(m_fileName); } }
public void AddOtherToAffix() { try { WordRecord wr = new WordRecord(); m_gd.WordRecords.Add(wr); wr.Prefixes = new List <Affix>(); Affix afx = new Affix(); wr.Prefixes.Add(afx); afx.Other = m_otherTop; AddOtherContents(); m_otherTop = m_gd.WordRecords[0].Prefixes[0].Other; CheckOtherContents(); } finally { DeleteFile(m_fileName); } }
internal void Convert(SqlCommand cmd, GAFAWSData gData, Dictionary<string, FwMsa> prefixes, Dictionary<string, List<FwMsa>> stems, Dictionary<string, FwMsa> suffixes) { if (!CanConvert) return; WordRecord wr = new WordRecord(); // Deal with prefixes, if any. int startStemOrd = 0; foreach (KeyValuePair<int, FwMorphBundle> kvp in m_morphBundles) { FwMorphBundle mb = kvp.Value; string msaKey = mb.GetMsaKey(cmd); if (mb.MSA.Class == 5001 || mb.MSA.Class == 5031 || mb.MSA.Class == 5032 || mb.MSA.Class == 5117) // What about 5117-MoUnclassifiedAffixMsa? { // stem or derivational prefix, so bail out of this loop. startStemOrd = kvp.Key; break; } // Add prefix, if not already present. if (wr.Prefixes == null) wr.Prefixes = new List<Affix>(); if (!prefixes.ContainsKey(msaKey)) { prefixes.Add(msaKey, mb.MSA); gData.Morphemes.Add(new Morpheme(MorphemeType.prefix, msaKey)); } Affix afx = new Affix(); afx.MIDREF = msaKey; wr.Prefixes.Add(afx); } // Deal with suffixes, if any. // Work through the suffixes from the end of the word. // We stop when we hit the stem or a derivational suffix. int endStemOrd = 0; for (int i = m_morphBundles.Count; i > 0; --i) { FwMorphBundle mb = m_morphBundles[i]; string msaKey = mb.GetMsaKey(cmd); if (mb.MSA.Class == 5001 || mb.MSA.Class == 5031 || mb.MSA.Class == 5032 || mb.MSA.Class == 5117) // What about 5117-MoUnclassifiedAffixMsa? { // stem or derivational suffix, so bail out of this loop. endStemOrd = i; break; } // Add suffix, if not already present. if (wr.Suffixes == null) wr.Suffixes = new List<Affix>(); if (!suffixes.ContainsKey(msaKey)) { suffixes.Add(msaKey, mb.MSA); gData.Morphemes.Add(new Morpheme(MorphemeType.suffix, msaKey)); } Affix afx = new Affix(); afx.MIDREF = msaKey; wr.Suffixes.Insert(0, afx); } // Deal with stem. List<FwMsa> localStems = new List<FwMsa>(); string sStem = ""; foreach (KeyValuePair<int, FwMorphBundle> kvp in m_morphBundles) { FwMorphBundle mb = kvp.Value; int currentOrd = kvp.Key; if (currentOrd >= startStemOrd && currentOrd <= endStemOrd) { string msaKey = mb.GetMsaKey(cmd); string spacer = (currentOrd == 1) ? "" : " "; sStem += spacer + msaKey; } } if (!stems.ContainsKey(sStem)) { stems.Add(sStem, localStems); gData.Morphemes.Add(new Morpheme(MorphemeType.stem, sStem)); } Stem stem = new Stem(); stem.MIDREF = sStem; wr.Stem = stem; // Add wr. gData.WordRecords.Add(wr); }
/// ----------------------------------------------------------------------------------- /// <summary> /// Convert the analysis and its morphemes. /// </summary> /// ----------------------------------------------------------------------------------- internal void Convert() { int i = 0; if (m_stem == null || (m_prefixes == null && m_suffixes == null)) return; // Don't convert a failure or no affixes. // Category Filter if ((PartsOfSpeech != null) && (PartsOfSpeech.Count != 0)) { // \cat? string[] catCats = {m_wordCategory}; if (m_wordCategory != null) if (ContainsCat(catCats)) goto label1; else return; // \a? string[] stemElements = m_sstem.Split(' '); string[] stemCats = new string[(stemElements.Length - 2)/2]; for (i = 0; i < (stemElements.Length - 2)/2; ++i) stemCats[i] = new string(stemElements[(i*2)+1].ToCharArray()); if (ContainsCat(stemCats)) goto label1; return; } label1: WordRecord wr = new WordRecord(); s_gd.WordRecords.Add(wr); wr.WRID = "WR" + s_idx++; if (m_prefixes != null) wr.Prefixes = new List<Affix>(); if (m_suffixes != null) wr.Suffixes = new List<Affix>(); if ((m_originalForm != null) || (m_wordCategory != null)) { string xml = "<ANAInfo"; if (m_originalForm != null) xml += " form=\'" + m_originalForm + "\'"; if (m_wordCategory != null) xml += " category=\'" + m_wordCategory + "\'"; xml += " />"; wr.Other = new Other(xml); } for (i = 0; m_prefixes != null && i < m_prefixes.Count; ++i) m_prefixes[i].Convert(); m_stem.Convert(); for (i = 0; m_suffixes != null && i < m_suffixes.Count; ++i) m_suffixes[i].Convert(); }
/// <summary> /// Do whatever it takes to convert the input this processor knows about. /// </summary> public void Convert() { string outputPathname = null; OpenFileDialog openFileDlg = new OpenFileDialog(); openFileDlg.InitialDirectory = "c:\\"; openFileDlg.Filter = "txt files (*.txt)|*.txt|All files (*.*)|*.*"; openFileDlg.FilterIndex = 2; openFileDlg.Multiselect = false; if (openFileDlg.ShowDialog() == DialogResult.OK) { string sourcePathname = openFileDlg.FileName; if (File.Exists(sourcePathname)) { // Try to convert it. using (StreamReader reader = new StreamReader(sourcePathname)) { string line = reader.ReadLine(); Dictionary<string, bool> dictPrefixes = new Dictionary<string, bool>(); Dictionary<string, bool> dictStems = new Dictionary<string, bool>(); Dictionary<string, bool> dictSuffixes = new Dictionary<string, bool>(); while (line != null) { line = line.Trim(); if (line != String.Empty) { int openAngleLocation = line.IndexOf("<", 0); if (openAngleLocation < 0) continue; int closeAngleLocation = line.IndexOf(">", openAngleLocation + 1); if (closeAngleLocation < 0) continue; WordRecord wrdRec = new WordRecord(); m_gd.WordRecords.Add(wrdRec); // Handle prefixes, if any. string prefixes = null; if (openAngleLocation > 0) prefixes = line.Substring(0, openAngleLocation); if (prefixes != null) { if (wrdRec.Prefixes == null) wrdRec.Prefixes = new List<Affix>(); foreach (string prefix in prefixes.Split('-')) { if (prefix != null && prefix != "") { Affix afx = new Affix(); afx.MIDREF = prefix; wrdRec.Prefixes.Add(afx); if (!dictPrefixes.ContainsKey(prefix)) { m_gd.Morphemes.Add(new Morpheme(MorphemeType.prefix, prefix)); dictPrefixes.Add(prefix, true); } } } } // Handle stem. string sStem = null; // Stem has content, so use it. sStem = line.Substring(openAngleLocation + 1, closeAngleLocation - openAngleLocation - 1); if (sStem.Length == 0) sStem = "stem"; Stem stem = new Stem(); stem.MIDREF = sStem; wrdRec.Stem = stem; if (!dictStems.ContainsKey(sStem)) { m_gd.Morphemes.Add(new Morpheme(MorphemeType.stem, sStem)); dictStems.Add(sStem, true); } // Handle suffixes, if any. string suffixes = null; if (line.Length > closeAngleLocation + 2) suffixes = line.Substring(closeAngleLocation + 1); if (suffixes != null) { if (wrdRec.Suffixes == null) wrdRec.Suffixes = new List<Affix>(); foreach (string suffix in suffixes.Split('-')) { if (suffix != null && suffix != "") { Affix afx = new Affix(); afx.MIDREF = suffix; wrdRec.Suffixes.Add(afx); if (!dictSuffixes.ContainsKey(suffix)) { m_gd.Morphemes.Add(new Morpheme(MorphemeType.suffix, suffix)); dictSuffixes.Add(suffix, true); } } } } } line = reader.ReadLine(); } // Main processing. PositionAnalyzer anal = new PositionAnalyzer(); anal.Process(m_gd); // Do any post-analysis processing here, if needed. // End of any optional post-processing. // Save, so it can be transformed. outputPathname = GetOutputPathname(sourcePathname); m_gd.SaveData(outputPathname); // Transform. XslCompiledTransform trans = new XslCompiledTransform(); try { trans.Load(XSLPathname); } catch { MessageBox.Show("Could not load the XSL file.", "Information"); return; } string htmlOutput = Path.GetTempFileName() + ".html"; try { trans.Transform(outputPathname, htmlOutput); } catch { MessageBox.Show("Could not transform the input file.", "Information"); return; } finally { if (outputPathname != null && File.Exists(outputPathname)) File.Delete(outputPathname); } Process.Start(htmlOutput); } // end 'using' } } // Reset m_gd, in case it gets called for another file. m_gd = GAFAWSData.Create(); }
public void SaveData() { string fileName = null; StreamReader reader = null; try { Classes cls = m_gd.Classes; Assert.IsNotNull(cls); Morpheme m = new Morpheme(MorphemeType.stem, "M1"); m_gd.Morphemes.Add(m); WordRecord wr = new WordRecord(); m_gd.WordRecords.Add(wr); wr.WRID = "WR1"; wr.Stem = new Stem(); wr.Stem.MIDREF = m.MID; fileName = MakeFile(); m_gd.SaveData(fileName); reader = new StreamReader(fileName); string dataAfter = reader.ReadToEnd(); Assert.AreEqual(m_dataBefore, dataAfter, "Before and After"); } finally { if (reader != null) reader.Close(); DeleteFile(fileName); } }
public void AddOtherToAffix() { try { WordRecord wr = new WordRecord(); m_gd.WordRecords.Add(wr); wr.Prefixes = new List<Affix>(); Affix afx = new Affix(); wr.Prefixes.Add(afx); afx.Other = m_otherTop; AddOtherContents(); m_otherTop = m_gd.WordRecords[0].Prefixes[0].Other; CheckOtherContents(); } finally { DeleteFile(m_fileName); } }