TreeNode[] TNodes(List <WordRu> lRoot) { List <TreeNode> lTN = new List <TreeNode>(); Stack <TreeNode> stTN = new Stack <TreeNode>(); Stack <WordRu> stW = new Stack <WordRu>(); foreach (WordRu r in lRoot) { stW.Push(r); TreeNode root = new TreeNode(r.ToString()); lTN.Add(root); stTN.Push(root); while (stW.Count > 0) { WordRu w = stW.Pop(); TreeNode tn = stTN.Pop(); foreach (WordRu c in w.lCh) { TreeNode tnc = new TreeNode(c.ToString()); tn.Nodes.Add(tnc); stW.Push(c); stTN.Push(tnc); } } } return(lTN.ToArray()); }
public WordRu(string norm, string form, LinkTagRu lnkM, LinkTagRu lnkD, List <AttributeRu> lAt, List <WordRu> lCh, WordRu par) { this.norm = norm; this.form = form; this.prep = ""; this.lnkM = lnkM; this.lnkD = lnkD; this.lAt = lAt; this.lCh = lCh; this.par = par; }
static public int Freq(string file) { StreamReader sr = new StreamReader(file, System.Text.Encoding.Default); string s = sr.ReadToEnd(); List <WordRu> lRoot = Analize(s); SyntaxRu.RemovePrepPunc(lRoot); List <string> lNorm = new List <string>(); List <int> lFreq = new List <int>(); Stack <WordRu> st = new Stack <WordRu>(); foreach (WordRu r in lRoot) { st.Push(r); while (st.Count > 0) { WordRu w = st.Pop(); int index = lNorm.BinarySearch(w.norm); if (index < 0) { lNorm.Insert(~index, w.norm); lFreq.Insert(~index, 1); } else { lFreq[index]++; } foreach (WordRu c in w.lCh) { st.Push(c); } } } DSDict.FreqDataTable ft = new DSDict.FreqDataTable(); DSDictTableAdapters.FreqTableAdapter fta = new Semantics.DSDictTableAdapters.FreqTableAdapter(); int count = 0; for (int i = 0; i < lNorm.Count; i++) { fta.FillByWord(ft, lNorm[i]); if (ft.Rows.Count == 0) { fta.Insert(lNorm[i], lFreq[i]); } else { fta.UpdateByWord(lNorm[i], (int)ft[0]["freq"] + lFreq[i], lNorm[i]); } count++; } return(count); }
static public void RemovePrepPunc(List <WordRu> lRoot) { Stack <WordRu> st = new Stack <WordRu>(); List <WordRu> lRemove = new List <WordRu>(); foreach (WordRu r in lRoot) { st.Push(r); while (st.Count > 0) { WordRu w = st.Pop(); if (w.norm[0] == ',' && w.norm.Length > 1) { w.norm = w.norm.Substring(2); w.form = w.form.Substring(2); } bool punc = w.lAt.Contains(AttributeRu.пунктуация), prep = w.lAt.Contains(AttributeRu.предлог); if (punc || prep) { if (w.par == null) { lRemove.Add(w); } else { w.par.lCh.Remove(w); if (w.lCh.Count > 0) { foreach (WordRu c in w.lCh) { w.par.lCh.Add(c); if (prep) { c.prep = w.norm; } c.lnkM = w.lnkM; } } } } foreach (WordRu c in w.lCh) { st.Push(c); } } } foreach (WordRu w in lRemove) { lRoot.Remove(w); } }
static public double Compare(List <WordRu> lRoot1, List <WordRu> lRoot2) { List <string> lHash1 = new List <string>(), lHash2 = new List <string>(); Stack <WordRu> st = new Stack <WordRu>(); foreach (WordRu r in lRoot1) { st.Push(r); while (st.Count > 0) { WordRu w = st.Pop(); List <string> lCh = new List <string>(); foreach (WordRu c in w.lCh) { lCh.Add(c.norm); st.Push(c); } lCh.Sort(); string x = ""; foreach (string s in lCh) { x += s; } x += w.norm; int index = lHash1.BinarySearch(x); if (index < 0) { lHash1.Insert(~index, x); } } } foreach (WordRu r in lRoot2) { st.Push(r); while (st.Count > 0) { WordRu w = st.Pop(); List <string> lCh = new List <string>(); foreach (WordRu c in w.lCh) { lCh.Add(c.norm); st.Push(c); } lCh.Sort(); string x = ""; foreach (string s in lCh) { x += s; } x += w.norm; int index = lHash2.BinarySearch(x); if (index < 0) { lHash2.Insert(~index, x); } } } int match = 0; foreach (string x in lHash1) { int index = lHash2.BinarySearch(x); if (index >= 0) { match++; } } return((double)match / (lHash1.Count + lHash2.Count - match)); }
static public List <WordRu> Analize(string text) { StreamWriter sr = new StreamWriter("in.txt", false, System.Text.Encoding.Default); sr.Write(text); sr.Close(); Process pr = Process.Start("bin\\wrf.exe", "i:in.txt o:out.xml lc:log.txt xml n"); pr.WaitForExit(); FileStream fs = new FileStream("out.xml", FileMode.Open); XmlReader reader = XmlReader.Create(fs); List <WordRu> lRoot = new List <WordRu>(); Stack <WordRu> st = new Stack <WordRu>(); while (reader.Read()) { switch (reader.NodeType) { case XmlNodeType.XmlDeclaration: break; case XmlNodeType.Element: switch (reader.Name) { case "Node": LinkTagRu lnkM = LinkTagRu.none, lnkD = LinkTagRu.none; string sign = reader.GetAttribute("Sign"); string lTag = reader.GetAttribute("LeftTag"), rTag = reader.GetAttribute("RightTag"); if (sign == "<") { lnkM = (LinkTagRu)Enum.Parse(typeof(LinkTagRu), lTag.Substring(0, lTag.Length - 3)); lnkD = (LinkTagRu)Enum.Parse(typeof(LinkTagRu), rTag.Substring(0, rTag.Length - 3)); } else if (sign == ">") { lnkD = (LinkTagRu)Enum.Parse(typeof(LinkTagRu), lTag.Substring(0, lTag.Length - 3)); lnkM = (LinkTagRu)Enum.Parse(typeof(LinkTagRu), rTag.Substring(0, rTag.Length - 3)); } while (reader.Read() && reader.NodeType != XmlNodeType.Element) { ; } string norm = reader.GetAttribute("Norm"), form = reader.GetAttribute("Form"); List <AttributeRu> lAt = new List <AttributeRu>(); while (true) { while (reader.Read() && reader.NodeType != XmlNodeType.Element && reader.NodeType != XmlNodeType.EndElement) { ; } if (reader.NodeType == XmlNodeType.EndElement) { break; } while (reader.Read() && reader.NodeType != XmlNodeType.Text) { ; } lAt.Add((AttributeRu)int.Parse(reader.Value)); while (reader.Read() && reader.NodeType != XmlNodeType.EndElement) { ; } } WordRu par = null; if (st.Count > 0) { par = st.Peek(); } WordRu w = new WordRu(norm, form, lnkM, lnkD, lAt, new List <WordRu>(), par); if (par != null) { par.lCh.Add(w); } else { lRoot.Add(w); } st.Push(w); break; } break; case XmlNodeType.EndElement: if (reader.Name == "Node") { st.Pop(); } break; } } fs.Close(); return(lRoot); }
static public int Syn(List <WordRu> lRoot1, List <WordRu> lRoot2) { List <string> lNorm1 = new List <string>(), lNorm2 = new List <string>(), lNorm = new List <string>(); List <int> lFreq1 = new List <int>(), lFreq2 = new List <int>(); List <List <WordRu> > llW1 = new List <List <WordRu> >(), llW2 = new List <List <WordRu> >(); List <List <string> > llSyn1 = new List <List <string> >(), llSyn2 = new List <List <string> >(); List <List <int> > llSFreq1 = new List <List <int> >(), llSFreq2 = new List <List <int> >(); List <bool> lMatch1 = new List <bool>(), lMatch2 = new List <bool>(); Stack <WordRu> st = new Stack <WordRu>(); DSDictTableAdapters.FreqTableAdapter fta = new Semantics.DSDictTableAdapters.FreqTableAdapter(); DSDictTableAdapters.SynTableAdapter sta = new Semantics.DSDictTableAdapters.SynTableAdapter(); DSDict.FreqDataTable fdt = new DSDict.FreqDataTable(); DSDict.SynDataTable sdt = new DSDict.SynDataTable(); foreach (WordRu r in lRoot1) { st.Push(r); while (st.Count > 0) { WordRu w = st.Pop(); int index = lNorm1.BinarySearch(w.norm); if (index >= 0) { llW1[index].Add(w); continue; } index = ~index; lNorm1.Insert(index, w.norm); if (fta.FillByWord(fdt, w.norm) > 0) { lFreq1.Insert(index, (int)fdt[0].freq); } else { lFreq1.Insert(index, 0); } sta.Fill(sdt, w.norm); List <string> lSyn = new List <string>(); List <int> lSFreq = new List <int>(); foreach (DSDict.SynRow sr in sdt) { lSyn.Add((string)sr.word); if (sr.IsfreqNull()) { lSFreq.Add(0); } else { lSFreq.Add((int)sr.freq); } } llW1.Insert(index, new List <WordRu>()); llW1[index].Add(w); llSyn1.Insert(index, lSyn); llSFreq1.Insert(index, lSFreq); index = lNorm.BinarySearch(w.norm); if (index < 0) { index = ~index; lNorm.Insert(index, w.norm); lMatch1.Insert(index, true); lMatch2.Insert(index, false); } foreach (string syn in lSyn) { index = lNorm.BinarySearch(syn); if (index < 0) { index = ~index; lNorm.Insert(index, syn); lMatch1.Insert(index, true); lMatch2.Insert(index, false); } } } } foreach (WordRu r in lRoot2) { st.Push(r); while (st.Count > 0) { WordRu w = st.Pop(); int index = lNorm2.BinarySearch(w.norm); if (index >= 0) { llW2[index].Add(w); continue; } index = ~index; lNorm2.Insert(index, w.norm); if (fta.FillByWord(fdt, w.norm) > 0) { lFreq2.Insert(index, (int)fdt[0].freq); } else { lFreq2.Insert(index, 0); } sta.Fill(sdt, w.norm); List <string> lSyn = new List <string>(); List <int> lSFreq = new List <int>(); foreach (DSDict.SynRow sr in sdt) { lSyn.Add((string)sr.word); if (sr.IsfreqNull()) { lSFreq.Add(0); } else { lSFreq.Add((int)sr.freq); } } llW2.Insert(index, new List <WordRu>()); llW2[index].Add(w); llSyn2.Insert(index, lSyn); llSFreq2.Insert(index, lSFreq); index = lNorm.BinarySearch(w.norm); if (index < 0) { index = ~index; lNorm.Insert(index, w.norm); lMatch1.Insert(index, true); lMatch2.Insert(index, false); } else { lMatch2[index] = true; } foreach (string syn in lSyn) { index = lNorm.BinarySearch(syn); if (index < 0) { index = ~index; lNorm.Insert(index, syn); lMatch1.Insert(index, true); lMatch2.Insert(index, false); } else { lMatch2[index] = true; } } } } int count = 0; for (int i = 0; i < lNorm1.Count; i++) { int j; for (j = 0; j < llSyn1[i].Count; j++) { int k = lNorm.BinarySearch(llSyn1[i][j]); if (lMatch1[k] && lMatch2[k]) { break; } } if (j == llSyn1[i].Count) { continue; } int index = lNorm.BinarySearch(lNorm1[i]); if (!lMatch1[index] || !lMatch2[index] || llSFreq1[i][j] > lFreq1[i]) { count += llW1[i].Count; foreach (WordRu w in llW1[i]) { w.norm = llSyn1[i][j]; } } } for (int i = 0; i < lNorm2.Count; i++) { int j; for (j = 0; j < llSyn2[i].Count; j++) { int k = lNorm.BinarySearch(llSyn2[i][j]); if (lMatch1[k] && lMatch2[k]) { break; } } if (j == llSyn2[i].Count) { continue; } int index = lNorm.BinarySearch(lNorm2[i]); if (!lMatch1[index] || !lMatch2[index] || llSFreq2[i][j] > lFreq2[i]) { count += llW2[i].Count; foreach (WordRu w in llW2[i]) { w.norm = llSyn2[i][j]; } } } return(count); }