static public int Freq(string file) { StreamReader sr = new StreamReader(file, System.Text.Encoding.Default); string s = sr.ReadToEnd(); List <WordRu> lRoot = Analize(s); SyntaxRu.RemovePrepPunc(lRoot); List <string> lNorm = new List <string>(); List <int> lFreq = new List <int>(); Stack <WordRu> st = new Stack <WordRu>(); foreach (WordRu r in lRoot) { st.Push(r); while (st.Count > 0) { WordRu w = st.Pop(); int index = lNorm.BinarySearch(w.norm); if (index < 0) { lNorm.Insert(~index, w.norm); lFreq.Insert(~index, 1); } else { lFreq[index]++; } foreach (WordRu c in w.lCh) { st.Push(c); } } } DSDict.FreqDataTable ft = new DSDict.FreqDataTable(); DSDictTableAdapters.FreqTableAdapter fta = new Semantics.DSDictTableAdapters.FreqTableAdapter(); int count = 0; for (int i = 0; i < lNorm.Count; i++) { fta.FillByWord(ft, lNorm[i]); if (ft.Rows.Count == 0) { fta.Insert(lNorm[i], lFreq[i]); } else { fta.UpdateByWord(lNorm[i], (int)ft[0]["freq"] + lFreq[i], lNorm[i]); } count++; } return(count); }
static public int Syn(List <WordRu> lRoot1, List <WordRu> lRoot2) { List <string> lNorm1 = new List <string>(), lNorm2 = new List <string>(), lNorm = new List <string>(); List <int> lFreq1 = new List <int>(), lFreq2 = new List <int>(); List <List <WordRu> > llW1 = new List <List <WordRu> >(), llW2 = new List <List <WordRu> >(); List <List <string> > llSyn1 = new List <List <string> >(), llSyn2 = new List <List <string> >(); List <List <int> > llSFreq1 = new List <List <int> >(), llSFreq2 = new List <List <int> >(); List <bool> lMatch1 = new List <bool>(), lMatch2 = new List <bool>(); Stack <WordRu> st = new Stack <WordRu>(); DSDictTableAdapters.FreqTableAdapter fta = new Semantics.DSDictTableAdapters.FreqTableAdapter(); DSDictTableAdapters.SynTableAdapter sta = new Semantics.DSDictTableAdapters.SynTableAdapter(); DSDict.FreqDataTable fdt = new DSDict.FreqDataTable(); DSDict.SynDataTable sdt = new DSDict.SynDataTable(); foreach (WordRu r in lRoot1) { st.Push(r); while (st.Count > 0) { WordRu w = st.Pop(); int index = lNorm1.BinarySearch(w.norm); if (index >= 0) { llW1[index].Add(w); continue; } index = ~index; lNorm1.Insert(index, w.norm); if (fta.FillByWord(fdt, w.norm) > 0) { lFreq1.Insert(index, (int)fdt[0].freq); } else { lFreq1.Insert(index, 0); } sta.Fill(sdt, w.norm); List <string> lSyn = new List <string>(); List <int> lSFreq = new List <int>(); foreach (DSDict.SynRow sr in sdt) { lSyn.Add((string)sr.word); if (sr.IsfreqNull()) { lSFreq.Add(0); } else { lSFreq.Add((int)sr.freq); } } llW1.Insert(index, new List <WordRu>()); llW1[index].Add(w); llSyn1.Insert(index, lSyn); llSFreq1.Insert(index, lSFreq); index = lNorm.BinarySearch(w.norm); if (index < 0) { index = ~index; lNorm.Insert(index, w.norm); lMatch1.Insert(index, true); lMatch2.Insert(index, false); } foreach (string syn in lSyn) { index = lNorm.BinarySearch(syn); if (index < 0) { index = ~index; lNorm.Insert(index, syn); lMatch1.Insert(index, true); lMatch2.Insert(index, false); } } } } foreach (WordRu r in lRoot2) { st.Push(r); while (st.Count > 0) { WordRu w = st.Pop(); int index = lNorm2.BinarySearch(w.norm); if (index >= 0) { llW2[index].Add(w); continue; } index = ~index; lNorm2.Insert(index, w.norm); if (fta.FillByWord(fdt, w.norm) > 0) { lFreq2.Insert(index, (int)fdt[0].freq); } else { lFreq2.Insert(index, 0); } sta.Fill(sdt, w.norm); List <string> lSyn = new List <string>(); List <int> lSFreq = new List <int>(); foreach (DSDict.SynRow sr in sdt) { lSyn.Add((string)sr.word); if (sr.IsfreqNull()) { lSFreq.Add(0); } else { lSFreq.Add((int)sr.freq); } } llW2.Insert(index, new List <WordRu>()); llW2[index].Add(w); llSyn2.Insert(index, lSyn); llSFreq2.Insert(index, lSFreq); index = lNorm.BinarySearch(w.norm); if (index < 0) { index = ~index; lNorm.Insert(index, w.norm); lMatch1.Insert(index, true); lMatch2.Insert(index, false); } else { lMatch2[index] = true; } foreach (string syn in lSyn) { index = lNorm.BinarySearch(syn); if (index < 0) { index = ~index; lNorm.Insert(index, syn); lMatch1.Insert(index, true); lMatch2.Insert(index, false); } else { lMatch2[index] = true; } } } } int count = 0; for (int i = 0; i < lNorm1.Count; i++) { int j; for (j = 0; j < llSyn1[i].Count; j++) { int k = lNorm.BinarySearch(llSyn1[i][j]); if (lMatch1[k] && lMatch2[k]) { break; } } if (j == llSyn1[i].Count) { continue; } int index = lNorm.BinarySearch(lNorm1[i]); if (!lMatch1[index] || !lMatch2[index] || llSFreq1[i][j] > lFreq1[i]) { count += llW1[i].Count; foreach (WordRu w in llW1[i]) { w.norm = llSyn1[i][j]; } } } for (int i = 0; i < lNorm2.Count; i++) { int j; for (j = 0; j < llSyn2[i].Count; j++) { int k = lNorm.BinarySearch(llSyn2[i][j]); if (lMatch1[k] && lMatch2[k]) { break; } } if (j == llSyn2[i].Count) { continue; } int index = lNorm.BinarySearch(lNorm2[i]); if (!lMatch1[index] || !lMatch2[index] || llSFreq2[i][j] > lFreq2[i]) { count += llW2[i].Count; foreach (WordRu w in llW2[i]) { w.norm = llSyn2[i][j]; } } } return(count); }