コード例 #1
0
ファイル: Program.cs プロジェクト: koanse/Semantics
        static public int Freq(string file)
        {
            StreamReader  sr    = new StreamReader(file, System.Text.Encoding.Default);
            string        s     = sr.ReadToEnd();
            List <WordRu> lRoot = Analize(s);

            SyntaxRu.RemovePrepPunc(lRoot);
            List <string>  lNorm = new List <string>();
            List <int>     lFreq = new List <int>();
            Stack <WordRu> st    = new Stack <WordRu>();

            foreach (WordRu r in lRoot)
            {
                st.Push(r);
                while (st.Count > 0)
                {
                    WordRu w     = st.Pop();
                    int    index = lNorm.BinarySearch(w.norm);
                    if (index < 0)
                    {
                        lNorm.Insert(~index, w.norm);
                        lFreq.Insert(~index, 1);
                    }
                    else
                    {
                        lFreq[index]++;
                    }
                    foreach (WordRu c in w.lCh)
                    {
                        st.Push(c);
                    }
                }
            }
            DSDict.FreqDataTable ft = new DSDict.FreqDataTable();
            DSDictTableAdapters.FreqTableAdapter fta = new Semantics.DSDictTableAdapters.FreqTableAdapter();
            int count = 0;

            for (int i = 0; i < lNorm.Count; i++)
            {
                fta.FillByWord(ft, lNorm[i]);
                if (ft.Rows.Count == 0)
                {
                    fta.Insert(lNorm[i], lFreq[i]);
                }
                else
                {
                    fta.UpdateByWord(lNorm[i], (int)ft[0]["freq"] + lFreq[i], lNorm[i]);
                }
                count++;
            }
            return(count);
        }
コード例 #2
0
ファイル: Program.cs プロジェクト: koanse/Semantics
        static public int Syn(List <WordRu> lRoot1, List <WordRu> lRoot2)
        {
            List <string>         lNorm1 = new List <string>(), lNorm2 = new List <string>(), lNorm = new List <string>();
            List <int>            lFreq1 = new List <int>(), lFreq2 = new List <int>();
            List <List <WordRu> > llW1 = new List <List <WordRu> >(), llW2 = new List <List <WordRu> >();
            List <List <string> > llSyn1 = new List <List <string> >(), llSyn2 = new List <List <string> >();
            List <List <int> >    llSFreq1 = new List <List <int> >(), llSFreq2 = new List <List <int> >();
            List <bool>           lMatch1 = new List <bool>(), lMatch2 = new List <bool>();
            Stack <WordRu>        st = new Stack <WordRu>();

            DSDictTableAdapters.FreqTableAdapter fta = new Semantics.DSDictTableAdapters.FreqTableAdapter();
            DSDictTableAdapters.SynTableAdapter  sta = new Semantics.DSDictTableAdapters.SynTableAdapter();
            DSDict.FreqDataTable fdt = new DSDict.FreqDataTable();
            DSDict.SynDataTable  sdt = new DSDict.SynDataTable();
            foreach (WordRu r in lRoot1)
            {
                st.Push(r);
                while (st.Count > 0)
                {
                    WordRu w     = st.Pop();
                    int    index = lNorm1.BinarySearch(w.norm);
                    if (index >= 0)
                    {
                        llW1[index].Add(w);
                        continue;
                    }
                    index = ~index;
                    lNorm1.Insert(index, w.norm);
                    if (fta.FillByWord(fdt, w.norm) > 0)
                    {
                        lFreq1.Insert(index, (int)fdt[0].freq);
                    }
                    else
                    {
                        lFreq1.Insert(index, 0);
                    }
                    sta.Fill(sdt, w.norm);
                    List <string> lSyn   = new List <string>();
                    List <int>    lSFreq = new List <int>();
                    foreach (DSDict.SynRow sr in sdt)
                    {
                        lSyn.Add((string)sr.word);
                        if (sr.IsfreqNull())
                        {
                            lSFreq.Add(0);
                        }
                        else
                        {
                            lSFreq.Add((int)sr.freq);
                        }
                    }
                    llW1.Insert(index, new List <WordRu>());
                    llW1[index].Add(w);
                    llSyn1.Insert(index, lSyn);
                    llSFreq1.Insert(index, lSFreq);

                    index = lNorm.BinarySearch(w.norm);
                    if (index < 0)
                    {
                        index = ~index;
                        lNorm.Insert(index, w.norm);
                        lMatch1.Insert(index, true);
                        lMatch2.Insert(index, false);
                    }
                    foreach (string syn in lSyn)
                    {
                        index = lNorm.BinarySearch(syn);
                        if (index < 0)
                        {
                            index = ~index;
                            lNorm.Insert(index, syn);
                            lMatch1.Insert(index, true);
                            lMatch2.Insert(index, false);
                        }
                    }
                }
            }
            foreach (WordRu r in lRoot2)
            {
                st.Push(r);
                while (st.Count > 0)
                {
                    WordRu w     = st.Pop();
                    int    index = lNorm2.BinarySearch(w.norm);
                    if (index >= 0)
                    {
                        llW2[index].Add(w);
                        continue;
                    }
                    index = ~index;
                    lNorm2.Insert(index, w.norm);
                    if (fta.FillByWord(fdt, w.norm) > 0)
                    {
                        lFreq2.Insert(index, (int)fdt[0].freq);
                    }
                    else
                    {
                        lFreq2.Insert(index, 0);
                    }
                    sta.Fill(sdt, w.norm);
                    List <string> lSyn   = new List <string>();
                    List <int>    lSFreq = new List <int>();
                    foreach (DSDict.SynRow sr in sdt)
                    {
                        lSyn.Add((string)sr.word);
                        if (sr.IsfreqNull())
                        {
                            lSFreq.Add(0);
                        }
                        else
                        {
                            lSFreq.Add((int)sr.freq);
                        }
                    }
                    llW2.Insert(index, new List <WordRu>());
                    llW2[index].Add(w);
                    llSyn2.Insert(index, lSyn);
                    llSFreq2.Insert(index, lSFreq);

                    index = lNorm.BinarySearch(w.norm);
                    if (index < 0)
                    {
                        index = ~index;
                        lNorm.Insert(index, w.norm);
                        lMatch1.Insert(index, true);
                        lMatch2.Insert(index, false);
                    }
                    else
                    {
                        lMatch2[index] = true;
                    }
                    foreach (string syn in lSyn)
                    {
                        index = lNorm.BinarySearch(syn);
                        if (index < 0)
                        {
                            index = ~index;
                            lNorm.Insert(index, syn);
                            lMatch1.Insert(index, true);
                            lMatch2.Insert(index, false);
                        }
                        else
                        {
                            lMatch2[index] = true;
                        }
                    }
                }
            }

            int count = 0;

            for (int i = 0; i < lNorm1.Count; i++)
            {
                int j;
                for (j = 0; j < llSyn1[i].Count; j++)
                {
                    int k = lNorm.BinarySearch(llSyn1[i][j]);
                    if (lMatch1[k] && lMatch2[k])
                    {
                        break;
                    }
                }
                if (j == llSyn1[i].Count)
                {
                    continue;
                }
                int index = lNorm.BinarySearch(lNorm1[i]);
                if (!lMatch1[index] || !lMatch2[index] || llSFreq1[i][j] > lFreq1[i])
                {
                    count += llW1[i].Count;
                    foreach (WordRu w in llW1[i])
                    {
                        w.norm = llSyn1[i][j];
                    }
                }
            }
            for (int i = 0; i < lNorm2.Count; i++)
            {
                int j;
                for (j = 0; j < llSyn2[i].Count; j++)
                {
                    int k = lNorm.BinarySearch(llSyn2[i][j]);
                    if (lMatch1[k] && lMatch2[k])
                    {
                        break;
                    }
                }
                if (j == llSyn2[i].Count)
                {
                    continue;
                }
                int index = lNorm.BinarySearch(lNorm2[i]);
                if (!lMatch1[index] || !lMatch2[index] || llSFreq2[i][j] > lFreq2[i])
                {
                    count += llW2[i].Count;
                    foreach (WordRu w in llW2[i])
                    {
                        w.norm = llSyn2[i][j];
                    }
                }
            }
            return(count);
        }