示例#1
0
文件: NewSplit.cs 项目: xy19xiaoyu/TG
        /// <summary>
        /// 处理一条检索项的数据
        /// </summary>
        /// <param name="strData">一个家族检索项数据</param>
        /// <param name="se">检索项配置信息</param>
        /// <param name="Year">数据所在年份</param>
        /// <param name="SerialNo">公开号或者唯一的序列号</param>
        /// <returns></returns>        
        private bool EnExSearchEnter(List<string> strData, SearchEnter se, int Index, int SerialNo)
        {
            byte[] by;
            byte[] byserial = BitConverter.GetBytes(SerialNo);
            byte[] byfloat;
            strData = strData.Distinct<string>().ToList<string>();
            //需要切词
            if (se.WordSplit == WordSplitType.English)
            {
                List<string> OneWord = new List<string>();

                char fchar;
                string str;
                for (int i = 0; i < strData.Count; i++)
                {
                    str = strData[i].Trim();
                    //如果为空进行下次循环
                    if (string.IsNullOrEmpty(str))
                    {
                        continue;
                    }

                    //短句中的一字词
                    OneWord = WordSplit.getEnglistWordList(str);

                    if (se.SingleFile == true)
                    {
                        //写单词
                        for (int j = 0; j < OneWord.Count; j++)
                        {
                            by = Encoding.UTF8.GetBytes(OneWord[j].PadRight(16, ' '));
                            if (by.Length < 16)
                            {
                                Error(SerialNo + "\t" + OneWord[j].ToString());
                                continue;
                            }
                            filelist[Index.ToString() + se.Name].Write(byserial, 0, byserial.Length);
                            if (se.WordLocation == true)
                            {
                                byfloat = BitConverter.GetBytes((short)(i + 1));
                                filelist[Index.ToString() + se.Name].Write(byfloat, 0, 2);
                                byfloat = BitConverter.GetBytes((short)(j + 1));
                                filelist[Index.ToString() + se.Name].Write(byfloat, 0, 2);
                            }
                            filelist[Index.ToString() + se.Name].Write(by, 0, 16);
                        }
                    }
                    else
                    {
                        //写单词
                        for (int j = 0; j < OneWord.Count; j++)
                        {
                            fchar = OneWord[j][0];
                            if ((fchar >= '0' && fchar <= '9') || (fchar >= '0' && fchar <= '9'))
                            {
                                fchar = '0';
                            }
                            if (fchar == '_')
                            {
                                continue;
                            }

                            by = Encoding.UTF8.GetBytes(OneWord[j].PadRight(16, ' '));
                            if (by.Length < 16)
                            {
                                Error(SerialNo + "\t" + OneWord[j].ToString());
                                continue;

                            }
                            filelist[Index.ToString() + se.Name + "_" + fchar].Write(byserial, 0, byserial.Length);
                            if (se.WordLocation == true)
                            {
                                byfloat = BitConverter.GetBytes((short)(i + 1));
                                filelist[Index.ToString() + se.Name + "_" + fchar].Write(byfloat, 0, 2);
                                byfloat = BitConverter.GetBytes((short)(j + 1));
                                filelist[Index.ToString() + se.Name + "_" + fchar].Write(byfloat, 0, 2);
                            }
                            filelist[Index.ToString() + se.Name + "_" + fchar].Write(by, 0, 16);

                        }
                    }

                }

            }
            else
            {
                strData = strData.OrderBy(x => x).ToList<string>();
                foreach (string tmp in strData)
                {
                    if (string.IsNullOrEmpty(tmp.Trim()))
                    {
                        continue;
                    }
                    if (se.DataType.ToUpper() == "INT")
                    {
                        by = BitConverter.GetBytes(Convert.ToInt32(tmp));
                    }
                    else
                    {
                        by = Encoding.UTF8.GetBytes(tmp.ToUpper().PadRight(se.Length, ' '));
                    }
                    if (by.Length < se.Length)
                    {
                        Error(SerialNo + "\t" + tmp.ToString());
                        continue;
                    }
                    filelist[Index.ToString() + se.Name].Write(byserial, 0, byserial.Length);
                    filelist[Index.ToString() + se.Name].Write(by, 0, se.Length);
                }

                if (se.SubKey.Count > 0)
                {
                    foreach (SubSearchEnter sbenter in se.SubKey)
                    {
                        List<string> tmp = new List<string>();
                        foreach (string s in strData)
                        {
                            if (s.Length >= sbenter.Length)
                            {
                                tmp.Add(s.Substring(0, sbenter.Length));
                            }
                            else
                            {
                                tmp.Add(s);
                            }
                        }

                        tmp = tmp.OrderBy(x => x).ToList<string>();
                        foreach (string t in tmp)
                        {
                            if (string.IsNullOrEmpty(t.Trim()))
                            {
                                continue;
                            }
                            if (se.DataType.ToUpper() == "INT")
                            {
                                by = BitConverter.GetBytes(Convert.ToInt32(tmp));
                            }
                            else
                            {
                                by = Encoding.UTF8.GetBytes(t.ToUpper().PadRight(sbenter.Length, ' '));
                            }
                            if (by.Length < sbenter.Length)
                            {
                                Error(SerialNo + "\t" + tmp.ToString());
                                continue;

                            }
                            filelist[Index.ToString() + sbenter.Name].Write(byserial, 0, byserial.Length);
                            filelist[Index.ToString() + sbenter.Name].Write(by, 0, sbenter.Length);
                        }
                    }
                }
            }

            return true;
        }
示例#2
0
文件: NewSplit.cs 项目: xy19xiaoyu/TG
        /// <summary>
        /// 处理一条检索项的数据
        /// </summary>
        /// <param name="strData">一个家族检索项数据</param>
        /// <param name="se">检索项配置信息</param>
        /// <param name="Year">数据所在年份</param>
        /// <param name="SerialNo">公开号或者唯一的序列号</param>
        /// <returns></returns>        
        private bool CnExSearchEnter(List<string> strData, SearchEnter se, int Index, int SerialNo)
        {
            byte[] by;
            byte[] byserial = BitConverter.GetBytes(SerialNo);
            strData = strData.Distinct<string>().ToList<string>();
            Encoding cd =System.Text.Encoding.GetEncoding(se.Encoding);
            //需要切词
            if (se.WordSplit == WordSplitType.Cn)
            {
                List<string> Sentence = new List<string>();
                List<char> OneWord = new List<char>();
                List<char> tmpOneWord = new List<char>();
                List<string> TwoWord = new List<string>();
                List<string> ThreeWord = new List<string>();
                foreach (var str in strData)
                {
                    //切成没有符号的短句
                    Sentence = WordSplit.getSentenceList(str.Trim());

                    //循环每个短句
                    foreach (string s in Sentence)
                    {
                        //如果为空进行下次循环
                        if (string.IsNullOrEmpty(s))
                        {
                            continue;
                        }

                        //短句中的一字词
                        tmpOneWord = WordSplit.getOneWordList(s);
                        OneWord.AddRange(tmpOneWord);
                        //短句中的二字词
                        TwoWord.AddRange(WordSplit.getTwoWordList(tmpOneWord));
                        //短句中的三字词
                        ThreeWord.AddRange(WordSplit.getThreeWordList(tmpOneWord));

                    }
                }
                OneWord = OneWord.Distinct<char>().OrderBy(x => x.ToString()).ToList<char>();
                TwoWord = TwoWord.Distinct<string>().OrderBy(x => x).ToList<string>();
                ThreeWord = ThreeWord.Distinct<string>().OrderBy(x => x).ToList<string>();

                if (se.SingleFile == true)
                {
                    //写三字词
                    foreach (string w in ThreeWord)
                    {
                        by = cd.GetBytes(w.ToString());
                        if (by.Length < 6)
                        {
                            Error(SerialNo + "\t" + w.ToString());
                            continue;

                        }
                        filelist[Index.ToString() + se.Name].Write(byserial, 0, byserial.Length);
                        filelist[Index.ToString() + se.Name].Write(by, 0, 6);
                    }
                }
                else
                {
                    //写一字词
                    foreach (char w in OneWord)
                    {
                        by = cd.GetBytes(w.ToString());
                        if (by.Length < 2)
                        {
                            Error(SerialNo + "\t" + w.ToString());
                            continue;

                        }
                        filelist[Index.ToString() + se.Name + "1"].Write(byserial, 0, byserial.Length);
                        filelist[Index.ToString() + se.Name + "1"].Write(by, 0, 2);
                    }
                    //写二字词
                    foreach (string w in TwoWord)
                    {
                        by = cd.GetBytes(w.ToString());
                        if (by.Length < 4)
                        {
                            Error(SerialNo + "\t" + w.ToString());
                            continue;

                        }
                        filelist[Index.ToString() + se.Name + "2"].Write(byserial, 0, byserial.Length);
                        filelist[Index.ToString() + se.Name + "2"].Write(by, 0, 4);
                    }
                    //写三字词
                    foreach (string w in ThreeWord)
                    {
                        by = cd.GetBytes(w.ToString());
                        if (by.Length < 6)
                        {
                            Error(SerialNo + "\t" + w.ToString());
                            continue;

                        }
                        filelist[Index.ToString() + se.Name + "3"].Write(byserial, 0, byserial.Length);
                        filelist[Index.ToString() + se.Name + "3"].Write(by, 0, 6);
                    }
                }

            }
            else
            {
                strData = strData.OrderBy(x => x).ToList<string>();
                foreach (string tmp in strData)
                {
                    if (string.IsNullOrEmpty(tmp.Trim()))
                    {
                        continue;
                    }
                    filelist[Index.ToString() + se.Name].Write(byserial, 0, byserial.Length);
                    by = System.Text.Encoding.GetEncoding(config.Encoding).GetBytes(tmp.ToString().PadRight(se.Length, ' '));
                    if (by.Length < se.Length)
                    {
                        Error(SerialNo + "\t" + tmp.ToString());
                        continue;
                    }

                    filelist[Index.ToString() + se.Name].Write(by, 0, se.Length);
                }

                if (se.SubKey.Count > 0)
                {
                    foreach (SubSearchEnter sbenter in se.SubKey)
                    {
                        List<string> tmp = new List<string>();
                        foreach (string s in strData)
                        {
                            if (s.Length >= sbenter.Length)
                            {
                                tmp.Add(s.Substring(0, sbenter.Length));
                            }
                            else
                            {
                                tmp.Add(s);
                            }
                        }

                        tmp = tmp.Distinct<string>().OrderBy(x => x).ToList<string>();
                        foreach (string t in tmp)
                        {
                            if (string.IsNullOrEmpty(t.Trim()))
                            {
                                continue;
                            }
                            filelist[Index.ToString() + sbenter.Name].Write(byserial, 0, byserial.Length);
                            by = System.Text.Encoding.GetEncoding(config.Encoding).GetBytes(t.ToString().PadRight(sbenter.Length, ' '));
                            if (by.Length < sbenter.Length)
                            {
                                Error(SerialNo + "\t" + tmp.ToString());
                                continue;
                            }
                            filelist[Index.ToString() + sbenter.Name].Write(by, 0, sbenter.Length);

                        }
                    }
                }
            }

            return true;
        }