コード例 #1
0
        public void parseData()
        {
            StringBuilder sb = new StringBuilder();

            sb.Append(SearchedPattern);
            if (WithoutDiacritics)
            {
                string tmp = TransformImput.RemoveDiacritics(sb.ToString());
                sb.Clear();
                sb.Append(tmp);
                XMLParser.setWithoutDiacritics(true);
            }
            if (SmallLetters)
            {
                string tmp = TransformImput.ToLower(sb.ToString());
                sb.Clear();
                sb.Append(tmp);
                XMLParser.setToSmall(true);
            }

            if (FlexibleWordsPosition)
            {
                sb.Replace(',', ' ');
                sb.Replace('.', ' ');


                patterns = sb.ToString().Split(new[] { ' ' }, StringSplitOptions.RemoveEmptyEntries).ToList();

                XMLParser.setAlgorithm("AhoCorasick-All");
            }
            else if (ExactMatch)
            {
                patterns.Add(sb.ToString());
                XMLParser.setAlgorithm("KMP");
            }
            else if (MultiplePatterns)
            {
                XMLParser.setAlgorithm("AhoCorasick-MP");
                patterns = sb.ToString().Split(new[] { Environment.NewLine }, StringSplitOptions.RemoveEmptyEntries).ToList();
            }
        }
コード例 #2
0
ファイル: XMLParser.cs プロジェクト: Tricerator/BibleProject
        public static void callSearchingAlgorithm(string algorithm, string line, List <string> pattern,
                                                  string translation, string bookName, int bookNumber,
                                                  int chapterNumber, int verseNumber,
                                                  bool small, bool notDiacritics)



        {
            string lineOrigin = line;

            if (small)
            {
                string tmp = TransformImput.ToLower(line);
                line = tmp;
            }
            if (notDiacritics)
            {
                string tmp = TransformImput.RemoveDiacritics(line);
                line = tmp;
            }


            if (algorithm == "KMP")
            {
                List <int> results = new List <int>();
                string     patt    = pattern[0];
                results = KMP.SearchKMP(line, patt);
                if (results.Count == 0)
                {
                    return;
                }
                else
                {
                    int    keyDictionary = createKeyNumber(bookNumber, chapterNumber, verseNumber);
                    string oneLine       = indexToString(results) + "/" + patt.Length + "/" +
                                           translation + " | " + bookName + " " + chapterNumber + ":" + verseNumber + " | " + lineOrigin;
                    addToDictionary(keyDictionary, oneLine);
                }
            }
            else if (algorithm == "AhoCorasick-MP")
            {
                AhoCorasick ahoAlg = new AhoCorasick();


                for (int i = 0; i < pattern.Count; i++)
                {
                    ahoAlg.addString(pattern[i], i);
                }

                ahoAlg.BuildAC();

                List <int> Matches = ahoAlg.ProcessLine(line);
                if (Matches.Count == 0)
                {
                    return;
                }
                else
                {
                    int    keyDictionary = createKeyNumber(bookNumber, chapterNumber, verseNumber);
                    string oneLine       = indexToString(Matches) + "/ " + "/" +
                                           translation + " | " + bookName + " " + chapterNumber + ":" + verseNumber + " | " + lineOrigin;
                    addToDictionary(keyDictionary, oneLine);
                }
            }
        }
コード例 #3
0
ファイル: XMLParser.cs プロジェクト: Tricerator/BibleProject
        public static void parseXML(List <string> input, string path)
        {
            if (toSmall)
            {
                List <string> tmpList = new List <string>();
                foreach (string s in input)
                {
                    tmpList.Add(TransformImput.ToLower(s));
                }
                input = tmpList;
            }
            if (withoutDiactritics)
            {
                List <string> tmpList = new List <string>();
                foreach (string s in input)
                {
                    tmpList.Add(TransformImput.RemoveDiacritics(s));
                }
                input = tmpList;
            }
            string translation = "";
            string bookName    = "";


            int bookNumber    = 0;
            int chapterNumber = 0;
            int verseNumber   = 0;


            XmlReader myReader = XmlReader.Create(path);

            while (myReader.Read())
            {
                if (myReader.NodeType == XmlNodeType.Element)
                {
                    switch (myReader.Name)
                    {
                    case "XMLBIBLE":
                        translation = myReader.GetAttribute("biblename");
                        break;

                    case "BIBLEBOOK":
                        bookNumber = int.Parse(myReader.GetAttribute("bnumber"));
                        bookName   = myReader.GetAttribute("bsname");
                        break;

                    case "CHAPTER":
                        chapterNumber = int.Parse(myReader.GetAttribute("cnumber"));
                        break;

                    case "VERS":
                        verseNumber = int.Parse(myReader.GetAttribute("vnumber"));

                        break;

                    default:
                        continue;
                    }
                }
                else if ((myReader.NodeType == XmlNodeType.Text) && (chapterNumber > 0))
                {
                    List <int> results = new List <int>();
                    string     line    = myReader.ReadContentAsString();

                    callSearchingAlgorithm(algorithm, line, input, translation, bookName, bookNumber, chapterNumber, verseNumber, toSmall, withoutDiactritics);
                }
            }
        }