public void parseData() { StringBuilder sb = new StringBuilder(); sb.Append(SearchedPattern); if (WithoutDiacritics) { string tmp = TransformImput.RemoveDiacritics(sb.ToString()); sb.Clear(); sb.Append(tmp); XMLParser.setWithoutDiacritics(true); } if (SmallLetters) { string tmp = TransformImput.ToLower(sb.ToString()); sb.Clear(); sb.Append(tmp); XMLParser.setToSmall(true); } if (FlexibleWordsPosition) { sb.Replace(',', ' '); sb.Replace('.', ' '); patterns = sb.ToString().Split(new[] { ' ' }, StringSplitOptions.RemoveEmptyEntries).ToList(); XMLParser.setAlgorithm("AhoCorasick-All"); } else if (ExactMatch) { patterns.Add(sb.ToString()); XMLParser.setAlgorithm("KMP"); } else if (MultiplePatterns) { XMLParser.setAlgorithm("AhoCorasick-MP"); patterns = sb.ToString().Split(new[] { Environment.NewLine }, StringSplitOptions.RemoveEmptyEntries).ToList(); } }
public static void callSearchingAlgorithm(string algorithm, string line, List <string> pattern, string translation, string bookName, int bookNumber, int chapterNumber, int verseNumber, bool small, bool notDiacritics) { string lineOrigin = line; if (small) { string tmp = TransformImput.ToLower(line); line = tmp; } if (notDiacritics) { string tmp = TransformImput.RemoveDiacritics(line); line = tmp; } if (algorithm == "KMP") { List <int> results = new List <int>(); string patt = pattern[0]; results = KMP.SearchKMP(line, patt); if (results.Count == 0) { return; } else { int keyDictionary = createKeyNumber(bookNumber, chapterNumber, verseNumber); string oneLine = indexToString(results) + "/" + patt.Length + "/" + translation + " | " + bookName + " " + chapterNumber + ":" + verseNumber + " | " + lineOrigin; addToDictionary(keyDictionary, oneLine); } } else if (algorithm == "AhoCorasick-MP") { AhoCorasick ahoAlg = new AhoCorasick(); for (int i = 0; i < pattern.Count; i++) { ahoAlg.addString(pattern[i], i); } ahoAlg.BuildAC(); List <int> Matches = ahoAlg.ProcessLine(line); if (Matches.Count == 0) { return; } else { int keyDictionary = createKeyNumber(bookNumber, chapterNumber, verseNumber); string oneLine = indexToString(Matches) + "/ " + "/" + translation + " | " + bookName + " " + chapterNumber + ":" + verseNumber + " | " + lineOrigin; addToDictionary(keyDictionary, oneLine); } } }
public static void parseXML(List <string> input, string path) { if (toSmall) { List <string> tmpList = new List <string>(); foreach (string s in input) { tmpList.Add(TransformImput.ToLower(s)); } input = tmpList; } if (withoutDiactritics) { List <string> tmpList = new List <string>(); foreach (string s in input) { tmpList.Add(TransformImput.RemoveDiacritics(s)); } input = tmpList; } string translation = ""; string bookName = ""; int bookNumber = 0; int chapterNumber = 0; int verseNumber = 0; XmlReader myReader = XmlReader.Create(path); while (myReader.Read()) { if (myReader.NodeType == XmlNodeType.Element) { switch (myReader.Name) { case "XMLBIBLE": translation = myReader.GetAttribute("biblename"); break; case "BIBLEBOOK": bookNumber = int.Parse(myReader.GetAttribute("bnumber")); bookName = myReader.GetAttribute("bsname"); break; case "CHAPTER": chapterNumber = int.Parse(myReader.GetAttribute("cnumber")); break; case "VERS": verseNumber = int.Parse(myReader.GetAttribute("vnumber")); break; default: continue; } } else if ((myReader.NodeType == XmlNodeType.Text) && (chapterNumber > 0)) { List <int> results = new List <int>(); string line = myReader.ReadContentAsString(); callSearchingAlgorithm(algorithm, line, input, translation, bookName, bookNumber, chapterNumber, verseNumber, toSmall, withoutDiactritics); } } }