private static List <WordPart> partition(string word) { List <WordPart> ret = new List <WordPart>(); for (var i = 0; i < word.Length;) { StringBuilder builder = new StringBuilder(); if (isKanji(word[i])) { for (; i < word.Length && isKanji(word[i]); i++) { builder.Append(word[i]); } ret.Add(WordPart.Kanji(builder.ToString())); } else if (isKana(word[i])) { for (; i < word.Length && isKana(word[i]); i++) { builder.Append(word[i]); } ret.Add(WordPart.Kana(builder.ToString())); } else { //skip silently (may not be the best course of action i++; } } return(ret); }
static void Main(string[] args) { Console.Write("press any key to start"); Console.Read(); WordPart wordPart = new WordPart(); while (wordPart != null) { var serviceProvider = GetServiceProvider(); var quranService = serviceProvider.GetService <IQuranService>(); try { wordPart = quranService.LoadQuranPartsFromFile(100, wordPart); } catch (Exception ex) { Console.WriteLine(ex.Message); Console.WriteLine(ex.StackTrace); break; } } var searchService = GetServiceProvider().GetService <IQuranSearchService>(); var result = searchService.GetVerse(1, 1); Console.Read(); }
private void AddFeatures(WordPart wordPart, string features) { var featureList = features.Split('|'); switch (featureList[0]) { case "PREFIX": { var prefix = EnsurePrefixExists(featureList[1]); wordPart.PrefixUsage = wordPart.PrefixUsage ?? new PrefixUsage { Prefix = prefix, WordPart = wordPart }; break; } case "STEM": { var dictionary = GetFeatureDictionary(featureList); //if (dictionary.ContainsKey("LEM")) //{ // var unmodifiedWordPart = EnsureUnmodifiedWordPartExists(dictionary["LEM"]); // wordPart.UnmodifiedWordPartUsage = wordPart.UnmodifiedWordPartUsage ?? new UnmodifiedWordPartUsage // { // UnmodifiedWord = unmodifiedWordPart, // WordPart = wordPart // }; //} if (dictionary.ContainsKey("ROOT")) { var root = EnsureRootExists(dictionary["ROOT"]); wordPart.RootUsage = wordPart.RootUsage ?? new RootUsage { Root = root, WordPart = wordPart }; } break; } } StorageService.SaveChanges(); }
public WordPart LoadQuranPartsFromFile(int maxWordParts, WordPart startingPoint) { //StorageService.NoTracking(); var fileName = @"C:\Users\lamaa\Documents\AAAWork\2015laptopdownloads\quranic-corpus-morphology-0.4 (1)\quranic-corpus-morphology-0.4.txt"; //var fileName = @"C:\Users\Lamaan\Downloads\quranic-corpus-morphology-0.4 (1)\quranic-corpus-morphology-0.4.txt"; var count = 0; var lines = File.ReadAllLines(fileName); WordPart lastWordPart = null; foreach (var line in lines) { var wordPart = ProcessWordPart(line, startingPoint); if (wordPart != null) { lastWordPart = wordPart; count++; if (count >= maxWordParts) { return(wordPart); } } } return(lastWordPart); }
private WordPart ProcessWordPart(string line, WordPart startingPoint) { if (string.IsNullOrEmpty(line)) { return(null); } if (line[0] != '(') { return(null); } var lineParts = line.Split('\t'); var key = lineParts[0].Trim(')', '('); Console.WriteLine(key); var keyParts = key.Split(':'); var surahNumber = int.Parse(keyParts[0]); var verseNumber = int.Parse(keyParts[1]); var wordNumber = int.Parse(keyParts[2]); var wordPartNumber = int.Parse(keyParts[3]); if (startingPoint != null) { if (startingPoint.SurahNumber > surahNumber) { return(null); } if (startingPoint.SurahNumber == surahNumber && startingPoint.VerseNumber > verseNumber) { return(null); } if (startingPoint.SurahNumber == surahNumber && startingPoint.VerseNumber == verseNumber && startingPoint.WordNumber > wordNumber) { return(null); } if (startingPoint.SurahNumber == surahNumber && startingPoint.VerseNumber == verseNumber && startingPoint.WordNumber == wordNumber && startingPoint.WordPartNumber > wordPartNumber) { return(null); } } var surah = EnsureSurahExists(surahNumber); var verse = EnsureVerseExists(verseNumber, surahNumber); var word = EnsureWordExists(wordNumber, verseNumber, surahNumber); var form = lineParts[1]; var tag = lineParts[2]; var features = lineParts[3]; var featuresList = features.Split('|'); var position = featuresList[0]; var wordPartForm = EnsureWordPartFormExists(form); var wordPartType = EnsureWordPartTypeExists(tag); var wordPartPositionType = EnsureWordPartPositionTypeExists(position); var wordParts = StorageService.SetOf <WordPart>(); var wordPart = wordParts.SingleOrDefault(w => w.SurahNumber == surahNumber && w.VerseNumber == verseNumber && w.WordNumber == wordNumber && w.WordPartNumber == wordPartNumber); if (wordPart == null) { wordPart = new WordPart(); wordPart.SurahNumber = surahNumber; wordPart.VerseNumber = verseNumber; wordPart.WordNumber = wordNumber; wordPart.WordPartNumber = wordPartNumber; wordPart.Text = wordPartForm.Text; wordPart.WordPartTypeCode = wordPartType.Code; wordPart.WordPartPositionTypeCode = wordPartPositionType.Code; wordParts.Add(wordPart); StorageService.SaveChanges(); AddFeatures(wordPart, features); StorageService.DetachAllEntities(); return(wordPart); } return(null); }
// corpus word parts from http://corpus.quran.com version 0.4 - modified by Ali Adams public static void LoadWordParts(Book book) { if (book != null) { try { string filename = Globals.DATA_FOLDER + "/" + "word-parts.txt"; if (File.Exists(filename)) { using (StreamReader reader = File.OpenText(filename)) { int waw_count = 0; int previous_verse_number = 0; while (!reader.EndOfStream) { string line = reader.ReadLine(); if ((line.Length == 0) || line.StartsWith("#") || line.StartsWith("LOCATION") || line.StartsWith("ADDRESS")) { continue; // skip header info } else { string[] parts = line.Split('\t'); if (parts.Length >= 4) { string address = parts[0]; if (address.StartsWith("(") && address.EndsWith(")")) { address = parts[0].Substring(1, parts[0].Length - 2); } string[] address_parts = address.Split(':'); if (address_parts.Length == 4) { int chapter_number = int.Parse(address_parts[0]); int verse_number = int.Parse(address_parts[1]); if (previous_verse_number != verse_number) { waw_count = 0; previous_verse_number = verse_number; } int word_number = int.Parse(address_parts[2]) + waw_count; int word_part_number = int.Parse(address_parts[3]); string buckwalter = parts[1]; string tag = parts[2]; if (book.Chapters != null) { Chapter chapter = book.Chapters[chapter_number - 1]; if (chapter != null) { Verse verse = chapter.Verses[verse_number - 1]; if (verse != null) { if (book.WithBismAllah) { // add bismAllah manually to each chapter except 1 and 9 if ( ((chapter_number != 1) && (chapter_number != 9)) && ((verse_number == 1) && (word_number == 1) && (word_part_number == 1)) ) { Verse bismAllah_verse = book.Verses[0]; // if there is no bismAllah, add one if (parts[1] != bismAllah_verse.Words[0].Parts[0].Buckwalter) { // insert 4 new words verse.Words.InsertRange(0, new List <Word>(4)); //(1:1:1:1) bi PP PREFIX|bi+ WordPart word_part = new WordPart(verse.Words[0], bismAllah_verse.Words[0].Parts[0].NumberInWord, bismAllah_verse.Words[0].Parts[0].Buckwalter, bismAllah_verse.Words[0].Parts[0].Tag, new WordPartGrammar(bismAllah_verse.Words[0].Parts[0].Grammar) ); if ((chapter_number == 95) || (chapter_number == 97)) { // add shadda { '~', 'ّ' } on B or bism word_part.Buckwalter = word_part.Buckwalter.Insert(1, "~"); } //(1:1:1:2) somi N STEM|POS:N|LEM:{som|ROOT:smw|M|GEN new WordPart(verse.Words[0], bismAllah_verse.Words[0].Parts[1].NumberInWord, bismAllah_verse.Words[0].Parts[1].Buckwalter, bismAllah_verse.Words[0].Parts[1].Tag, new WordPartGrammar(bismAllah_verse.Words[0].Parts[1].Grammar) ); //(1:1:2:1) {ll~ahi PN STEM|POS:PN|LEM:{ll~ah|ROOT:Alh|GEN new WordPart(verse.Words[1], bismAllah_verse.Words[1].Parts[0].NumberInWord, bismAllah_verse.Words[1].Parts[0].Buckwalter, bismAllah_verse.Words[1].Parts[0].Tag, new WordPartGrammar(bismAllah_verse.Words[1].Parts[0].Grammar) ); //(1:1:3:1) {l DET PREFIX|Al+ new WordPart(verse.Words[2], bismAllah_verse.Words[2].Parts[0].NumberInWord, bismAllah_verse.Words[2].Parts[0].Buckwalter, bismAllah_verse.Words[2].Parts[0].Tag, new WordPartGrammar(bismAllah_verse.Words[2].Parts[0].Grammar) ); //(1:1:3:2) r~aHoma`ni ADJ STEM|POS:ADJ|LEM:r~aHoma`n|ROOT:rHm|MS|GEN new WordPart(verse.Words[2], bismAllah_verse.Words[2].Parts[1].NumberInWord, bismAllah_verse.Words[2].Parts[1].Buckwalter, bismAllah_verse.Words[2].Parts[1].Tag, new WordPartGrammar(bismAllah_verse.Words[2].Parts[1].Grammar) ); //(1:1:4:1) {l DET PREFIX|Al+ new WordPart(verse.Words[3], bismAllah_verse.Words[3].Parts[0].NumberInWord, bismAllah_verse.Words[3].Parts[0].Buckwalter, bismAllah_verse.Words[3].Parts[0].Tag, new WordPartGrammar(bismAllah_verse.Words[3].Parts[0].Grammar) ); //(1:1:4:2) r~aHiymi ADJ STEM|POS:ADJ|LEM:r~aHiym|ROOT:rHm|MS|GEN new WordPart(verse.Words[3], bismAllah_verse.Words[3].Parts[1].NumberInWord, bismAllah_verse.Words[3].Parts[1].Buckwalter, bismAllah_verse.Words[3].Parts[1].Tag, new WordPartGrammar(bismAllah_verse.Words[3].Parts[1].Grammar) ); } } // correct word_number (if needed) for all subsequenct chapter word_parts if ( ((chapter_number != 1) && (chapter_number != 9)) && (verse_number == 1) ) { word_number += 4; } } Word word = verse.Words[word_number - 1]; if (word != null) { List <string> grammar = new List <string>(parts[3].Split('|')); if (grammar.Count > 0) { //(1:5:3:1) wa CONJ PREFIX|w_CONJ+ //(1:5:3:2) <iy~aAka PRON STEM|POS:PRON|LEM:<iy~aA|2MS if (word.Text == "و") { waw_count++; } new WordPart(word, word_part_number, buckwalter, tag, grammar); } else { throw new Exception("Grammar field is missing.\r\n" + filename); } } } } else { throw new Exception("Invalid Location Format.\r\n" + filename); } } } else { throw new Exception("Invalid File Format.\r\n" + filename); } } } } } } } catch (Exception ex) { throw new Exception("LoadWordParts: " + ex.Message); } } }
static void Main(string[] args) { #region READ INPUT FILE Console.WriteLine($"Reading the file {InputFileName}"); var inputPath = BasePathInputFile + @"\" + InputFileName; if (!File.Exists(inputPath)) { return; } var inputFileContent = File.ReadAllText(inputPath); Console.WriteLine("This is the file content: " + inputFileContent); #endregion #region ACCESS MEANING DATABASE SQLiteConnection sqlite_conn; sqlite_conn = CreateConnection(); //CreateTable(sqlite_conn); //InsertData(sqlite_conn); ReadData(sqlite_conn); /* Perform small test with NoSQL Database */ NoSQLTest(); #endregion #region PARSE INPUT AND BUILD MEANING TREE Storage.Current = new OnlineRepositoryStorage(new DiskStorage("catalyst-models")); var nlp = Pipeline.For(Language.English); var doc = new Document(inputFileContent, Language.English); nlp.ProcessSingle(doc); var CompleteStructure = new List <object>(); /* Go through the whole text */ foreach (var singleSenctence in doc.TokensData) { if ((singleSenctence is null) || (singleSenctence.Count == 0)) { continue; } var newWordList = new List <object>(); var newCommandList = new List <object>(); var currentCommand = new SingleCommandPart(); /* Go through each sentence */ foreach (var singleWord in singleSenctence) { var content = new WordPart(); content.Value = inputFileContent.Substring(singleWord.LowerBound, singleWord.UpperBound - singleWord.LowerBound + 1); content.PartOfSpeech = singleWord.Tag.ToString(); newWordList.Add(content); switch (singleWord.Tag) { case PartOfSpeech.VERB: /* This is the verb of the command. AXIOM: There can always only be one VERB per command */ currentCommand.Verb = content.Value; break; case PartOfSpeech.NOUN: /* This is a noun of the command. */ currentCommand.Noun.Add(content.Value); break; /* Adposition */ case PartOfSpeech.ADP: /* This marks an object -> Find the full size of the object */ switch (content.Value) { case "of": break; case "on": break; } break; case PartOfSpeech.ADV: switch (content.Value) { case "then": /* This means the first part of the sentence is finished. */ newCommandList.Add(currentCommand); currentCommand = new SingleCommandPart(); break; } break; } } CompleteStructure.Add(newWordList); CompleteStructure.Add(newCommandList); } Console.WriteLine("Result:"); foreach (var singleSentence in CompleteStructure) { switch (singleSentence) { case List <object> aList: foreach (var SingleWord in aList) { switch (SingleWord) { case WordPart singleCommand: Console.Write($"({singleCommand.Value}|{singleCommand.PartOfSpeech})"); break; } } break; } Console.WriteLine(""); } #endregion #region GENERATE CODE FILE Console.WriteLine($"Creating file {GenFileName}.cs"); /* Make sure the directory exists */ Directory.CreateDirectory(BasePathGenFile); // Create a file to write to. using (var genFile = File.CreateText($@"{BasePathGenFile}\{GenFileName}.cs")) { genFile.WriteLine("using System;"); genFile.WriteLine("using System.Collections.Generic;"); genFile.WriteLine("using System.Text;"); genFile.WriteLine(""); genFile.WriteLine("namespace GeneratedProject.TmpFolder"); genFile.WriteLine("{"); genFile.WriteLine(" class GenOneClass"); genFile.WriteLine(" {"); genFile.WriteLine(" }"); genFile.WriteLine("}"); genFile.Close(); } #endregion Console.WriteLine("Finished"); }