Beispiel #1
0
        private static List <WordPart> partition(string word)
        {
            List <WordPart> ret = new List <WordPart>();

            for (var i = 0; i < word.Length;)
            {
                StringBuilder builder = new StringBuilder();
                if (isKanji(word[i]))
                {
                    for (; i < word.Length && isKanji(word[i]); i++)
                    {
                        builder.Append(word[i]);
                    }
                    ret.Add(WordPart.Kanji(builder.ToString()));
                }
                else if (isKana(word[i]))
                {
                    for (; i < word.Length && isKana(word[i]); i++)
                    {
                        builder.Append(word[i]);
                    }
                    ret.Add(WordPart.Kana(builder.ToString()));
                }
                else
                {
                    //skip silently (may not be the best course of action
                    i++;
                }
            }
            return(ret);
        }
        static void Main(string[] args)
        {
            Console.Write("press any key to start");
            Console.Read();

            WordPart wordPart = new WordPart();

            while (wordPart != null)
            {
                var serviceProvider = GetServiceProvider();

                var quranService = serviceProvider.GetService <IQuranService>();
                try
                {
                    wordPart = quranService.LoadQuranPartsFromFile(100, wordPart);
                }
                catch (Exception ex)
                {
                    Console.WriteLine(ex.Message);
                    Console.WriteLine(ex.StackTrace);
                    break;
                }
            }
            var searchService = GetServiceProvider().GetService <IQuranSearchService>();
            var result        = searchService.GetVerse(1, 1);

            Console.Read();
        }
Beispiel #3
0
        private void AddFeatures(WordPart wordPart, string features)
        {
            var featureList = features.Split('|');

            switch (featureList[0])
            {
            case "PREFIX":
            {
                var prefix = EnsurePrefixExists(featureList[1]);
                wordPart.PrefixUsage = wordPart.PrefixUsage ?? new PrefixUsage
                {
                    Prefix   = prefix,
                    WordPart = wordPart
                };
                break;
            }

            case "STEM":
            {
                var dictionary = GetFeatureDictionary(featureList);
                //if (dictionary.ContainsKey("LEM"))
                //{
                //  var unmodifiedWordPart = EnsureUnmodifiedWordPartExists(dictionary["LEM"]);
                //  wordPart.UnmodifiedWordPartUsage = wordPart.UnmodifiedWordPartUsage ?? new UnmodifiedWordPartUsage
                //  {
                //    UnmodifiedWord = unmodifiedWordPart,
                //    WordPart = wordPart
                //  };
                //}
                if (dictionary.ContainsKey("ROOT"))
                {
                    var root = EnsureRootExists(dictionary["ROOT"]);
                    wordPart.RootUsage = wordPart.RootUsage ?? new RootUsage
                    {
                        Root     = root,
                        WordPart = wordPart
                    };
                }
                break;
            }
            }
            StorageService.SaveChanges();
        }
        public WordPart LoadQuranPartsFromFile(int maxWordParts, WordPart startingPoint)
        {
            //StorageService.NoTracking();
            var fileName = @"C:\Users\lamaa\Documents\AAAWork\2015laptopdownloads\quranic-corpus-morphology-0.4 (1)\quranic-corpus-morphology-0.4.txt";
            //var fileName = @"C:\Users\Lamaan\Downloads\quranic-corpus-morphology-0.4 (1)\quranic-corpus-morphology-0.4.txt";
            var      count        = 0;
            var      lines        = File.ReadAllLines(fileName);
            WordPart lastWordPart = null;

            foreach (var line in lines)
            {
                var wordPart = ProcessWordPart(line, startingPoint);
                if (wordPart != null)
                {
                    lastWordPart = wordPart;
                    count++;
                    if (count >= maxWordParts)
                    {
                        return(wordPart);
                    }
                }
            }
            return(lastWordPart);
        }
        private WordPart ProcessWordPart(string line, WordPart startingPoint)
        {
            if (string.IsNullOrEmpty(line))
            {
                return(null);
            }
            if (line[0] != '(')
            {
                return(null);
            }
            var lineParts = line.Split('\t');

            var key = lineParts[0].Trim(')', '(');

            Console.WriteLine(key);
            var keyParts       = key.Split(':');
            var surahNumber    = int.Parse(keyParts[0]);
            var verseNumber    = int.Parse(keyParts[1]);
            var wordNumber     = int.Parse(keyParts[2]);
            var wordPartNumber = int.Parse(keyParts[3]);

            if (startingPoint != null)
            {
                if (startingPoint.SurahNumber > surahNumber)
                {
                    return(null);
                }
                if (startingPoint.SurahNumber == surahNumber && startingPoint.VerseNumber > verseNumber)
                {
                    return(null);
                }
                if (startingPoint.SurahNumber == surahNumber && startingPoint.VerseNumber == verseNumber &&
                    startingPoint.WordNumber > wordNumber)
                {
                    return(null);
                }
                if (startingPoint.SurahNumber == surahNumber && startingPoint.VerseNumber == verseNumber &&
                    startingPoint.WordNumber == wordNumber && startingPoint.WordPartNumber > wordPartNumber)
                {
                    return(null);
                }
            }
            var surah                = EnsureSurahExists(surahNumber);
            var verse                = EnsureVerseExists(verseNumber, surahNumber);
            var word                 = EnsureWordExists(wordNumber, verseNumber, surahNumber);
            var form                 = lineParts[1];
            var tag                  = lineParts[2];
            var features             = lineParts[3];
            var featuresList         = features.Split('|');
            var position             = featuresList[0];
            var wordPartForm         = EnsureWordPartFormExists(form);
            var wordPartType         = EnsureWordPartTypeExists(tag);
            var wordPartPositionType = EnsureWordPartPositionTypeExists(position);

            var wordParts = StorageService.SetOf <WordPart>();
            var wordPart  = wordParts.SingleOrDefault(w => w.SurahNumber == surahNumber && w.VerseNumber == verseNumber && w.WordNumber == wordNumber && w.WordPartNumber == wordPartNumber);

            if (wordPart == null)
            {
                wordPart                          = new WordPart();
                wordPart.SurahNumber              = surahNumber;
                wordPart.VerseNumber              = verseNumber;
                wordPart.WordNumber               = wordNumber;
                wordPart.WordPartNumber           = wordPartNumber;
                wordPart.Text                     = wordPartForm.Text;
                wordPart.WordPartTypeCode         = wordPartType.Code;
                wordPart.WordPartPositionTypeCode = wordPartPositionType.Code;
                wordParts.Add(wordPart);
                StorageService.SaveChanges();
                AddFeatures(wordPart, features);
                StorageService.DetachAllEntities();
                return(wordPart);
            }
            return(null);
        }
Beispiel #6
0
    // corpus word parts from http://corpus.quran.com version 0.4 - modified by Ali Adams
    public static void LoadWordParts(Book book)
    {
        if (book != null)
        {
            try
            {
                string filename = Globals.DATA_FOLDER + "/" + "word-parts.txt";
                if (File.Exists(filename))
                {
                    using (StreamReader reader = File.OpenText(filename))
                    {
                        int waw_count             = 0;
                        int previous_verse_number = 0;
                        while (!reader.EndOfStream)
                        {
                            string line = reader.ReadLine();
                            if ((line.Length == 0) || line.StartsWith("#") || line.StartsWith("LOCATION") || line.StartsWith("ADDRESS"))
                            {
                                continue; // skip header info
                            }
                            else
                            {
                                string[] parts = line.Split('\t');
                                if (parts.Length >= 4)
                                {
                                    string address = parts[0];
                                    if (address.StartsWith("(") && address.EndsWith(")"))
                                    {
                                        address = parts[0].Substring(1, parts[0].Length - 2);
                                    }
                                    string[] address_parts = address.Split(':');
                                    if (address_parts.Length == 4)
                                    {
                                        int chapter_number = int.Parse(address_parts[0]);
                                        int verse_number   = int.Parse(address_parts[1]);
                                        if (previous_verse_number != verse_number)
                                        {
                                            waw_count             = 0;
                                            previous_verse_number = verse_number;
                                        }
                                        int word_number      = int.Parse(address_parts[2]) + waw_count;
                                        int word_part_number = int.Parse(address_parts[3]);

                                        string buckwalter = parts[1];
                                        string tag        = parts[2];

                                        if (book.Chapters != null)
                                        {
                                            Chapter chapter = book.Chapters[chapter_number - 1];
                                            if (chapter != null)
                                            {
                                                Verse verse = chapter.Verses[verse_number - 1];
                                                if (verse != null)
                                                {
                                                    if (book.WithBismAllah)
                                                    {
                                                        // add bismAllah manually to each chapter except 1 and 9
                                                        if (
                                                            ((chapter_number != 1) && (chapter_number != 9))
                                                            &&
                                                            ((verse_number == 1) && (word_number == 1) && (word_part_number == 1))
                                                            )
                                                        {
                                                            Verse bismAllah_verse = book.Verses[0];

                                                            // if there is no bismAllah, add one
                                                            if (parts[1] != bismAllah_verse.Words[0].Parts[0].Buckwalter)
                                                            {
                                                                // insert 4 new words
                                                                verse.Words.InsertRange(0, new List <Word>(4));

                                                                //(1:1:1:1)	bi	PP	PREFIX|bi+
                                                                WordPart word_part = new WordPart(verse.Words[0],
                                                                                                  bismAllah_verse.Words[0].Parts[0].NumberInWord,
                                                                                                  bismAllah_verse.Words[0].Parts[0].Buckwalter,
                                                                                                  bismAllah_verse.Words[0].Parts[0].Tag,
                                                                                                  new WordPartGrammar(bismAllah_verse.Words[0].Parts[0].Grammar)
                                                                                                  );
                                                                if ((chapter_number == 95) || (chapter_number == 97))
                                                                {
                                                                    // add shadda  { '~', 'ّ' } on B or bism
                                                                    word_part.Buckwalter = word_part.Buckwalter.Insert(1, "~");
                                                                }

                                                                //(1:1:1:2)	somi	N	STEM|POS:N|LEM:{som|ROOT:smw|M|GEN
                                                                new WordPart(verse.Words[0],
                                                                             bismAllah_verse.Words[0].Parts[1].NumberInWord,
                                                                             bismAllah_verse.Words[0].Parts[1].Buckwalter,
                                                                             bismAllah_verse.Words[0].Parts[1].Tag,
                                                                             new WordPartGrammar(bismAllah_verse.Words[0].Parts[1].Grammar)
                                                                             );

                                                                //(1:1:2:1)	{ll~ahi	PN	STEM|POS:PN|LEM:{ll~ah|ROOT:Alh|GEN
                                                                new WordPart(verse.Words[1],
                                                                             bismAllah_verse.Words[1].Parts[0].NumberInWord,
                                                                             bismAllah_verse.Words[1].Parts[0].Buckwalter,
                                                                             bismAllah_verse.Words[1].Parts[0].Tag,
                                                                             new WordPartGrammar(bismAllah_verse.Words[1].Parts[0].Grammar)
                                                                             );

                                                                //(1:1:3:1)	{l	DET	PREFIX|Al+
                                                                new WordPart(verse.Words[2],
                                                                             bismAllah_verse.Words[2].Parts[0].NumberInWord,
                                                                             bismAllah_verse.Words[2].Parts[0].Buckwalter,
                                                                             bismAllah_verse.Words[2].Parts[0].Tag,
                                                                             new WordPartGrammar(bismAllah_verse.Words[2].Parts[0].Grammar)
                                                                             );

                                                                //(1:1:3:2)	r~aHoma`ni	ADJ	STEM|POS:ADJ|LEM:r~aHoma`n|ROOT:rHm|MS|GEN
                                                                new WordPart(verse.Words[2],
                                                                             bismAllah_verse.Words[2].Parts[1].NumberInWord,
                                                                             bismAllah_verse.Words[2].Parts[1].Buckwalter,
                                                                             bismAllah_verse.Words[2].Parts[1].Tag,
                                                                             new WordPartGrammar(bismAllah_verse.Words[2].Parts[1].Grammar)
                                                                             );

                                                                //(1:1:4:1)	{l	DET	PREFIX|Al+
                                                                new WordPart(verse.Words[3],
                                                                             bismAllah_verse.Words[3].Parts[0].NumberInWord,
                                                                             bismAllah_verse.Words[3].Parts[0].Buckwalter,
                                                                             bismAllah_verse.Words[3].Parts[0].Tag,
                                                                             new WordPartGrammar(bismAllah_verse.Words[3].Parts[0].Grammar)
                                                                             );

                                                                //(1:1:4:2)	r~aHiymi	ADJ	STEM|POS:ADJ|LEM:r~aHiym|ROOT:rHm|MS|GEN
                                                                new WordPart(verse.Words[3],
                                                                             bismAllah_verse.Words[3].Parts[1].NumberInWord,
                                                                             bismAllah_verse.Words[3].Parts[1].Buckwalter,
                                                                             bismAllah_verse.Words[3].Parts[1].Tag,
                                                                             new WordPartGrammar(bismAllah_verse.Words[3].Parts[1].Grammar)
                                                                             );
                                                            }
                                                        }
                                                        // correct word_number (if needed) for all subsequenct chapter word_parts
                                                        if (
                                                            ((chapter_number != 1) && (chapter_number != 9)) && (verse_number == 1)
                                                            )
                                                        {
                                                            word_number += 4;
                                                        }
                                                    }

                                                    Word word = verse.Words[word_number - 1];
                                                    if (word != null)
                                                    {
                                                        List <string> grammar = new List <string>(parts[3].Split('|'));
                                                        if (grammar.Count > 0)
                                                        {
                                                            //(1:5:3:1)	wa	CONJ	PREFIX|w_CONJ+
                                                            //(1:5:3:2)	<iy~aAka	PRON	STEM|POS:PRON|LEM:<iy~aA|2MS
                                                            if (word.Text == "و")
                                                            {
                                                                waw_count++;
                                                            }
                                                            new WordPart(word, word_part_number, buckwalter, tag, grammar);
                                                        }
                                                        else
                                                        {
                                                            throw new Exception("Grammar field is missing.\r\n" + filename);
                                                        }
                                                    }
                                                }
                                            }
                                            else
                                            {
                                                throw new Exception("Invalid Location Format.\r\n" + filename);
                                            }
                                        }
                                    }
                                    else
                                    {
                                        throw new Exception("Invalid File Format.\r\n" + filename);
                                    }
                                }
                            }
                        }
                    }
                }
            }
            catch (Exception ex)
            {
                throw new Exception("LoadWordParts: " + ex.Message);
            }
        }
    }
Beispiel #7
0
        static void Main(string[] args)
        {
            #region READ INPUT FILE
            Console.WriteLine($"Reading the file {InputFileName}");
            var inputPath = BasePathInputFile + @"\" + InputFileName;
            if (!File.Exists(inputPath))
            {
                return;
            }

            var inputFileContent = File.ReadAllText(inputPath);

            Console.WriteLine("This is the file content: " + inputFileContent);
            #endregion

            #region ACCESS MEANING DATABASE
            SQLiteConnection sqlite_conn;
            sqlite_conn = CreateConnection();
            //CreateTable(sqlite_conn);
            //InsertData(sqlite_conn);
            ReadData(sqlite_conn);
            /* Perform small test with NoSQL Database */
            NoSQLTest();
            #endregion

            #region PARSE INPUT AND BUILD MEANING TREE
            Storage.Current = new OnlineRepositoryStorage(new DiskStorage("catalyst-models"));
            var nlp = Pipeline.For(Language.English);

            var doc = new Document(inputFileContent, Language.English);
            nlp.ProcessSingle(doc);

            var CompleteStructure = new List <object>();

            /* Go through the whole text */
            foreach (var singleSenctence in doc.TokensData)
            {
                if ((singleSenctence is null) || (singleSenctence.Count == 0))
                {
                    continue;
                }

                var newWordList    = new List <object>();
                var newCommandList = new List <object>();
                var currentCommand = new SingleCommandPart();

                /* Go through each sentence */
                foreach (var singleWord in singleSenctence)
                {
                    var content = new WordPart();
                    content.Value        = inputFileContent.Substring(singleWord.LowerBound, singleWord.UpperBound - singleWord.LowerBound + 1);
                    content.PartOfSpeech = singleWord.Tag.ToString();
                    newWordList.Add(content);

                    switch (singleWord.Tag)
                    {
                    case PartOfSpeech.VERB:
                        /* This is the verb of the command. AXIOM: There can always only be one VERB per command */
                        currentCommand.Verb = content.Value;
                        break;

                    case PartOfSpeech.NOUN:
                        /* This is a noun of the command. */
                        currentCommand.Noun.Add(content.Value);
                        break;

                    /* Adposition */
                    case PartOfSpeech.ADP:
                        /* This marks an object -> Find the full size of the object */
                        switch (content.Value)
                        {
                        case "of":
                            break;

                        case "on":
                            break;
                        }
                        break;

                    case PartOfSpeech.ADV:
                        switch (content.Value)
                        {
                        case "then":
                            /* This means the first part of the sentence is finished. */
                            newCommandList.Add(currentCommand);
                            currentCommand = new SingleCommandPart();
                            break;
                        }
                        break;
                    }
                }

                CompleteStructure.Add(newWordList);
                CompleteStructure.Add(newCommandList);
            }

            Console.WriteLine("Result:");
            foreach (var singleSentence in CompleteStructure)
            {
                switch (singleSentence)
                {
                case List <object> aList:
                    foreach (var SingleWord in aList)
                    {
                        switch (SingleWord)
                        {
                        case WordPart singleCommand:
                            Console.Write($"({singleCommand.Value}|{singleCommand.PartOfSpeech})");
                            break;
                        }
                    }
                    break;
                }
                Console.WriteLine("");
            }
            #endregion

            #region GENERATE CODE FILE
            Console.WriteLine($"Creating file {GenFileName}.cs");

            /* Make sure the directory exists */
            Directory.CreateDirectory(BasePathGenFile);

            // Create a file to write to.
            using (var genFile = File.CreateText($@"{BasePathGenFile}\{GenFileName}.cs"))
            {
                genFile.WriteLine("using System;");
                genFile.WriteLine("using System.Collections.Generic;");
                genFile.WriteLine("using System.Text;");
                genFile.WriteLine("");
                genFile.WriteLine("namespace GeneratedProject.TmpFolder");
                genFile.WriteLine("{");
                genFile.WriteLine("    class GenOneClass");
                genFile.WriteLine("    {");
                genFile.WriteLine("    }");
                genFile.WriteLine("}");

                genFile.Close();
            }
            #endregion

            Console.WriteLine("Finished");
        }