예제 #1
0
파일: EssayExercise.cs 프로젝트: TellOP/APP
        private async Task <IWord> ProcessSingleWord(string cleanToken)
        {
            string msg = "'" + cleanToken + "' ";
            IWord  w;

            if (this.Language == SupportedLanguage.English)
            {
                IList <IWord> offlineWords = await OfflineWord.Search(cleanToken);

                if (offlineWords.Count > 0)
                {
                    w = WordSearchUtilities.GetMostProbable(offlineWords);
                }
                else
                {
                    // TODO: find a better way to add a word that is not found in the database.
                    msg += "SearchCount is zero, so I'm creating a new word with an unclassified part of speech and an unknown language level";
                    w    = new OfflineWord()
                    {
                        Term         = cleanToken,
                        JsonLevel    = LanguageLevelClassification.Unknown,
                        PartOfSpeech = PartOfSpeech.Unclassified,
                        Language     = (string)new SupportedLanguageToLcidConverter().Convert(SupportedLanguage.English, typeof(string), null, CultureInfo.InvariantCulture)
                    };
                }

                Tools.Logger.Log("ProcessSingleWord", msg);
                return(w);
            }
            else if (this.Language == SupportedLanguage.English)
            {
                Tools.Logger.Log("ProcessSingleWord", "Call remote SpanishTagger");
                SpanishPOSTagger    es_tagger = new SpanishPOSTagger(App.OAuth2Account, cleanToken);
                IList <SpanishWord> results   = await es_tagger.CallEndpointAsObjectAsync();

                Tools.Logger.Log("ProcessSingleWord", "Got the results!");
                try
                {
                    return(results.First());
                }
                catch (Exception)
                {
                    return(new OfflineWord()
                    {
                        Term = cleanToken,
                        JsonLevel = LanguageLevelClassification.Unknown,
                        PartOfSpeech = PartOfSpeech.Unclassified,
                        Language = (string)new SupportedLanguageToLcidConverter().Convert(SupportedLanguage.Spanish, typeof(string), null, CultureInfo.InvariantCulture)
                    });
                }
            }

            return(new OfflineWord()
            {
                Term = cleanToken,
                JsonLevel = LanguageLevelClassification.Unknown,
                PartOfSpeech = PartOfSpeech.Unclassified,
                Language = (string)new SupportedLanguageToLcidConverter().Convert(SupportedLanguage.English, typeof(string), null, CultureInfo.InvariantCulture)
            });
        }
예제 #2
0
파일: EssayExercise.cs 프로젝트: TellOP/APP
        /// <summary>
        /// Initializes or reinitializes all properties which can be extracted from an offline analysis of the text.
        /// </summary>
        private void InitializeOfflineAnalysisProperties()
        {
            // TODO: catch errors in the UI!
            this.OfflineAnalysisResult = new AsyncLazy <List <IWord> >(async() =>
            {
                List <IWord> analysisCache = new List <IWord>();

                if (this.Language == SupportedLanguage.English)
                {
                    IList <Task <IWord> > searchTokenTasks = new List <Task <IWord> >();

                    // Dirty tokens
                    foreach (string token in Regex.Matches(this._essayContents, "[\\w']+").Cast <Match>().Select(m => m.Value))
                    {
                        foreach (string cleanToken in Regex.Matches(await this.PreprocessSingleWord(token), "[\\w']+").Cast <Match>().Select(m => m.Value))
                        {
                            searchTokenTasks.Add(this.ProcessSingleWord(cleanToken));
                        }
                    }

                    IEnumerable <IWord> results = await Task.WhenAll(searchTokenTasks);

                    Tools.Logger.Log("EssayExercise", "I've waited all of them!");
                    foreach (IWord w in results)
                    {
                        if (this.ExcludeFunctionalWords && (
                                w.PartOfSpeech == PartOfSpeech.ClauseOpener ||
                                w.PartOfSpeech == PartOfSpeech.Conjunction ||
                                w.PartOfSpeech == PartOfSpeech.Determiner ||
                                w.PartOfSpeech == PartOfSpeech.DeterminerPronoun ||
                                w.PartOfSpeech == PartOfSpeech.ExistentialParticle ||
                                w.PartOfSpeech == PartOfSpeech.Genitive ||
                                w.PartOfSpeech == PartOfSpeech.InfinitiveMarker ||
                                w.PartOfSpeech == PartOfSpeech.InterjectionOrDiscourseMarker ||
                                w.PartOfSpeech == PartOfSpeech.NegativeMarker ||
                                w.PartOfSpeech == PartOfSpeech.CardinalNumber ||
                                w.PartOfSpeech == PartOfSpeech.Ordinal ||
                                w.PartOfSpeech == PartOfSpeech.Pronoun ||
                                w.PartOfSpeech == PartOfSpeech.ModalVerb))
                        {
                            continue;
                        }
                        analysisCache.Add(w);
                    }
                }
                else if (this.Language == SupportedLanguage.Spanish)
                {
                    Tools.Logger.Log("EssayExerciseES", "Call remote SpanishTagger");
                    SpanishPOSTagger es_tagger  = new SpanishPOSTagger(App.OAuth2Account, this._essayContents);
                    IList <SpanishWord> results = await es_tagger.CallEndpointAsObjectAsync();
                    Tools.Logger.Log("EssayExerciseES", "Got the results!");
                    analysisCache.AddRange(results);
                }
                else if (this.Language == SupportedLanguage.German)
                {
                    Tools.Logger.Log("EssayExerciseDE", "Call remote GermanTagger");
                    GermanPOSTagger de_tagger  = new GermanPOSTagger(App.OAuth2Account, this._essayContents);
                    IList <GermanWord> results = await de_tagger.CallEndpointAsObjectAsync();
                    Tools.Logger.Log("EssayExerciseDE", "Got the results!");
                    analysisCache.AddRange(results);
                }

                return(analysisCache);
            });
            this.LevelClassification = new AsyncLazy <Dictionary <LanguageLevelClassification, List <IWord> > >(async() =>
            {
                if (this.Language == SupportedLanguage.English)
                {
                    List <IWord> offlineAnalysis = await this.OfflineAnalysisResult;
                    Dictionary <LanguageLevelClassification, List <IWord> > result = new Dictionary <LanguageLevelClassification, List <IWord> >();

                    foreach (LanguageLevelClassification level in Enum.GetValues(typeof(LanguageLevelClassification)))
                    {
                        result.Add(level, new List <IWord>());
                    }
                    foreach (IWord word in offlineAnalysis)
                    {
                        LanguageLevelClassification level = await word.Level;
                        result[level].Add(word);
                    }
                    return(result);
                }
                return(new Dictionary <LanguageLevelClassification, List <IWord> >());
            });
            this.LevelClassificationDistribution = new AsyncLazy <Dictionary <LanguageLevelClassification, float> >(async() =>
            {
                if (this.Language == SupportedLanguage.English)
                {
                    // TODO: check for any possible loss of precision
                    List <IWord> offlineAnalysis = await this.OfflineAnalysisResult;
                    Dictionary <LanguageLevelClassification, float> result = new Dictionary <LanguageLevelClassification, float>();

                    foreach (LanguageLevelClassification level in Enum.GetValues(typeof(LanguageLevelClassification)))
                    {
                        result.Add(level, 0);
                    }
                    foreach (IWord word in offlineAnalysis)
                    {
                        LanguageLevelClassification level = await word.Level;
                        result[level] = result[level] + 1;
                    }
                    foreach (LanguageLevelClassification level in Enum.GetValues(typeof(LanguageLevelClassification)))
                    {
                        result[level] = result[level] / (float)offlineAnalysis.Count;
                    }
                    return(result);
                }
                return(new Dictionary <LanguageLevelClassification, float>());
            });

            this.NumWords = new AsyncLazy <int>(async() =>
            {
                List <IWord> offlineAnalysis = await this.OfflineAnalysisResult;
                return(offlineAnalysis.Count);
            });
            this.Adjectives = new AsyncLazy <IDictionary <IWord, int> >(async() =>
            {
                return(await this.GetWordsByPartOfSpeech(PartOfSpeech.Adjective));
            });
            this.Adverbs = new AsyncLazy <IDictionary <IWord, int> >(async() =>
            {
                return(await this.GetWordsByPartOfSpeech(PartOfSpeech.Adverb));
            });
            this.ClauseOpeners = new AsyncLazy <IDictionary <IWord, int> >(async() =>
            {
                return(await this.GetWordsByPartOfSpeech(PartOfSpeech.ClauseOpener));
            });
            this.Conjunctions = new AsyncLazy <IDictionary <IWord, int> >(async() =>
            {
                return(await this.GetWordsByPartOfSpeech(PartOfSpeech.Conjunction));
            });
            this.Determiners = new AsyncLazy <IDictionary <IWord, int> >(async() =>
            {
                return(await this.GetWordsByPartOfSpeech(PartOfSpeech.Determiner));
            });
            this.DeterminerPronouns = new AsyncLazy <IDictionary <IWord, int> >(async() =>
            {
                return(await this.GetWordsByPartOfSpeech(PartOfSpeech.DeterminerPronoun));
            });
            this.ExistentialParticles = new AsyncLazy <IDictionary <IWord, int> >(async() =>
            {
                return(await this.GetWordsByPartOfSpeech(PartOfSpeech.ExistentialParticle));
            });
            this.ForeignWords = new AsyncLazy <IDictionary <IWord, int> >(async() =>
            {
                return(await this.GetWordsByPartOfSpeech(PartOfSpeech.ForeignWord));
            });
            this.Genitives = new AsyncLazy <IDictionary <IWord, int> >(async() =>
            {
                return(await this.GetWordsByPartOfSpeech(PartOfSpeech.Genitive));
            });
            this.InfinitiveMarkers = new AsyncLazy <IDictionary <IWord, int> >(async() =>
            {
                return(await this.GetWordsByPartOfSpeech(PartOfSpeech.InfinitiveMarker));
            });
            this.Interjections = new AsyncLazy <IDictionary <IWord, int> >(async() =>
            {
                return(await this.GetWordsByPartOfSpeech(PartOfSpeech.InterjectionOrDiscourseMarker));
            });
            this.LettersOfAlphabet = new AsyncLazy <IDictionary <IWord, int> >(async() =>
            {
                return(await this.GetWordsByPartOfSpeech(PartOfSpeech.LetterAsWord));
            });
            this.NegativeMarkers = new AsyncLazy <IDictionary <IWord, int> >(async() =>
            {
                return(await this.GetWordsByPartOfSpeech(PartOfSpeech.NegativeMarker));
            });
            this.CommonNouns = new AsyncLazy <IDictionary <IWord, int> >(async() =>
            {
                return(await this.GetWordsByPartOfSpeech(PartOfSpeech.CommonNoun));
            });
            this.ProperNouns = new AsyncLazy <IDictionary <IWord, int> >(async() =>
            {
                return(await this.GetWordsByPartOfSpeech(PartOfSpeech.ProperNoun));
            });
            this.PartsOfProperNouns = new AsyncLazy <IDictionary <IWord, int> >(async() =>
            {
                return(await this.GetWordsByPartOfSpeech(PartOfSpeech.PartOfProperNoun));
            });
            this.CardinalNumbers = new AsyncLazy <IDictionary <IWord, int> >(async() =>
            {
                return(await this.GetWordsByPartOfSpeech(PartOfSpeech.CardinalNumber));
            });
            this.OrdinalNumbers = new AsyncLazy <IDictionary <IWord, int> >(async() =>
            {
                return(await this.GetWordsByPartOfSpeech(PartOfSpeech.Ordinal));
            });
            this.Prepositions = new AsyncLazy <IDictionary <IWord, int> >(async() =>
            {
                return(await this.GetWordsByPartOfSpeech(PartOfSpeech.Preposition));
            });
            this.Pronouns = new AsyncLazy <IDictionary <IWord, int> >(async() =>
            {
                return(await this.GetWordsByPartOfSpeech(PartOfSpeech.Pronoun));
            });
            this.UnclassifiedWords = new AsyncLazy <IDictionary <IWord, int> >(async() =>
            {
                return(await this.GetWordsByPartOfSpeech(PartOfSpeech.Unclassified));
            });
            this.Verbs = new AsyncLazy <IDictionary <IWord, int> >(async() =>
            {
                return(await this.GetWordsByPartOfSpeech(PartOfSpeech.Verb));
            });
            this.ModalVerbs = new AsyncLazy <IDictionary <IWord, int> >(async() =>
            {
                return(await this.GetWordsByPartOfSpeech(PartOfSpeech.ModalVerb));
            });
        }