Esempio n. 1
0
 public NotenizerRule(String id, NotenizerDependencies dependencies)
 {
     _id = id;
     _structure = new NotenizerStructure(dependencies);
     _dependencies = dependencies;
     _createdAt = DateTime.Now;
     _updatedAt = DateTime.Now;
 }
Esempio n. 2
0
 public NotenizerNote(NotenizerSentence originalSentence)
 {
     _text = String.Empty;
     _noteParts = new List<NotePart>();
     _originalSentence = originalSentence;
     _createdAt = DateTime.Now;
     _updatedAt = DateTime.Now;
     _structure = new NotenizerStructure();
 }
Esempio n. 3
0
        /// <summary>
        /// Gets structure of heighest match.
        /// </summary>
        /// <param name="structure"></param>
        /// <param name="persistedStructures"></param>
        /// <param name="m"></param>
        /// <returns></returns>
        public static Structure GetHeighestMatch(NotenizerStructure structure, List<BsonDocument> persistedStructures, out Match m)
        {
            Structure struc = null;
            Match match;
            Match heighestMatch = null;

            foreach (BsonDocument persistedStructureLoop in persistedStructures)
            {
                match = CalculateMatch(structure, persistedStructureLoop);

                if (heighestMatch == null
                    || heighestMatch.Structure < match.Structure
                    || (heighestMatch.Structure == match.Structure
                        && heighestMatch.Content < match.Content))
                {
                    heighestMatch = match;
                    struc = ParseStructure(persistedStructureLoop);
                }
            }

            m = heighestMatch;
            return struc;
        }
Esempio n. 4
0
 public NotenizerSentence(Annotation annotation, Article article)
 {
     _annotation = annotation;
     _sentence = new Sentence(this.ToString(), article);
     _structure = new NotenizerStructure(GetDepencencies(annotation));
 }
Esempio n. 5
0
        /// <summary>
        /// Calculates the match between original sentence (from DB) and sentence that is being parsed.
        /// </summary>
        /// <param name="notenizerStructure"></param>
        /// <param name="structure"></param>
        /// <param name="persistedStructure"></param>
        /// <returns></returns>
        private static Match CalculateMatch(NotenizerStructure notenizerStructure, BsonDocument persistedStructure)
        {
            Double structureCompareCount = 5.0;
            Double contentCompareCount = 8.0;
            Double oneStructeCompareRating = NotenizerConstants.MaxMatchValue / structureCompareCount;
            Double oneContentCompareRating = NotenizerConstants.MaxMatchValue / contentCompareCount;
            Double oneStructureCompareTypeIterRating;
            Double oneContentComapareTypeIterRating;
            Double structureCounter = 0.0;
            Double contentCounter = 0.0;
            Double valueCounter = 0.0;
            Dictionary<String, Dictionary<Tuple<PartOfSpeechType, PartOfSpeechType>, int>> structureDic = new Dictionary<string, Dictionary<Tuple<PartOfSpeechType, PartOfSpeechType>, int>>();

            int c = 0;

            oneStructureCompareTypeIterRating = oneStructeCompareRating / Double.Parse(persistedStructure[DBConstants.StructureDataFieldName].AsBsonArray.Count.ToString());
            foreach (BsonDocument origDepDocLoop in persistedStructure[DBConstants.StructureDataFieldName].AsBsonArray)
            {
                String relationName = origDepDocLoop[DBConstants.RelationNameFieldName].AsString;

                if (notenizerStructure.CompressedDependencies[relationName].Count == origDepDocLoop[DBConstants.DependenciesFieldName].AsBsonArray.Count)
                {
                    structureCounter += oneStructureCompareTypeIterRating;
                }

                c += origDepDocLoop[DBConstants.DependenciesFieldName].AsBsonArray.Count;
            }

            // Goes over all dependencies of original sentence
            // and gets the name of dependency (for example: compound)
            // and checks, if there is, in sentence that is parsed right now,
            // the dependency with same POS tag or same index at governor or dependent.
            oneStructureCompareTypeIterRating = oneStructeCompareRating / (double)(c);
            oneContentComapareTypeIterRating = oneContentCompareRating / (double)(c);
            foreach (BsonDocument origDepDocLoop in persistedStructure[DBConstants.StructureDataFieldName].AsBsonArray)
            {
                String relationName = origDepDocLoop[DBConstants.RelationNameFieldName].AsString;

                foreach (BsonDocument depLoop in origDepDocLoop[DBConstants.DependenciesFieldName].AsBsonArray)
                {
                    /* ================= Structure match ================= */
                    if (!structureDic.ContainsKey(relationName))
                        structureDic.Add(relationName, new Dictionary<Tuple<PartOfSpeechType, PartOfSpeechType>, int>());

                    Tuple<PartOfSpeechType, PartOfSpeechType> govPOSdepPOSKey = new Tuple<PartOfSpeechType, PartOfSpeechType>(
                        PartOfSpeech.GetTypeFromTag(depLoop[DBConstants.GovernorFieldName].AsBsonDocument[DBConstants.POSFieldName].AsString),
                        PartOfSpeech.GetTypeFromTag(depLoop[DBConstants.DependentFieldName].AsBsonDocument[DBConstants.POSFieldName].AsString));

                    if (!structureDic[relationName].ContainsKey(govPOSdepPOSKey))
                        structureDic[relationName].Add(govPOSdepPOSKey, 0);

                    structureDic[relationName][govPOSdepPOSKey]++;

                    if (notenizerStructure.CompressedDependencies[relationName].Where(
                        x => x.Dependent.POS.Type == PartOfSpeech.GetTypeFromTag(depLoop[DBConstants.DependentFieldName].AsBsonDocument[DBConstants.POSFieldName].AsString)).FirstOrDefault() != null)
                    {
                        structureCounter += oneStructureCompareTypeIterRating;
                    }

                    if (notenizerStructure.CompressedDependencies[relationName].Where(
                        x => x.Governor.POS.Type == PartOfSpeech.GetTypeFromTag(depLoop[DBConstants.GovernorFieldName].AsBsonDocument[DBConstants.POSFieldName].AsString)).FirstOrDefault() != null)
                    {
                        structureCounter += oneStructureCompareTypeIterRating;
                    }

                    if (notenizerStructure.CompressedDependencies[relationName].Where(
                        x => x.Dependent.POS.Type == PartOfSpeech.GetTypeFromTag(depLoop[DBConstants.DependentFieldName].AsBsonDocument[DBConstants.POSFieldName].AsString)
                        && x.Governor.POS.Type == PartOfSpeech.GetTypeFromTag(depLoop[DBConstants.GovernorFieldName].AsBsonDocument[DBConstants.POSFieldName].AsString)).FirstOrDefault() != null)
                    {
                        structureCounter += oneStructureCompareTypeIterRating;
                    }

                    /* ================= Content match ================= */

                    if (notenizerStructure.CompressedDependencies[relationName].Where(
                        x => x.Dependent.Index == depLoop[DBConstants.DependentFieldName].AsBsonDocument[DBConstants.IndexFieldName]).FirstOrDefault() != null)
                    {
                        contentCounter += oneContentComapareTypeIterRating;
                    }

                    if (notenizerStructure.CompressedDependencies[relationName].Where(
                        x => x.Governor.Index == depLoop[DBConstants.GovernorFieldName].AsBsonDocument[DBConstants.IndexFieldName]).FirstOrDefault() != null)
                    {
                        contentCounter += oneContentComapareTypeIterRating;
                    }

                    if (notenizerStructure.CompressedDependencies[relationName].Where(
                        x =>x.Governor.POS.Tag == depLoop[DBConstants.GovernorFieldName].AsBsonDocument[DBConstants.POSFieldName]
                        && x.Governor.Index == depLoop[DBConstants.GovernorFieldName].AsBsonDocument[DBConstants.IndexFieldName]).FirstOrDefault() != null)
                    {
                        contentCounter += oneContentComapareTypeIterRating;
                    }

                    if (notenizerStructure.CompressedDependencies[relationName].Where(
                        x => x.Dependent.POS.Tag == depLoop[DBConstants.DependentFieldName].AsBsonDocument[DBConstants.POSFieldName]
                        && x.Dependent.Index == depLoop[DBConstants.DependentFieldName].AsBsonDocument[DBConstants.IndexFieldName]).FirstOrDefault() != null)
                    {
                        contentCounter += oneContentComapareTypeIterRating;
                    }

                    if (notenizerStructure.CompressedDependencies[relationName].Where(
                        x => x.Dependent.POS.Tag == depLoop[DBConstants.DependentFieldName].AsBsonDocument[DBConstants.POSFieldName]
                        && x.Dependent.Index == depLoop[DBConstants.DependentFieldName].AsBsonDocument[DBConstants.IndexFieldName]
                        && x.Governor.POS.Tag == depLoop[DBConstants.GovernorFieldName].AsBsonDocument[DBConstants.POSFieldName]
                        && x.Governor.Index == depLoop[DBConstants.GovernorFieldName].AsBsonDocument[DBConstants.IndexFieldName]).FirstOrDefault() != null)
                    {
                        contentCounter += oneContentComapareTypeIterRating;
                    }

                    if (notenizerStructure.CompressedDependencies[relationName].Where(
                        x => x.Dependent.NamedEntity.Value == depLoop[DBConstants.DependentFieldName].AsBsonDocument[DBConstants.NERFieldName]).FirstOrDefault() != null)
                    {
                        contentCounter += oneContentComapareTypeIterRating;
                    }

                    if (notenizerStructure.CompressedDependencies[relationName].Where(
                        x => x.Governor.NamedEntity.Value == depLoop[DBConstants.GovernorFieldName].AsBsonDocument[DBConstants.NERFieldName]).FirstOrDefault() != null)
                    {
                        contentCounter += oneContentComapareTypeIterRating;
                    }

                    if (notenizerStructure.CompressedDependencies[relationName].Where(
                        x => x.Dependent.NamedEntity.Value == depLoop[DBConstants.DependentFieldName].AsBsonDocument[DBConstants.NERFieldName]
                        && x.Governor.NamedEntity.Value == depLoop[DBConstants.GovernorFieldName].AsBsonDocument[DBConstants.NERFieldName]).FirstOrDefault() != null)
                    {
                        contentCounter += oneContentComapareTypeIterRating;
                    }
                }
            }

            int z = 0;
            int y = 0;
            foreach (KeyValuePair<String, Dictionary<Tuple<PartOfSpeechType, PartOfSpeechType>, int>> structDicKVPLoop in structureDic)
            {
                z += structDicKVPLoop.Value.Keys.Count;

                foreach (KeyValuePair<Tuple<PartOfSpeechType, PartOfSpeechType>, int> govPOSdepGOVCountKVPLoop in structDicKVPLoop.Value)
                {
                    if (notenizerStructure.CompressedDependencies[structDicKVPLoop.Key].Where(
                        x => x.Governor.POS.Type == govPOSdepGOVCountKVPLoop.Key.Item1
                        && x.Dependent.POS.Type == govPOSdepGOVCountKVPLoop.Key.Item2).Count() == govPOSdepGOVCountKVPLoop.Value)
                    {
                        y++;
                    }
                }
            }

            structureCounter += oneStructeCompareRating / z * y;

            return new Match(structureCounter, contentCounter, valueCounter);
        }
Esempio n. 6
0
        /// <summary>
        /// Create structure of note.
        /// </summary>
        /// <returns></returns>
        public NotenizerStructure CreateStructure()
        {
            this._structure = new NotenizerStructure(this.Dependencies);
            this._rule.Structure = this._structure;

            return this._structure;
        }
Esempio n. 7
0
        /// <summary>
        /// Gets rule for sentence
        /// </summary>
        /// <param name="sentence">Sentence to get rule for</param>
        /// <param name="matchedNote">Out matched note of sentence</param>
        /// <returns></returns>
        private NotenizerNoteRule GetRuleForSentence(NotenizerSentence sentence, out Note matchedNote)
        {
            Match match;
            Article article;
            Structure structure;
            Note matchedSentenceNote;
            NotenizerNoteRule matchedSentenceRule;
            NotenizerStructure matchedSentenceStructure;
            List<BsonDocument> sentencesWithSameStructure;

            matchedNote = null;
            structure = DocumentParser.GetHeighestMatch(
                sentence.Structure,
                DB.GetAll(DBConstants.StructuresCollectionName, DocumentCreator.CreateFilterByStructure(sentence)).Result,
                out match);

            if (structure == null)
                return null;

            matchedSentenceStructure = new NotenizerStructure(structure);

            sentencesWithSameStructure = DB.GetAll(
                DBConstants.SentencesCollectionName,
                DocumentCreator.CreateFilterById(DBConstants.StructureRefIdFieldName, matchedSentenceStructure.Structure.ID)).Result;

            nsNotenizerObjects.Sentence matchedSentence = null;

            if (sentencesWithSameStructure.Count > 0)
            {
                matchedSentence = DocumentParser.ParseSentence(sentencesWithSameStructure[0]);
                foreach (BsonDocument sentenceWithSameStructureLoop in sentencesWithSameStructure)
                {
                    if (sentenceWithSameStructureLoop[DBConstants.TextFieldName].AsString.Trim() == sentence.Sentence.Text)
                    {
                        match.Value = 100.0;
                        matchedSentence = DocumentParser.ParseSentence(sentenceWithSameStructureLoop);
                        break;
                    }
                }
            }

            if (matchedSentence == null)
                return null;

            article = DocumentParser.ParseArticle(
                DB.GetFirst(
                    DBConstants.ArticlesCollectionName,
                    DocumentCreator.CreateFilterById(sentence.Sentence.Article.ID)).Result);

            matchedSentenceNote = DocumentParser.ParseNote(
                DB.GetFirst(
                    DBConstants.NotesCollectionName,
                    DocumentCreator.CreateFilterById(matchedSentence.NoteID)).Result);

            matchedSentenceRule = DocumentParser.ParseRule(
                DB.GetFirst(
                    DBConstants.RulesCollectionName,
                    DocumentCreator.CreateFilterById(matchedSentence.RuleID)).Result);

            matchedSentenceRule.Structure = new NotenizerStructure(
                DocumentParser.ParseStructure(
                    DB.GetFirst(
                        DBConstants.StructuresCollectionName,
                        DocumentCreator.CreateFilterById(matchedSentenceRule.StructureID)).Result));

            matchedSentenceRule.Sentence = matchedSentence;
            matchedSentenceRule.Sentence.Article = article;
            matchedSentenceRule.Match = match;
            matchedNote = matchedSentenceNote;

            return matchedSentenceRule;
        }