public IGAnlys_Word ForceWordANlys(string key, List <string> contentMorphemes) { var morphemes = analyzer.AnalyzeImmediate(key); IGAnlys_Word wordBeingBuilt = null; foreach (var morpheme in morphemes) { contentMorphemes.Add(morpheme.word); if (wordBeingBuilt == null) { wordBeingBuilt = new IGAnlys_Word(); } wordBeingBuilt.AddMorphemeText(morpheme.word); } return(wordBeingBuilt); }
public IncrementalGAnalyzer PickBestElement(string key) { var morphemes = analyzer.AnalyzeImmediate(key); bool doIgnoreNext = false; bool isUnreadableNext = false; List <IncrementalGAnalyzer> analyzers = new List <IncrementalGAnalyzer>(); IGAnlys_Word wordBeingBuilt = null; foreach (var morpheme in morphemes) { if (morpheme.id == 0) { if (wordBeingBuilt == null) { wordBeingBuilt = new IGAnlys_Word(); } wordBeingBuilt.AddMorphemeText(morpheme.word); } else if (wordBeingBuilt != null) { analyzers.Add(ModifyAnalyzer(wordBeingBuilt, ref doIgnoreNext, ref isUnreadableNext)); wordBeingBuilt = null; } if (morpheme.id == 1) { if (int.TryParse(morpheme.word, out int morphemeID)) { analyzers.Add(ModifyAnalyzer(new IGAnlys_Quote { morphemeID = morphemeID }, ref doIgnoreNext, ref isUnreadableNext)); } else { if (!doIgnoreNext) { analyzers.Add(dict.dict[morpheme.word]); } else { analyzers.Add(new IGAnlys_IgnoreBlock { baseAnalyzer = dict.dict[morpheme.word] }); } doIgnoreNext = false; } } else if (morpheme.id == 2) { doIgnoreNext = true; } else if (morpheme.id == 3) { isUnreadableNext = true; } } if (wordBeingBuilt != null) { analyzers.Add(ModifyAnalyzer(wordBeingBuilt, ref doIgnoreNext, ref isUnreadableNext)); } if (analyzers.Count == 1) { return(analyzers[0]); } else if (analyzers.Count > 1) { var sequence = new IGAnlys_Sequence(); foreach (var analyzer in analyzers) { sequence.analyzers.Add(analyzer); } return(new IGAnlys_ResultClusterizer { baseAnalyzer = sequence }); } return(null); }
public ExampleNLangProcessor(string dictionaryFilePath) { #region MAnlys var mAnlys = new StdMAnalyzer { }; { var ignoreReader = new IgnoreBlockReader { markers = new List <string> { " ", "\r", "\n", "\t" } }; var markerReader = new MarkerBlockReader { markers = new List <MarkerAndFormatID> { new MarkerAndFormatID { marker = ".", formatID = 2 }, new MarkerAndFormatID { marker = ",", formatID = 3 }, new MarkerAndFormatID { marker = "###", formatID = 4 }, new MarkerAndFormatID { marker = ":", formatID = 5 }, } }; var quoteBlockMarker = new QuoteBlockReader { formatID = 1, leftMarker = "\"", rightMarker = "\"" }; mAnlys.blockReaderes.Add(ignoreReader); mAnlys.blockReaderes.Add(markerReader); mAnlys.blockReaderes.Add(quoteBlockMarker); mAnlys.generalReaders.Add(new NumberReader { }); mAnlys.generalReaders.Add(new WordReader { }); } #endregion #region MAnlys for dictionary creation var mAnlysForDict = new StdMAnalyzer { }; { var ignoreReader = new IgnoreBlockReader { markers = new List <string> { " " } }; var markerReader = new MarkerBlockReader { markers = new List <MarkerAndFormatID> { new MarkerAndFormatID { marker = "(IGNORE)", formatID = 2 }, new MarkerAndFormatID { marker = "(UNREADABLE)", formatID = 3 } } }; var quoteBlockMarker = new QuoteBlockReader { formatID = 1, leftMarker = "#", rightMarker = "#" }; mAnlysForDict.blockReaderes.Add(ignoreReader); mAnlysForDict.blockReaderes.Add(markerReader); mAnlysForDict.blockReaderes.Add(quoteBlockMarker); mAnlysForDict.generalReaders.Add(new WordReader { }); } #endregion #region dictionary creation var gDict = new IncrGAnlysDictionary(); var formatReader = new FormatReader(); FormatReader.fReader = new FormToGAnlys { analyzer = mAnlysForDict, dict = gDict }; FormatReader.gAnlysDict = gDict; FormatReader.metaInfos = new Dictionary <string, GrammarBlock>(); FormatReader.metaInfos[StdMetaInfos.sentenceCluster.word] = StdMetaInfos.sentenceCluster; FormatReader.metaInfos[StdMetaInfos.nominalBlock.word] = StdMetaInfos.nominalBlock; FormatReader.metaInfos[StdMetaInfos.verbalBlock.word] = StdMetaInfos.verbalBlock; FormatReader.metaInfos[StdMetaInfos.quoteBlock.word] = StdMetaInfos.quoteBlock; FormatReader.metaInfos[StdMetaInfos.sv.word] = StdMetaInfos.sv; FormatReader.metaInfos[StdMetaInfos.conditionSV.word] = StdMetaInfos.conditionSV; FormatReader.metaInfos[StdMetaInfos.negated.word] = StdMetaInfos.negated; FormatReader.metaInfos[StdMetaInfos.title.word] = StdMetaInfos.title; FormatReader.metaInfos[StdMetaInfos.clusterExtractable.word] = StdMetaInfos.clusterExtractable; FormatReader.metaInfos[StdMetaInfos.metaCluster.word] = StdMetaInfos.metaCluster; FormatReader.metaInfos[StdMetaInfos.anonymousCommand.word] = StdMetaInfos.anonymousCommand; FormatReader.metaInfos[StdMetaInfos.modifierCluster.word] = StdMetaInfos.modifierCluster; FormatReader.metaInfos[StdMetaInfos.quoteSV.word] = StdMetaInfos.quoteSV; FormatReader.metaInfos[StdMetaInfos.pronoun.word] = StdMetaInfos.pronoun; FormatReader.metaInfos[StdMetaInfos.plural.word] = StdMetaInfos.plural; FormatReader.metaInfos[StdMetaInfos.unreadable.word] = StdMetaInfos.unreadable; { var conjCand = new IGAnlys_Candidates { }; var and = new IGAnlys_Word { }; and.AddMorphemeText("and"); var or = new IGAnlys_Word { }; or.AddMorphemeText("or"); var quote = new IGAnlys_Quote { morphemeID = 3 }; conjCand.candidates.Add(and); conjCand.candidates.Add(or); conjCand.candidates.Add(quote); var conjGAnlys = new IGAnlys_RepeatableBlock { baseAnalyzer = conjCand }; FormatReader.cojunctionIGAnlys = conjGAnlys; FormatReader.preparer = new AnalyzePreparer(); } #endregion var dictionaryJsonText = File.ReadAllText(dictionaryFilePath); var reader = new RootReader { }; reader.subReader.Push(new GrammarDictRoot { gAnlysDict = gDict }); reader.Read(dictionaryJsonText); var rootGAnlys = gDict.dict["RootUnit"]; nlProcessor = new MGSyntacticProcessor { gAnalyzer = new StdGrammarAnalyzer { incrGAnalyzer = rootGAnlys, analyzePreparer = FormatReader.preparer }, mAnalyzer = mAnlys }; }