private Parser( IMaxentModel buildModel, IMaxentModel attachModel, IMaxentModel checkModel, IPOSTagger tagger, IChunker chunker, AbstractHeadRules headRules, int beamSize, double advancePercentage) : base(tagger, chunker, headRules, beamSize, advancePercentage) { this.buildModel = buildModel; this.attachModel = attachModel; this.checkModel = checkModel; buildContextGenerator = new BuildContextGenerator(); attachContextGenerator = new AttachContextGenerator(punctSet); checkContextGenerator = new CheckContextGenerator(punctSet); bProbs = new double[buildModel.GetNumOutcomes()]; aProbs = new double[attachModel.GetNumOutcomes()]; cProbs = new double[checkModel.GetNumOutcomes()]; doneIndex = buildModel.GetIndex(DONE); sisterAttachIndex = attachModel.GetIndex(ATTACH_SISTER); daughterAttachIndex = attachModel.GetIndex(ATTACH_DAUGHTER); // nonAttachIndex = attachModel.GetIndex(NON_ATTACH); attachments = new[] { daughterAttachIndex, sisterAttachIndex }; completeIndex = checkModel.GetIndex(COMPLETE); }
public StyleFactory(IPOSTagger tagger, INRCDictionary nrcDictionary, IFrequencyListManager frequency, IInquirerManager inquirer) { this.nrcDictionary = nrcDictionary ?? throw new ArgumentNullException(nameof(nrcDictionary)); this.frequency = frequency ?? throw new ArgumentNullException(nameof(frequency)); this.inquirer = inquirer ?? throw new ArgumentNullException(nameof(inquirer)); this.tagger = tagger ?? throw new ArgumentNullException(nameof(tagger)); }
public ContextSensitiveSpellingCorrection(IPOSTagger posTagger, IEnumerable <string> corpora, IEnumerable <string[]> confusionSets, bool prune) { _posTagger = posTagger; _contextFeaturesExtractor = new ContextFeaturesExtractor(k); _collocationtFeaturesExtractor = new CollocationFeaturesExtractor(l); _statsHelper = new StatsHelper(); _comparators = new List <Comparator>(confusionSets.Count()); Sentence[] sentences = PreProcessCorpora(corpora).ToArray(); /*processed corpus was serialized for faster results between trials*/ XmlSerializer x = new XmlSerializer(typeof(Sentence[])); FileStream fs = new FileStream(@"Sentence.xml", FileMode.Open); x.Serialize(fs, sentences); fs.Close(); sentences = (Sentence[])x.Deserialize(new FileStream(@"Sentence.xml", FileMode.Open)); Console.WriteLine("Deserialize complete"); var featureFrequencies = new Dictionary <string, Dictionary <string, int> >(StringComparer.OrdinalIgnoreCase); if (prune) { /* preprocess terms' frequencies */ featureFrequencies = _statsHelper.GetFrequencies(sentences); } Parallel.ForEach(confusionSets, confusionSet => { TrainingData output = GenerateTrainingData(sentences, prune, featureFrequencies, confusionSet); Train(confusionSet, output.Features.ToArray(), output.Samples); }); }
private Parser(IMaxentModel buildModel, IMaxentModel checkModel, IPOSTagger tagger, IChunker chunker, AbstractHeadRules headRules, int beamSize, double advancePercentage) : base(tagger, chunker, headRules, beamSize, advancePercentage) { this.buildModel = buildModel; this.checkModel = checkModel; bProbs = new double[buildModel.GetNumOutcomes()]; cProbs = new double[checkModel.GetNumOutcomes()]; buildContextGenerator = new BuildContextGenerator(); checkContextGenerator = new CheckContextGenerator(); startTypeMap = new Dictionary <string, string>(); contTypeMap = new Dictionary <string, string>(); for (int boi = 0, bon = buildModel.GetNumOutcomes(); boi < bon; boi++) { var outcome = buildModel.GetOutcome(boi); if (outcome.StartsWith(START)) { startTypeMap[outcome] = outcome.Substring(START.Length); } else if (outcome.StartsWith(CONT)) { contTypeMap[outcome] = outcome.Substring(CONT.Length); } } topStartIndex = buildModel.GetIndex(TOP_START); completeIndex = checkModel.GetIndex(COMPLETE); incompleteIndex = checkModel.GetIndex(INCOMPLETE); }
/// <summary> /// Initializes a new instance of the <see cref="POSEvaluator"/>. /// </summary> /// <param name="tagger">The tagger.</param> /// <param name="listeners">Any listeners.</param> /// <exception cref="System.ArgumentNullException"> /// <paramref name="tagger"/> /// </exception> public POSEvaluator(IPOSTagger tagger, params IEvaluationMonitor<POSSample>[] listeners) : base(listeners) { if (tagger == null) { throw new ArgumentNullException("tagger"); } this.tagger = tagger; }
public static bool IsWordType(this IPOSTagger tagger, WordEx word, WordType type) { if (tagger is null) { throw new ArgumentNullException(nameof(tagger)); } if (word is null) { throw new ArgumentNullException(nameof(word)); } return(word.Tag.WordType == type || tagger.GetTag(word.Text).WordType == type); }
/// <summary> /// Initializes a new instance of the <see cref="AbstractBottomUpParser"/>. /// </summary> /// <param name="tagger">The pos-tagger that the parser uses.</param> /// <param name="chunker">The chunker that the parser uses to chunk non-recursive structures.</param> /// <param name="headRules">The head rules for the parser.</param> /// <param name="beamSize">Size of the beam.</param> /// <param name="advancePercentage">The advance percentage.</param> protected AbstractBottomUpParser(IPOSTagger tagger, IChunker chunker, AbstractHeadRules headRules, int beamSize, double advancePercentage) { this.tagger = tagger; this.chunker = chunker; M = beamSize; K = beamSize; Q = advancePercentage; ReportFailedParse = true; this.headRules = headRules; punctSet = headRules.PunctuationTags; odh = new ListHeap <Parse>(K); ndh = new ListHeap <Parse>(K); completeParses = new ListHeap <Parse>(K); }
public static bool IsWordType(this IPOSTagger tagger, WordEx word, BasePOSType posType) { if (tagger is null) { throw new ArgumentNullException(nameof(tagger)); } if (word is null) { throw new ArgumentNullException(nameof(word)); } if (posType is null) { throw new ArgumentNullException(nameof(posType)); } return(word.Tag == posType || tagger.GetTag(word.Text) == posType); }
/// <summary> /// Initializes a new instance of the <see cref="Pipeline"/> class. /// </summary> /// <param name="normalizerManager">The normalizer manager.</param> /// <param name="pOSTagger">The p os tagger.</param> /// <param name="sentenceDetector">The sentence detector.</param> /// <param name="stemmer">The stemmer.</param> /// <param name="stopWordsManager">The stop words manager.</param> /// <param name="tokenizer">The tokenizer.</param> /// <param name="featureExtractor">The feature extractor.</param> /// <param name="textSummarizer">The text summarizer.</param> /// <param name="entityFinder">The entity finder.</param> /// <exception cref="ArgumentNullException"> /// normalizerManager or pOSTagger or sentenceDetector or stemmer or stopWordsManager or /// tokenizer or featureExtractor or textSummarizer /// </exception> public Pipeline( INormalizerManager normalizerManager, IPOSTagger pOSTagger, ISentenceDetector sentenceDetector, IStemmer stemmer, IStopWordsManager stopWordsManager, ITokenizer tokenizer, IFeatureExtractor featureExtractor, ITextSummarizer textSummarizer, IEntityFinder entityFinder) { NormalizerManager = normalizerManager ?? throw new ArgumentNullException(nameof(normalizerManager)); POSTagger = pOSTagger ?? throw new ArgumentNullException(nameof(pOSTagger)); SentenceDetector = sentenceDetector ?? throw new ArgumentNullException(nameof(sentenceDetector)); Stemmer = stemmer ?? throw new ArgumentNullException(nameof(stemmer)); StopWordsManager = stopWordsManager ?? throw new ArgumentNullException(nameof(stopWordsManager)); Tokenizer = tokenizer ?? throw new ArgumentNullException(nameof(tokenizer)); FeatureExtractor = featureExtractor ?? throw new ArgumentNullException(nameof(featureExtractor)); TextSummarizer = textSummarizer ?? throw new ArgumentNullException(nameof(textSummarizer)); EntityFinder = entityFinder ?? throw new ArgumentNullException(nameof(entityFinder)); SetLanguage(Languages.English); }
private Parser(IMaxentModel buildModel, IMaxentModel checkModel, IPOSTagger tagger, IChunker chunker, AbstractHeadRules headRules, int beamSize, double advancePercentage) : base(tagger, chunker, headRules, beamSize, advancePercentage) { this.buildModel = buildModel; this.checkModel = checkModel; bProbs = new double[buildModel.GetNumOutcomes()]; cProbs = new double[checkModel.GetNumOutcomes()]; buildContextGenerator = new BuildContextGenerator(); checkContextGenerator = new CheckContextGenerator(); startTypeMap = new Dictionary<string, string>(); contTypeMap = new Dictionary<string, string>(); for (int boi = 0, bon = buildModel.GetNumOutcomes(); boi < bon; boi++) { var outcome = buildModel.GetOutcome(boi); if (outcome.StartsWith(START)) { startTypeMap[outcome] = outcome.Substring(START.Length); } else if (outcome.StartsWith(CONT)) { contTypeMap[outcome] = outcome.Substring(CONT.Length); } } topStartIndex = buildModel.GetIndex(TOP_START); completeIndex = checkModel.GetIndex(COMPLETE); incompleteIndex = checkModel.GetIndex(INCOMPLETE); }
/// <summary> /// Initializes a new instance of the <see cref="AbstractBottomUpParser"/>. /// </summary> /// <param name="tagger">The pos-tagger that the parser uses.</param> /// <param name="chunker">The chunker that the parser uses to chunk non-recursive structures.</param> /// <param name="headRules">The head rules for the parser.</param> /// <param name="beamSize">Size of the beam.</param> /// <param name="advancePercentage">The advance percentage.</param> protected AbstractBottomUpParser(IPOSTagger tagger, IChunker chunker, AbstractHeadRules headRules, int beamSize, double advancePercentage) { this.tagger = tagger; this.chunker = chunker; M = beamSize; K = beamSize; Q = advancePercentage; ReportFailedParse = true; this.headRules = headRules; punctSet = headRules.PunctuationTags; odh = new ListHeap<Parse>(K); ndh = new ListHeap<Parse>(K); completeParses = new ListHeap<Parse>(K); }
private Parser( IMaxentModel buildModel, IMaxentModel attachModel, IMaxentModel checkModel, IPOSTagger tagger, IChunker chunker, AbstractHeadRules headRules, int beamSize, double advancePercentage) : base(tagger, chunker, headRules, beamSize, advancePercentage) { this.buildModel = buildModel; this.attachModel = attachModel; this.checkModel = checkModel; buildContextGenerator = new BuildContextGenerator(); attachContextGenerator = new AttachContextGenerator(punctSet); checkContextGenerator = new CheckContextGenerator(punctSet); bProbs = new double[buildModel.GetNumOutcomes()]; aProbs = new double[attachModel.GetNumOutcomes()]; cProbs = new double[checkModel.GetNumOutcomes()]; doneIndex = buildModel.GetIndex(DONE); sisterAttachIndex = attachModel.GetIndex(ATTACH_SISTER); daughterAttachIndex = attachModel.GetIndex(ATTACH_DAUGHTER); // nonAttachIndex = attachModel.GetIndex(NON_ATTACH); attachments = new[] {daughterAttachIndex, sisterAttachIndex}; completeIndex = checkModel.GetIndex(COMPLETE); }
public TextBlock(IPOSTagger tagger, IInquirerManager inquirer, IFrequencyListManager frequency, SentenceItem[] sentences) { if (tagger is null) { throw new ArgumentNullException(nameof(tagger)); } if (inquirer is null) { throw new ArgumentNullException(nameof(inquirer)); } if (frequency is null) { throw new ArgumentNullException(nameof(frequency)); } if (sentences is null) { throw new ArgumentNullException(nameof(sentences)); } if (sentences is null) { throw new ArgumentNullException(nameof(sentences)); } if (sentences.Length == 0) { throw new ArgumentException("Value cannot be an empty collection.", nameof(sentences)); } Sentences = sentences; Surface = new SurfaceData(this); Readability = new ReadabilityDataSource(this); Words = (from sentence in Sentences from word in sentence.Words select word).ToArray(); if (Words.Length == 0) { throw new ArgumentException("Value cannot be an empty collection.", nameof(Words)); } var pure = new List <WordEx>(); foreach (var word in Words) { if (word.Text.HasLetters() || word.Text.Length > 0 && char.IsDigit(word.Text[0])) { pure.Add(word); } if (!string.IsNullOrEmpty(word.Raw)) { lemmaDictionary.GetSafeCreate(word.Raw).Add(word); } wordDictionary.GetSafeCreate(word.Text).Add(word); } PureWords = pure.ToArray(); VocabularyObscurity = new VocabularyObscurity(this, frequency); SyntaxFeatures = new SyntaxFeatures(this, tagger); InquirerFinger = new InquirerFingerPrint(this, inquirer); Sentiment = new SentimentFeatures(this); }
/// <summary> /// Initializes a new instance of the <see cref="AbstractBottomUpParser"/>. /// </summary> /// <param name="tagger">The pos-tagger that the parser uses.</param> /// <param name="chunker">The chunker that the parser uses to chunk non-recursive structures.</param> /// <param name="headRules">The head rules for the parser.</param> /// <param name="beamSize">Size of the beam.</param> /// <param name="advancePercentage">The advance percentage.</param> public Parser(IPOSTagger tagger, IChunker chunker, AbstractHeadRules headRules, int beamSize, double advancePercentage) : base(tagger, chunker, headRules, beamSize, advancePercentage) {}
/// <summary> /// Initializes a new instance of the <see cref="AbstractBottomUpParser"/>. /// </summary> /// <param name="tagger">The pos-tagger that the parser uses.</param> /// <param name="chunker">The chunker that the parser uses to chunk non-recursive structures.</param> /// <param name="headRules">The head rules for the parser.</param> /// <param name="beamSize">Size of the beam.</param> /// <param name="advancePercentage">The advance percentage.</param> public Parser(IPOSTagger tagger, IChunker chunker, AbstractHeadRules headRules, int beamSize, double advancePercentage) : base(tagger, chunker, headRules, beamSize, advancePercentage) { }
public SyntaxFeatures(TextBlock text, IPOSTagger tagger) { Text = text ?? throw new ArgumentNullException(nameof(text)); this.tagger = tagger ?? throw new ArgumentNullException(nameof(tagger)); }
public SimpleWordItemFactory(IPOSTagger tagger, IRawTextExtractor raw) { this.tagger = tagger ?? throw new ArgumentNullException(nameof(tagger)); this.raw = raw ?? throw new ArgumentNullException(nameof(raw)); }
public SentenceTokenizerFactory(IPOSTagger tagger, IRawTextExtractor raw) { this.tagger = tagger ?? throw new ArgumentNullException(nameof(tagger)); this.raw = raw ?? throw new ArgumentNullException(nameof(raw)); }
public bool CreatePOSTagger() { Console.WriteLine("Loading POS Tagger Model. This may take few seconds."); _myposTagger = _myPOSTaggerFactory.Create(_posTaggerNameName); return (_myposTagger.LoadModel(_posTaggerModelFile)); }