예제 #1
0
        private Parser(
            IMaxentModel buildModel,
            IMaxentModel attachModel,
            IMaxentModel checkModel,
            IPOSTagger tagger,
            IChunker chunker,
            AbstractHeadRules headRules,
            int beamSize,
            double advancePercentage) : base(tagger, chunker, headRules, beamSize, advancePercentage)
        {
            this.buildModel  = buildModel;
            this.attachModel = attachModel;
            this.checkModel  = checkModel;

            buildContextGenerator  = new BuildContextGenerator();
            attachContextGenerator = new AttachContextGenerator(punctSet);
            checkContextGenerator  = new CheckContextGenerator(punctSet);

            bProbs = new double[buildModel.GetNumOutcomes()];
            aProbs = new double[attachModel.GetNumOutcomes()];
            cProbs = new double[checkModel.GetNumOutcomes()];

            doneIndex           = buildModel.GetIndex(DONE);
            sisterAttachIndex   = attachModel.GetIndex(ATTACH_SISTER);
            daughterAttachIndex = attachModel.GetIndex(ATTACH_DAUGHTER);
            // nonAttachIndex = attachModel.GetIndex(NON_ATTACH);
            attachments   = new[] { daughterAttachIndex, sisterAttachIndex };
            completeIndex = checkModel.GetIndex(COMPLETE);
        }
예제 #2
0
 public StyleFactory(IPOSTagger tagger, INRCDictionary nrcDictionary, IFrequencyListManager frequency, IInquirerManager inquirer)
 {
     this.nrcDictionary = nrcDictionary ?? throw new ArgumentNullException(nameof(nrcDictionary));
     this.frequency     = frequency ?? throw new ArgumentNullException(nameof(frequency));
     this.inquirer      = inquirer ?? throw new ArgumentNullException(nameof(inquirer));
     this.tagger        = tagger ?? throw new ArgumentNullException(nameof(tagger));
 }
        public ContextSensitiveSpellingCorrection(IPOSTagger posTagger, IEnumerable <string> corpora, IEnumerable <string[]> confusionSets, bool prune)
        {
            _posTagger = posTagger;
            _contextFeaturesExtractor      = new ContextFeaturesExtractor(k);
            _collocationtFeaturesExtractor = new CollocationFeaturesExtractor(l);
            _statsHelper = new StatsHelper();
            _comparators = new List <Comparator>(confusionSets.Count());

            Sentence[] sentences = PreProcessCorpora(corpora).ToArray();


            /*processed corpus was serialized for faster results between trials*/
            XmlSerializer x  = new XmlSerializer(typeof(Sentence[]));
            FileStream    fs = new FileStream(@"Sentence.xml", FileMode.Open);

            x.Serialize(fs, sentences);
            fs.Close();
            sentences = (Sentence[])x.Deserialize(new FileStream(@"Sentence.xml", FileMode.Open));
            Console.WriteLine("Deserialize complete");

            var featureFrequencies = new Dictionary <string, Dictionary <string, int> >(StringComparer.OrdinalIgnoreCase);

            if (prune)
            {
                /* preprocess terms' frequencies */
                featureFrequencies = _statsHelper.GetFrequencies(sentences);
            }

            Parallel.ForEach(confusionSets, confusionSet =>
            {
                TrainingData output = GenerateTrainingData(sentences, prune, featureFrequencies, confusionSet);

                Train(confusionSet, output.Features.ToArray(), output.Samples);
            });
        }
예제 #4
0
파일: Parser.cs 프로젝트: qooba/SharpNL
 private Parser(IMaxentModel buildModel, IMaxentModel checkModel, IPOSTagger tagger, IChunker chunker,
                AbstractHeadRules headRules, int beamSize, double advancePercentage) :
     base(tagger, chunker, headRules, beamSize, advancePercentage)
 {
     this.buildModel       = buildModel;
     this.checkModel       = checkModel;
     bProbs                = new double[buildModel.GetNumOutcomes()];
     cProbs                = new double[checkModel.GetNumOutcomes()];
     buildContextGenerator = new BuildContextGenerator();
     checkContextGenerator = new CheckContextGenerator();
     startTypeMap          = new Dictionary <string, string>();
     contTypeMap           = new Dictionary <string, string>();
     for (int boi = 0, bon = buildModel.GetNumOutcomes(); boi < bon; boi++)
     {
         var outcome = buildModel.GetOutcome(boi);
         if (outcome.StartsWith(START))
         {
             startTypeMap[outcome] = outcome.Substring(START.Length);
         }
         else if (outcome.StartsWith(CONT))
         {
             contTypeMap[outcome] = outcome.Substring(CONT.Length);
         }
     }
     topStartIndex   = buildModel.GetIndex(TOP_START);
     completeIndex   = checkModel.GetIndex(COMPLETE);
     incompleteIndex = checkModel.GetIndex(INCOMPLETE);
 }
예제 #5
0
        /// <summary>
        /// Initializes a new instance of the <see cref="POSEvaluator"/>.
        /// </summary>
        /// <param name="tagger">The tagger.</param>
        /// <param name="listeners">Any listeners.</param>
        /// <exception cref="System.ArgumentNullException">
        /// <paramref name="tagger"/>
        /// </exception>
        public POSEvaluator(IPOSTagger tagger, params IEvaluationMonitor<POSSample>[] listeners) : base(listeners) {
            if (tagger == null) {
                throw new ArgumentNullException("tagger");
            }

            this.tagger = tagger;
        }
예제 #6
0
        public static bool IsWordType(this IPOSTagger tagger, WordEx word, WordType type)
        {
            if (tagger is null)
            {
                throw new ArgumentNullException(nameof(tagger));
            }

            if (word is null)
            {
                throw new ArgumentNullException(nameof(word));
            }

            return(word.Tag.WordType == type || tagger.GetTag(word.Text).WordType == type);
        }
예제 #7
0
 /// <summary>
 /// Initializes a new instance of the <see cref="AbstractBottomUpParser"/>.
 /// </summary>
 /// <param name="tagger">The pos-tagger that the parser uses.</param>
 /// <param name="chunker">The chunker that the parser uses to chunk non-recursive structures.</param>
 /// <param name="headRules">The head rules for the parser.</param>
 /// <param name="beamSize">Size of the beam.</param>
 /// <param name="advancePercentage">The advance percentage.</param>
 protected AbstractBottomUpParser(IPOSTagger tagger, IChunker chunker, AbstractHeadRules headRules, int beamSize,
                                  double advancePercentage)
 {
     this.tagger       = tagger;
     this.chunker      = chunker;
     M                 = beamSize;
     K                 = beamSize;
     Q                 = advancePercentage;
     ReportFailedParse = true;
     this.headRules    = headRules;
     punctSet          = headRules.PunctuationTags;
     odh               = new ListHeap <Parse>(K);
     ndh               = new ListHeap <Parse>(K);
     completeParses    = new ListHeap <Parse>(K);
 }
예제 #8
0
        public static bool IsWordType(this IPOSTagger tagger, WordEx word, BasePOSType posType)
        {
            if (tagger is null)
            {
                throw new ArgumentNullException(nameof(tagger));
            }

            if (word is null)
            {
                throw new ArgumentNullException(nameof(word));
            }

            if (posType is null)
            {
                throw new ArgumentNullException(nameof(posType));
            }

            return(word.Tag == posType || tagger.GetTag(word.Text) == posType);
        }
예제 #9
0
 /// <summary>
 /// Initializes a new instance of the <see cref="Pipeline"/> class.
 /// </summary>
 /// <param name="normalizerManager">The normalizer manager.</param>
 /// <param name="pOSTagger">The p os tagger.</param>
 /// <param name="sentenceDetector">The sentence detector.</param>
 /// <param name="stemmer">The stemmer.</param>
 /// <param name="stopWordsManager">The stop words manager.</param>
 /// <param name="tokenizer">The tokenizer.</param>
 /// <param name="featureExtractor">The feature extractor.</param>
 /// <param name="textSummarizer">The text summarizer.</param>
 /// <param name="entityFinder">The entity finder.</param>
 /// <exception cref="ArgumentNullException">
 /// normalizerManager or pOSTagger or sentenceDetector or stemmer or stopWordsManager or
 /// tokenizer or featureExtractor or textSummarizer
 /// </exception>
 public Pipeline(
     INormalizerManager normalizerManager,
     IPOSTagger pOSTagger,
     ISentenceDetector sentenceDetector,
     IStemmer stemmer,
     IStopWordsManager stopWordsManager,
     ITokenizer tokenizer,
     IFeatureExtractor featureExtractor,
     ITextSummarizer textSummarizer,
     IEntityFinder entityFinder)
 {
     NormalizerManager = normalizerManager ?? throw new ArgumentNullException(nameof(normalizerManager));
     POSTagger         = pOSTagger ?? throw new ArgumentNullException(nameof(pOSTagger));
     SentenceDetector  = sentenceDetector ?? throw new ArgumentNullException(nameof(sentenceDetector));
     Stemmer           = stemmer ?? throw new ArgumentNullException(nameof(stemmer));
     StopWordsManager  = stopWordsManager ?? throw new ArgumentNullException(nameof(stopWordsManager));
     Tokenizer         = tokenizer ?? throw new ArgumentNullException(nameof(tokenizer));
     FeatureExtractor  = featureExtractor ?? throw new ArgumentNullException(nameof(featureExtractor));
     TextSummarizer    = textSummarizer ?? throw new ArgumentNullException(nameof(textSummarizer));
     EntityFinder      = entityFinder ?? throw new ArgumentNullException(nameof(entityFinder));
     SetLanguage(Languages.English);
 }
예제 #10
0
 private Parser(IMaxentModel buildModel, IMaxentModel checkModel, IPOSTagger tagger, IChunker chunker,
     AbstractHeadRules headRules, int beamSize, double advancePercentage) :
         base(tagger, chunker, headRules, beamSize, advancePercentage) {
     this.buildModel = buildModel;
     this.checkModel = checkModel;
     bProbs = new double[buildModel.GetNumOutcomes()];
     cProbs = new double[checkModel.GetNumOutcomes()];
     buildContextGenerator = new BuildContextGenerator();
     checkContextGenerator = new CheckContextGenerator();
     startTypeMap = new Dictionary<string, string>();
     contTypeMap = new Dictionary<string, string>();
     for (int boi = 0, bon = buildModel.GetNumOutcomes(); boi < bon; boi++) {
         var outcome = buildModel.GetOutcome(boi);
         if (outcome.StartsWith(START)) {
             startTypeMap[outcome] = outcome.Substring(START.Length);
         } else if (outcome.StartsWith(CONT)) {
             contTypeMap[outcome] = outcome.Substring(CONT.Length);
         }
     }
     topStartIndex = buildModel.GetIndex(TOP_START);
     completeIndex = checkModel.GetIndex(COMPLETE);
     incompleteIndex = checkModel.GetIndex(INCOMPLETE);
 }
예제 #11
0
 /// <summary>
 /// Initializes a new instance of the <see cref="AbstractBottomUpParser"/>.
 /// </summary>
 /// <param name="tagger">The pos-tagger that the parser uses.</param>
 /// <param name="chunker">The chunker that the parser uses to chunk non-recursive structures.</param>
 /// <param name="headRules">The head rules for the parser.</param>
 /// <param name="beamSize">Size of the beam.</param>
 /// <param name="advancePercentage">The advance percentage.</param>
 protected AbstractBottomUpParser(IPOSTagger tagger, IChunker chunker, AbstractHeadRules headRules, int beamSize,
     double advancePercentage) {
     this.tagger = tagger;
     this.chunker = chunker;
     M = beamSize;
     K = beamSize;
     Q = advancePercentage;
     ReportFailedParse = true;
     this.headRules = headRules;
     punctSet = headRules.PunctuationTags;
     odh = new ListHeap<Parse>(K);
     ndh = new ListHeap<Parse>(K);
     completeParses = new ListHeap<Parse>(K);
 }
예제 #12
0
        private Parser(
            IMaxentModel buildModel,
            IMaxentModel attachModel, 
            IMaxentModel checkModel,
            IPOSTagger tagger,
            IChunker chunker, 
            AbstractHeadRules headRules, 
            int beamSize, 
            double advancePercentage) : base(tagger, chunker, headRules, beamSize, advancePercentage) {

            this.buildModel = buildModel;
            this.attachModel = attachModel;
            this.checkModel = checkModel;

            buildContextGenerator = new BuildContextGenerator();
            attachContextGenerator = new AttachContextGenerator(punctSet);
            checkContextGenerator = new CheckContextGenerator(punctSet);

            bProbs = new double[buildModel.GetNumOutcomes()];
            aProbs = new double[attachModel.GetNumOutcomes()];
            cProbs = new double[checkModel.GetNumOutcomes()];

            doneIndex = buildModel.GetIndex(DONE);
            sisterAttachIndex = attachModel.GetIndex(ATTACH_SISTER);
            daughterAttachIndex = attachModel.GetIndex(ATTACH_DAUGHTER);
            // nonAttachIndex = attachModel.GetIndex(NON_ATTACH);
            attachments = new[] {daughterAttachIndex, sisterAttachIndex};
            completeIndex = checkModel.GetIndex(COMPLETE);
        }
예제 #13
0
        public TextBlock(IPOSTagger tagger, IInquirerManager inquirer, IFrequencyListManager frequency, SentenceItem[] sentences)
        {
            if (tagger is null)
            {
                throw new ArgumentNullException(nameof(tagger));
            }

            if (inquirer is null)
            {
                throw new ArgumentNullException(nameof(inquirer));
            }

            if (frequency is null)
            {
                throw new ArgumentNullException(nameof(frequency));
            }

            if (sentences is null)
            {
                throw new ArgumentNullException(nameof(sentences));
            }

            if (sentences is null)
            {
                throw new ArgumentNullException(nameof(sentences));
            }

            if (sentences.Length == 0)
            {
                throw new ArgumentException("Value cannot be an empty collection.", nameof(sentences));
            }

            Sentences   = sentences;
            Surface     = new SurfaceData(this);
            Readability = new ReadabilityDataSource(this);
            Words       = (from sentence in Sentences from word in sentence.Words select word).ToArray();
            if (Words.Length == 0)
            {
                throw new ArgumentException("Value cannot be an empty collection.", nameof(Words));
            }

            var pure = new List <WordEx>();

            foreach (var word in Words)
            {
                if (word.Text.HasLetters() ||
                    word.Text.Length > 0 && char.IsDigit(word.Text[0]))
                {
                    pure.Add(word);
                }

                if (!string.IsNullOrEmpty(word.Raw))
                {
                    lemmaDictionary.GetSafeCreate(word.Raw).Add(word);
                }

                wordDictionary.GetSafeCreate(word.Text).Add(word);
            }

            PureWords           = pure.ToArray();
            VocabularyObscurity = new VocabularyObscurity(this, frequency);
            SyntaxFeatures      = new SyntaxFeatures(this, tagger);
            InquirerFinger      = new InquirerFingerPrint(this, inquirer);
            Sentiment           = new SentimentFeatures(this);
        }
예제 #14
0
 /// <summary>
 /// Initializes a new instance of the <see cref="AbstractBottomUpParser"/>.
 /// </summary>
 /// <param name="tagger">The pos-tagger that the parser uses.</param>
 /// <param name="chunker">The chunker that the parser uses to chunk non-recursive structures.</param>
 /// <param name="headRules">The head rules for the parser.</param>
 /// <param name="beamSize">Size of the beam.</param>
 /// <param name="advancePercentage">The advance percentage.</param>
 public Parser(IPOSTagger tagger, IChunker chunker, AbstractHeadRules headRules, int beamSize,
     double advancePercentage)
     : base(tagger, chunker, headRules, beamSize, advancePercentage) {}
예제 #15
0
파일: Parser.cs 프로젝트: qooba/SharpNL
 /// <summary>
 /// Initializes a new instance of the <see cref="AbstractBottomUpParser"/>.
 /// </summary>
 /// <param name="tagger">The pos-tagger that the parser uses.</param>
 /// <param name="chunker">The chunker that the parser uses to chunk non-recursive structures.</param>
 /// <param name="headRules">The head rules for the parser.</param>
 /// <param name="beamSize">Size of the beam.</param>
 /// <param name="advancePercentage">The advance percentage.</param>
 public Parser(IPOSTagger tagger, IChunker chunker, AbstractHeadRules headRules, int beamSize,
               double advancePercentage)
     : base(tagger, chunker, headRules, beamSize, advancePercentage)
 {
 }
예제 #16
0
 public SyntaxFeatures(TextBlock text, IPOSTagger tagger)
 {
     Text        = text ?? throw new ArgumentNullException(nameof(text));
     this.tagger = tagger ?? throw new ArgumentNullException(nameof(tagger));
 }
 public SimpleWordItemFactory(IPOSTagger tagger, IRawTextExtractor raw)
 {
     this.tagger = tagger ?? throw new ArgumentNullException(nameof(tagger));
     this.raw    = raw ?? throw new ArgumentNullException(nameof(raw));
 }
 public SentenceTokenizerFactory(IPOSTagger tagger, IRawTextExtractor raw)
 {
     this.tagger = tagger ?? throw new ArgumentNullException(nameof(tagger));
     this.raw    = raw ?? throw new ArgumentNullException(nameof(raw));
 }
 public bool CreatePOSTagger()
 {
     Console.WriteLine("Loading POS Tagger Model. This may take few seconds.");
     _myposTagger = _myPOSTaggerFactory.Create(_posTaggerNameName);
     return (_myposTagger.LoadModel(_posTaggerModelFile));
 }