Beispiel #1
0
        /// <summary>
        /// Initializes a new instance of the <see cref="Dedup1024KHashAlgorithm"/> class.
        /// </summary>
        public Dedup1024KHashAlgorithm(IChunker chunker)
            : base(chunker)
        {
            int expectedAvgChunkSize = TargetHashType.GetAvgChunkSize();

            Contract.Check(chunker.Configuration.AvgChunkSize == expectedAvgChunkSize)?.Assert($"Invalid average chunk size (in bytes) specified: {chunker.Configuration.AvgChunkSize} expected: {expectedAvgChunkSize}");
        }
Beispiel #2
0
        private Parser(
            IMaxentModel buildModel,
            IMaxentModel attachModel,
            IMaxentModel checkModel,
            IPOSTagger tagger,
            IChunker chunker,
            AbstractHeadRules headRules,
            int beamSize,
            double advancePercentage) : base(tagger, chunker, headRules, beamSize, advancePercentage)
        {
            this.buildModel  = buildModel;
            this.attachModel = attachModel;
            this.checkModel  = checkModel;

            buildContextGenerator  = new BuildContextGenerator();
            attachContextGenerator = new AttachContextGenerator(punctSet);
            checkContextGenerator  = new CheckContextGenerator(punctSet);

            bProbs = new double[buildModel.GetNumOutcomes()];
            aProbs = new double[attachModel.GetNumOutcomes()];
            cProbs = new double[checkModel.GetNumOutcomes()];

            doneIndex           = buildModel.GetIndex(DONE);
            sisterAttachIndex   = attachModel.GetIndex(ATTACH_SISTER);
            daughterAttachIndex = attachModel.GetIndex(ATTACH_DAUGHTER);
            // nonAttachIndex = attachModel.GetIndex(NON_ATTACH);
            attachments   = new[] { daughterAttachIndex, sisterAttachIndex };
            completeIndex = checkModel.GetIndex(COMPLETE);
        }
Beispiel #3
0
        public void Setup() {
            var p = new TrainingParameters();
            p.Set(Parameters.Iterations, "70");
            p.Set(Parameters.Cutoff, "1");

            var chunkerModel = ChunkerME.Train("en", CreateSampleStream(), p, new ChunkerFactory());

            chunker = new ChunkerME(chunkerModel);
        }
Beispiel #4
0
        /// <summary>
        /// Initializes a new instance of the <see cref="AbstractAnalyzer" /> with the specified weight.
        /// </summary>
        /// <param name="chunker">The chunker used in this analyzer.</param>
        /// <param name="weight">The analyzer weight.</param>
        /// <exception cref="System.ArgumentNullException">chunker</exception>
        public ChunkerAnalyzer(IChunker chunker, float weight)
            : base(weight) {
            
            if (chunker == null)
                throw new ArgumentNullException("chunker");

            Chunker = chunker;

        }
Beispiel #5
0
        public NLPToolsController()
        {
            string modelPath = @"C:\Users\Garrett\Documents\Visual Studio 2015\Projects\MindysTermExtractionLibrary\src\sharpnlp-nbin-files\";

            sentenceDetector = new EnglishMaximumEntropySentenceDetector(modelPath + "EnglishSD.nbin");
            tokenizer        = new EnglishMaximumEntropyTokenizer(modelPath + "EnglishTok.nbin");
            posTagger        = new EnglishMaximumEntropyPosTagger(modelPath + "EnglishPOS.nbin");
            phraseChunker    = new EnglishTreebankChunker(modelPath + "EnglishChunk.nbin");
        }
 /// <nodoc />
 public DedupNodeOrChunkHashAlgorithm(IChunker chunker)
 {
     if (!ChunkerConfiguration.IsValidChunkSize(chunker.Configuration))
     {
         throw new NotImplementedException($"Unsupported chunk size specified: {chunker.Configuration.AvgChunkSize} in bytes.");
     }
     _chunker = chunker;
     Initialize();
 }
Beispiel #7
0
        /// <summary>
        /// Initializes a new instance of the <see cref="AbstractAnalyzer" /> with the specified weight.
        /// </summary>
        /// <param name="chunker">The chunker used in this analyzer.</param>
        /// <param name="weight">The analyzer weight.</param>
        /// <exception cref="System.ArgumentNullException">chunker</exception>
        public ChunkerAnalyzer(IChunker chunker, float weight)
            : base(weight)
        {
            if (chunker == null)
            {
                throw new ArgumentNullException("chunker");
            }

            Chunker = chunker;
        }
Beispiel #8
0
            public Session(IChunker chunker, Action <ChunkInfo> callback)
            {
                _pushBufferHandle = PushBufferPool.Get();
                _pushBuffer       = _pushBufferHandle.Value;

                _chunksSeenHandle = ChunksSeenPool.Get();
                _chunksSeen       = _chunksSeenHandle.Value;

                _chunker  = chunker;
                _callback = callback;
            }
Beispiel #9
0
        public void Setup()
        {
            var p = new TrainingParameters();

            p.Set(Parameters.Iterations, "70");
            p.Set(Parameters.Cutoff, "1");

            var chunkerModel = ChunkerME.Train("en", CreateSampleStream(), p, new ChunkerFactory());

            chunker = new ChunkerME(chunkerModel);
        }
Beispiel #10
0
 /// <summary>
 /// Initializes a new instance of the <see cref="AbstractBottomUpParser"/>.
 /// </summary>
 /// <param name="tagger">The pos-tagger that the parser uses.</param>
 /// <param name="chunker">The chunker that the parser uses to chunk non-recursive structures.</param>
 /// <param name="headRules">The head rules for the parser.</param>
 /// <param name="beamSize">Size of the beam.</param>
 /// <param name="advancePercentage">The advance percentage.</param>
 protected AbstractBottomUpParser(IPOSTagger tagger, IChunker chunker, AbstractHeadRules headRules, int beamSize,
                                  double advancePercentage)
 {
     this.tagger       = tagger;
     this.chunker      = chunker;
     M                 = beamSize;
     K                 = beamSize;
     Q                 = advancePercentage;
     ReportFailedParse = true;
     this.headRules    = headRules;
     punctSet          = headRules.PunctuationTags;
     odh               = new ListHeap <Parse>(K);
     ndh               = new ListHeap <Parse>(K);
     completeParses    = new ListHeap <Parse>(K);
 }
Beispiel #11
0
        public void Setup() {
            var sParams = new TrainingParameters();
            sParams.Set(Parameters.Iterations, "70");
            sParams.Set(Parameters.Cutoff, "1");

            var jParams = new opennlp.tools.util.TrainingParameters();
            jParams.put("Iterations", "70");
            jParams.put("Cutoff", "1");

            var sModel = ChunkerME.Train("en", ChunkerMETest.CreateSampleStream(), sParams, new ChunkerFactory());

            var jModel = opennlp.tools.chunker.ChunkerME.train("en", JavaSampleStream(), jParams,
                new opennlp.tools.chunker.ChunkerFactory());

            Assert.NotNull(sModel);
            Assert.NotNull(jModel);

            sChunker = new ChunkerME(sModel);
            jChunker = new opennlp.tools.chunker.ChunkerME(jModel);
        }
Beispiel #12
0
        private DedupNode HashIsStableForChunker(IChunker chunker, uint byteCount, string expectedHash, int seed)
        {
            using (var hasher = new DedupNodeHashAlgorithm(chunker))
            {
                byte[] bytes = new byte[byteCount];

                if (byteCount > 0)
                {
                    FillBufferWithTestContent(seed, bytes);
                }

                hasher.Initialize();
                hasher.ComputeHash(bytes, 0, bytes.Length);
                var node = hasher.GetNode();
                Assert.Equal <long>((long)byteCount, node.EnumerateChunkLeafsInOrder().Sum(c => (long)c.TransitiveContentBytes));

                string header = $"Chunker:{chunker.GetType().Name} Seed:{seed} Length:{byteCount} Hash:";
                Assert.Equal <string>($"{header}{expectedHash}", $"{header}{node.Hash.ToHex()}");
                return(node);
            }
        }
Beispiel #13
0
 private Parser(IMaxentModel buildModel, IMaxentModel checkModel, IPOSTagger tagger, IChunker chunker,
     AbstractHeadRules headRules, int beamSize, double advancePercentage) :
         base(tagger, chunker, headRules, beamSize, advancePercentage) {
     this.buildModel = buildModel;
     this.checkModel = checkModel;
     bProbs = new double[buildModel.GetNumOutcomes()];
     cProbs = new double[checkModel.GetNumOutcomes()];
     buildContextGenerator = new BuildContextGenerator();
     checkContextGenerator = new CheckContextGenerator();
     startTypeMap = new Dictionary<string, string>();
     contTypeMap = new Dictionary<string, string>();
     for (int boi = 0, bon = buildModel.GetNumOutcomes(); boi < bon; boi++) {
         var outcome = buildModel.GetOutcome(boi);
         if (outcome.StartsWith(START)) {
             startTypeMap[outcome] = outcome.Substring(START.Length);
         } else if (outcome.StartsWith(CONT)) {
             contTypeMap[outcome] = outcome.Substring(CONT.Length);
         }
     }
     topStartIndex = buildModel.GetIndex(TOP_START);
     completeIndex = checkModel.GetIndex(COMPLETE);
     incompleteIndex = checkModel.GetIndex(INCOMPLETE);
 }
Beispiel #14
0
        public void Setup()
        {
            var sParams = new TrainingParameters();

            sParams.Set(Parameters.Iterations, "70");
            sParams.Set(Parameters.Cutoff, "1");

            var jParams = new opennlp.tools.util.TrainingParameters();

            jParams.put("Iterations", "70");
            jParams.put("Cutoff", "1");

            var sModel = ChunkerME.Train("en", ChunkerMETest.CreateSampleStream(), sParams, new ChunkerFactory());

            var jModel = opennlp.tools.chunker.ChunkerME.train("en", JavaSampleStream(), jParams,
                                                               new opennlp.tools.chunker.ChunkerFactory());

            Assert.NotNull(sModel);
            Assert.NotNull(jModel);

            sChunker = new ChunkerME(sModel);
            jChunker = new opennlp.tools.chunker.ChunkerME(jModel);
        }
Beispiel #15
0
        private void HashOfChunksInNodeMatchesChunkHashAlgorithmInner(IChunker chunker)
        {
            using (var nodeHasher = new DedupNodeHashAlgorithm(chunker))
                using (var chunkHasher = new DedupChunkHashAlgorithm())
                {
                    byte[] bytes = new byte[2 * DedupNode.MaxDirectChildrenPerNode * (64 * 1024 /* avg chunk size */)];

                    var r = new Random(Seed: 0);
                    r.NextBytes(bytes);

                    nodeHasher.ComputeHash(bytes, 0, bytes.Length);
                    var node = nodeHasher.GetNode();
                    Assert.NotNull(node.Height);
                    Assert.Equal((uint)2, node.Height.Value);
                    ulong offset = 0;
                    foreach (var chunkInNode in node.EnumerateChunkLeafsInOrder())
                    {
                        byte[] chunkHash = chunkHasher.ComputeHash(bytes, (int)offset, (int)chunkInNode.TransitiveContentBytes);
                        Assert.Equal(chunkHash.ToHex(), chunkInNode.Hash.ToHex());
                        offset += chunkInNode.TransitiveContentBytes;
                    }
                    Assert.Equal(offset, node.TransitiveContentBytes);
                }
        }
Beispiel #16
0
 private Parser(IMaxentModel buildModel, IMaxentModel checkModel, IPOSTagger tagger, IChunker chunker,
                AbstractHeadRules headRules, int beamSize, double advancePercentage) :
     base(tagger, chunker, headRules, beamSize, advancePercentage)
 {
     this.buildModel       = buildModel;
     this.checkModel       = checkModel;
     bProbs                = new double[buildModel.GetNumOutcomes()];
     cProbs                = new double[checkModel.GetNumOutcomes()];
     buildContextGenerator = new BuildContextGenerator();
     checkContextGenerator = new CheckContextGenerator();
     startTypeMap          = new Dictionary <string, string>();
     contTypeMap           = new Dictionary <string, string>();
     for (int boi = 0, bon = buildModel.GetNumOutcomes(); boi < bon; boi++)
     {
         var outcome = buildModel.GetOutcome(boi);
         if (outcome.StartsWith(START))
         {
             startTypeMap[outcome] = outcome.Substring(START.Length);
         }
         else if (outcome.StartsWith(CONT))
         {
             contTypeMap[outcome] = outcome.Substring(CONT.Length);
         }
     }
     topStartIndex   = buildModel.GetIndex(TOP_START);
     completeIndex   = checkModel.GetIndex(COMPLETE);
     incompleteIndex = checkModel.GetIndex(INCOMPLETE);
 }
Beispiel #17
0
         /// <summary>
 /// Initializes a new instance of the <see cref="ChunkerAnalyzer"/> class.
 /// </summary>
 /// <param name="chunker">The chunker.</param>
 public ChunkerAnalyzer(IChunker chunker) : this(chunker, 5f) { }
Beispiel #18
0
 public DeterministicChunker(IChunker chunker)
 {
     _chunker = chunker;
 }
 /// <summary>
 /// Initializes a new instance of the <see cref="AbstractBottomUpParser"/>.
 /// </summary>
 /// <param name="tagger">The pos-tagger that the parser uses.</param>
 /// <param name="chunker">The chunker that the parser uses to chunk non-recursive structures.</param>
 /// <param name="headRules">The head rules for the parser.</param>
 /// <param name="beamSize">Size of the beam.</param>
 /// <param name="advancePercentage">The advance percentage.</param>
 protected AbstractBottomUpParser(IPOSTagger tagger, IChunker chunker, AbstractHeadRules headRules, int beamSize,
     double advancePercentage) {
     this.tagger = tagger;
     this.chunker = chunker;
     M = beamSize;
     K = beamSize;
     Q = advancePercentage;
     ReportFailedParse = true;
     this.headRules = headRules;
     punctSet = headRules.PunctuationTags;
     odh = new ListHeap<Parse>(K);
     ndh = new ListHeap<Parse>(K);
     completeParses = new ListHeap<Parse>(K);
 }
Beispiel #20
0
        public DiffResult CreateDiffs(string oldText, string newText, bool ignoreWhiteSpace, bool ignoreCase, IChunker chunker)
        {
            if (oldText == null)
            {
                throw new ArgumentNullException(nameof(oldText));
            }
            if (newText == null)
            {
                throw new ArgumentNullException(nameof(newText));
            }
            if (chunker == null)
            {
                throw new ArgumentNullException(nameof(chunker));
            }

            var pieceHash = new Dictionary <string, int>();
            var lineDiffs = new List <DiffBlock>();

            var modOld = new ModificationData(oldText);
            var modNew = new ModificationData(newText);

            BuildPieceHashes(pieceHash, modOld, ignoreWhiteSpace, ignoreCase, chunker);
            BuildPieceHashes(pieceHash, modNew, ignoreWhiteSpace, ignoreCase, chunker);

            BuildModificationData(modOld, modNew);

            int piecesALength = modOld.HashedPieces.Length;
            int piecesBLength = modNew.HashedPieces.Length;
            int posA          = 0;
            int posB          = 0;

            do
            {
                while (posA < piecesALength &&
                       posB < piecesBLength &&
                       !modOld.Modifications[posA] &&
                       !modNew.Modifications[posB])
                {
                    posA++;
                    posB++;
                }

                int beginA = posA;
                int beginB = posB;
                for (; posA < piecesALength && modOld.Modifications[posA]; posA++)
                {
                    ;
                }

                for (; posB < piecesBLength && modNew.Modifications[posB]; posB++)
                {
                    ;
                }

                int deleteCount = posA - beginA;
                int insertCount = posB - beginB;
                if (deleteCount > 0 || insertCount > 0)
                {
                    lineDiffs.Add(new DiffBlock(beginA, deleteCount, beginB, insertCount));
                }
            } while (posA < piecesALength && posB < piecesBLength);

            return(new DiffResult(modOld.Pieces, modNew.Pieces, lineDiffs));
        }
Beispiel #21
0
 /// <summary>
 /// Initializes a new instance of the <see cref="DedupNodeOrChunkHashAlgorithm"/> class.
 /// </summary>
 public DedupNodeOrChunkHashAlgorithm(DedupNodeTree.Algorithm treeAlgorithm, IChunker chunker)
 {
     _treeAlgorithm = treeAlgorithm;
     _chunker       = chunker;
     Initialize();
 }
 /// <summary>
 /// Initializes a new instance of the <see cref="DefaultTextMatcher"/> class.
 /// </summary>
 /// <param name="differ">The IDiffer implementation to use for determining which text replacements correspond with original text in ambiguous cases.</param>
 /// <param name="chunker">The IChunker to be used with the differ.</param>
 public DefaultTextMatcher(IDiffer differ, IChunker chunker)
 {
     _chunker = chunker ?? throw new ArgumentNullException(nameof(chunker));
     _differ  = differ ?? throw new ArgumentNullException(nameof(differ));
 }
Beispiel #23
0
 /// <summary>
 /// Initializes a new instance of the <see cref="AbstractBottomUpParser"/>.
 /// </summary>
 /// <param name="tagger">The pos-tagger that the parser uses.</param>
 /// <param name="chunker">The chunker that the parser uses to chunk non-recursive structures.</param>
 /// <param name="headRules">The head rules for the parser.</param>
 /// <param name="beamSize">Size of the beam.</param>
 /// <param name="advancePercentage">The advance percentage.</param>
 public Parser(IPOSTagger tagger, IChunker chunker, AbstractHeadRules headRules, int beamSize,
               double advancePercentage)
     : base(tagger, chunker, headRules, beamSize, advancePercentage)
 {
 }
 /// <summary>
 /// Initializes a new instance of the <see cref="DedupNodeHashAlgorithm"/> class.
 /// </summary>
 public DedupNodeHashAlgorithm(DedupNodeTree.Algorithm treeAlgorithm, IChunker chunker)
     : base(treeAlgorithm, chunker)
 {
 }
 /// <summary>
 /// Initializes a new instance of the <see cref="DedupNodeHashAlgorithm"/> class.
 /// </summary>
 public DedupNodeHashAlgorithm(IChunker chunker)
     : this(DedupNodeTree.Algorithm.MaximallyPacked, chunker)
 {
 }
Beispiel #26
0
        public DiffPaneModel BuildDiffModel(string oldText, string newText, bool ignoreWhitespace, bool ignoreCase, IChunker chunker)
        {
            if (oldText == null)
            {
                throw new ArgumentNullException(nameof(oldText));
            }
            if (newText == null)
            {
                throw new ArgumentNullException(nameof(newText));
            }

            var model = new DiffPaneModel();

            var diffResult = differ.CreateDiffs(oldText, newText, ignoreWhitespace, ignoreCase: ignoreCase, chunker);

            BuildDiffPieces(diffResult, model.Lines);
            return(model);
        }
Beispiel #27
0
 /// <summary>
 /// Gets the inline textual diffs.
 /// </summary>
 /// <param name="oldText">The old text to diff.</param>
 /// <param name="newText">The new text.</param>
 /// <param name="ignoreWhiteSpace">true if ignore the white space; othewise, false.</param>
 /// <param name="ignoreCase">true if case-insensitive; otherwise, false.</param>
 /// <param name="chunker">The chunker.</param>
 /// <returns>The diffs result.</returns>
 public static DiffPaneModel Diff(string oldText, string newText, bool ignoreWhiteSpace = true, bool ignoreCase = false, IChunker chunker = null)
 {
     return(Diff(Differ.Instance, oldText, newText, ignoreWhiteSpace, ignoreCase, chunker));
 }
Beispiel #28
0
 /// <summary>
 /// Initializes a new instance of the <see cref="ChunkerAnalyzer"/> class.
 /// </summary>
 /// <param name="chunker">The chunker.</param>
 public ChunkerAnalyzer(IChunker chunker) : this(chunker, 5f)
 {
 }
Beispiel #29
0
 /// <summary>
 /// Initializes a new instance of the <see cref="AbstractBottomUpParser"/>.
 /// </summary>
 /// <param name="tagger">The pos-tagger that the parser uses.</param>
 /// <param name="chunker">The chunker that the parser uses to chunk non-recursive structures.</param>
 /// <param name="headRules">The head rules for the parser.</param>
 /// <param name="beamSize">Size of the beam.</param>
 /// <param name="advancePercentage">The advance percentage.</param>
 public Parser(IPOSTagger tagger, IChunker chunker, AbstractHeadRules headRules, int beamSize,
     double advancePercentage)
     : base(tagger, chunker, headRules, beamSize, advancePercentage) {}
Beispiel #30
0
        private static void BuildPieceHashes(IDictionary <string, int> pieceHash, ModificationData data, bool ignoreWhitespace, bool ignoreCase, IChunker chunker)
        {
            var pieces = string.IsNullOrEmpty(data.RawData)
                ? emptyStringArray
                : chunker.Chunk(data.RawData);

            data.Pieces        = pieces;
            data.HashedPieces  = new int[pieces.Length];
            data.Modifications = new bool[pieces.Length];

            for (int i = 0; i < pieces.Length; i++)
            {
                string piece = pieces[i];
                if (ignoreWhitespace)
                {
                    piece = piece.Trim();
                }
                if (ignoreCase)
                {
                    piece = piece.ToUpperInvariant();
                }

                if (pieceHash.ContainsKey(piece))
                {
                    data.HashedPieces[i] = pieceHash[piece];
                }
                else
                {
                    data.HashedPieces[i] = pieceHash.Count;
                    pieceHash[piece]     = pieceHash.Count;
                }
            }
        }
        private void HashOfChunksInNodeMatchesChunkHashAlgorithmInner(int expectedChunkCount, ChunkerConfiguration config, IChunker chunker)
        {
            using (DedupNodeOrChunkHashAlgorithm nodeHasher = new DedupNodeOrChunkHashAlgorithm(chunker))
                using (DedupChunkHashAlgorithm chunkHasher = new DedupChunkHashAlgorithm())
                {
                    byte[] bytes = new byte[expectedChunkCount * config.AvgChunkSize];

                    nodeHasher.SetInputLength(bytes.Length);

                    var r = new Random(Seed: 0);
                    r.NextBytes(bytes);

                    nodeHasher.ComputeHash(bytes, 0, bytes.Length);
                    var node = nodeHasher.GetNode();
                    Assert.NotNull(node.Height);
                    if (expectedChunkCount >= 2 * DedupNode.MaxDirectChildrenPerNode)
                    {
                        Assert.Equal((uint)2, node.Height.Value);
                    }

                    ulong offset     = 0;
                    int   chunkCount = 0;
                    foreach (var chunkInNode in node.EnumerateChunkLeafsInOrder())
                    {
                        byte[] chunkHash = chunkHasher.ComputeHash(bytes, (int)offset, (int)chunkInNode.TransitiveContentBytes);
                        Assert.Equal(chunkHash.ToHex(), chunkInNode.Hash.ToHex());
                        offset     += chunkInNode.TransitiveContentBytes;
                        chunkCount += 1;
                    }

                    Assert.Equal(offset, node.TransitiveContentBytes);

                    double ratio = (1.0 * expectedChunkCount) / chunkCount;
                    Assert.True(Math.Abs(ratio - 1.0) < 0.3); // within 30% of expected
                }
        }
Beispiel #32
0
        /// <summary>
        /// Gets the inline textual diffs.
        /// </summary>
        /// <param name="differ">The differ instance.</param>
        /// <param name="oldText">The old text to diff.</param>
        /// <param name="newText">The new text.</param>
        /// <param name="ignoreWhiteSpace">true if ignore the white space; othewise, false.</param>
        /// <param name="ignoreCase">true if case-insensitive; otherwise, false.</param>
        /// <param name="chunker">The chunker.</param>
        /// <returns>The diffs result.</returns>
        public static DiffPaneModel Diff(IDiffer differ, string oldText, string newText, bool ignoreWhiteSpace = true, bool ignoreCase = false, IChunker chunker = null)
        {
            if (oldText == null)
            {
                throw new ArgumentNullException(nameof(oldText));
            }
            if (newText == null)
            {
                throw new ArgumentNullException(nameof(newText));
            }

            var model      = new DiffPaneModel();
            var diffResult = (differ ?? Differ.Instance).CreateDiffs(oldText, newText, ignoreWhiteSpace, ignoreCase, chunker ?? LineChunker.Instance);

            BuildDiffPieces(diffResult, model.Lines);
            return(model);
        }
Beispiel #33
0
        private Parser(
            IMaxentModel buildModel,
            IMaxentModel attachModel, 
            IMaxentModel checkModel,
            IPOSTagger tagger,
            IChunker chunker, 
            AbstractHeadRules headRules, 
            int beamSize, 
            double advancePercentage) : base(tagger, chunker, headRules, beamSize, advancePercentage) {

            this.buildModel = buildModel;
            this.attachModel = attachModel;
            this.checkModel = checkModel;

            buildContextGenerator = new BuildContextGenerator();
            attachContextGenerator = new AttachContextGenerator(punctSet);
            checkContextGenerator = new CheckContextGenerator(punctSet);

            bProbs = new double[buildModel.GetNumOutcomes()];
            aProbs = new double[attachModel.GetNumOutcomes()];
            cProbs = new double[checkModel.GetNumOutcomes()];

            doneIndex = buildModel.GetIndex(DONE);
            sisterAttachIndex = attachModel.GetIndex(ATTACH_SISTER);
            daughterAttachIndex = attachModel.GetIndex(ATTACH_DAUGHTER);
            // nonAttachIndex = attachModel.GetIndex(NON_ATTACH);
            attachments = new[] {daughterAttachIndex, sisterAttachIndex};
            completeIndex = checkModel.GetIndex(COMPLETE);
        }
Beispiel #34
0
 public SideBySideDiffBuilder(IDiffer differ, IChunker lineChunker, IChunker wordChunker)
 {
     this.differ      = differ ?? Differ.Instance;
     this.lineChunker = lineChunker ?? throw new ArgumentNullException(nameof(lineChunker));
     this.wordChunker = wordChunker ?? throw new ArgumentNullException(nameof(wordChunker));
 }
 /// <summary>
 /// Initializes a new instance of the <see cref="DedupNodeOrChunkHashAlgorithm"/> class.
 /// </summary>
 public DedupNodeOrChunkHashAlgorithm(DedupNodeTree.Algorithm treeAlgorithm)
 {
     _treeAlgorithm = treeAlgorithm;
     _chunker       = DedupNodeHashAlgorithm.CreateChunker();
     Initialize();
 }
Beispiel #36
0
        /// <summary>
        /// Gets the side-by-side textual diffs.
        /// </summary>
        /// <param name="differ">The differ instance.</param>
        /// <param name="oldText">The old text to diff.</param>
        /// <param name="newText">The new text.</param>
        /// <param name="ignoreWhiteSpace">true if ignore the white space; othewise, false.</param>
        /// <param name="ignoreCase">true if case-insensitive; otherwise, false.</param>
        /// <param name="lineChunker">The line chunker.</param>
        /// <param name="wordChunker">The word chunker.</param>
        /// <returns>The diffs result.</returns>
        public static SideBySideDiffModel Diff(IDiffer differ, string oldText, string newText, bool ignoreWhiteSpace = true, bool ignoreCase = false, IChunker lineChunker = null, IChunker wordChunker = null)
        {
            if (oldText == null)
            {
                throw new ArgumentNullException(nameof(oldText));
            }
            if (newText == null)
            {
                throw new ArgumentNullException(nameof(newText));
            }

            if (differ == null)
            {
                return(Diff(oldText, newText, ignoreWhiteSpace, ignoreCase));
            }

            var model      = new SideBySideDiffModel();
            var diffResult = differ.CreateDiffs(oldText, newText, ignoreWhiteSpace, ignoreCase, lineChunker ?? LineChunker.Instance);

            BuildDiffPieces(diffResult, model.OldText.Lines, model.NewText.Lines, (ot, nt, op, np, iw, ic) =>
            {
                var r = differ.CreateDiffs(ot, nt, iw, ic, wordChunker ?? WordChunker.Instance);
                return(BuildDiffPieces(r, op, np, null, iw, ic));
            }, ignoreWhiteSpace, ignoreCase);

            return(model);
        }
Beispiel #37
0
        /// <summary>
        /// Initializes a new instance of the <see cref="ChunkerEvaluator"/> class.
        /// </summary>
        /// <param name="chunker">The chunker.</param>
        /// <param name="listeners">The evaluation listeners.</param>
        public ChunkerEvaluator(IChunker chunker, params IEvaluationMonitor <ChunkSample>[] listeners) : base(listeners)
        {
            this.chunker = chunker;

            FMeasure = new FMeasure <Span>();
        }
Beispiel #38
0
 public TestCase(IChunker chunker) => Chunker = chunker;