コード例 #1
0
        public void TestMergerTooFew()
        {
            int maxCount  = 5;
            int costSlack = 3;
            MultiSearchMerger        merger  = new MultiSearchMerger(maxCount, costSlack);
            List <MultiSearchResult> results = new List <MultiSearchResult>();

            string[][] surfaces1 = { new string[] { "a", "b" },
                                     new string[] { "c", "d" },
                                     new string[] { "e", "f" } };
            int[]      costs1 = { 1, 2, 5 };
            results.Add(MakeResult(surfaces1, costs1));

            string[][] surfaces2 = { new string[] { "a", "b" },
                                     new string[] { "c", "d" } };
            int[]      costs2 = { 1, 2 };
            results.Add(MakeResult(surfaces2, costs2));

            string[][] surfaces3 = { new string[] { "a", "b" } };
            int[]      costs3    = { 5 };
            results.Add(MakeResult(surfaces3, costs3));

            MultiSearchResult mergedResult = merger.Merge(results);

            Assert.AreEqual(4, mergedResult.Count);
            Assert.AreEqual(7, mergedResult.GetCost(0));
            Assert.AreEqual(8, mergedResult.GetCost(1));
            Assert.AreEqual(8, mergedResult.GetCost(2));
            Assert.AreEqual(9, mergedResult.GetCost(3));
            Assert.AreEqual("a b a b a b", GetSpaceSeparatedTokens(mergedResult.GetTokenizedResult(0)));
            Assert.AreEqual("c d a b a b", GetSpaceSeparatedTokens(mergedResult.GetTokenizedResult(1)));
            Assert.AreEqual("a b c d a b", GetSpaceSeparatedTokens(mergedResult.GetTokenizedResult(2)));
            Assert.AreEqual("c d c d a b", GetSpaceSeparatedTokens(mergedResult.GetTokenizedResult(3)));
        }
コード例 #2
0
        /**
         * Tokenizes the provided text and returns up to maxCount lists of tokens with various feature information.
         * Each list corresponds to a possible tokenization with cost at most OPT + costSlack, where OPT is the optimal solution.
         * <p>
         * This method is thread safe
         *
         * @param text  text to tokenize
         * @param maxCount  maximum number of different tokenizations
         * @param costSlack  maximum cost slack of a tokenization
         * @param <T>  token type
         * @return list of Token, not null
         */
        protected List <LinkedList <T> > CreateMultiTokenList(string text, int maxCount, int costSlack)
        {
            if (!split)
            {
                return(ConvertMultiSearchResultToList(CreateMultiSearchResult(text, maxCount, costSlack)));
            }

            List <int> splitPositions = GetSplitPositions(text);

            if (splitPositions.Count == 0)
            {
                return(ConvertMultiSearchResultToList(CreateMultiSearchResult(text, maxCount, costSlack)));
            }

            List <MultiSearchResult> results = new List <MultiSearchResult>();
            int offset = 0;

            foreach (int position in splitPositions)
            {
                results.Add(CreateMultiSearchResult(text.Substring(offset, position + 1 - offset), maxCount, costSlack));
                offset = position + 1;
            }

            if (offset < text.Length)
            {
                results.Add(CreateMultiSearchResult(text.Substring(offset), maxCount, costSlack));
            }

            MultiSearchMerger merger       = new MultiSearchMerger(maxCount, costSlack);
            MultiSearchResult mergedResult = merger.Merge(results);

            return(ConvertMultiSearchResultToList(mergedResult));
        }
コード例 #3
0
        public void TestMergerTooFew()
        {
            const int maxCount  = 5;
            const int costSlack = 3;
            var       merger    = new MultiSearchMerger(maxCount, costSlack);
            var       results   = new List <MultiSearchResult>();

            var surfaces1 = new string[][] { new string[] { "a", "b" }, new string[] { "c", "d" }, new string[] { "e", "f" } };
            var costs1    = new int[] { 1, 2, 5 };

            results.Add(MakeResult(surfaces1, costs1));

            var surfaces2 = new string[][] { new string[] { "a", "b" }, new string[] { "c", "d" } };
            var costs2    = new int[] { 1, 2 };

            results.Add(MakeResult(surfaces2, costs2));

            var surfaces3 = new string[][] { new string[] { "a", "b" } };
            var costs3    = new int[] { 5 };

            results.Add(MakeResult(surfaces3, costs3));

            MultiSearchResult mergedResult = merger.Merge(results);

            mergedResult.Count.Is(4);
            mergedResult.GetCost(0).Is(7);
            mergedResult.GetCost(1).Is(8);
            mergedResult.GetCost(2).Is(8);
            mergedResult.GetCost(3).Is(9);
            GetSpaceSeparatedTokens(mergedResult.GetTokenizedResult(0)).Is("a b a b a b");
            GetSpaceSeparatedTokens(mergedResult.GetTokenizedResult(1)).Is("c d a b a b");
            GetSpaceSeparatedTokens(mergedResult.GetTokenizedResult(2)).Is("a b c d a b");
            GetSpaceSeparatedTokens(mergedResult.GetTokenizedResult(3)).Is("c d c d a b");
        }