示例#1
0
 // Introduce symbols from a set S into an input string not containing symbols in S
 static Fst Intro(ISet <char> alphabet, ISet <char> symbols) =>
 FsaBuilder.FromSymbolSet(alphabet.Except(symbols))
 .Identity()
 .Union(
     FsaBuilder.FromEpsilon()
     .Product(FsaBuilder.FromSymbolSet(symbols)))
 .Star();
示例#2
0
    static Fst XintroX(ISet <char> alphabet, ISet <char> symbols)
    {
        var f   = FsaBuilder.FromSymbolSet(alphabet.Except(symbols)).Identity();
        var s   = Intro(alphabet, symbols);
        var res = f.Concat(s, f).Union(f);

        return(res.Optional());
    }
示例#3
0
    public void EpsilonFreeSimpleConstructionTest()
    {
        // a*
        var fsa = FsaBuilder.FromWord("a").Star().EpsilonFree();

        Assert.DoesNotContain(fsa.Transitions, t => string.IsNullOrEmpty(t.Label));
        Assert.DoesNotContain(new[] { "ca", "aaba", "b", "cc" }, fsa.Recognize);
        Assert.True(new[] { "aaaa", "a", "aa", string.Empty, "aaaaaaaa" }.All(fsa.Recognize));
    }
示例#4
0
    Fsa Term()
    {
        if (this.HasMoreChars() && this.Peek() != ')' && this.Peek() != '|')
        {
            return(this.Factor().Concat(this.Term()));
        }

        return(FsaBuilder.FromEpsilon());
    }
示例#5
0
    public void EpsilonFsaBuilderTest()
    {
        var fsa = FsaBuilder.FromEpsilon();

        Assert.Single(fsa.States);
        Assert.False(fsa.Recognize("a"));
        Assert.False(fsa.Recognize("abc"));
        Assert.True(fsa.Recognize(string.Empty));
    }
示例#6
0
    public static Bimachine CreateForEnglish()
    {
        var alphabet = Enumerable.Range(32, 95).Select(x => (char)x)
                       .Concat(new[] { '\t', '\n', '\v', '\f', '\r' })
                       .ToHashSet();
        var whitespaces      = new[] { ' ', '\t', '\n' };
        var upperCaseLetters = Enumerable.Range(65, 27).Select(x => (char)x);
        var lowerCaseLetters = Enumerable.Range(97, 27).Select(x => (char)x);
        var digits           = new[] { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9' };
        var letters          = upperCaseLetters.Concat(lowerCaseLetters);

        var riseCase = alphabet
                       .Select(symbol =>
                               FstBuilder.FromWordPair(
                                   symbol.ToString(),
                                   char.IsLower(symbol)
                        ? symbol.ToString().ToUpper()
                        : symbol.ToString()))
                       .Aggregate((aggr, fst) => aggr.Union(fst))
                       .Star();

        var multiWordExprList = new[] { "AT LEAST", "IN SPITE OF", "HEAD OVER HEELS" };
        var multiWordExpr     =
            multiWordExprList
            .Select(exp => FsaBuilder.FromWord(exp))
            .Aggregate((aggr, fsa) => aggr.Union(fsa));

        var token =
            FsaBuilder.FromSymbolSet(letters)
            .Plus()
            .Union(
                FsaBuilder.FromSymbolSet(digits).Plus(),
                riseCase.Compose(multiWordExpr.Identity()).Domain(),
                FsaBuilder.FromSymbolSet(alphabet.Except(whitespaces)));

        var insertLeadingNewLine =
            FstBuilder.FromWordPair(string.Empty, "\n")
            .Concat(FsaBuilder.FromSymbolSet(alphabet).Star().Identity());

        var clearSpaces =
            FsaBuilder.FromSymbolSet(whitespaces)
            .Plus()
            .Product(FsaBuilder.FromWord(" "))
            .ToLmlRewriter(alphabet);

        var markTokens =
            token.Identity()
            .Concat(FstBuilder.FromWordPair(string.Empty, "\n"))
            .ToLmlRewriter(alphabet);

        var clearLeadingSpace =
            insertLeadingNewLine.Compose(
                FstBuilder.FromWordPair("\n ", "\n").ToRewriter(alphabet),
                insertLeadingNewLine.Inverse());

        return(clearSpaces.Compose(markTokens, clearLeadingSpace).ToBimachine(alphabet));
    }
示例#7
0
    public void WordFsaBuilderTest()
    {
        var fsa = FsaBuilder.FromWord("abc");

        Assert.Equal(4, fsa.States.Count);
        Assert.False(fsa.Recognize(string.Empty));
        Assert.False(fsa.Recognize("a"));
        Assert.False(fsa.Recognize("abca"));
        Assert.True(fsa.Recognize("abc"));
    }
示例#8
0
    // Convert to an obligatory rewrite transducer
    public static Fst ToRewriter(this Fst fst, ISet <char> alphabet)
    {
        var all         = FsaBuilder.All(alphabet);
        var notInDomain = all
                          .Difference(all.Concat(fst.Domain()).Concat(all))
                          .Identity()
                          .Optional();

        return(notInDomain.Concat(fst.Concat(notInDomain).Star()));
    }
示例#9
0
    public void StarFsaTest()
    {
        var fsa = FsaBuilder.FromWord("a").Star();

        Assert.Equal(3, fsa.States.Count);
        Assert.Single(fsa.Initial);
        Assert.Equal(2, fsa.Final.Count);
        Assert.False(fsa.Recognize("ab"));
        Assert.True(new[] { "aaaa", "a", "aa", string.Empty, "aaaaaaaa" }.All(fsa.Recognize));
    }
示例#10
0
    public void ConcatMultipleFsaTest()
    {
        var fsa1 = FsaBuilder.FromWord("ab");
        var fsa2 = FsaBuilder.FromWord("cde");
        var fsa3 = FsaBuilder.FromWord("f").Star();
        var fsa  = fsa1.Concat(fsa2, fsa3);

        Assert.True(fsa.Recognize("abcdef"));
        Assert.True(fsa.Recognize("abcdefffffff"));
        Assert.False(fsa.Recognize("abcdff"));
    }
示例#11
0
    public void ComplexFsaConstructionTest1()
    {
        // (a|b)*c
        var fsa = FsaBuilder.FromWord("a")
                  .Union(FsaBuilder.FromWord("b"))
                  .Star()
                  .Concat(FsaBuilder.FromWord("c"));

        Assert.DoesNotContain(new[] { "ca", "aaba", string.Empty, "cc" }, fsa.Recognize);
        Assert.True(new[] { "abbac", "ac", "bc", "ababbbbac", "c" }.All(fsa.Recognize));
    }
示例#12
0
    public void OptionFsaTest()
    {
        var fsa = FsaBuilder.FromWord("ab").Optional();

        Assert.Equal(4, fsa.States.Count);
        Assert.Equal(2, fsa.Initial.Count);
        Assert.Equal(2, fsa.Final.Count);
        Assert.False(fsa.Recognize("b"));
        Assert.False(fsa.Recognize("a"));
        Assert.True(new[] { "ab", string.Empty }.All(fsa.Recognize));
    }
示例#13
0
    public void FromSymbolSetFsaTest()
    {
        var fsa = FsaBuilder.FromSymbolSet(new HashSet <char> {
            'a', 'b', 'c'
        });

        Assert.Equal(2, fsa.States.Count);
        Assert.False(fsa.Recognize(string.Empty));
        Assert.False(fsa.Recognize("d"));
        Assert.False(fsa.Recognize("ab"));
        Assert.True(new[] { "b", "a", "c" }.All(fsa.Recognize));
    }
示例#14
0
    public void AllFsaTest()
    {
        var fsa = FsaBuilder.All(new HashSet <char> {
            'a', 'b', 'c'
        });

        Assert.Equal(3, fsa.States.Count);
        Assert.Equal(1, fsa.Initial.Count);
        Assert.Equal(2, fsa.Final.Count);
        Assert.False(fsa.Recognize("d"));
        Assert.False(fsa.Recognize("ad"));
        Assert.True(new[] { "ab", string.Empty, "abc", "bbbac", "cba", "cbcbbcaaaaacb" }.All(fsa.Recognize));
    }
示例#15
0
    public void EpsilonFreeConstructionTest()
    {
        // (a|b)+c
        var fsa = FsaBuilder.FromWord("a")
                  .Union(FsaBuilder.FromWord("b"))
                  .Plus()
                  .Concat(FsaBuilder.FromWord("c"))
                  .EpsilonFree();

        Assert.DoesNotContain(fsa.Transitions, t => string.IsNullOrEmpty(t.Label));
        Assert.True(new[] { "abbac", "ac", "bc", "ababbbbac", "aac" }.All(fsa.Recognize));
        Assert.DoesNotContain(new[] { "ca", "aaba", string.Empty, "cc", "c" }, fsa.Recognize);
    }
示例#16
0
    public void StarFsaTest1()
    {
        var fsa = FsaBuilder.FromWord("abc").Star();

        Assert.Equal(5, fsa.States.Count);
        Assert.Single(fsa.Initial);
        Assert.Equal(2, fsa.Final.Count);
        Assert.False(fsa.Recognize("abcabcabcb"));
        Assert.False(fsa.Recognize("ab"));
        Assert.True(fsa.Recognize(string.Empty));
        Assert.True(fsa.Recognize("abc"));
        Assert.True(fsa.Recognize("abcabcabc"));
    }
示例#17
0
    public void ComplexFsaConstructionTest()
    {
        // ab*c
        var fsa =
            FsaBuilder.FromWord("a").Concat(
                FsaBuilder.FromWord("b").Star(),
                FsaBuilder.FromWord("c"));

        Assert.False(fsa.Recognize(string.Empty));
        Assert.False(fsa.Recognize("ab"));
        Assert.True(fsa.Recognize("abc"));
        Assert.True(fsa.Recognize("ac"));
        Assert.True(fsa.Recognize("abbbbc"));
    }
示例#18
0
    public void UnionEpsilonFsaTest()
    {
        var fsa1 = FsaBuilder.FromWord("abc");
        var fsa2 = FsaBuilder.FromEpsilon();
        var fsa  = fsa1.Union(fsa2);

        Assert.Equal(5, fsa.States.Count);
        Assert.Equal(2, fsa.Initial.Count);
        Assert.Equal(2, fsa.Final.Count);
        Assert.True(fsa.Recognize(string.Empty));
        Assert.False(fsa.Recognize("a"));
        Assert.True(fsa.Recognize("abc"));
        Assert.False(fsa.Recognize("abca"));
    }
示例#19
0
    public void ConcatFsaTest()
    {
        var fsa1 = FsaBuilder.FromWord("abc");
        var fsa2 = FsaBuilder.FromWord("de");
        var fsa  = fsa1.Concat(fsa2);

        Assert.Equal(7, fsa.States.Count);
        Assert.Single(fsa.Initial);
        Assert.Single(fsa.Final);
        Assert.False(fsa.Recognize(string.Empty));
        Assert.False(fsa.Recognize("a"));
        Assert.False(fsa.Recognize("abc"));
        Assert.False(fsa.Recognize("de"));
        Assert.True(fsa.Recognize("abcde"));
    }
示例#20
0
    public void ComplexFsaConstructionTest2()
    {
        // .*@.*\.com
        var all = FsaBuilder.All(
            Enumerable.Range(97, 27).Select(Convert.ToChar).ToHashSet());
        var fsa = all
                  .Concat(
            FsaBuilder.FromWord("@"),
            all,
            FsaBuilder.FromWord(".com"))
                  .Determinize();

        Assert.DoesNotContain(new[] { "*****@*****.**", "you@@gmail.com", "*****@*****.**", "*****@*****.**" }, fsa.Recognize);
        Assert.True(new[] { "*****@*****.**", "*****@*****.**", "*****@*****.**" }.All(fsa.Recognize));
    }
示例#21
0
    // Convert to an obligatory leftmost-longest match rewrite transducer (van Noord, Gerdemann 1999)
    public static Fst ToLmlRewriter2(this Fst fst, ISet <char> alphabet)
    {
        const char notMarkerSymbol = '0';
        const char isMarkerSymbol  = '1';
        var        markers         = new[] { notMarkerSymbol, isMarkerSymbol };

        var sigFsa = FsaBuilder.FromSymbolSet(alphabet)
                     .Concat(FsaBuilder.FromSymbolSet(new[] { notMarkerSymbol }));
        var sigStarFsa  = sigFsa.Star().Minimal();
        var xSig        = alphabet.Concat(markers).ToHashSet();
        var xSigFsa     = sigFsa.Concat(FsaBuilder.FromSymbolSet(markers));
        var xSigStarFsa = xSigFsa.Star().Minimal();

        const char lb1Marker = '<'; // <1
        const char lb2Marker = '≪'; // <2
        const char rb1Marker = '>'; // 1>
        const char rb2Marker = '≫'; // 2>

        var lb1   = FsaBuilder.FromSymbol(lb1Marker).Concat(FsaBuilder.FromSymbol(isMarkerSymbol));
        var lb2   = FsaBuilder.FromSymbol(lb2Marker).Concat(FsaBuilder.FromSymbol(isMarkerSymbol));
        var rb2   = FsaBuilder.FromSymbol(rb2Marker).Concat(FsaBuilder.FromSymbol(isMarkerSymbol));
        var rb1   = FsaBuilder.FromSymbol(rb1Marker).Concat(FsaBuilder.FromSymbol(isMarkerSymbol));
        var lb    = lb1.Union(lb2);
        var rb    = rb1.Union(rb2);
        var b1    = lb1.Union(rb1);
        var b2    = lb2.Union(rb2);
        var brack = lb.Union(rb);

        Fsa Not(Fsa lang) => xSigStarFsa.Difference(lang);
        Fsa Contain(Fsa lang) => xSigStarFsa.Concat(lang, xSigStarFsa);

        Fsa IfPThenS(Fsa l1, Fsa l2) => Not(l1.Concat(Not(l2)));
        Fsa IfSThenP(Fsa l1, Fsa l2) => Not(Not(l1).Concat(l2));
        Fsa PiffS(Fsa l1, Fsa l2) => IfPThenS(l1, l2).Intersect(IfSThenP(l1, l2));
        Fsa LiffR(Fsa l1, Fsa l2) => PiffS(xSigStarFsa.Concat(l1), l2.Concat(xSigStarFsa));

        var trueFsa  = xSigStarFsa;
        var falseFsa = FsaBuilder.FromEpsilon();

        // Fsa CoerceToBoolean(Fsa l) => l.Identity()
        //     .Compose(trueFsa.Product(trueFsa)).Range();

        // Fst If(Fsa cond, Fst then, Fst @else) =>
        //     CoerceToBoolean(cond).Identity().Compose(then)
        //         .Union(Not(CoerceToBoolean(cond)).Identity().Compose(@else));

        var leftCtx  = FsaBuilder.FromEpsilon();
        var rightCtx = FsaBuilder.FromEpsilon();
        var domainT  = fst.Domain();

        var nonMarkersFst = FsaBuilder.FromSymbolSet(alphabet)
                            .Identity()
                            .Concat(FstBuilder.FromWordPair(string.Empty, notMarkerSymbol.ToString()));

        Fsa NonMarkers(Fsa l) => l.Identity().Compose(nonMarkersFst).Range();

        // begin R
        var cond = FsaBuilder.FromEpsilon().Intersect(rightCtx);
        var then = FsaBuilder.FromEpsilon().Product(rb2).Concat(sigFsa.Identity()).Star()
                   .Concat(FsaBuilder.FromEpsilon().Product(rb2));
        var @else = Intro(xSig, new HashSet <char> {
            rb2Marker
        }).Compose(
            LiffR(rb2, XIgnore(NonMarkers(rightCtx), xSig, new HashSet <char> {
            rb2Marker
        })).Identity());

        // var r = If(cond, then, @else);
        var r = FsaBuilder.FromEpsilon().Product(rb2).Concat(sigFsa.Identity()).Star()
                .Concat(FsaBuilder.FromEpsilon().Product(rb2));
        // end R

        var f = Intro(xSig, new HashSet <char> {
            lb2Marker
        })
                .Compose(
            LiffR(lb2, XIgnoreX(NonMarkers(domainT), xSig, new HashSet <char> {
            lb2Marker, rb2Marker
        })
                  .Concat(lb2.Optional(), rb2)).Identity());

        // begin lr
        var leftToRightBody = lb2.Product(lb1)
                              .Concat(
            Ignore(NonMarkers(domainT), xSig, new HashSet <char> {
            lb2Marker, rb2Marker
        }).Identity()
            .Compose(Intro(xSig, new HashSet <char> {
            lb2Marker
        }).Inverse()))
                              .Concat(rb2.Product(rb1));

        var leftToRight = xSigStarFsa.Identity()
                          .Concat(leftToRightBody)
                          .Star()
                          .Concat(xSigStarFsa.Identity());
        // end lr

        // begin longest match
        var longestBody = lb1
                          .Concat(
            IgnoreX(NonMarkers(domainT), xSig, new HashSet <char> {
            lb1Marker, lb2Marker, rb1Marker, rb2Marker
        })
            .Intersect(Contain(rb1)))
                          .Concat(rb);

        var longestMatch = Not(Contain(longestBody)).Identity()
                           .Compose(Intro(xSig, new HashSet <char> {
            rb2Marker
        }).Inverse());
        // end longest match

        var auxReplace = sigFsa.Union(lb2).Identity()
                         .Union(lb1.Identity()
                                .Concat(nonMarkersFst.Inverse().Compose(fst, nonMarkersFst))
                                .Concat(rb1.Product(FsaBuilder.FromEpsilon())))
                         .Star();

        var l1 = Ignore(
            IfSThenP(
                IgnoreX(xSigStarFsa.Concat(NonMarkers(leftCtx)), xSig, new HashSet <char> {
            lb1Marker
        }),
                lb1.Concat(xSigStarFsa)),
            xSig,
            new HashSet <char> {
            lb2Marker
        })
                 .Identity()
                 .Compose(Intro(xSig, new HashSet <char> {
            lb1Marker
        }).Inverse());

        var l2 = IfSThenP(
            IgnoreX(Not(xSigStarFsa.Concat(NonMarkers(leftCtx))), xSig, new HashSet <char> {
            lb2Marker
        }),
            lb2.Concat(xSigStarFsa))
                 .Identity()
                 .Compose(Intro(xSig, new HashSet <char> {
            lb2Marker
        }).Inverse());

        var replace = nonMarkersFst.Compose(
            r, f,
            leftToRight, longestMatch, auxReplace,
            l1, l2,
            nonMarkersFst.Inverse());

        return(replace);
    }
示例#22
0
 // Same as "Intro" except symbols from S cannot occur at the beginning of the string
 static Fst Xintro(ISet <char> alphabet, ISet <char> symbols) =>
 FsaBuilder.FromSymbolSet(alphabet.Except(symbols))
 .Identity()
 .Concat(Intro(alphabet, symbols))
 .Optional();
示例#23
0
    // Convert to an obligatory leftmost-longest match rewrite transducer (Karttunen 1996)
    public static Fst ToLmlRewriter(this Fst fst, ISet <char> alphabet)
    {
        if (alphabet.Intersect(markers).Any())
        {
            throw new ArgumentException("The alphabet contains invalid symbols.");
        }

        var alphabetStarFsa   = FsaBuilder.All(alphabet).Minimal();
        var allSymbols        = alphabet.Concat(markers).ToHashSet();
        var allSymbolsStarFsa = FsaBuilder.All(allSymbols).Minimal();

        // Automaton recognizing all words that are not in the language of the input automaton (complement)
        Fsa NotInLang(Fsa lang) => allSymbolsStarFsa.Difference(lang);

        // Automaton recognizing all words that contain an occurrence of a word from the input automaton
        Fsa ContainsLang(Fsa lang) => allSymbolsStarFsa.Concat(lang, allSymbolsStarFsa);

        // All words w where each prefix of w representing a string in "P" is followed by a suffix which is in "S"
        Fsa IfPThenS(Fsa p, Fsa s) => NotInLang(p.Concat(NotInLang(s)));

        // All words for which each suffix from "S" is preceeded by a prefix from "P"
        Fsa IfSThenP(Fsa p, Fsa s) => NotInLang(NotInLang(p).Concat(s));
        Fsa PiffS(Fsa l, Fsa r) => IfPThenS(l, r).Intersect(IfSThenP(l, r));

        /* Describes the words where every position is preceded by a string with a suffix in "L"
         * if and only if it is followed by a string with a prefix in "R" */
        Fsa LiffR(Fsa l, Fsa r) => PiffS(allSymbolsStarFsa.Concat(l), r.Concat(allSymbolsStarFsa));

        var fstDomain = fst.Domain();

        var initialMatch = // mark the beginnings of all rewrite occurrences by inserting "cb"
                           Intro(allSymbols, new HashSet <char> {
            cb
        })
                           .Compose(
            LiffR(
                FsaBuilder.FromSymbol(cb),
                XIgnore(fstDomain, allSymbols, new HashSet <char> {
            cb
        }))
            .Identity());

        var leftToRight =                                          // insert boundary markers ("lb", "rb") around the leftmost rewrite occurrences
                          alphabetStarFsa.Identity()               // preceeded by arbitrary text that is not matched by the rule
                          .Concat(
            FstBuilder.FromWordPair(cb.ToString(), lb.ToString()), // replace intial match marker with the left boundary marker
            IgnoreX(fstDomain, allSymbols, new HashSet <char> {
            cb
        }).Identity(),                                                                    // recognize matches with the leftover "cb" symbol inbetween the markers
            FstBuilder.FromWordPair(string.Empty, rb.ToString()))                         // insert right boundary marker at the end of the matched substring
                          .Star()                                                         // handle multiple rewrite occurrences
                          .Concat(alphabetStarFsa.Identity())                             // succeeded by arbitrary text that is not matched by the rule
                          .Compose(
            FstBuilder.FromWordPair(cb.ToString(), string.Empty).ToRewriter(allSymbols)); // delete the remaining initial match markers

        var includesNotLongestMatches =
            ContainsLang(
                FsaBuilder.FromSymbol(lb)
                .Concat(
                    IgnoreX(fstDomain, allSymbols, new HashSet <char> {
            lb, rb
        })
                    .Intersect(ContainsLang(FsaBuilder.FromSymbol(rb)))));
        // amongst occurrences with the same starting point, preserve only the longest ones
        var longestMatch = NotInLang(includesNotLongestMatches).Identity();

        var replacement =                                                      // replace the rewrite occurrence and delete the left and right markers
                          FstBuilder.FromWordPair(lb.ToString(), string.Empty) // delete the left boundary marker
                          .Concat(
            fst,                                                               // perform the replacement
            FstBuilder.FromWordPair(rb.ToString(), string.Empty))              // delete the right boundary marker
                          .ToRewriter(allSymbols);

        return(initialMatch.Compose(leftToRight, longestMatch, replacement));
    }
示例#24
0
 public static Fsa All(IEnumerable <char> alphabet) => FsaBuilder.FromSymbolSet(alphabet).Star();
示例#25
0
    // Convert an FST to an optional rewrite transducer
    public static Fst ToOptionalRewriter(this Fst fst, ISet <char> alphabet)
    {
        var idAll = FsaBuilder.All(alphabet).Identity();

        return(idAll.Concat(fst.Concat(idAll).Star()));
    }