예제 #1
0
 // Introduce symbols from a set S into an input string not containing symbols in S
 static Fst Intro(ISet <char> alphabet, ISet <char> symbols) =>
 FsaBuilder.FromSymbolSet(alphabet.Except(symbols))
 .Identity()
 .Union(
     FsaBuilder.FromEpsilon()
     .Product(FsaBuilder.FromSymbolSet(symbols)))
 .Star();
예제 #2
0
    public void EpsilonFsaBuilderTest()
    {
        var fsa = FsaBuilder.FromEpsilon();

        Assert.Single(fsa.States);
        Assert.False(fsa.Recognize("a"));
        Assert.False(fsa.Recognize("abc"));
        Assert.True(fsa.Recognize(string.Empty));
    }
예제 #3
0
    Fsa Term()
    {
        if (this.HasMoreChars() && this.Peek() != ')' && this.Peek() != '|')
        {
            return(this.Factor().Concat(this.Term()));
        }

        return(FsaBuilder.FromEpsilon());
    }
예제 #4
0
    public void UnionEpsilonFsaTest()
    {
        var fsa1 = FsaBuilder.FromWord("abc");
        var fsa2 = FsaBuilder.FromEpsilon();
        var fsa  = fsa1.Union(fsa2);

        Assert.Equal(5, fsa.States.Count);
        Assert.Equal(2, fsa.Initial.Count);
        Assert.Equal(2, fsa.Final.Count);
        Assert.True(fsa.Recognize(string.Empty));
        Assert.False(fsa.Recognize("a"));
        Assert.True(fsa.Recognize("abc"));
        Assert.False(fsa.Recognize("abca"));
    }
예제 #5
0
    // Convert to an obligatory leftmost-longest match rewrite transducer (van Noord, Gerdemann 1999)
    public static Fst ToLmlRewriter2(this Fst fst, ISet <char> alphabet)
    {
        const char notMarkerSymbol = '0';
        const char isMarkerSymbol  = '1';
        var        markers         = new[] { notMarkerSymbol, isMarkerSymbol };

        var sigFsa = FsaBuilder.FromSymbolSet(alphabet)
                     .Concat(FsaBuilder.FromSymbolSet(new[] { notMarkerSymbol }));
        var sigStarFsa  = sigFsa.Star().Minimal();
        var xSig        = alphabet.Concat(markers).ToHashSet();
        var xSigFsa     = sigFsa.Concat(FsaBuilder.FromSymbolSet(markers));
        var xSigStarFsa = xSigFsa.Star().Minimal();

        const char lb1Marker = '<'; // <1
        const char lb2Marker = '≪'; // <2
        const char rb1Marker = '>'; // 1>
        const char rb2Marker = '≫'; // 2>

        var lb1   = FsaBuilder.FromSymbol(lb1Marker).Concat(FsaBuilder.FromSymbol(isMarkerSymbol));
        var lb2   = FsaBuilder.FromSymbol(lb2Marker).Concat(FsaBuilder.FromSymbol(isMarkerSymbol));
        var rb2   = FsaBuilder.FromSymbol(rb2Marker).Concat(FsaBuilder.FromSymbol(isMarkerSymbol));
        var rb1   = FsaBuilder.FromSymbol(rb1Marker).Concat(FsaBuilder.FromSymbol(isMarkerSymbol));
        var lb    = lb1.Union(lb2);
        var rb    = rb1.Union(rb2);
        var b1    = lb1.Union(rb1);
        var b2    = lb2.Union(rb2);
        var brack = lb.Union(rb);

        Fsa Not(Fsa lang) => xSigStarFsa.Difference(lang);
        Fsa Contain(Fsa lang) => xSigStarFsa.Concat(lang, xSigStarFsa);

        Fsa IfPThenS(Fsa l1, Fsa l2) => Not(l1.Concat(Not(l2)));
        Fsa IfSThenP(Fsa l1, Fsa l2) => Not(Not(l1).Concat(l2));
        Fsa PiffS(Fsa l1, Fsa l2) => IfPThenS(l1, l2).Intersect(IfSThenP(l1, l2));
        Fsa LiffR(Fsa l1, Fsa l2) => PiffS(xSigStarFsa.Concat(l1), l2.Concat(xSigStarFsa));

        var trueFsa  = xSigStarFsa;
        var falseFsa = FsaBuilder.FromEpsilon();

        // Fsa CoerceToBoolean(Fsa l) => l.Identity()
        //     .Compose(trueFsa.Product(trueFsa)).Range();

        // Fst If(Fsa cond, Fst then, Fst @else) =>
        //     CoerceToBoolean(cond).Identity().Compose(then)
        //         .Union(Not(CoerceToBoolean(cond)).Identity().Compose(@else));

        var leftCtx  = FsaBuilder.FromEpsilon();
        var rightCtx = FsaBuilder.FromEpsilon();
        var domainT  = fst.Domain();

        var nonMarkersFst = FsaBuilder.FromSymbolSet(alphabet)
                            .Identity()
                            .Concat(FstBuilder.FromWordPair(string.Empty, notMarkerSymbol.ToString()));

        Fsa NonMarkers(Fsa l) => l.Identity().Compose(nonMarkersFst).Range();

        // begin R
        var cond = FsaBuilder.FromEpsilon().Intersect(rightCtx);
        var then = FsaBuilder.FromEpsilon().Product(rb2).Concat(sigFsa.Identity()).Star()
                   .Concat(FsaBuilder.FromEpsilon().Product(rb2));
        var @else = Intro(xSig, new HashSet <char> {
            rb2Marker
        }).Compose(
            LiffR(rb2, XIgnore(NonMarkers(rightCtx), xSig, new HashSet <char> {
            rb2Marker
        })).Identity());

        // var r = If(cond, then, @else);
        var r = FsaBuilder.FromEpsilon().Product(rb2).Concat(sigFsa.Identity()).Star()
                .Concat(FsaBuilder.FromEpsilon().Product(rb2));
        // end R

        var f = Intro(xSig, new HashSet <char> {
            lb2Marker
        })
                .Compose(
            LiffR(lb2, XIgnoreX(NonMarkers(domainT), xSig, new HashSet <char> {
            lb2Marker, rb2Marker
        })
                  .Concat(lb2.Optional(), rb2)).Identity());

        // begin lr
        var leftToRightBody = lb2.Product(lb1)
                              .Concat(
            Ignore(NonMarkers(domainT), xSig, new HashSet <char> {
            lb2Marker, rb2Marker
        }).Identity()
            .Compose(Intro(xSig, new HashSet <char> {
            lb2Marker
        }).Inverse()))
                              .Concat(rb2.Product(rb1));

        var leftToRight = xSigStarFsa.Identity()
                          .Concat(leftToRightBody)
                          .Star()
                          .Concat(xSigStarFsa.Identity());
        // end lr

        // begin longest match
        var longestBody = lb1
                          .Concat(
            IgnoreX(NonMarkers(domainT), xSig, new HashSet <char> {
            lb1Marker, lb2Marker, rb1Marker, rb2Marker
        })
            .Intersect(Contain(rb1)))
                          .Concat(rb);

        var longestMatch = Not(Contain(longestBody)).Identity()
                           .Compose(Intro(xSig, new HashSet <char> {
            rb2Marker
        }).Inverse());
        // end longest match

        var auxReplace = sigFsa.Union(lb2).Identity()
                         .Union(lb1.Identity()
                                .Concat(nonMarkersFst.Inverse().Compose(fst, nonMarkersFst))
                                .Concat(rb1.Product(FsaBuilder.FromEpsilon())))
                         .Star();

        var l1 = Ignore(
            IfSThenP(
                IgnoreX(xSigStarFsa.Concat(NonMarkers(leftCtx)), xSig, new HashSet <char> {
            lb1Marker
        }),
                lb1.Concat(xSigStarFsa)),
            xSig,
            new HashSet <char> {
            lb2Marker
        })
                 .Identity()
                 .Compose(Intro(xSig, new HashSet <char> {
            lb1Marker
        }).Inverse());

        var l2 = IfSThenP(
            IgnoreX(Not(xSigStarFsa.Concat(NonMarkers(leftCtx))), xSig, new HashSet <char> {
            lb2Marker
        }),
            lb2.Concat(xSigStarFsa))
                 .Identity()
                 .Compose(Intro(xSig, new HashSet <char> {
            lb2Marker
        }).Inverse());

        var replace = nonMarkersFst.Compose(
            r, f,
            leftToRight, longestMatch, auxReplace,
            l1, l2,
            nonMarkersFst.Inverse());

        return(replace);
    }