예제 #1
0
 // Introduce symbols from a set S into an input string not containing symbols in S
 static Fst Intro(ISet <char> alphabet, ISet <char> symbols) =>
 FsaBuilder.FromSymbolSet(alphabet.Except(symbols))
 .Identity()
 .Union(
     FsaBuilder.FromEpsilon()
     .Product(FsaBuilder.FromSymbolSet(symbols)))
 .Star();
예제 #2
0
    static Fst XintroX(ISet <char> alphabet, ISet <char> symbols)
    {
        var f   = FsaBuilder.FromSymbolSet(alphabet.Except(symbols)).Identity();
        var s   = Intro(alphabet, symbols);
        var res = f.Concat(s, f).Union(f);

        return(res.Optional());
    }
예제 #3
0
    public static Bimachine CreateForEnglish()
    {
        var alphabet = Enumerable.Range(32, 95).Select(x => (char)x)
                       .Concat(new[] { '\t', '\n', '\v', '\f', '\r' })
                       .ToHashSet();
        var whitespaces      = new[] { ' ', '\t', '\n' };
        var upperCaseLetters = Enumerable.Range(65, 27).Select(x => (char)x);
        var lowerCaseLetters = Enumerable.Range(97, 27).Select(x => (char)x);
        var digits           = new[] { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9' };
        var letters          = upperCaseLetters.Concat(lowerCaseLetters);

        var riseCase = alphabet
                       .Select(symbol =>
                               FstBuilder.FromWordPair(
                                   symbol.ToString(),
                                   char.IsLower(symbol)
                        ? symbol.ToString().ToUpper()
                        : symbol.ToString()))
                       .Aggregate((aggr, fst) => aggr.Union(fst))
                       .Star();

        var multiWordExprList = new[] { "AT LEAST", "IN SPITE OF", "HEAD OVER HEELS" };
        var multiWordExpr     =
            multiWordExprList
            .Select(exp => FsaBuilder.FromWord(exp))
            .Aggregate((aggr, fsa) => aggr.Union(fsa));

        var token =
            FsaBuilder.FromSymbolSet(letters)
            .Plus()
            .Union(
                FsaBuilder.FromSymbolSet(digits).Plus(),
                riseCase.Compose(multiWordExpr.Identity()).Domain(),
                FsaBuilder.FromSymbolSet(alphabet.Except(whitespaces)));

        var insertLeadingNewLine =
            FstBuilder.FromWordPair(string.Empty, "\n")
            .Concat(FsaBuilder.FromSymbolSet(alphabet).Star().Identity());

        var clearSpaces =
            FsaBuilder.FromSymbolSet(whitespaces)
            .Plus()
            .Product(FsaBuilder.FromWord(" "))
            .ToLmlRewriter(alphabet);

        var markTokens =
            token.Identity()
            .Concat(FstBuilder.FromWordPair(string.Empty, "\n"))
            .ToLmlRewriter(alphabet);

        var clearLeadingSpace =
            insertLeadingNewLine.Compose(
                FstBuilder.FromWordPair("\n ", "\n").ToRewriter(alphabet),
                insertLeadingNewLine.Inverse());

        return(clearSpaces.Compose(markTokens, clearLeadingSpace).ToBimachine(alphabet));
    }
예제 #4
0
    public void FromSymbolSetFsaTest()
    {
        var fsa = FsaBuilder.FromSymbolSet(new HashSet <char> {
            'a', 'b', 'c'
        });

        Assert.Equal(2, fsa.States.Count);
        Assert.False(fsa.Recognize(string.Empty));
        Assert.False(fsa.Recognize("d"));
        Assert.False(fsa.Recognize("ab"));
        Assert.True(new[] { "b", "a", "c" }.All(fsa.Recognize));
    }
예제 #5
0
 public static Fsa All(IEnumerable <char> alphabet) => FsaBuilder.FromSymbolSet(alphabet).Star();
예제 #6
0
 // Same as "Intro" except symbols from S cannot occur at the beginning of the string
 static Fst Xintro(ISet <char> alphabet, ISet <char> symbols) =>
 FsaBuilder.FromSymbolSet(alphabet.Except(symbols))
 .Identity()
 .Concat(Intro(alphabet, symbols))
 .Optional();
예제 #7
0
    // Convert to an obligatory leftmost-longest match rewrite transducer (van Noord, Gerdemann 1999)
    public static Fst ToLmlRewriter2(this Fst fst, ISet <char> alphabet)
    {
        const char notMarkerSymbol = '0';
        const char isMarkerSymbol  = '1';
        var        markers         = new[] { notMarkerSymbol, isMarkerSymbol };

        var sigFsa = FsaBuilder.FromSymbolSet(alphabet)
                     .Concat(FsaBuilder.FromSymbolSet(new[] { notMarkerSymbol }));
        var sigStarFsa  = sigFsa.Star().Minimal();
        var xSig        = alphabet.Concat(markers).ToHashSet();
        var xSigFsa     = sigFsa.Concat(FsaBuilder.FromSymbolSet(markers));
        var xSigStarFsa = xSigFsa.Star().Minimal();

        const char lb1Marker = '<'; // <1
        const char lb2Marker = '≪'; // <2
        const char rb1Marker = '>'; // 1>
        const char rb2Marker = '≫'; // 2>

        var lb1   = FsaBuilder.FromSymbol(lb1Marker).Concat(FsaBuilder.FromSymbol(isMarkerSymbol));
        var lb2   = FsaBuilder.FromSymbol(lb2Marker).Concat(FsaBuilder.FromSymbol(isMarkerSymbol));
        var rb2   = FsaBuilder.FromSymbol(rb2Marker).Concat(FsaBuilder.FromSymbol(isMarkerSymbol));
        var rb1   = FsaBuilder.FromSymbol(rb1Marker).Concat(FsaBuilder.FromSymbol(isMarkerSymbol));
        var lb    = lb1.Union(lb2);
        var rb    = rb1.Union(rb2);
        var b1    = lb1.Union(rb1);
        var b2    = lb2.Union(rb2);
        var brack = lb.Union(rb);

        Fsa Not(Fsa lang) => xSigStarFsa.Difference(lang);
        Fsa Contain(Fsa lang) => xSigStarFsa.Concat(lang, xSigStarFsa);

        Fsa IfPThenS(Fsa l1, Fsa l2) => Not(l1.Concat(Not(l2)));
        Fsa IfSThenP(Fsa l1, Fsa l2) => Not(Not(l1).Concat(l2));
        Fsa PiffS(Fsa l1, Fsa l2) => IfPThenS(l1, l2).Intersect(IfSThenP(l1, l2));
        Fsa LiffR(Fsa l1, Fsa l2) => PiffS(xSigStarFsa.Concat(l1), l2.Concat(xSigStarFsa));

        var trueFsa  = xSigStarFsa;
        var falseFsa = FsaBuilder.FromEpsilon();

        // Fsa CoerceToBoolean(Fsa l) => l.Identity()
        //     .Compose(trueFsa.Product(trueFsa)).Range();

        // Fst If(Fsa cond, Fst then, Fst @else) =>
        //     CoerceToBoolean(cond).Identity().Compose(then)
        //         .Union(Not(CoerceToBoolean(cond)).Identity().Compose(@else));

        var leftCtx  = FsaBuilder.FromEpsilon();
        var rightCtx = FsaBuilder.FromEpsilon();
        var domainT  = fst.Domain();

        var nonMarkersFst = FsaBuilder.FromSymbolSet(alphabet)
                            .Identity()
                            .Concat(FstBuilder.FromWordPair(string.Empty, notMarkerSymbol.ToString()));

        Fsa NonMarkers(Fsa l) => l.Identity().Compose(nonMarkersFst).Range();

        // begin R
        var cond = FsaBuilder.FromEpsilon().Intersect(rightCtx);
        var then = FsaBuilder.FromEpsilon().Product(rb2).Concat(sigFsa.Identity()).Star()
                   .Concat(FsaBuilder.FromEpsilon().Product(rb2));
        var @else = Intro(xSig, new HashSet <char> {
            rb2Marker
        }).Compose(
            LiffR(rb2, XIgnore(NonMarkers(rightCtx), xSig, new HashSet <char> {
            rb2Marker
        })).Identity());

        // var r = If(cond, then, @else);
        var r = FsaBuilder.FromEpsilon().Product(rb2).Concat(sigFsa.Identity()).Star()
                .Concat(FsaBuilder.FromEpsilon().Product(rb2));
        // end R

        var f = Intro(xSig, new HashSet <char> {
            lb2Marker
        })
                .Compose(
            LiffR(lb2, XIgnoreX(NonMarkers(domainT), xSig, new HashSet <char> {
            lb2Marker, rb2Marker
        })
                  .Concat(lb2.Optional(), rb2)).Identity());

        // begin lr
        var leftToRightBody = lb2.Product(lb1)
                              .Concat(
            Ignore(NonMarkers(domainT), xSig, new HashSet <char> {
            lb2Marker, rb2Marker
        }).Identity()
            .Compose(Intro(xSig, new HashSet <char> {
            lb2Marker
        }).Inverse()))
                              .Concat(rb2.Product(rb1));

        var leftToRight = xSigStarFsa.Identity()
                          .Concat(leftToRightBody)
                          .Star()
                          .Concat(xSigStarFsa.Identity());
        // end lr

        // begin longest match
        var longestBody = lb1
                          .Concat(
            IgnoreX(NonMarkers(domainT), xSig, new HashSet <char> {
            lb1Marker, lb2Marker, rb1Marker, rb2Marker
        })
            .Intersect(Contain(rb1)))
                          .Concat(rb);

        var longestMatch = Not(Contain(longestBody)).Identity()
                           .Compose(Intro(xSig, new HashSet <char> {
            rb2Marker
        }).Inverse());
        // end longest match

        var auxReplace = sigFsa.Union(lb2).Identity()
                         .Union(lb1.Identity()
                                .Concat(nonMarkersFst.Inverse().Compose(fst, nonMarkersFst))
                                .Concat(rb1.Product(FsaBuilder.FromEpsilon())))
                         .Star();

        var l1 = Ignore(
            IfSThenP(
                IgnoreX(xSigStarFsa.Concat(NonMarkers(leftCtx)), xSig, new HashSet <char> {
            lb1Marker
        }),
                lb1.Concat(xSigStarFsa)),
            xSig,
            new HashSet <char> {
            lb2Marker
        })
                 .Identity()
                 .Compose(Intro(xSig, new HashSet <char> {
            lb1Marker
        }).Inverse());

        var l2 = IfSThenP(
            IgnoreX(Not(xSigStarFsa.Concat(NonMarkers(leftCtx))), xSig, new HashSet <char> {
            lb2Marker
        }),
            lb2.Concat(xSigStarFsa))
                 .Identity()
                 .Compose(Intro(xSig, new HashSet <char> {
            lb2Marker
        }).Inverse());

        var replace = nonMarkersFst.Compose(
            r, f,
            leftToRight, longestMatch, auxReplace,
            l1, l2,
            nonMarkersFst.Inverse());

        return(replace);
    }