public static Bimachine CreateForEnglish() { var alphabet = Enumerable.Range(32, 95).Select(x => (char)x) .Concat(new[] { '\t', '\n', '\v', '\f', '\r' }) .ToHashSet(); var whitespaces = new[] { ' ', '\t', '\n' }; var upperCaseLetters = Enumerable.Range(65, 27).Select(x => (char)x); var lowerCaseLetters = Enumerable.Range(97, 27).Select(x => (char)x); var digits = new[] { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9' }; var letters = upperCaseLetters.Concat(lowerCaseLetters); var riseCase = alphabet .Select(symbol => FstBuilder.FromWordPair( symbol.ToString(), char.IsLower(symbol) ? symbol.ToString().ToUpper() : symbol.ToString())) .Aggregate((aggr, fst) => aggr.Union(fst)) .Star(); var multiWordExprList = new[] { "AT LEAST", "IN SPITE OF", "HEAD OVER HEELS" }; var multiWordExpr = multiWordExprList .Select(exp => FsaBuilder.FromWord(exp)) .Aggregate((aggr, fsa) => aggr.Union(fsa)); var token = FsaBuilder.FromSymbolSet(letters) .Plus() .Union( FsaBuilder.FromSymbolSet(digits).Plus(), riseCase.Compose(multiWordExpr.Identity()).Domain(), FsaBuilder.FromSymbolSet(alphabet.Except(whitespaces))); var insertLeadingNewLine = FstBuilder.FromWordPair(string.Empty, "\n") .Concat(FsaBuilder.FromSymbolSet(alphabet).Star().Identity()); var clearSpaces = FsaBuilder.FromSymbolSet(whitespaces) .Plus() .Product(FsaBuilder.FromWord(" ")) .ToLmlRewriter(alphabet); var markTokens = token.Identity() .Concat(FstBuilder.FromWordPair(string.Empty, "\n")) .ToLmlRewriter(alphabet); var clearLeadingSpace = insertLeadingNewLine.Compose( FstBuilder.FromWordPair("\n ", "\n").ToRewriter(alphabet), insertLeadingNewLine.Inverse()); return(clearSpaces.Compose(markTokens, clearLeadingSpace).ToBimachine(alphabet)); }
public void EpsilonFreeSimpleConstructionTest() { // a* var fsa = FsaBuilder.FromWord("a").Star().EpsilonFree(); Assert.DoesNotContain(fsa.Transitions, t => string.IsNullOrEmpty(t.Label)); Assert.DoesNotContain(new[] { "ca", "aaba", "b", "cc" }, fsa.Recognize); Assert.True(new[] { "aaaa", "a", "aa", string.Empty, "aaaaaaaa" }.All(fsa.Recognize)); }
public void WordFsaBuilderTest() { var fsa = FsaBuilder.FromWord("abc"); Assert.Equal(4, fsa.States.Count); Assert.False(fsa.Recognize(string.Empty)); Assert.False(fsa.Recognize("a")); Assert.False(fsa.Recognize("abca")); Assert.True(fsa.Recognize("abc")); }
public void StarFsaTest() { var fsa = FsaBuilder.FromWord("a").Star(); Assert.Equal(3, fsa.States.Count); Assert.Single(fsa.Initial); Assert.Equal(2, fsa.Final.Count); Assert.False(fsa.Recognize("ab")); Assert.True(new[] { "aaaa", "a", "aa", string.Empty, "aaaaaaaa" }.All(fsa.Recognize)); }
public void ConcatMultipleFsaTest() { var fsa1 = FsaBuilder.FromWord("ab"); var fsa2 = FsaBuilder.FromWord("cde"); var fsa3 = FsaBuilder.FromWord("f").Star(); var fsa = fsa1.Concat(fsa2, fsa3); Assert.True(fsa.Recognize("abcdef")); Assert.True(fsa.Recognize("abcdefffffff")); Assert.False(fsa.Recognize("abcdff")); }
public void ComplexFsaConstructionTest1() { // (a|b)*c var fsa = FsaBuilder.FromWord("a") .Union(FsaBuilder.FromWord("b")) .Star() .Concat(FsaBuilder.FromWord("c")); Assert.DoesNotContain(new[] { "ca", "aaba", string.Empty, "cc" }, fsa.Recognize); Assert.True(new[] { "abbac", "ac", "bc", "ababbbbac", "c" }.All(fsa.Recognize)); }
public void OptionFsaTest() { var fsa = FsaBuilder.FromWord("ab").Optional(); Assert.Equal(4, fsa.States.Count); Assert.Equal(2, fsa.Initial.Count); Assert.Equal(2, fsa.Final.Count); Assert.False(fsa.Recognize("b")); Assert.False(fsa.Recognize("a")); Assert.True(new[] { "ab", string.Empty }.All(fsa.Recognize)); }
public void EpsilonFreeConstructionTest() { // (a|b)+c var fsa = FsaBuilder.FromWord("a") .Union(FsaBuilder.FromWord("b")) .Plus() .Concat(FsaBuilder.FromWord("c")) .EpsilonFree(); Assert.DoesNotContain(fsa.Transitions, t => string.IsNullOrEmpty(t.Label)); Assert.True(new[] { "abbac", "ac", "bc", "ababbbbac", "aac" }.All(fsa.Recognize)); Assert.DoesNotContain(new[] { "ca", "aaba", string.Empty, "cc", "c" }, fsa.Recognize); }
public void StarFsaTest1() { var fsa = FsaBuilder.FromWord("abc").Star(); Assert.Equal(5, fsa.States.Count); Assert.Single(fsa.Initial); Assert.Equal(2, fsa.Final.Count); Assert.False(fsa.Recognize("abcabcabcb")); Assert.False(fsa.Recognize("ab")); Assert.True(fsa.Recognize(string.Empty)); Assert.True(fsa.Recognize("abc")); Assert.True(fsa.Recognize("abcabcabc")); }
public void UnionEpsilonFsaTest() { var fsa1 = FsaBuilder.FromWord("abc"); var fsa2 = FsaBuilder.FromEpsilon(); var fsa = fsa1.Union(fsa2); Assert.Equal(5, fsa.States.Count); Assert.Equal(2, fsa.Initial.Count); Assert.Equal(2, fsa.Final.Count); Assert.True(fsa.Recognize(string.Empty)); Assert.False(fsa.Recognize("a")); Assert.True(fsa.Recognize("abc")); Assert.False(fsa.Recognize("abca")); }
public void ComplexFsaConstructionTest() { // ab*c var fsa = FsaBuilder.FromWord("a").Concat( FsaBuilder.FromWord("b").Star(), FsaBuilder.FromWord("c")); Assert.False(fsa.Recognize(string.Empty)); Assert.False(fsa.Recognize("ab")); Assert.True(fsa.Recognize("abc")); Assert.True(fsa.Recognize("ac")); Assert.True(fsa.Recognize("abbbbc")); }
public void ConcatFsaTest() { var fsa1 = FsaBuilder.FromWord("abc"); var fsa2 = FsaBuilder.FromWord("de"); var fsa = fsa1.Concat(fsa2); Assert.Equal(7, fsa.States.Count); Assert.Single(fsa.Initial); Assert.Single(fsa.Final); Assert.False(fsa.Recognize(string.Empty)); Assert.False(fsa.Recognize("a")); Assert.False(fsa.Recognize("abc")); Assert.False(fsa.Recognize("de")); Assert.True(fsa.Recognize("abcde")); }
public void ComplexFsaConstructionTest2() { // .*@.*\.com var all = FsaBuilder.All( Enumerable.Range(97, 27).Select(Convert.ToChar).ToHashSet()); var fsa = all .Concat( FsaBuilder.FromWord("@"), all, FsaBuilder.FromWord(".com")) .Determinize(); Assert.DoesNotContain(new[] { "*****@*****.**", "you@@gmail.com", "*****@*****.**", "*****@*****.**" }, fsa.Recognize); Assert.True(new[] { "*****@*****.**", "*****@*****.**", "*****@*****.**" }.All(fsa.Recognize)); }