/// <summary> /// Builds a lattice of all possible segmentations using only words /// present in the lexicon. /// </summary> /// <remarks> /// Builds a lattice of all possible segmentations using only words /// present in the lexicon. This function must be run prior to /// running maxMatchSegmentation. /// </remarks> private void BuildSegmentationLattice(string s) { edgesNb = 0; len = s.Length; // Initialize word lattice: states = new List <DFSAState <Word, int> >(); lattice = new DFSA <Word, int>("wordLattice"); for (int i = 0; i <= s.Length; ++i) { states.Add(new DFSAState <Word, int>(i, lattice)); } // Set start and accepting state: lattice.SetInitialState(states[0]); states[len].SetAccepting(true); // Find all instances of lexicon words in input string: for (int start = 0; start < len; ++start) { for (int end = len; end > start; --end) { string str = Sharpen.Runtime.Substring(s, start, end); System.Diagnostics.Debug.Assert((str.Length > 0)); bool isOneChar = (start + 1 == end); bool isInDict = words.Contains(str); if (isInDict || isOneChar) { double cost = isInDict ? 1 : 100; DFSATransition <Word, int> trans = new DFSATransition <Word, int>(null, states[start], states[end], new Word(str), null, cost); //logger.info("start="+start+" end="+end+" word="+str); states[start].AddTransition(trans); ++edgesNb; } } } }
private static int Call(string[] args) { try { if (args.Length >= 2 && !args.Contains("--help")) { // Set the verbose level var verbose = (args.Contains("-q")) ? 0 : (args.Contains("-v")) ? 1 : (args.Contains("-vv")) ? 2 : 0; // Create new Deterministic Finite State Automaton from given file var automaton = DFSA.CreateInstance(args[0] ?? Console.ReadLine(), 1); // Display Automaton if not set to quiet if (!args.Contains("-q")) { automaton.Print(); } // Analyse the given source code & display the results var accepted = automaton.Analyse(File.ReadAllText(args[1] ?? Console.ReadLine())); Console.WriteLine($@"{'\n'}{((accepted) ? '\u2713' : '\u2717')} The source file `{Path.GetFileName(args[1])}` is {((accepted) ? string.Empty : "NOT " )}accepted by the automaton's described language !"); return((accepted) ? 0 : 1); } Console.WriteLine(Help); return(1); } catch (Exception e) { Console.WriteLine($"Error occurred : {e.Message}\n"); // File not found or error occurred return(1); } }
/// <summary> /// Move the node with value dSt from this partition to blk. /// </summary> /// <param name="dSt">value to be moved</param> /// <param name="blk">destination partition</param> internal void MoveMember(DFSA.DState dSt, PartitionBlock blk) { // Assert: dSt must belong to LinkedList this.members LinkedListNode<DFSA.DState> node = dSt.listNode; this.members.Remove(node); this.predCount--; blk.AddNode(node); }
/// <summary> /// Find an existing partition block with which dSt is compatible, /// or construct a new partition into which dSt can be placed. /// </summary> /// <param name="dSt"></param> /// <returns></returns> PartitionBlock FindPartition(DFSA.DState dSt) { foreach (PartitionBlock blk in acceptStates) { // Assert every partition in acceptStates has at least one member. // Assert every member has the same semantic action. // // This would be a simple matter except for right context. Is such // cases the action is an input backup, then the user action. For a // pattern R1/R2 the regex R1.R2 (concatenation) is recognized // and the buffer backed up to the position of the '/'. // In the case of R1 of fixed length N we do "yyless(N);" // In the case of R2 of fixed length N we do "yyless(yyleng-N);" // If the first state in the partition has both lengths fixed // we must choose one or the other backup action, and only add // other states that are compatible with that choice. DFSA.DState first = blk.FirstMember; if (DFSA.SpansEqual(first.accept.aSpan, dSt.accept.aSpan)) { if (!first.HasRightContext && !dSt.HasRightContext) { return(blk); } else { if (first.lhCntx > 0 && first.lhCntx == dSt.lhCntx) // From now on only add states with matching lhs length { first.rhCntx = 0; dSt.rhCntx = 0; return(blk); } if (first.rhCntx > 0 && first.rhCntx == dSt.rhCntx) // From now on only add states with matching rhs length { first.lhCntx = 0; dSt.lhCntx = 0; return(blk); } } } } PartitionBlock nxt = MkNewBlock(); acceptStates.Add(nxt); return(nxt); }
private static void Main(string[] args) { try { var automaton = DFSA.CreateInstance(args[0]); automaton.Print(); var word = "∅"; while (word != "-1") { Console.WriteLine("Enter a word (-1 to exit) : "); word = Console.ReadLine(); var accepted = automaton.Accept(word); Console.WriteLine($@"{((accepted) ? '\u2713' : '\u2717')} The word `{word}` is {((accepted) ? String.Empty : "NOT " )}accepted by the automaton's described language !"); } } catch (Exception e) { // File not found or error occured Console.WriteLine(e.Message); } }
public virtual void PrintLattice(DFSA <string, int> tagLattice, IList <CoreLabel> doc, PrintWriter @out) { CoreLabel[] docArray = Sharpen.Collections.ToArray(doc, new CoreLabel[doc.Count]); // Create answer lattice: MutableInteger nodeId = new MutableInteger(0); DFSA <string, int> answerLattice = new DFSA <string, int>(null); DFSAState <string, int> aInitState = new DFSAState <string, int>(nodeId, answerLattice); answerLattice.SetInitialState(aInitState); IDictionary <DFSAState <string, int>, DFSAState <string, int> > stateLinks = Generics.NewHashMap(); // Convert binary lattice into word lattice: TagLatticeToAnswerLattice(tagLattice.InitialState(), aInitState, new StringBuilder(string.Empty), nodeId, 0, 0.0, stateLinks, answerLattice, docArray); try { answerLattice.PrintAttFsmFormat(@out); } catch (IOException e) { throw new Exception(e); } }
internal Minimizer(DFSA dfsa) { this.dfsa = dfsa; otherStates = MkNewBlock(); startStates = MkNewBlock(); }
/// <summary> /// Add a new node to the list, with value given by the dSt /// </summary> /// <param name="dSt"></param> internal void AddState(DFSA.DState dSt) { LinkedListNode<DFSA.DState> node = new LinkedListNode<DFSA.DState>(dSt); dSt.listNode = node; this.members.AddLast(node); }
/// <summary> /// Maps old dfsa states to new states in the minimized set. /// </summary> /// <param name="dSt">The state to be mapped</param> /// <returns>The replacement state</returns> internal static DFSA.DState PMap(DFSA.DState dSt) { PartitionBlock blk = dSt.block as PartitionBlock; if (blk.MemberCount == 1) return dSt; else return blk.FirstMember; }
/// <summary> /// Find an existing partition block with which dSt is compatible, /// or construct a new partition into which dSt can be placed. /// </summary> /// <param name="dSt"></param> /// <returns></returns> PartitionBlock FindPartition(DFSA.DState dSt) { foreach (PartitionBlock blk in acceptStates) { // Assert every partition in acceptStates has at least one member. // Assert every member has the same semantic action. // // This would be a simple matter except for right context. Is such // cases the action is an input backup, then the user action. For a // pattern R1/R2 the regex R1.R2 (concatenation) is recognized // and the buffer backed up to the position of the '/'. // In the case of R1 of fixed length N we do "yyless(N);" // In the case of R2 of fixed length N we do "yyless(yyleng-N);" // If the first state in the partition has both lengths fixed // we must choose one or the other backup action, and only add // other states that are compatible with that choice. DFSA.DState first = blk.FirstMember; if (DFSA.SpansEqual(first.accept.aSpan, dSt.accept.aSpan)) if (!first.HasRightContext && !dSt.HasRightContext) return blk; else { if (first.lhCntx > 0 && first.lhCntx == dSt.lhCntx) // From now on only add states with matching lhs length { first.rhCntx = 0; dSt.rhCntx = 0; return blk; } if (first.rhCntx > 0 && first.rhCntx == dSt.rhCntx) // From now on only add states with matching rhs length { first.lhCntx = 0; dSt.lhCntx = 0; return blk; } } } PartitionBlock nxt = MkNewBlock(); acceptStates.Add(nxt); return nxt; }
internal void Process(string fileArg) { GetNames(fileArg); // check for file exists OpenSource(); // parse source file if (inputFile != null) { DateTime start = DateTime.Now; try { handler = new ErrorHandler(); scanner = new QUT.Gplex.Lexer.Scanner(inputFile); parser = new QUT.Gplex.Parser.Parser(scanner); scanner.yyhdlr = handler; parser.Initialize(this, scanner, handler, new OptionParser2(ParseOption)); aast = parser.Aast; parser.Parse(); // aast.DiagnosticDump(); if (verbose) Status(start); CheckOptions(); if (!Errors && !ParseOnly) { // build NFSA if (ChrClasses) { DateTime t0 = DateTime.Now; partition = new Partition( TargetSymCardinality, this ); partition.FindClasses( aast ); partition.FixMap(); if (verbose) ClassStatus( t0, partition.Length ); } else CharRange.Init( TargetSymCardinality ); nfsa = new NFSA(this); nfsa.Build(aast); if (!Errors) { // convert to DFSA dfsa = new DFSA(this); dfsa.Convert(nfsa); if (!Errors) { // minimize automaton if (minimize) dfsa.Minimize(); if (!Errors && !checkOnly) { // emit the scanner to output file TextReader frameRdr = FrameReader(); TextWriter outputWrtr = OutputWriter(); dfsa.EmitScanner(frameRdr, outputWrtr); if (!embedBuffers) CopyBufferCode(); // Clean up! if (frameRdr != null) frameRdr.Close(); if (outputWrtr != null) outputWrtr.Close(); } } } } } catch (Exception ex) { string str = ex.Message; handler.AddError(str, aast.AtStart); throw; } } }
/// <summary> /// Recursively builds an answer lattice (Chinese words) from a Viterbi search graph /// of binary predictions. /// </summary> /// <remarks> /// Recursively builds an answer lattice (Chinese words) from a Viterbi search graph /// of binary predictions. This function does a limited amount of post-processing: /// preserve white spaces of the input, and not segment between two latin characters or /// between two digits. Consequently, the probabilities of all paths in answerLattice /// may not sum to 1 (they do sum to 1 if no post processing applies). /// </remarks> /// <param name="tSource">Current node in Viterbi search graph.</param> /// <param name="aSource">Current node in answer lattice.</param> /// <param name="answer">Partial word starting at aSource.</param> /// <param name="nodeId">Currently unused node identifier for answer graph.</param> /// <param name="pos">Current position in docArray.</param> /// <param name="cost">Current cost of answer.</param> /// <param name="stateLinks"> /// Maps nodes of the search graph to nodes in answer lattice /// (when paths of the search graph are recombined, paths of the answer lattice should be /// recombined as well, if at word boundary). /// </param> private void TagLatticeToAnswerLattice(DFSAState <string, int> tSource, DFSAState <string, int> aSource, StringBuilder answer, MutableInteger nodeId, int pos, double cost, IDictionary <DFSAState <string, int>, DFSAState <string, int> > stateLinks, DFSA <string, int> answerLattice, CoreLabel[] docArray) { // Add "1" prediction after the end of the sentence, if applicable: if (tSource.IsAccepting() && tSource.ContinuingInputs().IsEmpty()) { tSource.AddTransition(new DFSATransition <string, int>(string.Empty, tSource, new DFSAState <string, int>(-1, null), "1", string.Empty, 0)); } // Get current label, character, and prediction: CoreLabel curLabel = (pos < docArray.Length) ? docArray[pos] : null; string curChr = null; string origSpace = null; if (curLabel != null) { curChr = curLabel.Get(typeof(CoreAnnotations.OriginalCharAnnotation)); System.Diagnostics.Debug.Assert((curChr.Length == 1)); origSpace = curLabel.Get(typeof(CoreAnnotations.SpaceBeforeAnnotation)); } // Get set of successors in search graph: ICollection <string> inputs = tSource.ContinuingInputs(); // Only keep most probable transition out of initial state: string answerConstraint = null; if (pos == 0) { double minCost = double.PositiveInfinity; // DFSATransition<String, Integer> bestTransition = null; foreach (string predictSpace in inputs) { DFSATransition <string, int> transition = tSource.Transition(predictSpace); double transitionCost = transition.Score(); if (transitionCost < minCost) { if (predictSpace != null) { logger.Info(string.Format("mincost (%s): %e -> %e%n", predictSpace, minCost, transitionCost)); minCost = transitionCost; answerConstraint = predictSpace; } } } } // Follow along each transition: foreach (string predictSpace_1 in inputs) { DFSATransition <string, int> transition = tSource.Transition(predictSpace_1); DFSAState <string, int> tDest = transition.Target(); DFSAState <string, int> newASource = aSource; //logger.info(String.format("tsource=%s tdest=%s asource=%s pos=%d predictSpace=%s%n", tSource, tDest, newASource, pos, predictSpace)); StringBuilder newAnswer = new StringBuilder(answer.ToString()); int answerLen = newAnswer.Length; string prevChr = (answerLen > 0) ? newAnswer.Substring(answerLen - 1) : null; double newCost = cost; // Ignore paths starting with zero: if (answerConstraint != null && !answerConstraint.Equals(predictSpace_1)) { logger.Info(string.Format("Skipping transition %s at pos 0.%n", predictSpace_1)); continue; } // Ignore paths not consistent with input segmentation: if (flags.keepAllWhitespaces && "0".Equals(predictSpace_1) && "1".Equals(origSpace)) { logger.Info(string.Format("Skipping non-boundary at pos %d, since space in the input.%n", pos)); continue; } // Ignore paths adding segment boundaries between two latin characters, or between two digits: // (unless already present in original input) if ("1".Equals(predictSpace_1) && "0".Equals(origSpace) && prevChr != null && curChr != null) { char p = prevChr[0]; char c = curChr[0]; if (ChineseStringUtils.IsLetterASCII(p) && ChineseStringUtils.IsLetterASCII(c)) { logger.Info(string.Format("Not hypothesizing a boundary at pos %d, since between two ASCII letters (%s and %s).%n", pos, prevChr, curChr)); continue; } if (ChineseUtils.IsNumber(p) && ChineseUtils.IsNumber(c)) { logger.Info(string.Format("Not hypothesizing a boundary at pos %d, since between two numeral characters (%s and %s).%n", pos, prevChr, curChr)); continue; } } // If predictSpace==1, create a new transition in answer search graph: if ("1".Equals(predictSpace_1)) { if (newAnswer.ToString().Length > 0) { // If answer destination node visited before, create a new edge and leave: if (stateLinks.Contains(tSource)) { DFSAState <string, int> aDest = stateLinks[tSource]; newASource.AddTransition(new DFSATransition <string, int>(string.Empty, newASource, aDest, newAnswer.ToString(), string.Empty, newCost)); //logger.info(String.format("new transition: asource=%s adest=%s edge=%s%n", newASource, aDest, newAnswer)); continue; } // If answer destination node not visited before, create it + new edge: nodeId.IncValue(1); DFSAState <string, int> aDest_1 = new DFSAState <string, int>(nodeId, answerLattice, 0.0); stateLinks[tSource] = aDest_1; newASource.AddTransition(new DFSATransition <string, int>(string.Empty, newASource, aDest_1, newAnswer.ToString(), string.Empty, newCost)); //logger.info(String.format("new edge: adest=%s%n", newASource, aDest, newAnswer)); //logger.info(String.format("new transition: asource=%s adest=%s edge=%s%n%n%n", newASource, aDest, newAnswer)); // Reached an accepting state: if (tSource.IsAccepting()) { aDest_1.SetAccepting(true); continue; } // Start new answer edge: newASource = aDest_1; newAnswer = new StringBuilder(); newCost = 0.0; } } System.Diagnostics.Debug.Assert((curChr != null)); newAnswer.Append(curChr); newCost += transition.Score(); if (newCost < flags.searchGraphPrune || ChineseStringUtils.IsLetterASCII(curChr[0])) { TagLatticeToAnswerLattice(tDest, newASource, newAnswer, nodeId, pos + 1, newCost, stateLinks, answerLattice, docArray); } } }
internal void Process(string fileArg) { GetNames(fileArg); // check for file exists OpenSource(); // parse source file if (inputFile != null) { DateTime start = DateTime.Now; try { handler = new ErrorHandler(); scanner = new QUT.Gplex.Lexer.Scanner(inputFile); parser = new QUT.Gplex.Parser.Parser(scanner); scanner.yyhdlr = handler; parser.Initialize(this, scanner, handler, new OptionParser2(ParseOption)); aast = parser.Aast; parser.Parse(); // aast.DiagnosticDump(); if (verbose) { Status(start); } CheckOptions(); if (!Errors && !ParseOnly) { // build NFSA if (ChrClasses) { DateTime t0 = DateTime.Now; partition = new Partition(TargetSymCardinality, this); partition.FindClasses(aast); partition.FixMap(); if (verbose) { ClassStatus(t0, partition.Length); } } else { CharRange.Init(TargetSymCardinality); } nfsa = new NFSA(this); nfsa.Build(aast); if (!Errors) { // convert to DFSA dfsa = new DFSA(this); dfsa.Convert(nfsa); if (!Errors) { // minimize automaton if (minimize) { dfsa.Minimize(); } if (!Errors && !checkOnly) { // emit the scanner to output file TextReader frameRdr = FrameReader(); TextWriter outputWrtr = OutputWriter(); dfsa.EmitScanner(frameRdr, outputWrtr); if (!embedBuffers) { CopyBufferCode(); } // Clean up! if (frameRdr != null) { frameRdr.Close(); } if (outputWrtr != null) { outputWrtr.Close(); } } } } } } catch (Exception ex) { string str = ex.Message; handler.AddError(str, aast.AtStart); throw; } } }
public static DFSA <string, int> GetGraph(ISequenceModel ts, IIndex <string> classIndex) { DFSA <string, int> viterbiSearchGraph = new DFSA <string, int>(null); // Set up tag options int length = ts.Length(); int leftWindow = ts.LeftWindow(); int rightWindow = ts.RightWindow(); System.Diagnostics.Debug.Assert((rightWindow == 0)); int padLength = length + leftWindow + rightWindow; // NOTE: tags[i][j] : i is index into pos, and j into product int[][] tags = new int[padLength][]; int[] tagNum = new int[padLength]; for (int pos = 0; pos < padLength; pos++) { tags[pos] = ts.GetPossibleValues(pos); tagNum[pos] = tags[pos].Length; } // Set up Viterbi search graph: DFSAState <string, int>[][] graphStates = null; DFSAState <string, int> startState = null; DFSAState <string, int> endState = null; if (viterbiSearchGraph != null) { int stateId = -1; startState = new DFSAState <string, int>(++stateId, viterbiSearchGraph, 0.0); viterbiSearchGraph.SetInitialState(startState); graphStates = new DFSAState[length][]; for (int pos_1 = 0; pos_1 < length; ++pos_1) { //System.err.printf("%d states at pos %d\n",tags[pos].length,pos); graphStates[pos_1] = new DFSAState[tags[pos_1].Length]; for (int product = 0; product < tags[pos_1].Length; ++product) { graphStates[pos_1][product] = new DFSAState <string, int>(++stateId, viterbiSearchGraph); } } // Accepting state: endState = new DFSAState <string, int>(++stateId, viterbiSearchGraph, 0.0); endState.SetAccepting(true); } int[] tempTags = new int[padLength]; // Set up product space sizes int[] productSizes = new int[padLength]; int curProduct = 1; for (int i = 0; i < leftWindow; i++) { curProduct *= tagNum[i]; } for (int pos_2 = leftWindow; pos_2 < padLength; pos_2++) { if (pos_2 > leftWindow + rightWindow) { curProduct /= tagNum[pos_2 - leftWindow - rightWindow - 1]; } // shift off curProduct *= tagNum[pos_2]; // shift on productSizes[pos_2 - rightWindow] = curProduct; } double[][] windowScore = new double[padLength][]; // Score all of each window's options for (int pos_3 = leftWindow; pos_3 < leftWindow + length; pos_3++) { windowScore[pos_3] = new double[productSizes[pos_3]]; Arrays.Fill(tempTags, tags[0][0]); for (int product = 0; product < productSizes[pos_3]; product++) { int p = product; int shift = 1; for (int curPos = pos_3; curPos >= pos_3 - leftWindow; curPos--) { tempTags[curPos] = tags[curPos][p % tagNum[curPos]]; p /= tagNum[curPos]; if (curPos > pos_3) { shift *= tagNum[curPos]; } } if (tempTags[pos_3] == tags[pos_3][0]) { // get all tags at once double[] scores = ts.ScoresOf(tempTags, pos_3); // fill in the relevant windowScores for (int t = 0; t < tagNum[pos_3]; t++) { windowScore[pos_3][product + t * shift] = scores[t]; } } } } // loop over the classification spot for (int pos_4 = leftWindow; pos_4 < length + leftWindow; pos_4++) { // loop over window product types for (int product = 0; product < productSizes[pos_4]; product++) { if (pos_4 == leftWindow) { // all nodes in the first spot link to startState: int curTag = tags[pos_4][product % tagNum[pos_4]]; //System.err.printf("pos=%d, product=%d, tag=%d score=%.3f\n",pos,product,curTag,windowScore[pos][product]); DFSATransition <string, int> tr = new DFSATransition <string, int>(string.Empty, startState, graphStates[pos_4][product], classIndex.Get(curTag), string.Empty, -windowScore[pos_4][product]); startState.AddTransition(tr); } else { int sharedProduct = product / tagNum[pos_4 + rightWindow]; int factor = productSizes[pos_4] / tagNum[pos_4 + rightWindow]; for (int newTagNum = 0; newTagNum < tagNum[pos_4 - leftWindow - 1]; newTagNum++) { int predProduct = newTagNum * factor + sharedProduct; int predTag = tags[pos_4 - 1][predProduct % tagNum[pos_4 - 1]]; int curTag = tags[pos_4][product % tagNum[pos_4]]; //log.info("pos: "+pos); //log.info("product: "+product); //System.err.printf("pos=%d-%d, product=%d-%d, tag=%d-%d score=%.3f\n",pos-1,pos,predProduct,product,predTag,curTag, // windowScore[pos][product]); DFSAState <string, int> sourceState = graphStates[pos_4 - leftWindow][predTag]; DFSAState <string, int> destState = (pos_4 - leftWindow + 1 == graphStates.Length) ? endState : graphStates[pos_4 - leftWindow + 1][curTag]; DFSATransition <string, int> tr = new DFSATransition <string, int>(string.Empty, sourceState, destState, classIndex.Get(curTag), string.Empty, -windowScore[pos_4][product]); graphStates[pos_4 - leftWindow][predTag].AddTransition(tr); } } } } return(viterbiSearchGraph); }