/// <summary> /// В позиции облома может находиться "грязь", т.е. набор символов которые не удается разобрать ни одним правилом токена /// доступным в CompositeGrammar в данном месте. Ни одно правило не сможет спарсить этот код, так что просто ищем /// следующий корректный токе и пропускаем все что идет до него (грязь). /// </summary> /// <returns>true - если грязь была удалена</returns> private bool TryDeleteGarbage(RecoveryParser rp, int maxPos, ParsedSequence sequence) { var text = rp.ParseResult.Text; if (maxPos >= text.Length) { return(false); } var parseResult = rp.ParseResult; var grammar = parseResult.RuleParser.Grammar; var res = grammar.ParseAllGrammarTokens(maxPos, parseResult); RemoveEmpty(res, maxPos); if (res.Count == 0) { var i = maxPos + 1; for (; i < text.Length; i++) // крутимся пока не будет распознан токен или достигнут конец строки { var res2 = grammar.ParseAllGrammarTokens(i, parseResult); RemoveEmpty(res2, i); if (res2.Count > 0) { break; } } _deletedToken[new ParsedSequenceAndSubrule(sequence, new ParsedSubrule(maxPos, i, s_loopState))] = true; rp.SubruleParsed(maxPos, i, new ParseRecord(sequence, 0, maxPos)); return(true); } return(false); }
private TokenChanges LocalMinForSubSequence(ParsedSequence seq, Dictionary <ParsedSequenceKey, SequenceTokenChanges> memiozation, ParsedSubrule subrule, TokenChanges localMin) { var subSeqs = seq.GetSequencesForSubrule(subrule).ToArray(); var hasSequence = false; foreach (var subSeq in subSeqs) { hasSequence = true; var localRes = FindBestPath(subSeq, subrule.End, memiozation); localMin = TokenChanges.Min(localMin, localRes); } if (!hasSequence) { if (subrule.IsEmpty) { localMin = new TokenChanges(seq.SubruleMandatoryTokenCount(subrule), 0); } else { localMin = new TokenChanges(); } } return(localMin); }
private void DeleteTokens(RecoveryParser rp, int pos, ParsedSequence sequence, int tokensToDelete) { if (tokensToDelete <= 0) { return; } var text = rp.ParseResult.Text; var parseResult = rp.ParseResult; var grammar = parseResult.RuleParser.Grammar; var res = grammar.ParseAllNonVoidGrammarTokens(pos, parseResult); RemoveEmpty(res, pos); if (res.Count == 0) { return; } foreach (var nextPos in res) { if (CanDelete(text, pos, nextPos)) { ContinueDeleteTokens(rp, sequence, pos, nextPos, tokensToDelete); } } }
private FlattenSequences FlattenSequence( FlattenSequences prevs, ParseResult parseResult, ParsedSequence seq, int end, TokenChanges sequenceInsertedTokens, Dictionary <ParsedSequenceKey, SequenceTokenChanges> memiozation) { //var seqTxt = parseResult.Text.Substring(seq.StartPos, end - seq.StartPos); if (seq.StartPos == 8 && end == 15) { Debug.Assert(true); } SequenceTokenChanges first; var key = new ParsedSequenceKey(seq, end); if (!memiozation.TryGetValue(key, out first)) { Debug.Assert(false); } var parses = first.SubrulesTokenChanges; if (first.TotalTokenChanges.IsFail) { return(new FlattenSequences()); } if (sequenceInsertedTokens != first.TotalTokenChanges) { //Debug.Assert(false); return(new FlattenSequences()); } var firstSubrules = seq.GetFirstSubrules(parses.Keys).ToArray(); var total = new FlattenSequences(); foreach (var firstSubrule in firstSubrules) { //var txt = parseResult.Text.Substring(firstSubrule.Begin, firstSubrule.End - firstSubrule.Begin); //var stateIndex = firstSubrule.State; //var state = stateIndex < 0 ? null : seq.ParsingSequence.States[stateIndex]; var insertedTokens = parses[firstSubrule]; if (insertedTokens.IsFail) { continue; } var result = FlattenSubrule(prevs, parseResult, seq, parses, firstSubrule, insertedTokens, memiozation); total.AddRange(result); } return(total); }
private static SubruleParses RemoveWorstPaths(ParsedSequence seq, int end, SubruleParses parses, out int comulativeMin) { var comulativeCost = new SubruleParses(); bool updated = true; while (updated) { updated = false; foreach (var parse in parses) { var subrule = parse.Key; int oldCount; if (!comulativeCost.TryGetValue(subrule, out oldCount)) updated = true; int min; if (seq.StartPos == subrule.Begin && seq.ParsingSequence.StartStates.Contains(subrule.State)) min = 0; else { min = Fail; int prevCount; foreach (var prevSubrule in seq.GetPrevSubrules(subrule, parses.Keys)) if (comulativeCost.TryGetValue(prevSubrule, out prevCount)) min = Math.Min(min, prevCount); } var newCount = AddOrFail(min, parses[subrule]); comulativeCost[subrule] = newCount; updated = updated || oldCount != newCount; } } var toProcess = new SCG.Queue<ParsedSubrule>(seq.GetLastSubrules(parses.Keys, end)); var comulativeMin2 = toProcess.Min(s => comulativeCost[s]); comulativeMin = comulativeMin2; toProcess = new SCG.Queue<ParsedSubrule>(toProcess.Where(s => comulativeCost[s] == comulativeMin2)); var good = new SubruleParses(); while (toProcess.Count > 0) { var subrule = toProcess.Dequeue(); if (good.ContainsKey(subrule)) continue; good.Add(subrule, parses[subrule]); var prev = seq.GetPrevSubrules(subrule, parses.Keys).ToList(); if (prev.Count > 0) { int min; if (seq.StartPos == subrule.Begin && seq.ParsingSequence.StartStates.Contains(subrule.State)) min = 0; else min = prev.Min(s => comulativeCost[s]); foreach (var prevSubrule in prev) if (comulativeCost[prevSubrule] == min) toProcess.Enqueue(prevSubrule); } } return good; }
/// <summary> /// Peptide constructor /// </summary> /// <param name="Seq">Aminoacid sequence of peptide (1-lettere code)</param> public Peptide(string Seq) { Parsed = new ParsedSequence[Seq.Length]; Sequence = Seq; for (int i = 0; i < Seq.Length; i++) { Parsed[i].AminoAcid = Convert.ToString(Seq[i]); Parsed[i].ModifMass = 0.0; } ParseSeq(); }
private int FindBestPath(ParsedSequence seq, int end, Dictionary<ParsedSequenceKey, SubruleParsesAndEnd> memiozation) { if (_parseResult.TerminateParsing) throw new OperationCanceledException(); SubruleParsesAndEnd result; var key = new ParsedSequenceKey(seq, end); if (memiozation.TryGetValue(key, out result)) return result.End; if (seq.StartPos == end) { memiozation.Add(key, new SubruleParsesAndEnd(new Dictionary<ParsedSubrule, int>(), seq.ParsingSequence.MandatoryTokenCount)); return seq.ParsingSequence.MandatoryTokenCount; } var results = new Dictionary<ParsedSubrule, int>(); var validSubrules = seq.GetValidSubrules(end).ToList(); if (validSubrules.Count == 0) { memiozation.Add(key, new SubruleParsesAndEnd(results, 0)); return 0; } memiozation.Add(key, new SubruleParsesAndEnd(results, Fail)); foreach (var subrule in validSubrules) { var localMin = Fail; if (_deletedToken.ContainsKey(new ParsedSequenceAndSubrule(seq, subrule))) localMin = 1; // оцениваем удаление как одну вставку else localMin = LocalMinForSubSequence(seq, memiozation, subrule, localMin); results[subrule] = localMin; } int comulativeMin; if (results.Count == 0) { } var bestResults = RemoveWorstPaths(seq, end, results, out comulativeMin); var result2 = new SubruleParsesAndEnd(bestResults, comulativeMin); memiozation[key] = result2; return result2.End; }
private void ContinueDeleteTokens(RecoveryParser rp, ParsedSequence sequence, int pos, int nextPos, int tokensToDelete) { _deletedToken[new ParsedSequenceAndSubrule(sequence, new ParsedSubrule(pos, nextPos, s_loopState))] = false; rp.SubruleParsed(pos, nextPos, new ParseRecord(sequence, 0, pos)); var parseResult = rp.ParseResult; var grammar = parseResult.RuleParser.Grammar; var res2 = grammar.ParseAllVoidGrammarTokens(nextPos, parseResult); RemoveEmpty(res2, nextPos); if (res2.Count == 0) DeleteTokens(rp, nextPos, sequence, tokensToDelete - 1); foreach (var nextPos2 in res2) { //_deletedToken[new ParsedSequenceAndSubrule(sequence, new ParsedSubrule(pos, nextPos2, s_loopState))] = false; rp.SubruleParsed(nextPos, nextPos2, new ParseRecord(sequence, s_loopState, pos)); DeleteTokens(rp, nextPos2, sequence, tokensToDelete - 1); } }
private int LocalMinForSubSequence(ParsedSequence seq, Dictionary<ParsedSequenceKey, SubruleParsesAndEnd> memiozation, ParsedSubrule subrule, int localMin) { var subSeqs = seq.GetSequencesForSubrule(subrule).ToArray(); var hasSequence = false; foreach (var subSeq in subSeqs) { hasSequence = true; var localRes = FindBestPath(subSeq, subrule.End, memiozation); if (localRes < localMin) localMin = localRes; } if (!hasSequence) { if (subrule.IsEmpty) localMin = seq.SubruleMandatoryTokenCount(subrule); else localMin = 0; } return localMin; }
private void DeleteTokens(RecoveryParser rp, int pos, ParsedSequence sequence, int tokensToDelete) { if (tokensToDelete <= 0) return; var text = rp.ParseResult.Text; var parseResult = rp.ParseResult; var grammar = parseResult.RuleParser.Grammar; var res = grammar.ParseAllNonVoidGrammarTokens(pos, parseResult); RemoveEmpty(res, pos); if (res.Count == 0) return; foreach (var nextPos in res) if (CanDelete(text, pos, nextPos)) ContinueDeleteTokens(rp, sequence, pos, nextPos, tokensToDelete); }
/// <summary> /// В позиции облома может находиться "грязь", т.е. набор символов которые не удается разобрать ни одним правилом токена /// доступным в CompositeGrammar в данном месте. Ни одно правило не сможет спарсить этот код, так что просто ищем /// следующий корректный токе и пропускаем все что идет до него (грязь). /// </summary> /// <returns>true - если грязь была удалена</returns> private bool TryDeleteGarbage(RecoveryParser rp, int maxPos, ParsedSequence sequence) { var text = rp.ParseResult.Text; if (maxPos >= text.Length) return false; var parseResult = rp.ParseResult; var grammar = parseResult.RuleParser.Grammar; var res = grammar.ParseAllGrammarTokens(maxPos, parseResult); RemoveEmpty(res, maxPos); if (res.Count == 0) { var i = maxPos + 1; for (; i < text.Length; i++) // крутимся пока не будет распознан токен или достигнут конец строки { var res2 = grammar.ParseAllGrammarTokens(i, parseResult); RemoveEmpty(res2, i); if (res2.Count > 0) break; } _deletedToken[new ParsedSequenceAndSubrule(sequence, new ParsedSubrule(maxPos, i, s_loopState))] = true; rp.SubruleParsed(maxPos, i, new ParseRecord(sequence, 0, maxPos)); return true; } return false; }
private static SubrulesTokenChanges RemoveWorstPaths(ParsedSequence seq, int end, SubrulesTokenChanges parses, out TokenChanges comulativeMin) { var comulativeCost = new SubrulesTokenChanges(); bool updated = true; while (updated) { updated = false; foreach (var parse in parses) { var subrule = parse.Key; TokenChanges oldCount; if (!comulativeCost.TryGetValue(subrule, out oldCount)) updated = true; TokenChanges min; if (seq.StartPos == subrule.Begin && seq.ParsingSequence.StartStates.Contains(subrule.State)) min = new TokenChanges(); else { min = TokenChanges.Fail; TokenChanges prevCount; foreach (var prevSubrule in seq.GetPrevSubrules(subrule, parses.Keys)) if (comulativeCost.TryGetValue(prevSubrule, out prevCount)) min = TokenChanges.Min(min, prevCount); } var newCount = AddOrFail(min, parses[subrule]); comulativeCost[subrule] = newCount; updated = updated || oldCount != newCount; } } var toProcess = new SCG.Queue<ParsedSubrule>(seq.GetLastSubrules(parses.Keys, end)); var comulativeMin2 = toProcess.Min(s => comulativeCost[s]); comulativeMin = comulativeMin2; toProcess = new SCG.Queue<ParsedSubrule>(toProcess.Where(s => comulativeCost[s] == comulativeMin2)); var good = new SubrulesTokenChanges(); while (toProcess.Count > 0) { var subrule = toProcess.Dequeue(); if (good.ContainsKey(subrule)) continue; good.Add(subrule, parses[subrule]); var prev = seq.GetPrevSubrules(subrule, parses.Keys).ToList(); if (prev.Count > 0) { TokenChanges min; if (seq.StartPos == subrule.Begin && seq.ParsingSequence.StartStates.Contains(subrule.State)) min = new TokenChanges(); else min = prev.Min(s => comulativeCost[s]); foreach (var prevSubrule in prev) if (comulativeCost[prevSubrule] == min) toProcess.Enqueue(prevSubrule); } } return good; }
private TokenChanges LocalMinForSubSequence(ParsedSequence seq, Dictionary<ParsedSequenceKey, SequenceTokenChanges> memiozation, ParsedSubrule subrule, TokenChanges localMin) { var subSeqs = seq.GetSequencesForSubrule(subrule).ToArray(); var hasSequence = false; foreach (var subSeq in subSeqs) { hasSequence = true; var localRes = FindBestPath(subSeq, subrule.End, memiozation); localMin = TokenChanges.Min(localMin, localRes); } if (!hasSequence) { if (subrule.IsEmpty) localMin = new TokenChanges(seq.SubruleMandatoryTokenCount(subrule), 0); else localMin = new TokenChanges(); } return localMin; }
private TokenChanges FindBestPath(ParsedSequence seq, int end, Dictionary<ParsedSequenceKey, SequenceTokenChanges> memiozation) { if (_parseResult.TerminateParsing) throw new OperationCanceledException(); SequenceTokenChanges result; var key = new ParsedSequenceKey(seq, end); if (memiozation.TryGetValue(key, out result)) return result.TotalTokenChanges; if (seq.StartPos == end) { var tokenChanges = new TokenChanges(seq.ParsingSequence.MandatoryTokenCount, 0); memiozation.Add(key, new SequenceTokenChanges(new SubrulesTokenChanges(), tokenChanges)); return tokenChanges; } var results = new SubrulesTokenChanges(); var validSubrules = seq.GetValidSubrules(end).ToList(); if (validSubrules.Count == 0) { var tokenChanges = new TokenChanges(); memiozation.Add(key, new SequenceTokenChanges(results, tokenChanges)); return tokenChanges; } memiozation.Add(key, new SequenceTokenChanges(results, TokenChanges.Fail)); foreach (var subrule in validSubrules) { TokenChanges localMin = TokenChanges.Fail; if (_deletedToken.ContainsKey(new ParsedSequenceAndSubrule(seq, subrule))) localMin = new TokenChanges(0, 1); else localMin = LocalMinForSubSequence(seq, memiozation, subrule, localMin); results[subrule] = localMin; } TokenChanges comulativeMin; if (results.Count == 0) { } var bestResults = RemoveWorstPaths(seq, end, results, out comulativeMin); var result2 = new SequenceTokenChanges(bestResults, comulativeMin); memiozation[key] = result2; return result2.TotalTokenChanges; }
private FlattenSequences FlattenSequence( FlattenSequences prevs, ParseResult parseResult, ParsedSequence seq, int end, TokenChanges sequenceInsertedTokens, Dictionary<ParsedSequenceKey, SequenceTokenChanges> memiozation) { //var seqTxt = parseResult.Text.Substring(seq.StartPos, end - seq.StartPos); if (seq.StartPos == 8 && end == 15) Debug.Assert(true); SequenceTokenChanges first; var key = new ParsedSequenceKey(seq, end); if (!memiozation.TryGetValue(key, out first)) Debug.Assert(false); var parses = first.SubrulesTokenChanges; if (first.TotalTokenChanges.IsFail) return new FlattenSequences(); if (sequenceInsertedTokens != first.TotalTokenChanges) { //Debug.Assert(false); return new FlattenSequences(); } var firstSubrules = seq.GetFirstSubrules(parses.Keys).ToArray(); var total = new FlattenSequences(); foreach (var firstSubrule in firstSubrules) { //var txt = parseResult.Text.Substring(firstSubrule.Begin, firstSubrule.End - firstSubrule.Begin); //var stateIndex = firstSubrule.State; //var state = stateIndex < 0 ? null : seq.ParsingSequence.States[stateIndex]; var insertedTokens = parses[firstSubrule]; if (insertedTokens.IsFail) continue; var result = FlattenSubrule(prevs, parseResult, seq, parses, firstSubrule, insertedTokens, memiozation); total.AddRange(result); } return total; }
private FlattenSequences FlattenSubrule(FlattenSequences prevs, ParseResult parseResult, ParsedSequence seq, SubrulesTokenChanges parses, ParsedSubrule subrule, TokenChanges tokenChanges, Dictionary<ParsedSequenceKey, SequenceTokenChanges> memiozation) { Begin: //var txt = parseResult.Text.Substring(subrule.Begin, subrule.End - subrule.Begin); //var stateIndex = subrule.State; //var state = stateIndex < 0 ? null : seq.ParsingSequence.States[stateIndex]; if (subrule.End == 11) { } var currentNodes = new FlattenSequences(); //var subruledDesc = seq.GetSubruleDescription(subrule.State); if (subrule.IsEmpty) { //if (subruleInsertedTokens > 0) // Debug.WriteLine("Inserted = " + subruleInsertedTokens + " - " + subruledDesc + " Seq: " + seq); } else { var sequences = seq.GetSequencesForSubrule(subrule).ToArray(); if (sequences.Length > 1) { } foreach (var subSequences in sequences) { //Debug.WriteLine(subruledDesc); var result = FlattenSequence(prevs, parseResult, subSequences, subrule.End, tokenChanges, memiozation); currentNodes.AddRange(result); } } if (currentNodes.Count == 0) // если не было сабсиквенсов, надо создать продолжения из текущего сабруля { foreach (var prev in prevs) currentNodes.Add(new ParsedSequenceAndSubrules.Cons(new SubruleTokenChanges(seq, subrule, tokenChanges), prev)); } var nextSubrules = seq.GetNextSubrules(subrule, parses.Keys).ToArray(); switch (nextSubrules.Length) { case 0: return currentNodes; case 1: { var nextSubrule = nextSubrules[0]; if (nextSubrule.State == 9 && nextSubrule.Begin == 8 && nextSubrule.End == 15) { } tokenChanges = parses[nextSubrule]; if (tokenChanges.IsFail) return currentNodes; // recursive self call... prevs = currentNodes; subrule = nextSubrule; goto Begin; return null; } default: { var resultNodes = new FlattenSequences(); foreach (var nextSubrule in nextSubrules) { var newSubruleInsertedTokens = parses[nextSubrule]; if (newSubruleInsertedTokens.IsFail) continue; var result = FlattenSubrule(currentNodes, parseResult, seq, parses, nextSubrule, newSubruleInsertedTokens, memiozation); resultNodes.AddRange(result); } return resultNodes; } } }
private FlattenSequences FlattenSubrule(FlattenSequences prevs, ParseResult parseResult, ParsedSequence seq, SubrulesTokenChanges parses, ParsedSubrule subrule, TokenChanges tokenChanges, Dictionary <ParsedSequenceKey, SequenceTokenChanges> memiozation) { Begin: //var txt = parseResult.Text.Substring(subrule.Begin, subrule.End - subrule.Begin); //var stateIndex = subrule.State; //var state = stateIndex < 0 ? null : seq.ParsingSequence.States[stateIndex]; if (subrule.End == 11) { } var currentNodes = new FlattenSequences(); //var subruledDesc = seq.GetSubruleDescription(subrule.State); if (subrule.IsEmpty) { //if (subruleInsertedTokens > 0) // Debug.WriteLine("Inserted = " + subruleInsertedTokens + " - " + subruledDesc + " Seq: " + seq); } else { var sequences = seq.GetSequencesForSubrule(subrule).ToArray(); if (sequences.Length > 1) { } foreach (var subSequences in sequences) { //Debug.WriteLine(subruledDesc); var result = FlattenSequence(prevs, parseResult, subSequences, subrule.End, tokenChanges, memiozation); currentNodes.AddRange(result); } } if (currentNodes.Count == 0) // если не было сабсиквенсов, надо создать продолжения из текущего сабруля { foreach (var prev in prevs) { currentNodes.Add(new ParsedSequenceAndSubrules.Cons(new SubruleTokenChanges(seq, subrule, tokenChanges), prev)); } } var nextSubrules = seq.GetNextSubrules(subrule, parses.Keys).ToArray(); switch (nextSubrules.Length) { case 0: return(currentNodes); case 1: { var nextSubrule = nextSubrules[0]; if (nextSubrule.State == 9 && nextSubrule.Begin == 8 && nextSubrule.End == 15) { } tokenChanges = parses[nextSubrule]; if (tokenChanges.IsFail) { return(currentNodes); } // recursive self call... prevs = currentNodes; subrule = nextSubrule; goto Begin; return(null); } default: { var resultNodes = new FlattenSequences(); foreach (var nextSubrule in nextSubrules) { var newSubruleInsertedTokens = parses[nextSubrule]; if (newSubruleInsertedTokens.IsFail) { continue; } var result = FlattenSubrule(currentNodes, parseResult, seq, parses, nextSubrule, newSubruleInsertedTokens, memiozation); resultNodes.AddRange(result); } return(resultNodes); } } }
private TokenChanges FindBestPath(ParsedSequence seq, int end, Dictionary <ParsedSequenceKey, SequenceTokenChanges> memiozation) { if (_parseResult.TerminateParsing) { throw new OperationCanceledException(); } SequenceTokenChanges result; var key = new ParsedSequenceKey(seq, end); if (memiozation.TryGetValue(key, out result)) { return(result.TotalTokenChanges); } if (seq.StartPos == end) { var tokenChanges = new TokenChanges(seq.ParsingSequence.MandatoryTokenCount, 0); memiozation.Add(key, new SequenceTokenChanges(new SubrulesTokenChanges(), tokenChanges)); return(tokenChanges); } var results = new SubrulesTokenChanges(); var validSubrules = seq.GetValidSubrules(end).ToList(); if (validSubrules.Count == 0) { var tokenChanges = new TokenChanges(); memiozation.Add(key, new SequenceTokenChanges(results, tokenChanges)); return(tokenChanges); } memiozation.Add(key, new SequenceTokenChanges(results, TokenChanges.Fail)); foreach (var subrule in validSubrules) { TokenChanges localMin = TokenChanges.Fail; if (_deletedToken.ContainsKey(new ParsedSequenceAndSubrule(seq, subrule))) { localMin = new TokenChanges(0, 1); } else { localMin = LocalMinForSubSequence(seq, memiozation, subrule, localMin); } results[subrule] = localMin; } TokenChanges comulativeMin; if (results.Count == 0) { } var bestResults = RemoveWorstPaths(seq, end, results, out comulativeMin); var result2 = new SequenceTokenChanges(bestResults, comulativeMin); memiozation[key] = result2; return(result2.TotalTokenChanges); }