private ArrayRun(IDataModel data, string format, int start, IReadOnlyList <int> pointerSources) : base(start, pointerSources) { owner = data; FormatString = format; SupportsPointersToElements = format.StartsWith(AnchorStart.ToString()); if (SupportsPointersToElements) { format = format.Substring(1); } var closeArray = format.LastIndexOf(ArrayEnd.ToString()); if (!format.StartsWith(ArrayStart.ToString()) || closeArray == -1) { throw new ArrayRunParseException($"Array Content must be wrapped in {ArrayStart}{ArrayEnd}."); } var segments = format.Substring(1, closeArray - 1); var length = format.Substring(closeArray + 1); ElementContent = ParseSegments(segments, data); if (ElementContent.Count == 0) { throw new ArrayRunParseException("Array Content must not be empty."); } ElementLength = ElementContent.Sum(e => e.Length); FormatMatchFlags flags = default; if (ElementContent.Count == 1) { flags |= FormatMatchFlags.IsSingleSegment; } if (length.Length == 0) { var nextRun = owner.GetNextRun(Start); while (nextRun is NoInfoRun && nextRun.Start < owner.Count) { nextRun = owner.GetNextRun(nextRun.Start + 1); } var byteLength = 0; var elementCount = 0; while (Start + byteLength + ElementLength <= nextRun.Start && DataMatchesElementFormat(owner, Start + byteLength, ElementContent, flags, nextRun)) { byteLength += ElementLength; elementCount++; } LengthFromAnchor = string.Empty; ElementCount = Math.Max(1, elementCount); // if the user said there's a format here, then there is, even if the format it wrong. } else if (int.TryParse(length, out int result)) { // fixed length is easy LengthFromAnchor = string.Empty; ElementCount = Math.Max(1, result); } else { LengthFromAnchor = length; ElementCount = Math.Max(1, ParseLengthFromAnchor()); } Length = ElementLength * ElementCount; }
private static bool DataMatchesSegmentFormat(IDataModel owner, int start, ArrayRunElementSegment segment, FormatMatchFlags flags, IFormattedRun nextAnchor) { if (start + segment.Length > nextAnchor.Start && nextAnchor is ArrayRun) { return(false); // don't blap over existing arrays } switch (segment.Type) { case ElementContentType.PCS: int readLength = PCSString.ReadString(owner, start, true, segment.Length); if (readLength < 2) { return(false); } if (readLength > segment.Length) { return(false); } if (Enumerable.Range(start, segment.Length).All(i => owner[i] == 0xFF)) { return(false); } // if we end with a space, and the next one starts with a space, we probably have the data width wrong. // We might be the start of a different data segment that is no longer pointed to. (Example: Vega/pokenames) // only do this check if the current element seems useful var isBlank = Enumerable.Range(start, segment.Length).All(i => owner[i] == 0x00 || owner[i] == 0xFF); if (!isBlank && flags.HasFlag(FormatMatchFlags.IsSingleSegment) && start % 4 == 0 && owner[start + segment.Length - 1] == 0x00 && owner[start + segment.Length] == 0x00) { // if the next one starts on a 4-byte boundary, then we probably just skipped a few bytes between different data types, and _this_ section is still part of the _last_ run (example, Emerald Ability names) // if the next one doesn't start on a 4-byte boundary, then we probably have the length wrong var nextWordStart = (start + segment.Length + 3) / 4 * 4; if (Enumerable.Range(start + segment.Length, nextWordStart - start - segment.Length).Any(i => owner[i] != 0x00) || owner[nextWordStart] == 0x00) { return(false); } } // require that the overall thing still ends with 'FF' or '00' to avoid finding text of the wrong width. // the width check is less important if we have more complex data, so relax the condition (example: Clover) // the width check is less important if we're already known to be in a long run (example: Gaia moves) var lastByteInText = owner[start + segment.Length - 1]; var lastByteIsReasonablEnd = lastByteInText == 0x00 || lastByteInText == 0xFF; if (!flags.HasFlag(FormatMatchFlags.AllowJunkAfterText) && !lastByteIsReasonablEnd && flags.HasFlag(FormatMatchFlags.IsSingleSegment)) { return(false); } return(true); case ElementContentType.Integer: if (segment is ArrayRunEnumSegment enumSegment) { return(owner.ReadMultiByteValue(start, segment.Length) < enumSegment.GetOptions(owner).Count); } else { return(true); } case ElementContentType.Pointer: var destination = owner.ReadPointer(start); if (destination == Pointer.NULL) { return(true); } if (0 > destination || destination > owner.Count) { return(false); } if (segment is ArrayRunPointerSegment pointerSegment) { if (!pointerSegment.DestinationDataMatchesPointerFormat(owner, new NoDataChangeDeltaModel(), destination)) { return(false); } } return(true); case ElementContentType.BitArray: var bitArraySegment = (ArrayRunBitArraySegment)segment; var bits = bitArraySegment.GetOptions(owner).Count; bits %= 8; if (bits == 0) { return(true); } var finalByte = owner[start + bitArraySegment.Length - 1]; finalByte >>= bits; return(finalByte == 0); // all the unneeded bits should be set to zero default: throw new NotImplementedException(); } }
private static int KnownLengthSearch(IDataModel data, List <ArrayRunElementSegment> elementContent, int elementLength, string lengthToken, out int bestLength, Func <IFormattedRun, bool> runFilter) { var noChange = new NoDataChangeDeltaModel(); if (!int.TryParse(lengthToken, out bestLength)) { var matchedArrayName = lengthToken; var matchedArrayAddress = data.GetAddressFromAnchor(noChange, -1, matchedArrayName); if (matchedArrayAddress == Pointer.NULL) { return(Pointer.NULL); } var matchedRun = data.GetNextRun(matchedArrayAddress) as ArrayRun; if (matchedRun == null) { return(Pointer.NULL); } bestLength = matchedRun.ElementCount; } FormatMatchFlags flags = default; if (elementContent.Count == 1) { flags |= FormatMatchFlags.IsSingleSegment; } for (var run = data.GetNextRun(0); run.Start < data.Count; run = data.GetNextRun(run.Start + run.Length + 1)) { if (!(run is PointerRun)) { continue; } var targetRun = data.GetNextRun(data.ReadPointer(run.Start)); if (targetRun is ArrayRun) { continue; } // some searches allow special conditions on the run. For example, we could only be intersted in runs with >100 pointers leading to it. if (runFilter != null && !runFilter(targetRun)) { continue; } // tolerate a few errors in the data. We know what length we're looking for, so if most of the elements match, then // most likely we're just looking at the right collection but with some user-created bugs. int errorsToTolerate = bestLength / 80; int encounterErrors = 0; int lastGoodLength = 0; int currentLength = 0; int currentAddress = targetRun.Start; bool earlyExit = false; for (int i = 0; i < bestLength; i++) { var nextArray = data.GetNextAnchor(currentAddress + 1); bool match = DataMatchesElementFormat(data, currentAddress, elementContent, flags, nextArray); currentLength++; currentAddress += elementLength; if (match) { lastGoodLength = currentLength; } else { encounterErrors++; if (encounterErrors > errorsToTolerate) { // as long as this array is at least 80% of the passed in array, we're fine and can say that these are matched. // (the other one might have bad data at the end that needs to be removed) (example: see Gaia) earlyExit = bestLength * .8 > lastGoodLength; break; } } } currentLength = lastGoodLength; if (!earlyExit) { var dataEmpty = Enumerable.Range(targetRun.Start, currentLength * elementLength).Select(i => data[i]).All(d => d == 0xFF || d == 0x00); if (dataEmpty) { continue; // don't accept the run if it contains no data } bestLength = currentLength; return(targetRun.Start); } } return(Pointer.NULL); }
private static bool DataMatchesElementFormat(IDataModel owner, int start, IReadOnlyList <ArrayRunElementSegment> segments, FormatMatchFlags flags, IFormattedRun nextAnchor) { foreach (var segment in segments) { if (start + segment.Length > owner.Count) { return(false); } if (!DataMatchesSegmentFormat(owner, start, segment, flags, nextAnchor)) { return(false); } start += segment.Length; } return(true); }
private static int StandardSearch(IDataModel data, List <ArrayRunElementSegment> elementContent, int elementLength, out int bestLength, Func <IFormattedRun, bool> runFilter) { int bestAddress = Pointer.NULL; bestLength = 0; var run = data.GetNextAnchor(0); for (var nextRun = data.GetNextAnchor(run.Start + run.Length); run.Start < int.MaxValue; nextRun = data.GetNextAnchor(nextRun.Start + nextRun.Length)) { if (run is ArrayRun || run.PointerSources == null) { run = nextRun; continue; } var nextArray = nextRun; // some searches allow special conditions on the run. For example, we could only be intersted in runs with >100 pointers leading to it. if (runFilter != null && !runFilter(run)) { run = nextRun; continue; } FormatMatchFlags flags = default; if (elementContent.Count == 1) { flags |= FormatMatchFlags.IsSingleSegment; } int currentLength = 0; int currentAddress = run.Start; while (true) { if (currentLength > 100) { flags |= FormatMatchFlags.AllowJunkAfterText; // we've gone long enough without junk data to be fairly sure that we're looking at something real } if (nextArray.Start < currentAddress) { nextArray = data.GetNextAnchor(nextArray.Start + 1); } if (DataMatchesElementFormat(data, currentAddress, elementContent, flags, nextArray)) { currentLength++; currentAddress += elementLength; } else { break; } } // if what we found is just a text array, then remove any trailing elements starting with a space. if (elementContent.Count == 1 && elementContent[0].Type == ElementContentType.PCS) { while (data[currentAddress - elementLength] == 0x00) { currentLength--; currentAddress -= elementLength; } } // we think we found some data! Make sure it's not just a bunch of 00's and FF's var dataEmpty = true; for (int i = 0; i < currentLength && currentLength > bestLength && dataEmpty; i++) { dataEmpty = data[run.Start + i] == 0xFF || data[run.Start + i] == 0x00; } if (bestLength < currentLength && !dataEmpty) { bestLength = currentLength; bestAddress = run.Start; } run = nextRun; } return(bestAddress); }