public ParseResultCollection Parse(int startIndex) { string _text = context.Text; ParseResultCollection prc = new ParseResultCollection(); int i = startIndex; char ch = _text[i]; while ((NumeralUtil.IsArabicNumeral(ch) || NumeralUtil.IsChineseNumeralChars(ch) || ch == '.') && i + 1 < _text.Length) { if (i == startIndex && (NumeralUtil.IsChineseGenDigit(ch) && ch != '十')) //首字出现进位符 { return(prc); } ch = _text[++i]; } if (i == startIndex) { return(prc); } int j = Math.Min(i, _text.Length); if (IsChineseQuantity(_text[j])) { prc.Add(ParseResult.Create(_text.Substring(startIndex, i - startIndex), startIndex, POSType.A_M)); prc.Add(ParseResult.Create(_text[i].ToString(), i, POSType.A_Q)); } return(prc); }
public ParseResult Parse(string pathfile) { return(ParseResult.Create( Name, XDocument.Parse($"<data plugin=\"{Name}\">{pathfile}</data>") )); }
public static async Task <ParseResult <Mp3File> > ParseMetadataAsync(IBinaryStream stream) { var streamLength = await stream.GetLengthAsync().ConfigureAwait(false); stream.Position = 0; var id3v2 = (Id3v2)null; if (await Id3v2.IsPointingToHeaderAsync(stream).ConfigureAwait(false)) { id3v2 = await Id3v2.ParseAsync(stream).ConfigureAwait(false); return(ParseResult <Mp3File> .Create(new Mp3File() { Metadata = new Mp3Metadata(id3v2) })); } else { return(ParseResult <Mp3File> .Create(new Mp3File() { Metadata = new Mp3Metadata(null) })); } }
/// <summary> /// Matches the specified <paramref name="parser"/> or yields the specified default result if there are /// no matches. /// </summary> /// <typeparam name="TSource">The type of the source elements.</typeparam> /// <typeparam name="TResult">The type of the elements that are generated from parsing the source elements.</typeparam> /// <param name="parser">The parser that might produce matches.</param> /// <param name="defaultResult">The value that is yielded if the specified <paramref name="parser"/> does not match.</param> /// <returns>A parser that yields matches from the specified <paramref name="parser"/> or the specified default result /// if the <paramref name="parser"/> does not match.</returns> public static IObservableParser <TSource, TResult> WithDefault <TSource, TResult>( this IObservableParser <TSource, TResult> parser, TResult defaultResult) { Contract.Requires(parser != null); Contract.Ensures(Contract.Result <IObservableParser <TSource, TResult> >() != null); return(parser.Yield( "WithDefault", (source, observer) => { bool hasResult = false; return parser.Parse(source).SubscribeSafe( result => { hasResult = true; observer.OnNext(result); }, observer.OnError, () => { if (!hasResult) { observer.OnNext(ParseResult.Create(defaultResult, length: 0)); } observer.OnCompleted(); }); })); }
private static IEnumerable <IParseResult <TSuccess> > AtEndOfSequenceIterator <TSource, TResult, TSuccess>( this ICursor <TSource> source, IParser <TSource, TResult> parser, TSuccess successResult) { Contract.Requires(source != null); Contract.Requires(parser != null); Contract.Ensures(Contract.Result <IEnumerable <IParseResult <TSuccess> > >() != null); if (source.IsSequenceTerminated) { if (!source.AtEndOfSequence) { yield break; } } else { foreach (var result in source) { yield break; } Contract.Assume(source.AtEndOfSequence); } yield return(ParseResult.Create(successResult, length: 0)); }
public ParseResult Parse(string pathfile) { // There is a problem with `GetDirectoryName` as it cuts the drive name if there is no directory or file. // That should not be a problem here though, as we always get a file. // https://docs.microsoft.com/en-us/dotnet/api/system.io.path.getdirectoryname var path = System.IO.Path.GetDirectoryName(pathfile); var filename = System.IO.Path.GetFileName(pathfile); // We cannot use new FileInfo(...).Length as it throws an exception. // See here: https://stackoverflow.com/questions/44029830/how-do-i-mock-the-fileinfo-information-for-a-file var length = fileSystem.FileInfo.FromFileName(pathfile).Length; // GetLastWriteTime always returns the value as local kind // so we change it to UTC to alway have... UTC. var lastWriteTime = fileSystem.File.GetLastWriteTime(pathfile).ToUniversalTime(); var xml = new XDocument( new XElement("file", new XAttribute("name", filename), new XAttribute("path", path ?? string.Empty), new XAttribute("length", length), new XAttribute("lastWriteTime", lastWriteTime) ) ); return(ParseResult.Create( Name, xml )); }
/// <summary> /// Yields success when the specified <paramref name="parser"/> does not match. /// </summary> /// <typeparam name="TSource">The type of the source elements.</typeparam> /// <typeparam name="TResult">The type of the elements that are generated from parsing the source elements.</typeparam> /// <typeparam name="TSuccess">The type of the success value.</typeparam> /// <param name="parser">The parser for which any match results in failure.</param> /// <param name="successResult">The value that is yielded if the specified <paramref name="parser"/> does not match.</param> /// <returns>A parser that yields failure when the specified <paramref name="parser"/> matches or success when /// it does not match.</returns> public static IObservableParser <TSource, TSuccess> None <TSource, TResult, TSuccess>( this IObservableParser <TSource, TResult> parser, TSuccess successResult) { Contract.Requires(parser != null); Contract.Ensures(Contract.Result <IObservableParser <TSource, TSuccess> >() != null); if (parser is IObservableParserCursor <TSource> ) { return(parser.AtEndOfSequence(successResult)); } else { return(parser.Yield <TSource, TResult, TSuccess>( "None", (source, observer) => { return parser.Parse(source).Any().SubscribeSafe( any => { if (!any) { observer.OnNext(ParseResult.Create(successResult, length: 0)); } }, observer.OnError, observer.OnCompleted); })); } }
public ParseResultCollection Parse(int startIndex) { string _text = context.Text; ParserPattern _format = context.Pattern; char ch; int i = startIndex; StringBuilder sb = new StringBuilder(6); ParseResultCollection prc = new ParseResultCollection(); ch = _text[i]; while (NumeralUtil.IsArabicNumeral(ch) || (ch >= '0' && ch <= '9') && i < _text.Length) { sb.Append(ch); ch = _text[++i]; } string source = sb.ToString(); if (_format == ParserPattern.China) { if (source.Length != 6) { return(prc); } } else if (_format == ParserPattern.NorthAmerica) { if (source.Length != 5) { return(prc); } } prc.Add(ParseResult.Create(source.ToString(), startIndex, POSType.A_M)); return(prc); }
/// <summary> /// Creates a singleton observable sequence containing an <see cref="IParseResult{TValue}"/> /// with the specified <paramref name="value"/> and <paramref name="length"/>. /// </summary> /// <typeparam name="TValue">The type of the parse result's value.</typeparam> /// <param name="value">The projection of elements of a parse operation.</param> /// <param name="length">The number of elements that were consumed in the sequence to generate /// the specified <paramref name="value"/>.</param> /// <returns>A singleton observable sequence containing an <see cref="IParseResult{TValue}"/> /// with the specified <paramref name="value"/> and <paramref name="length"/>.</returns> public static IObservable <IParseResult <TValue> > Return <TValue>( TValue value, int length) { Contract.Requires(length >= 0); Contract.Ensures(Contract.Result <IObservable <IParseResult <TValue> > >() != null); return(Observable.Return(ParseResult.Create(value, length))); }
public static ParseResult <string> TryParseString(this string value, string defaultValue = "") { if (value == null) { return(ParseResult <string> .Create(defaultValue, false, null)); } if (string.IsNullOrWhiteSpace(value)) { return(ParseResult <string> .Create(defaultValue, false, value)); } return(ParseResult <string> .Create(value, true, value)); }
/// <summary> /// Yields success if the specified parser starts at the end of the input sequence. /// </summary> /// <typeparam name="TSource">The type of the source elements.</typeparam> /// <typeparam name="TResult">The type of the elements that are generated from parsing the source elements.</typeparam> /// <typeparam name="TSuccess">The type of the success value.</typeparam> /// <param name="parser">The parser that provides the context in which to check whether the cursor is at the end of the input sequence.</param> /// <param name="successResult">The value that is yielded if the specified parser starts at the end of the input sequence.</param> /// <returns>A new parser that yields success without parsing if the cursor is positioned at the end of the input sequence; otherwise, yields no results.</returns> public static IObservableParser <TSource, TSuccess> AtEndOfSequence <TSource, TResult, TSuccess>( this IObservableParser <TSource, TResult> parser, TSuccess successResult) { Contract.Requires(parser != null); Contract.Ensures(Contract.Result <IObservableParser <TSource, TSuccess> >() != null); return(parser.Yield <TSource, TResult, TSuccess>( "AtEndOfSequence", (source, observer) => { IDisposable disposable; if (source.IsSequenceTerminated) { if (source.AtEndOfSequence) { observer.OnNext(ParseResult.Create(successResult, length: 0)); } observer.OnCompleted(); disposable = Disposable.Empty; } else { bool hasResult = false; disposable = source.Subscribe( Observer.Create <TSource>( result => hasResult = true, observer.OnError, () => { if (!hasResult) { Contract.Assume(source.AtEndOfSequence); observer.OnNext(ParseResult.Create(successResult, length: 0)); } observer.OnCompleted(); }), count: 1); } return disposable; })); }
public static ParseResult <bool> TryParseBool(this string value, bool defaultValue = false) { if (string.IsNullOrWhiteSpace(value)) { return(ParseResult <bool> .Create(defaultValue, false, value)); } bool parsedValue; if (!bool.TryParse(value, out parsedValue)) { return(ParseResult <bool> .Create(defaultValue, false, value)); } return(ParseResult <bool> .Create(parsedValue, true, value)); }
public static ParseResult <int> TryParseInt32(this string value, int defaultValue = default) { if (string.IsNullOrWhiteSpace(value)) { return(ParseResult <int> .Create(defaultValue, false, value)); } int parsedValue; if (!int.TryParse(value, out parsedValue)) { return(ParseResult <int> .Create(defaultValue, false, value)); } return(ParseResult <int> .Create(parsedValue, true, value)); }
public static async Task <ParseResult <Mp3File> > ParseFullMetadataAsync(IBinaryStream stream) { var streamLength = await stream.GetLengthAsync().ConfigureAwait(false); stream.Position = 0; var file = new Mp3File(); var id3v2 = await Id3v2.IsPointingToHeaderAsync(stream).ConfigureAwait(false) ? await Id3v2.ParseAsync(stream).ConfigureAwait(false) : null; var collectSamplesResult = await CollectSamplesAsync(stream, streamLength, 1).ConfigureAwait(false); if (collectSamplesResult.Item1.Count == 0) { file.Metadata = new Mp3Metadata(id3v2); return(ParseResult <Mp3File> .Create(file)); } else { var firstFrame = collectSamplesResult.Item1[0]; var contentLength = streamLength - stream.Position; if (collectSamplesResult.Item2 == BitrateType.Constant) { var duration = TimeSpan.FromSeconds(Math.Ceiling((contentLength / firstFrame.Length) * firstFrame.Duration.TotalSeconds)); var bitrate = firstFrame.Bitrate; file.Metadata = new Mp3Metadata(id3v2, bitrate, duration); } else { var xingFrame = await XingFrame.CreateAsync(stream, firstFrame).ConfigureAwait(false); if (xingFrame.FramesCount.HasValue) { var duration = TimeSpan.FromSeconds(xingFrame.FramesCount.Value * firstFrame.SamplesCount / firstFrame.Frequency); var averageBitrate = (UInt32)(contentLength / duration.TotalSeconds) * 8; file.Metadata = new Mp3Metadata(id3v2, averageBitrate, duration); } else { collectSamplesResult = await CollectSamplesAsync(stream, streamLength, null).ConfigureAwait(false); file.Metadata = new Mp3Metadata(id3v2, collectSamplesResult.Item4, collectSamplesResult.Item3); } } return(ParseResult <Mp3File> .Create(file)); } }
public static ParseResult <GedcomEvent> Parse(GedcomLine first, ILineProvider lineProvider) { GedcomEvent gedcomEvent = new GedcomEvent(); var initialLevel = first.Level; GedcomLine line = default; string currentRawLine; while ((currentRawLine = lineProvider.ReadLine()) != null) { line = ParserHelper.ParseLine(currentRawLine); if (line.Level <= first.Level) { break; } switch (line.GetTagOrRef()) { case "DATE": // If checks we're parsing actual date and not // CREA or CHAN tags // TODO: should actually put CREA and CHAN into different parser if (line.Level == initialLevel + 1) { gedcomEvent.Date = line.GetLineContent(); } break; case "PLAC": // If checks we're parsing actual date and not // CREA or CHAN tags // TODO: should actually put CREA and CHAN into different parser if (line.Level == initialLevel + 1) { gedcomEvent.Location = line.GetLineContent(); } break; } } return(ParseResult.Create(gedcomEvent, line)); }
public ParseResult Parse(string pathfile) { string model = null; using (var reader = new ExifLib.ExifReader(pathfile)) { reader.GetTagValue <string>(ExifLib.ExifTags.Model, out model); } var xml = new XDocument( new XElement("exif", new XAttribute("model", model ?? "null") ) ); return(ParseResult.Create( Name, xml)); }
private static IEnumerable <IParseResult <TResult> > WithDefaultIterator <TSource, TResult>( ICursor <TSource> source, IParser <TSource, TResult> parser, TResult defaultResult) { bool hasResult = false; foreach (var result in parser.Parse(source)) { hasResult = true; yield return(result); } if (!hasResult) { yield return(ParseResult.Create(defaultResult, length: 0)); } }
public IObservable <IParseResult <T> > Parse(IObservableCursor <T> source) { return(Observable.Create <IParseResult <T> >( observer => { return source.Subscribe( Observer.Create <T>( value => { #if !SILVERLIGHT && !PORT_45 && !PORT_40 ParserTraceSources.TraceInput(value); #endif observer.OnNext(ParseResult.Create(value, length: 1)); }, observer.OnError, observer.OnCompleted), count: 1); })); }
public ParseResult Parse(string pathfile) { var image = Image.FromFile(pathfile); var xml = new XDocument( new XElement("image", new XAttribute("width", image.Width), new XAttribute("height", image.Height) ) ); // There is more info for an image. // One is Flags https://docs.microsoft.com/en-us/dotnet/api/system.drawing.image.flags // Another is properties. // https://docs.microsoft.com/en-us/dotnet/desktop/winforms/advanced/how-to-read-image-metadata // https://docs.microsoft.com/en-us/dotnet/api/system.drawing.imaging.propertyitem.id return(ParseResult.Create( Name, xml)); }
public ParseResult <string, object> GetResult() { if (FormatValueCountUnequalToFormatPlaceholderCount) { var result = ParseResult.Error(ValidationErrors.Concat(new[] { $"Format values count ({FormatValues.Count}) is not equal to column placeholders count ({FormatPlaceholders.Count}), see #MISSING# in format placeholders list (keys)" }), FormatPlaceholders.Zip(FormatValues, (name, value) => new KeyValuePair <string, object>(name, value))); FormatPlaceholders.AddRange(Enumerable.Range(1, FormatValues.Count - FormatPlaceholders.Count).Select(_ => "#MISSING#")); return(result); } else if (FormatPlaceholderCountUnequalToFormatValueCount) { var result = ParseResult.Error(ValidationErrors.Concat(new[] { $"Format placeholders count ({FormatPlaceholders.Count}) is not equal to column values count ({FormatValues.Count}), see #MISSING# in format values list (values)" }), FormatPlaceholders.Zip(FormatValues, (name, value) => new KeyValuePair <string, object>(name, value))); FormatValues.AddRange(Enumerable.Range(1, FormatPlaceholders.Count - FormatValues.Count).Select(_ => "#MISSING#")); return(result); } else if (FormatPlaceholders.Count == 0) { return(ParseResult.Error(ValidationErrors.Concat(new[] { "No format placeholders were found" }), Array.Empty <KeyValuePair <string, object> >())); } return(ParseResult.Create(ValidationErrors.Count == 0, FormatPlaceholders.Zip(FormatValues, (name, value) => new KeyValuePair <string, object>(name, value)), ValidationErrors)); }
public ParseResultCollection Parse(int startIndex) { ParseResultCollection prc = new ParseResultCollection(); string input = context.Text.Substring(startIndex); if (context.Pattern != ParserPattern.NorthAmerica) { throw new InvalidOperationException("To use USAddressParser, Parser pattern must be NorthAmerica"); } if (!string.IsNullOrEmpty(input)) { var match = addressRegex.Match(input.ToUpperInvariant()); if (match.Success) { var extracted = GetApplicableFields(match); var addr = new Address(Normalize(extracted)); prc.Add(ParseResult.Create(addr.ToString(), startIndex, POSType.A_M, addr)); } } return(prc); }
public static async Task <ParseResult <Mpeg4File> > ParseAsync(IBinaryStream stream, ParseOptions options) { try { stream.Position = 4; if (await stream.ReadStringAsync(4) != "ftyp") { return(ParseResult <Mpeg4File> .CreateUnknownFormat()); } var file = new Mpeg4File(); stream.Position = 0; var streamLength = await stream.GetLengthAsync().ConfigureAwait(false); while (stream.Position < streamLength) { file.Boxes.Add(await Mpeg4File.ReadBoxAsync(stream, await stream.GetLengthAsync().ConfigureAwait(false))); } if (options == ParseOptions.Metadata) { var mediaData = file.Contains("mdat") ? file["mdat"] : null; var metadataItems = file.Contains("moov.udta.meta.ilst") ? await MetadataItemsBox.CreateAsync(stream, file["moov.udta.meta.ilst"]).ConfigureAwait(false) : null; var mediaHeader = file.Contains("moov.trak.mdia.mdhd") ? await MediaHeaderBox.CreateAsync(stream, file["moov.trak.mdia.mdhd"]).ConfigureAwait(false) : null; file.Metadata = new Mpeg4Metadata(metadataItems?.Cover, mediaHeader?.Duration, mediaHeader?.TimeScale, mediaData?.Length); } return(ParseResult <Mpeg4File> .Create(file)); } catch (Exception exception) { return(ParseResult <Mpeg4File> .Create(exception)); } }
public static async Task <ParseResult <Mp3File> > ParseSamplesAsync(IBinaryStream stream) { var streamLength = await stream.GetLengthAsync().ConfigureAwait(false); stream.Position = 0; if (await Id3v2.IsPointingToHeaderAsync(stream).ConfigureAwait(false)) { await Id3v2.SkipAsync(stream).ConfigureAwait(false); } var collectSamplesResult = await CollectSamplesAsync(stream, streamLength, null).ConfigureAwait(false); if (collectSamplesResult.Item1.Count == 0) { return(ParseResult <Mp3File> .CreateUnknownFormat()); } var file = new Mp3File(); file.Type = collectSamplesResult.Item2; file.Samples = collectSamplesResult.Item1; return(ParseResult <Mp3File> .Create(file)); }
/// <summary> /// Matches the specified <paramref name="parser"/> the specified number of times. /// </summary> /// <typeparam name="TSource">The type of the source elements.</typeparam> /// <typeparam name="TResult">The type of the elements that are generated from parsing the source elements.</typeparam> /// <param name="parser">The parser to be matched.</param> /// <param name="count">The specified number of times to match the specified <paramref name="parser"/>.</param> /// <returns>A parser that matches the specified <paramref name="parser"/> the specified number of times.</returns> public static IObservableParser <TSource, IObservable <TResult> > Exactly <TSource, TResult>( this IObservableParser <TSource, TResult> parser, int count) { Contract.Requires(parser != null); Contract.Requires(count >= 0); Contract.Ensures(Contract.Result <IObservableParser <TSource, IObservable <TResult> > >() != null); if (count == 0) { return(parser.Yield(_ => ObservableParseResult.ReturnSuccessMany <TResult>(length: 0))); } else if (count == 1) { // Profiling has shown this to be about 50% faster than Repeat(parser, 1).All() return(parser.Amplify()); } else if (parser is IObservableParserCursor <TSource> ) { /* Profiling has shown this to be exponentially faster in next.Exactly(largeN) queries for Ix. * It hasn't been profiled in Rx, but I'm assuming that for similar reasons as Ix it would prove * to be exponentially faster. Furthermore, due to the extra plumbing in Rx that's being avoided * by this optimization, it may have even greater gains than Ix. */ return(parser.Yield <TSource, TResult, IObservable <TResult> >( "Exactly", source => from list in source.Take(count).ToList() where list.Count == count select ParseResult.Create(list.Cast <TResult>().ToObservable(Scheduler.Immediate), count))); } else { return(System.Linq.Enumerable.Repeat(parser, count).All()); } }
public static ParseResult <GedcomHeader> Parse(GedcomLine first, ILineProvider lineProvider) { CurrentLevel currentLevel = CurrentLevel.None; var header = new GedcomHeader(); GedcomLine line = default; string currentRawLine; while ((currentRawLine = lineProvider.ReadLine()) != null) { line = ParserHelper.ParseLine(currentRawLine); if (line.Level == 0) { break; } if (line.Level == 1) { switch (line.GetTagOrRef()) { case "SOUR": currentLevel = CurrentLevel.Sour; break; case "GEDC": currentLevel = CurrentLevel.Gedc; break; case "CHAR": header.GedcomCharacterSet = line.GetLineContent(); break; } } else if (line.Level == 2) { if (currentLevel == CurrentLevel.Sour) { switch (line.GetTagOrRef()) { case "NAME": header.SourceName = line.GetLineContent(); break; case "VERS": header.SourceVers = line.GetLineContent(); break; case "CORP": header.SourceCorp = line.GetLineContent(); break; } } else if (currentLevel == CurrentLevel.Gedc) { if (line.GetTagOrRef() == "VERS") { header.GedcomVers = line.GetLineContent(); } } } } return(ParseResult.Create(header, line)); }
//public static ParseResultCollection Parse(string text) //{ // return ParseResultCollection.InternalParse(text, new OrgNameParser(text)); //} public ParseResultCollection Parse(int startIndex) { string _text = context.Text; ParseResultCollection prc = new ParseResultCollection(); string temp = _text.Substring(startIndex, Math.Min(maxChineseOrgNameLength, _text.Length - startIndex)); int pos = -1; string suffix = null; for (int i = 0; i < suffixList.Length; i++) { pos = temp.IndexOf(suffixList[i]); if (pos > 0) { suffix = suffixList[i]; break; } } if (pos <= 0) //找不到后缀,直接返回 { return(prc); } //寻找前置地名 string placeName = null; ParserContext context1 = this.context.Clone(); context1.Text = temp; IParser placeNameParser = new PlaceNameParser(context1); ParseResultCollection prc1 = placeNameParser.Parse(0); if (prc1.Count > 0) { placeName = (string)prc1[0].Text; } if (placeName != null && pos - placeName.Length < maxMiddlePartLength) { prc.Add(ParseResult.Create(temp.Substring(0, pos + suffix.Length), startIndex, POSType.A_NT)); } else if (context.Text.IndexOf("(") > 0) { int bracePos = context.Text.IndexOf("("); IParser placeNameParser2 = new PlaceNameParser(context); ParseResultCollection prc2 = placeNameParser2.Parse(bracePos + 1); if (prc2.Count > 0) { placeName = (string)prc2[0].Text; prc.Add(ParseResult.Create(temp.Substring(0, pos + suffix.Length), startIndex, POSType.A_NT)); } } else { //没有找到地名 string orgName = MatchOrgName(temp, 0); if (orgName != null) { prc.Add(ParseResult.Create(orgName, startIndex, POSType.A_NT)); } else { //库中没有,使用谓词定位边界 } } return(prc); /* * 《现代汉语词汇研究-中文信息处理》 * 确定规则 * a. 如果候选地名字符串前一词为地名指界词,且候选地名字串后一个词为地名特征词,则候选地名左右边界确定 * b. 如果候选地名字符串前一词为地名指界词,则候选地名左边界确定 * c. 如果候选地名字串后一个词为地名指界词,则候选地名右边界确定 * d. 如果两个候选地名字串存在并列关系, 其中一个候选地名被确定,则另一个候选地名也被确定 * 否定规则 * 称谓词否定规则:如果候选地名字串的前一词是人名称谓词,且候选地名字串中没有地名特征词,否定该地名字串。 * 指界词否定规则:如果候选地名字串的后一词为人名指界词,且候选地名字串中没有地名特征词,否定该地名字串。 * 并列否定规则:如果两个候选地名字串存在并列关系,其中一个候选地名被否定,另一个候选地名也被否定。 * 其他物体类否定规则:如果候选地名字符串的后一词为其他物体类特征词,否定该地名字串。如红塔山香烟 * 非单字词否定规则:如果候选地名字串的前一词不是单字词,或候选地名字串的后一词不是单字词,则否定候选地名 * 边界修正规则 * 称谓词与特征词修正规则:如果候选地名字串的前一词为人名称谓词且候选地名字串中存在地名特征词,则修正地名的边界 */ }
public ParseResultCollection Parse(int startIndex) { string _text = context.Text; ParseResultCollection prc = new ParseResultCollection(); string temp = _text.Substring(startIndex, Math.Min(maxChineseAddressLength, _text.Length - startIndex)); char[] chars = temp.ToCharArray(); //int lastStartPos = 0; StringBuilder sb = new StringBuilder(); StringBuilder whole = new StringBuilder(); ChineseAddress ca = new ChineseAddress(); int startpos = 0; //TODO: 通过字典找国家名 if (temp.StartsWith("中国")) { startpos = 2; ca.country = "中国"; whole.Append("中国"); } for (int i = startpos; i < chars.Length; i++) { char ch = chars[i]; if (ch == '市' || ch == '场') { if (sb.Length == 0) { sb.Append(ch); continue; } sb.Append(ch); string subStr = sb.ToString(); string city = GetMaximumMatch(subStr, 0, 5); if (city != null) { ca.city = city; whole.Append(ca.city); sb = new StringBuilder(); } } else if (ch == '区') { if (sb.Length == 0) { sb.Append(ch); continue; } sb.Append(ch); string subStr = sb.ToString(); string district = GetMaximumMatch(subStr, 0, 5); if (district != null) { if (!district.EndsWith("区")) { ca.city = district; whole.Append(ca.city); ca.district = subStr.Substring(ca.city.Length); whole.Append(ca.district); } else { //string district = NEParser.GetMaximumMatch(subStr, 0, 5, "district", _cityNames, null); ca.district = district; whole.Append(ca.district); } } else { ca.district = subStr; whole.Append(ca.district); } sb = new StringBuilder(); } else if (ch == '省') { if (sb.Length == 0) { sb.Append(ch); continue; } sb.Append(ch); string subStr = sb.ToString(); string province = GetMaximumMatch(subStr, 0, 5); //省份 if (province != null) { ca.province = province; whole.Append(ca.province); sb = new StringBuilder(); } } else if (ch == '乡' || ch == '村' || ch == '县' || ch == '镇') { if (sb.Length == 0) { sb.Append(ch); continue; } sb.Append(ch); ca.county = sb.ToString(); whole.Append(ca.county); sb = new StringBuilder(); } else if (ch == '巷') { } else if (ch == '楼' || ch == '弄' || ch == '号' || ch == '室') { if (sb.Length == 0) { sb.Append(ch); continue; } string substr = NumeralUtil.ConvertChineseNumeral2Arabic(sb.ToString()); int x; sb.Append(ch); if (Int32.TryParse(substr, out x)) { if (ch == '楼') { ca.floor = sb.ToString(); } else if (ch == '弄') { ca.lane = sb.ToString(); } else if (ch == '号') { ca.no = sb.ToString(); } else if (ch == '室') { ca.room = sb.ToString(); } whole.Append(sb.ToString()); sb = new StringBuilder(); } } else if (ch == '道' || ch == '路' || ch == '街') { if (sb.Length == 0) { sb.Append(ch); continue; } sb.Append(ch); ca.street = sb.ToString(); whole.Append(ca.street); sb = new StringBuilder(); } else if (ch == '(' || ch == '(') { sb = new StringBuilder(); sb.Append(ch); } else if (ch == ')' || ch == ')') { sb.Append(ch); string extra1 = sb.ToString(); whole.Append(extra1); ca.extra = extra1; sb = new StringBuilder(); } else if (CharacterUtil.IsChinesePunctuation(ch) || (ch == ' ' || ch == ' ')) { break; } else if (ch == '大') { if (sb.Length == 0) { sb.Append(ch); continue; } if (i + 1 < chars.Length) { char nextchar = chars[i + 1]; if (nextchar == '桥' || nextchar == '厦') { string extra1 = sb.ToString() + "大" + nextchar; whole.Append(extra1); if (nextchar == '桥') { ca.extra += extra1; } else { ca.building = extra1; } i += 2 - 1; sb = new StringBuilder(); } else if (i + 2 < chars.Length && nextchar == '酒') { char nextchar2 = chars[i + 2]; if (nextchar2 == '店') { string extra1 = sb.ToString() + "大" + nextchar + nextchar2; string city = GetMaximumMatch(extra1, 0, 5); //城市或省份 if (city != null) { ca.city = city; whole.Append(ca.city); extra1 = extra1.Substring(ca.city.Length); } whole.Append(extra1); ca.building = extra1; i += 3 - 1; sb = new StringBuilder(); } } } } else if (ch == '餐') { if (sb.Length == 0) { sb.Append(ch); continue; } if (i + 1 < chars.Length) { char nextchar = chars[i + 1]; if (nextchar == '厅') { string extra1 = sb.ToString() + "餐" + nextchar; whole.Append(extra1); ca.extra += extra1; i += 2 - 1; sb = new StringBuilder(); } } } else { //if (sb.Length == 0) // lastStartPos = i; sb.Append(ch); string extra = sb.ToString(); if (extra.EndsWith("中心") || extra.EndsWith("酒店")) { string city = GetMaximumMatch(extra, 0, 5); //城市 if (city != null) { ca.city = city; extra = extra.Substring(city.Length); } ca.building = extra; whole.Append(extra); if (i + 2 < chars.Length && chars[i + 1] == '大' && chars[i + 2] == '厦') //处理 "中心大厦" { ca.building += "大厦"; whole.Append("大厦"); i += 2; sb = new StringBuilder(); continue; } sb = new StringBuilder(); } } } if (whole.Length > 0) { if (sb.Length > 0) { ca.extra = sb.ToString(); } prc.Add(ParseResult.Create(whole.ToString(), startIndex, POSType.D_S, ca)); } return(prc); }
public ParseResultCollection Parse(int startIndex) { string _text = context.Text; ParseResultCollection prc = new ParseResultCollection(); //TODO:外国人中文姓名处理(无姓) //3 找前缀 string prefix = MatchPrefix(_text, startIndex); int prefixlength = 0; if (prefix != null) { prefixlength = prefix.Length; } //1 扫描百家姓中的姓 //查单字姓 int currentPos = startIndex + prefixlength; string surname = MatchSurname(_text, currentPos); if (surname == null) { return(prc); } bool surnameInserted = false; bool givennameInserted = false; if (prefix != null && surname != null) { prc.Add(ParseResult.Create(prefix, startIndex, POSType.D_N)); //前缀 surnameInserted = true; prc.Add(ParseResult.Create(surname, currentPos, POSType.A_NR)); currentPos += surname.Length; } //2 如果姓后面是标点符号,直接认为不是人名 if (currentPos + 1 < _text.Length && CharacterUtil.IsChinesePunctuation(_text[currentPos + 1])) { return(prc); } //1.1用最大匹配搜索库中的完整人名,如果匹配且权重很高,直接认为是人名 //string fullname = MatchFullname(_text, startIndex); //if (fullname != null) //{ // prc.Add(ParseResult.Create(surname, startIndex, POSType.A_NR)); // prc.Add(ParseResult.Create(fullname.Substring(surname.Length), startIndex + surname.Length, POSType.A_NR)); // return prc; //} //3 找名字 //TODO:缩小名字的范围,否则容易造成匹配错误 //string givenname = MatchGivenname(_text, startIndex + surname.Length); //if (givenname != null) //{ // string suffix2 = MatchSuffix(_text, startIndex + surname.Length + givenname.Length, _siblingWordDB); // if (suffix != null && givenname.Length <= suffix.Length) // { // givenname = null; // } // else // { // suffix = suffix2; // } //} //4 如果后面是称谓,如先生、小姐、博士、医生,则认为是人名 int resultStartPos = -1; if (surname != null) { resultStartPos = currentPos + (surnameInserted?0:surname.Length); string suffix = MatchSuffix(_text, resultStartPos, out resultStartPos); if (suffix != null) { if (!surnameInserted) { prc.Add(ParseResult.Create(surname, currentPos, POSType.A_NR)); surnameInserted = true; currentPos += surname.Length; } if (resultStartPos > currentPos) { string givenname = _text.Substring(currentPos, resultStartPos - currentPos); prc.Add(ParseResult.Create(givenname, currentPos, POSType.A_NR)); prc.Add(ParseResult.Create(suffix, resultStartPos, POSType.D_N)); currentPos += givenname.Length + suffix.Length; givennameInserted = true; } else { prc.Add(ParseResult.Create(suffix, currentPos, POSType.D_N)); currentPos += suffix.Length; } return(prc); } } // 5 如果前面是动词、使动词,可认为是人名 if (surname != null) { resultStartPos = currentPos + (surnameInserted ? 0 : surname.Length); bool verbFound = MatchVerb(_text, resultStartPos, out resultStartPos); if (verbFound && resultStartPos > currentPos + (surnameInserted ? 0 : surname.Length)) { if (!surnameInserted) { prc.Add(ParseResult.Create(surname, currentPos, POSType.A_NR)); surnameInserted = true; currentPos += surname.Length; } if (!givennameInserted) { string givenname = _text.Substring(currentPos, resultStartPos - currentPos); prc.Add(ParseResult.Create(givenname, currentPos, POSType.A_NR)); currentPos += givenname.Length; givennameInserted = true; } } } if (surname != null) { //人名之后直接标点符号, 认为是人名 int punctuationPos = MatchPunctation(_text, currentPos + (surnameInserted ? 0 : surname.Length), 4); if (punctuationPos > 0) { if (!surnameInserted) { prc.Add(ParseResult.Create(surname, currentPos, POSType.A_NR)); surnameInserted = true; currentPos += surname.Length; } if (!givennameInserted) { string givenname = _text.Substring(currentPos, punctuationPos - currentPos); prc.Add(ParseResult.Create(givenname, currentPos, POSType.A_NR)); currentPos += givenname.Length; givennameInserted = true; } } } if (surname != null && _text.Length - currentPos - surname.Length <= MaximumGivennameLength && _text.Length - currentPos - surname.Length > 0) //姓名之后没有字的情况 { if (!surnameInserted) { prc.Add(ParseResult.Create(surname, currentPos, POSType.A_NR)); surnameInserted = true; currentPos += surname.Length; } if (!givennameInserted) { string givenname = _text.Substring(currentPos, _text.Length - currentPos); prc.Add(ParseResult.Create(givenname, currentPos, POSType.A_NR)); currentPos += givenname.Length; givennameInserted = true; } } return(prc); }
public static ParseResult <Family> Parse(GedcomLine first, ILineProvider lineProvider) { var family = new Family(); family.ID = ParserHelper.ParseID(first.GetTagOrRef()); bool inMarriage = false; var initialLevel = first.Level; GedcomLine line = default; string currentRawLine; while ((currentRawLine = lineProvider.ReadLine()) != null) { line = ParserHelper.ParseLine(currentRawLine); if (line.Level == first.Level) { break; } switch (line.GetTagOrRef()) { case "MARR": { inMarriage = true; break; } case "DATE": if (inMarriage) // TODO: should have MARR parser { var date = line.GetLineContent(); if (family.Marriage == null) { family.Marriage = new GedcomEvent(); } family.Marriage.Date = date; } break; case "PLAC": if (inMarriage) // Assume level + 1 is MARR { var place = line.GetLineContent(); if (family.Marriage == null) { family.Marriage = new GedcomEvent(); } family.Marriage.Location = place; } break; case "HUSB": // Ignore any husband and wife information in the middle of a marriage tag. // Present for torture test files - and info redundant? // can have e.g. "2 HUSB", with no additional info var contentHusb = line.GetLineContent(); if (!string.IsNullOrEmpty(contentHusb)) { family.HusbandID = ParserHelper.ParseID(contentHusb); } break; case "WIFE": // Ignore any husband and wife information in the middle of a marriage tag. // Present for torture test files - and info redundant? // can have e.g. "2 HUSB", with no additional info var contentWife = line.GetLineContent(); if (!string.IsNullOrEmpty(contentWife)) { family.WifeID = ParserHelper.ParseID(contentWife); } break; case "CHIL": family.ChildIDs.Add(ParserHelper.ParseID(line.GetLineContent())); break; default: inMarriage = false; break; } } return(ParseResult.Create(family, line)); }
public IObservable <IParseResult <IObservable <TResult> > > Parse(IObservableCursor <TSource> source) { return(Observable.Create <IParseResult <IObservable <TResult> > >( observer => { int matchCount = 0; int remainingLength = 0; Action <Action> iterate = moveNext => { bool hasResult = false; int length = 0; var branch = source.Branch(); var values = parser.Parse(branch) .Finally(branch.Dispose) .Select(result => { if (!hasResult) { matchCount++; hasResult = true; } length = Math.Max(length, result.Length); return result.Value; }) .Do( __ => { }, () => { /* We must respect the greediness of the results unless the length is zero since the * cursor would have already moved to the following element. It is acceptable to ignore * zero-length results because marking an entirely non-greedy parser as ambiguous would * otherwise cause the parser to continously parse the first element indefinitely. */ if (length > 0) { remainingLength = length - 1; } else if (remainingLength > 0) { remainingLength--; } moveNext(); }); observer.OnNext(ParseResult.Create(values, length: 1)); }; Action complete = () => { if (remainingLength > 0) { observer.OnNext(ObservableParseResult.SuccessMany <TResult>(remainingLength)); } observer.OnCompleted(); }; var untilSubscription = new SerialDisposable(); var schedule = Scheduler.Immediate.Schedule( self => { if (!source.AtEndOfSequence && (untilCount == unlimitedCount || matchCount < untilCount)) { if (untilParser == null) { iterate(self); } else { untilSubscription.SetDisposableIndirectly(() => untilParser.Parse(source).Any().Subscribe( any => { if (!any) { iterate(self); } else { complete(); } }, observer.OnError)); } } else { complete(); } }); return new CompositeDisposable(schedule, untilSubscription); })); }