public void AffixSpacing() { Normalizer normalizer = new Normalizer(false, false, true); string input, expected, actual; input = "خانه ی پدری"; expected = "خانهی پدری"; actual = normalizer.Run(input); Assert.AreEqual(expected, actual, "Failed to affix spacing of '" + input + "'"); input = "فاصله میان پیشوند ها و پسوند ها را اصلاح می کند."; expected = "فاصله میان پیشوندها و پسوندها را اصلاح میکند."; actual = normalizer.Run(input); Assert.AreEqual(expected, actual, "Failed to affix spacing of '" + input + "'"); input = "می روم"; expected = "میروم"; actual = normalizer.Run(input); Assert.AreEqual(expected, actual, "Failed to affix spacing of '" + input + "'"); input = "حرفه ای"; expected = "حرفهای"; actual = normalizer.Run(input); Assert.AreEqual(expected, actual, "Failed to affix spacing of '" + input + "'"); }
public BijankhanReader(string bijankhanFile, bool joinedVerbParts, string posMap) { this._bijankhanFile = bijankhanFile; this._joinedVerbParts = joinedVerbParts; this._posMap = posMap; this.normalizer = new Normalizer(true, false, true); this.tokenizer = new WordTokenizer(); }
public void PunctuationSpacing() { Normalizer normalizer = new Normalizer(false, true, false); string input, expected, actual; input = "اصلاح ( پرانتزها ) در متن ."; expected = "اصلاح (پرانتزها) در متن."; actual = normalizer.Run(input); Assert.AreEqual(expected, actual, "Failed to punctuation spacing of '" + input + "'"); }
public void CharacterRefinementTest() { Normalizer normalizer = new Normalizer(true, false, false); string input, expected, actual; input = "اصلاح كاف و ياي عربي"; expected = "اصلاح کاف و یای عربی"; actual = normalizer.Run(input); Assert.AreEqual(expected, actual, "Failed to character refinement of '" + input + "'"); input = "رمــــان"; expected = "رمان"; actual = normalizer.Run(input); Assert.AreEqual(expected, actual, "Failed to character refinement of '" + input + "'"); input = "1,2,3,..."; expected = "۱,۲,۳, …"; actual = normalizer.Run(input); Assert.AreEqual(expected, actual, "Failed to character refinement of '" + input + "'"); }
public abstract void TheOthers(Normalizer normalizer, char c);
public override void SpecialChars(Normalizer normalizer, char c) { normalizer._stringBuilder.Append(' '); normalizer._stringBuilder.Append(c); normalizer._state = Normalizer._specialCharsState; }
public override void SpecialChars(Normalizer normalizer, char c) { if (normalizer.IsNotEmpty) { normalizer._stringBuilder.Append(' '); } normalizer._stringBuilder.Append(c); }
public abstract void Space(Normalizer normalizer, char c);
private void Save() { try { var webConfigPath = this.FilePathTextbox.Text.Trim(" \"".ToCharArray()); webConfigPath = ConfigBuilderProxy.ConvertLinkToRealFile(webConfigPath); var buildWebConfigResult = this.BuildWebConfigResult.Checked; var normalizeOutput = this.NormalizeOutput.Checked; var requireDefaultConfiguration = this.RequireDefaultConfiguration.Checked; var selectedItem = (ComboboxItem)this.SitecoreVersionComboBox.SelectedItem; var releaseInfo = selectedItem.Value; var outputShowConfigFile = this.GetShowConfigFilePath(ShowconfigFileName); if (string.IsNullOrEmpty(outputShowConfigFile)) { return; } var outputWebConfigFile = string.Empty; if (buildWebConfigResult) { outputWebConfigFile = this.GetShowConfigFilePath(WebConfigResultFileName); Assert.IsNotNull(outputWebConfigFile, "outputWebConfigFile"); } ConfigBuilderProxy.Build(webConfigPath, false, false).Save(outputShowConfigFile); if (normalizeOutput) { ConfigBuilderProxy.Build(webConfigPath, false, true).Save(GetNormalizedPath(outputShowConfigFile)); } if (buildWebConfigResult) { ConfigBuilderProxy.Build(webConfigPath, true, false).Save(outputWebConfigFile); if (normalizeOutput) { ConfigBuilderProxy.Build(webConfigPath, true, true).Save(GetNormalizedPath(outputWebConfigFile)); } } if (requireDefaultConfiguration) { var websiteFolder = Path.GetDirectoryName(webConfigPath); if (string.Equals(websiteFolder, Path.GetDirectoryName(outputShowConfigFile))) { websiteFolder += " " + releaseInfo.Version.MajorMinorUpdate; webConfigPath = Path.Combine(websiteFolder, Path.GetFileName(webConfigPath)); outputShowConfigFile = Path.Combine(websiteFolder, Path.GetFileName(outputShowConfigFile)); Directory.Delete(websiteFolder, true); { var filesZipPath = Path.GetTempFileName(); new WebClient() .DownloadFile(releaseInfo.DefaultDistribution.Defaults.Configs.FilesUrl, filesZipPath); var tempFolder = Path.GetTempFileName(); File.Delete(tempFolder); Directory.CreateDirectory(tempFolder); ZipFile.ExtractToDirectory(filesZipPath, tempFolder); tempFolder = ( Directory.GetDirectories(tempFolder, "Website", SearchOption.AllDirectories).First()); Directory.Move(tempFolder, websiteFolder); var outputWebConfigFile1 = string.Empty; if (buildWebConfigResult) { outputWebConfigFile1 = Path.Combine(websiteFolder, "web.config.result.xml"); Assert.IsNotNull(outputWebConfigFile1, "outputWebConfigFile"); } ConfigBuilderProxy.Build(webConfigPath, false, false).Save(outputShowConfigFile); if (normalizeOutput) { ConfigBuilderProxy.Build(webConfigPath, false, true).Save(GetNormalizedPath(outputShowConfigFile)); } if (buildWebConfigResult) { ConfigBuilderProxy.Build(webConfigPath, true, false).Save(outputWebConfigFile1); if (normalizeOutput) { ConfigBuilderProxy.Build(webConfigPath, true, true).Save(GetNormalizedPath(outputWebConfigFile1)); } } } } else { try { var release = releaseInfo.Version.MajorMinorUpdate; var defaultShowConfig = outputShowConfigFile + "." + release + ".xml"; var defaults = releaseInfo.DefaultDistribution.Defaults; Assert.IsNotNull(defaults, $"Defaults are not available for {release}"); defaults.Configs.ShowConfig.Save(defaultShowConfig); var normalizer = new Normalizer(); normalizer.Normalize(defaultShowConfig, GetNormalizedPath(defaultShowConfig)); if (buildWebConfigResult) { var defaultWebConfigResult = outputWebConfigFile + "." + release + ".xml"; defaults.Configs.Configuration.Save(defaultWebConfigResult); normalizer.Normalize(defaultWebConfigResult, GetNormalizedPath(defaultWebConfigResult)); } } catch (Exception ex) { // Log.Error() } } } if (this.OpenFolder.Checked && File.Exists(outputWebConfigFile)) { string argument = @"/select, """ + outputWebConfigFile + @""""; Process.Start("explorer.exe", argument); } if (this.CloseWhenDone.Checked) { this.Close(); } } catch (Exception ex) { MessageBox.Show("The action failed with exception. " + ex.Message + Environment.NewLine + "Find details in the ConfigBuilder.ConfigBuilder.dll.log file", "Sitecore ConfigBuilder"); File.AppendAllText("ConfigBuilder.ConfigBuilder.dll.log", DateTime.Now.ToString("yyyy-MM-dd hh:mm:ss") + " ERROR " + ex.GetType().FullName + Environment.NewLine + "Message: " + ex.Message + Environment.NewLine + "Stack trace:" + Environment.NewLine + ex.StackTrace + Environment.NewLine); } }
public void FindProperModifiers() { int i = 0; Dialect dialect = Dialect.LooseyGoosey; dialect.InferCompoundsPrepositionsForeignText = false; ParserUtils pu = new ParserUtils(dialect); Normalizer norm = new Normalizer(dialect); CorpusFileReader reader = new CorpusFileReader(); Dictionary <string, int> words = new Dictionary <string, int>(500); SentenceSplitter ss = new SentenceSplitter(dialect); foreach (string s in reader.NextFile()) { string[] sentences = ss.ParseIntoNonNormalizedSentences(s); foreach (string original in sentences) { try { string normalized = norm.NormalizeText(original); Sentence structured = pu.ParsedSentenceFactory(normalized, original); //string diag = structured.ToString("b"); string stringified = structured.Subjects.ToString(); if (!stringified.Contains(" ")) { continue; //single word } if (stringified.Contains(@"""")) { continue; //foreign } if (stringified.StartsWith(@"nanpa")) { continue; //implicit number } if (stringified.StartsWith(@"#")) { continue; //explicit number by punctuation } if (stringified.ContainsLetter(Token.AlphabetUpper)) { if (words.ContainsKey(stringified)) { words[stringified] = words[stringified] + 1; } else { words.Add(stringified, 1); Console.WriteLine(i + " : " + stringified); } } } catch (Exception) { i++; } } } foreach (KeyValuePair <string, int> pair in words.OrderBy(x => x.Value)) { Console.WriteLine(pair.Key + " : " + pair.Value); } }
public override Node CompileParseTree(Node node, Scope scope, Module targetModule, ErrorNodeList errorNodes){ TrivialHashtable ambiguousTypes = new TrivialHashtable(); TrivialHashtable referencedLabels = new TrivialHashtable(); TrivialHashtable scopeFor = new TrivialHashtable(); ErrorHandler errorHandler = new ErrorHandler(errorNodes); SpecSharpCompilation ssCompilation = new SpecSharpCompilation(); // Setting the state TypeNode thisType = null; Method currentMethod = null; BlockScope blockScope = scope as BlockScope; if (blockScope != null){ Class baseScope = blockScope; MethodScope methodScope = null; while (baseScope != null){ methodScope = baseScope.BaseClass as MethodScope; if (methodScope != null) break; baseScope = baseScope.BaseClass; } if (methodScope != null){ thisType = methodScope.ThisType; if (thisType == null && methodScope.BaseClass is TypeScope){ thisType = ((TypeScope) methodScope.BaseClass).Type; } currentMethod = methodScope.DeclaringMethod; } } //Attach scope to namespaces and types scopeFor[node.UniqueKey] = scope; Scoper scoper = new Scoper(scopeFor); scoper.currentScope = scope; node = scoper.Visit(node); //Walk IR looking up names Looker looker = new Looker(scope, errorHandler, scopeFor, ambiguousTypes, referencedLabels); // begin change by drunje (this is called from debugger only) looker.AllowPointersToManagedStructures = true; // end change by drunje if (blockScope != null) { looker.currentType = thisType; looker.currentMethod = currentMethod; } looker.currentAssembly = targetModule as AssemblyNode; looker.currentModule = targetModule; node = looker.Visit(node); //Walk IR inferring types and resolving overloads TypeSystem typeSystem = new TypeSystem(errorHandler); Resolver resolver = new Resolver(errorHandler, typeSystem); if (blockScope != null){ resolver.currentType = thisType; resolver.currentMethod = currentMethod; } resolver.currentAssembly = targetModule as AssemblyNode; resolver.currentModule = targetModule; node = resolver.Visit(node); //TODO: Need to set the state of the checker for compiling Expression, STOP using this method when the shift is complete //Walk IR checking for semantic errors and repairing it so that the next walk will work Checker checker = new Checker(ssCompilation, errorHandler, typeSystem, scopeFor, ambiguousTypes, referencedLabels); if (blockScope != null){ checker.currentType = thisType; checker.currentMethod = currentMethod; } checker.currentAssembly = targetModule as AssemblyNode; checker.currentModule = targetModule; node = checker.Visit(node); //Walk IR reducing it to nodes that have predefined mappings to MD+IL Normalizer normalizer = new Normalizer(typeSystem); if (blockScope != null){ normalizer.currentType = thisType; normalizer.currentMethod = currentMethod; normalizer.WrapToBlockExpression = false; } normalizer.currentModule = targetModule; node = normalizer.Visit(node); return node; }
/// <summary> /// 获得甲方 /// </summary> /// <returns></returns> public string GetJiaFang() { //最高置信度的抽取 EntityProperty e = new EntityProperty(); e.ExcludeContainsWordList = new string[] { "招标代理" }; e.LeadingColonKeyWordList = new string[] { "甲方:", "合同买方:", "发包人:", "发包单位:", "发包方:", "发包机构:", "发包人名称:", "招标人:", "招标单位:", "招标方:", "招标机构:", "招标人名称:", "业主:", "业主单位:", "业主方:", "业主机构:", "业主名称:", "采购单位:", "采购单位名称:", "采购人:", "采购人名称:", "采购方:", "采购方名称:" }; e.CandidatePreprocess = (x => { x = Normalizer.ClearTrailing(x); return(CompanyNameLogic.AfterProcessFullName(x).secFullName); }); e.MaxLength = ContractTraning.JiaFangES.MaxLength; e.MaxLengthCheckPreprocess = Utility.TrimEnglish; e.MinLength = 3; e.Extract(this); //这里不直接做Distinct,出现频次越高,则可信度越高 //多个甲方的时候,可能意味着没有甲方! if (e.LeadingColonKeyWordCandidate.Distinct().Count() > 1) { foreach (var candidate in e.LeadingColonKeyWordCandidate) { Program.Logger.WriteLine("发现多个甲方:" + candidate); } } if (e.LeadingColonKeyWordCandidate.Count > 0) { return(e.LeadingColonKeyWordCandidate[0]); } //招标 var Extractor = new ExtractPropertyByHTML(); var CandidateWord = new List <String>(); var StartArray = new string[] { "招标单位", "业主", "收到", "接到" }; var EndArray = new string[] { "发来", "发出", "的中标" }; Extractor.StartEndFeature = Utility.GetStartEndStringArray(StartArray, EndArray); Extractor.Extract(root); foreach (var item in Extractor.CandidateWord) { var JiaFang = CompanyNameLogic.AfterProcessFullName(item.Value.Trim()); if (JiaFang.secFullName.Contains("招标代理")) { continue; //特殊业务规则 } JiaFang.secFullName = JiaFang.secFullName.Replace("业主", String.Empty).Trim(); JiaFang.secFullName = JiaFang.secFullName.Replace("招标单位", String.Empty).Trim(); if (Utility.TrimEnglish(JiaFang.secFullName).Length > ContractTraning.JiaFangES.MaxLength) { continue; } if (JiaFang.secFullName.Length < 3) { continue; //使用实际长度排除全英文的情况 } if (!Program.IsMultiThreadMode) { Program.Logger.WriteLine("甲方候补词(招标):[" + JiaFang.secFullName + "]"); } CandidateWord.Add(JiaFang.secFullName); } //合同 Extractor = new ExtractPropertyByHTML(); StartArray = new string[] { "与", "与业主" }; EndArray = new string[] { "签署", "签订" }; Extractor.StartEndFeature = Utility.GetStartEndStringArray(StartArray, EndArray); Extractor.Extract(root); foreach (var item in Extractor.CandidateWord) { var JiaFang = CompanyNameLogic.AfterProcessFullName(item.Value.Trim()); JiaFang.secFullName = JiaFang.secFullName.Replace("业主", String.Empty).Trim(); if (JiaFang.secFullName.Contains("招标代理")) { continue; //特殊业务规则 } if (Utility.TrimEnglish(JiaFang.secFullName).Length > ContractTraning.JiaFangES.MaxLength) { continue; } if (JiaFang.secFullName.Length < 3) { continue; //使用实际长度排除全英文的情况 } if (!Program.IsMultiThreadMode) { Program.Logger.WriteLine("甲方候补词(合同):[" + JiaFang.secFullName + "]"); } CandidateWord.Add(JiaFang.secFullName); } return(CompanyNameLogic.MostLikeCompanyName(CandidateWord)); }
public PreprocessorAndFeatureExtractor(float sampleRate) { extractor = new LpcFeaturesExtractor(sampleRate, 20); voiceDetector = new AutocorrellatedVoiceActivityDetector(); normalizer = new Normalizer(); }
public override void ParseAndAnalyzeCompilationUnit(string fname, string text, int line, int col, ErrorNodeList errors, Compilation compilation, AuthoringSink sink) { if (fname == null || text == null || errors == null || compilation == null){Debug.Assert(false); return;} if (compilation != null && compilation.CompilerParameters is SpecSharpCompilerOptions) this.allowSpecSharpExtensions = !((SpecSharpCompilerOptions)compilation.CompilerParameters).Compatibility; CompilationUnitList compilationUnitSnippets = compilation.CompilationUnits; if (compilationUnitSnippets == null){Debug.Assert(false); return;} //Fix up the CompilationUnitSnippet corresponding to fname with the new source text CompilationUnitSnippet cuSnippet = this.GetCompilationUnitSnippet(compilation, fname); if (cuSnippet == null) return; Compiler compiler = new Compiler(); compiler.CurrentCompilation = compilation; cuSnippet.SourceContext.Document = compiler.CreateDocument(fname, 1, new DocumentText(text)); cuSnippet.SourceContext.EndPos = text.Length; //Parse all of the compilation unit snippets Module symbolTable = compilation.TargetModule = compiler.CreateModule(compilation.CompilerParameters, errors); AttributeList assemblyAttributes = symbolTable is AssemblyNode ? symbolTable.Attributes : null; AttributeList moduleAttributes = symbolTable is AssemblyNode ? ((AssemblyNode)symbolTable).ModuleAttributes : symbolTable.Attributes; int n = compilationUnitSnippets.Count; for (int i = 0; i < n; i++){ CompilationUnitSnippet compilationUnitSnippet = compilationUnitSnippets[i] as CompilationUnitSnippet; if (compilationUnitSnippet == null){Debug.Assert(false); continue;} Document doc = compilationUnitSnippet.SourceContext.Document; doc = compilationUnitSnippet.SourceContext.Document; if (doc == null || doc.Text == null){Debug.Assert(false); continue;} IParserFactory factory = compilationUnitSnippet.ParserFactory; if (factory == null) continue; compilationUnitSnippet.Nodes = null; compilationUnitSnippet.PreprocessorDefinedSymbols = null; IParser p = factory.CreateParser(doc.Name, doc.LineNumber, doc.Text, symbolTable, compilationUnitSnippet == cuSnippet ? errors : new ErrorNodeList(), compilation.CompilerParameters); if (p == null){Debug.Assert(false); continue;} if (p is ResgenCompilerStub) continue; Parser specSharpParser = p as Parser; if (specSharpParser == null) p.ParseCompilationUnit(compilationUnitSnippet); else specSharpParser.ParseCompilationUnit(compilationUnitSnippet, compilationUnitSnippet != cuSnippet, false); //TODO: this following is a good idea only if the files will not be frequently reparsed from source //StringSourceText stringSourceText = doc.Text.TextProvider as StringSourceText; //if (stringSourceText != null && stringSourceText.IsSameAsFileContents) // doc.Text.TextProvider = new CollectibleSourceText(doc.Name, doc.Text.Length); } //Construct symbol table for entire project ErrorHandler errorHandler = new ErrorHandler(errors); SpecSharpCompilation ssCompilation = new SpecSharpCompilation(); TrivialHashtable ambiguousTypes = new TrivialHashtable(); TrivialHashtable referencedLabels = new TrivialHashtable(); TrivialHashtable scopeFor = this.scopeFor = new TrivialHashtable(); Scoper scoper = new Scoper(scopeFor); scoper.currentModule = symbolTable; Looker symLooker = new Looker(null, new ErrorHandler(new ErrorNodeList(0)), scopeFor, ambiguousTypes, referencedLabels); symLooker.currentAssembly = (symLooker.currentModule = symbolTable) as AssemblyNode; Looker looker = new Looker(null, errorHandler, scopeFor, ambiguousTypes, referencedLabels); looker.currentAssembly = (looker.currentModule = symbolTable) as AssemblyNode; looker.VisitAttributeList(assemblyAttributes); bool dummyCompilation = compilation.CompilerParameters is SpecSharpCompilerOptions && ((SpecSharpCompilerOptions)compilation.CompilerParameters).DummyCompilation; if (dummyCompilation){ //This happens when there is no project. In this case, semantic errors should be ignored since the references and options are unknown. //But proceed with the full analysis anyway so that some measure of Intellisense can still be provided. errorHandler.Errors = new ErrorNodeList(0); } for (int i = 0; i < n; i++){ CompilationUnit cUnit = compilationUnitSnippets[i]; scoper.VisitCompilationUnit(cUnit); } for (int i = 0; i < n; i++){ CompilationUnit cUnit = compilationUnitSnippets[i]; if (cUnit == cuSnippet) looker.VisitCompilationUnit(cUnit); //Uses real error message list and populate the identifier info lists else symLooker.VisitCompilationUnit(cUnit); //Errors are discarded } //Run resolver over symbol table so that custom attributes on member signatures are known and can be used //to error check the the given file. TypeSystem typeSystem = new TypeSystem(errorHandler); Resolver resolver = new Resolver(errorHandler, typeSystem); resolver.currentAssembly = (resolver.currentModule = symbolTable) as AssemblyNode; Resolver symResolver = new Resolver(new ErrorHandler(new ErrorNodeList(0)), typeSystem); symResolver.currentAssembly = resolver.currentAssembly; symResolver.VisitAttributeList(assemblyAttributes); for (int i = 0; i < n; i++) { CompilationUnit cUnit = compilationUnitSnippets[i]; if (cUnit == cuSnippet) resolver.VisitCompilationUnit(cUnit); //Uses real error message list and populate the identifier info lists else symResolver.VisitCompilationUnit(cUnit); //Errors are discarded } if (dummyCompilation) return; //Now analyze the given file for errors Checker checker = new Checker(ssCompilation, errorHandler, typeSystem, scopeFor, ambiguousTypes, referencedLabels); checker.currentAssembly = (checker.currentModule = symbolTable) as AssemblyNode; checker.VisitAttributeList(assemblyAttributes, checker.currentAssembly); checker.VisitModuleAttributes(moduleAttributes); checker.VisitCompilationUnit(cuSnippet); MemberFinder finder = new MemberFinder(line, col); finder.VisitCompilationUnit(cuSnippet); Node node = finder.Member; if (node == null){ if (line == 0 && col == 0) node = cuSnippet; else return; } SpecSharpCompilerOptions options = (SpecSharpCompilerOptions) compilation.CompilerParameters; if (options.IsContractAssembly) return; ssCompilation.RunPlugins(node, errorHandler); Normalizer normalizer = new Normalizer(typeSystem); normalizer.Visit(node); Analyzer analyzer = new Analyzer(typeSystem, compilation); analyzer.Visit(node); if (options.RunProgramVerifierWhileEditing) ssCompilation.AddProgramVerifierPlugin(typeSystem, compilation); ssCompilation.analyzer = analyzer; // make the analyzer available to plugins for access to method CFGs ssCompilation.RunPlugins(node, errorHandler); ssCompilation.analyzer = null; analyzer = null; }
/// <summary> /// 获得甲方 /// </summary> /// <returns></returns> string GetJiaFang(String YiFang) { //最高置信度的抽取 EntityProperty e = new EntityProperty(); e.ExcludeContainsWordList = new string[] { "招标代理" }; e.LeadingColonKeyWordList = new string[] { "甲方:", "合同买方:", "发包人:", "发包单位:", "发包方:", "发包机构:", "发包人名称:", "招标人:", "招标单位:", "招标方:", "招标机构:", "招标人名称:", "项目招标人:", "业主:", "业主单位:", "业主方:", "业主机构:", "业主名称:", "采购单位:", "采购单位名称:", "采购人:", "采购人名称:", "采购方:", "采购方名称:" }; e.CandidatePreprocess = (x => { x = Normalizer.ClearTrailing(x); return(CompanyNameLogic.AfterProcessFullName(x).secFullName); }); e.MaxLength = 32; e.MaxLengthCheckPreprocess = Utility.TrimEnglish; e.MinLength = 3; e.Extract(this); //这里不直接做Distinct,出现频次越高,则可信度越高 //多个甲方的时候,可能意味着没有甲方! if (e.LeadingColonKeyWordCandidate.Distinct().Count() > 1) { foreach (var candidate in e.LeadingColonKeyWordCandidate) { Program.Logger.WriteLine("发现多个甲方:" + candidate); } } if (e.LeadingColonKeyWordCandidate.Count > 0) { return(e.LeadingColonKeyWordCandidate[0]); } var ner = SearchJiaFang(); var NerJia = String.Empty; if (!String.IsNullOrEmpty(ner)) { foreach (var cn in companynamelist) { if (cn.secShortName == ner) { ner = cn.secFullName; } } if (String.IsNullOrEmpty(YiFang)) { NerJia = ner; } if (!YiFang.Equals(ner)) { NerJia = ner; } } //招标 var Extractor = new ExtractPropertyByHTML(); var CandidateWord = new List <String>(); var StartArray = new string[] { "招标单位", "业主", "收到", "接到" }; var EndArray = new string[] { "发来", "发出", "的中标" }; Extractor.StartEndFeature = Utility.GetStartEndStringArray(StartArray, EndArray); Extractor.Extract(root); foreach (var item in Extractor.CandidateWord) { var JiaFang = CompanyNameLogic.AfterProcessFullName(item.Value.Trim()); if (JiaFang.secFullName.Contains("招标代理")) { continue; //特殊业务规则 } JiaFang.secFullName = JiaFang.secFullName.Replace("业主", String.Empty).Trim(); JiaFang.secFullName = JiaFang.secFullName.Replace("招标单位", String.Empty).Trim(); if (Utility.TrimEnglish(JiaFang.secFullName).Length > 32) { continue; } if (JiaFang.secFullName.Length < 3) { continue; //使用实际长度排除全英文的情况 } if (!Program.IsMultiThreadMode) { Program.Logger.WriteLine("甲方候补词(招标):[" + JiaFang.secFullName + "]"); } CandidateWord.Add(JiaFang.secFullName); } //合同 Extractor = new ExtractPropertyByHTML(); StartArray = new string[] { "与", "与业主" }; EndArray = new string[] { "签署", "签订" }; Extractor.StartEndFeature = Utility.GetStartEndStringArray(StartArray, EndArray); Extractor.Extract(root); foreach (var item in Extractor.CandidateWord) { var JiaFang = CompanyNameLogic.AfterProcessFullName(item.Value.Trim()); JiaFang.secFullName = JiaFang.secFullName.Replace("业主", String.Empty).Trim(); if (JiaFang.secFullName.Contains("招标代理")) { continue; //特殊业务规则 } if (Utility.TrimEnglish(JiaFang.secFullName).Length > 32) { continue; } if (JiaFang.secFullName.Length < 3) { continue; //使用实际长度排除全英文的情况 } if (!Program.IsMultiThreadMode) { Program.Logger.WriteLine("甲方候补词(合同):[" + JiaFang.secFullName + "]"); } CandidateWord.Add(JiaFang.secFullName); } if (!String.IsNullOrEmpty(NerJia)) { //原则上,有NER中提取的甲方,则使用甲方 foreach (var c in CandidateWord) { //但是,这里有可能是正确的解答,例如 //NER:(集团)有限公司 实际上应该是 XXXX(集团)有限公司 if (c.EndsWith(NerJia)) { return(c); } } return(NerJia); } else { return(CompanyNameLogic.MostLikeCompanyName(CandidateWord)); } }
/// <summary> /// Creates a Formula from a string that consists of a standard infix expression composed /// from non-negative floating-point numbers (using C#-like syntax for double/int literals), /// variable symbols (a letter followed by zero or more letters and/or digits), left and right /// parentheses, and the four binary operator symbols +, -, *, and /. White space is /// permitted between tokens, but is not required. /// /// Examples of a valid parameter to this constructor are: /// "2.5e9 + x5 / 17" /// "(5 * 2) + 8" /// "x*y-2+35/9" /// /// Examples of invalid parameters are: /// "_" /// "-5.3" /// "2 5 + 3" /// /// If the formula is syntacticaly invalid, throws a FormulaFormatException with an /// explanatory Message. /// </summary> public Formula(String formula) { if (formula == null) { throw new ArgumentNullException("formula"); } IEnumerable <string> tokens = GetTokens(formula); tokensList = new List <string>(); Normalizer N = (s => s); Validator V = (s => true); foreach (string s in tokens) { tokensList.Add(s); } if (tokensList.Count == 0) { throw new FormulaFormatException("The formula is empty."); } foreach (string s in tokensList) { if (!Regex.IsMatch(N(s), @"\(") && !Regex.IsMatch(N(s), @"\)") && !Regex.IsMatch(N(s), @"[\+\-*/]") && !Regex.IsMatch(N(s), @"[a-zA-Z][0-9a-zA-Z]*") && !Regex.IsMatch(N(s), @"(?: \d+\.\d* | \d*\.\d+ | \d+ ) (?: e[\+-]?\d+)?", RegexOptions.IgnorePatternWhitespace) && V(N(s))) { throw new FormulaFormatException("There is an invalid character in the formula."); } } int left = 0; int right = 0; foreach (string s in tokensList) { if (s == "(") { left++; } if (s == ")") { right++; if (right > left) { throw new FormulaFormatException("Closing parenthesis with no opening parenthesis."); } } } if (left != right) { throw new FormulaFormatException("Parentheses imbalance."); } if (Regex.IsMatch(tokensList[0], @"[\+\-*/]") && !double.TryParse(tokensList[0], out double n)) { throw new FormulaFormatException("Formula cannot start with an operator"); } if (Regex.IsMatch(tokensList[tokensList.Count - 1], @"[\+\-*/]") && !double.TryParse(tokensList[tokensList.Count - 1], out n)) { throw new FormulaFormatException("Formula cannot end with an operator"); } for (int x = 0; x < tokensList.Count - 1; x++) { if (Regex.IsMatch(tokensList[x], @"[\+\-*/]") | Regex.IsMatch(tokensList[x], @"\(") && !double.TryParse(tokensList[x], out n)) { if ((Regex.IsMatch(tokensList[x + 1], @"[\+\-*/]") | Regex.IsMatch(tokensList[x + 1], @"\)")) && !double.TryParse(tokensList[x + 1], out n)) { throw new FormulaFormatException("There is an operator or closing parenthesis immediately following an operator or opening parenthesis."); } } if (!Regex.IsMatch(tokensList[x], @"[\+\-*/]") && !Regex.IsMatch(tokensList[x], @"\(")) { if (!Regex.IsMatch(tokensList[x + 1], @"[\+\-*/]") && !Regex.IsMatch(tokensList[x + 1], @"\)")) { throw new FormulaFormatException("There is a number, variable, or opening parenthesis immediately following a number, variable, or closing parenthesis."); } } } }
private void SeedCategory( Category parent, JToken categoryToken, IList <int> numbers, string instanceTitle = null) { int repeatCount = 1; var repeat = categoryToken["Repeat"]; if (repeat != null) { repeatCount = (int)repeat; } for (int j = 0; j < repeatCount; j++) { int id = dataContainer.NextCategoryId(); string name = PrepareText((string)categoryToken["Name"], numbers); string thisMaterialTypeTitle = (string)categoryToken["MaterialTypeTitle"] ?? instanceTitle; Category category = new Category { Id = id, ParentId = parent?.Id, Name = name, NameNormalized = Normalizer.Normalize(name), Title = PrepareText((string)categoryToken["Title"], numbers), Header = PrepareText((string)categoryToken["Header"], numbers), MaterialTypeTitle = thisMaterialTypeTitle, SortNumber = id }; if (categoryToken["IsMaterialsContainer"] != null) { category.IsMaterialsContainer = (bool)categoryToken["IsMaterialsContainer"]; } if (categoryToken["LayoutName"] != null) { category.LayoutName = (string)categoryToken["LayoutName"]; } if (categoryToken["IsMaterialsNameEditable"] != null) { category.IsMaterialsNameEditable = (bool)categoryToken["IsMaterialsNameEditable"]; } if (categoryToken["IsMaterialsSubTitleEditable"] != null) { category.IsMaterialsSubTitleEditable = (bool)categoryToken["IsMaterialsSubTitleEditable"]; } dataContainer.Categories.Add(category); if (categoryToken["SubCategories"] != null) { var numbers1 = new List <int> { 1 }; numbers1.AddRange(numbers); foreach (JToken subCategoryToken in (JArray)categoryToken["SubCategories"]) { SeedCategory(category, subCategoryToken, numbers1, thisMaterialTypeTitle); } } numbers[0]++; } }
/// <summary> /// See if the decomposition of cp2 is at segment starting at <paramref name="segmentPos"/> /// (with canonical rearrangment!). /// If so, take the remainder, and return the equivalents. /// </summary> /// <param name="comp"></param> /// <param name="segment"></param> /// <param name="segmentPos"></param> /// <param name="buf"></param> /// <returns></returns> private ISet <string> Extract(int comp, string segment, int segmentPos, StringBuffer buf) { if (PROGRESS) { Console.Out.WriteLine(" extract: " + Utility.Hex(UTF16.ValueOf(comp)) + ", " + Utility.Hex(segment.Substring(segmentPos))); } string decomp = nfcImpl.GetDecomposition(comp); if (decomp == null) { decomp = UTF16.ValueOf(comp); } // See if it matches the start of segment (at segmentPos) bool ok = false; int cp; int decompPos = 0; int decompCp = UTF16.CharAt(decomp, 0); decompPos += UTF16.GetCharCount(decompCp); // adjust position to skip first char //int decompClass = getClass(decompCp); buf.Length = 0; // initialize working buffer, shared among callees for (int i = segmentPos; i < segment.Length; i += UTF16.GetCharCount(cp)) { cp = UTF16.CharAt(segment, i); if (cp == decompCp) { // if equal, eat another cp from decomp if (PROGRESS) { Console.Out.WriteLine(" matches: " + Utility.Hex(UTF16.ValueOf(cp))); } if (decompPos == decomp.Length) { // done, have all decomp characters! buf.Append(segment.Substring(i + UTF16.GetCharCount(cp))); // add remaining segment chars ok = true; break; } decompCp = UTF16.CharAt(decomp, decompPos); decompPos += UTF16.GetCharCount(decompCp); //decompClass = getClass(decompCp); } else { if (PROGRESS) { Console.Out.WriteLine(" buffer: " + Utility.Hex(UTF16.ValueOf(cp))); } // brute force approach UTF16.Append(buf, cp); /* TODO: optimize * // since we know that the classes are monotonically increasing, after zero * // e.g. 0 5 7 9 0 3 * // we can do an optimization * // there are only a few cases that work: zero, less, same, greater * // if both classes are the same, we fail * // if the decomp class < the segment class, we fail * * segClass = getClass(cp); * if (decompClass <= segClass) return null; */ } } if (!ok) { return(null); // we failed, characters left over } if (PROGRESS) { Console.Out.WriteLine("Matches"); } if (buf.Length == 0) { return(SET_WITH_NULL_STRING); // succeed, but no remainder } string remainder = buf.ToString(); // brute force approach // to check to make sure result is canonically equivalent /* * String trial = Normalizer.normalize(UTF16.valueOf(comp) + remainder, Normalizer.DECOMP, 0); * if (!segment.regionMatches(segmentPos, trial, 0, segment.length() - segmentPos)) return null; */ if (0 != Normalizer.Compare(UTF16.ValueOf(comp) + remainder, segment.Substring(segmentPos), 0)) { return(null); } // get the remaining combinations return(GetEquivalents2(remainder)); }
public NormDocument Normalize(string format) { Normalizer norm = new Normalizer (this, format); return norm.CreateNormDocument (); }
public override Statement VisitWhile(While While) { if (While == null || While.Condition == null) return While; if (insideAtomicBlock && IsCoalescableWhileStmt(While)) { CurrentContinuation = AddBlock(new BasicBlock(While, CurrentContinuation)); return While; } Normalizer normalizer = new Normalizer(splicer, null, false); BasicBlock testBlock = new BasicBlock(null); testBlock.SourceContext = While.Condition.SourceContext; PushContinuationStack(testBlock); this.Visit(While.Body); BasicBlock bodyBlock = PopContinuationStack(); testBlock.ConditionalExpression = normalizer.VisitExpression(While.Condition); testBlock.ConditionalTarget = bodyBlock; testBlock.UnconditionalTarget = CurrentContinuation; testBlock.ConditionalExpression.SourceContext = testBlock.SourceContext; CurrentContinuation = AddBlock(testBlock); return While; }
/// <summary> /// 获得合同名 /// </summary> /// <returns></returns> string GetContractName() { var e = new EntityProperty(); e.PropertyName = "合同名称"; e.PropertyType = EntityProperty.enmType.NER; e.MaxLength = ContractTraning.ContractES.MaxLength; e.MinLength = ContractTraning.ContractES.MinLength; /* 训练模式下 * e.LeadingColonKeyWordList = ContractTraning.ContractNameLeadingDict * .Where((x) => { return x.Value >= 40; }) //阈值40%以上 * .Select((x) => { return x.Key + ":"; }).ToArray(); */ e.LeadingColonKeyWordList = new string[] { "合同名称:" }; e.QuotationTrailingWordList = new string[] { "协议书", "合同书", "确认书", "合同", "协议" }; e.QuotationTrailingWordList_IsSkipBracket = true; //暂时只能选True var KeyList = new List <ExtractPropertyByDP.DPKeyWord>(); KeyList.Add(new ExtractPropertyByDP.DPKeyWord() { StartWord = new string[] { "签署", "签订" }, //通过SRL训练获得 StartDPValue = new string[] { LTPTrainingDP.核心关系, LTPTrainingDP.定中关系, LTPTrainingDP.并列关系 }, EndWord = new string[] { "补充协议", "合同书", "合同", "协议书", "协议", }, EndDPValue = new string[] { LTPTrainingDP.核心关系, LTPTrainingDP.定中关系, LTPTrainingDP.并列关系, LTPTrainingDP.动宾关系, LTPTrainingDP.主谓关系 } }); e.DpKeyWordList = KeyList; var StartArray = new string[] { "签署了", "签订了" }; //通过语境训练获得 var EndArray = new string[] { "合同" }; e.ExternalStartEndStringFeature = Utility.GetStartEndStringArray(StartArray, EndArray); e.ExternalStartEndStringFeatureCandidatePreprocess = (x) => { return(x + "合同"); }; e.MaxLengthCheckPreprocess = str => { return(Utility.TrimEnglish(str)); }; //最高级别的置信度,特殊处理器 e.LeadingColonKeyWordCandidatePreprocess = str => { var c = Normalizer.ClearTrailing(TrimEndJianCheng(str)); return(c); }; e.CandidatePreprocess = str => { var c = Normalizer.ClearTrailing(TrimEndJianCheng(str)); var RightQMarkIdx = c.IndexOf("”"); if (!(RightQMarkIdx != -1 && RightQMarkIdx != c.Length - 1)) { //对于"XXX"合同,有右边引号,但不是最后的时候,不用做 c = c.TrimStart("“".ToCharArray()); } c = c.TrimStart("《".ToCharArray()); c = c.TrimEnd("》".ToCharArray()).TrimEnd("”".ToCharArray()); return(c); }; e.ExcludeContainsWordList = new string[] { "日常经营重大合同" }; //下面这个列表的根据不足 e.ExcludeEqualsWordList = new string[] { "合同", "重大合同", "项目合同", "终止协议", "经营合同", "特别重大合同", "相关项目合同" }; e.Extract(this); //是否所有的候选词里面包括(测试集无法使用) var contractlist = TraningDataset.ContractList.Where((x) => { return(x.Id == this.Id); }); if (contractlist.Count() > 0) { var contract = contractlist.First(); var contractname = contract.ContractName; if (!String.IsNullOrEmpty(contractname)) { e.CheckIsCandidateContainsTarget(contractname); } } //置信度 e.Confidence = ContractTraning.ContractES.GetStardardCI(); return(e.EvaluateCI()); }
public override void CompileParseTree(Compilation compilation, ErrorNodeList errorNodes){ if (compilation == null || compilation.CompilationUnits == null || compilation.TargetModule == null){Debug.Assert(false); return;} if (compilation.CompilationUnits.Count == 0) return; TrivialHashtable ambiguousTypes = new TrivialHashtable(); TrivialHashtable referencedLabels = new TrivialHashtable(); TrivialHashtable scopeFor = new TrivialHashtable(64); ErrorHandler errorHandler = new ErrorHandler(errorNodes); SpecSharpCompilation ssCompilation = new SpecSharpCompilation(); SpecSharpCompilerOptions options = (SpecSharpCompilerOptions)compilation.CompilerParameters; //Attach scopes to namespaces and types so that forward references to base types can be looked up in the appropriate namespace scope Scoper scoper = new Scoper(scopeFor); scoper.VisitCompilation(compilation); scoper = null; if (options.NoStandardLibrary && compilation.TargetModule is AssemblyNode) { if (compilation.TargetModule.IsValidTypeName(StandardIds.System, StandardIds.CapitalObject)) { SystemAssemblyLocation.ParsedAssembly = (AssemblyNode)compilation.TargetModule; SystemCompilerRuntimeAssemblyLocation.ParsedAssembly = (AssemblyNode)compilation.TargetModule; //So that mscorlib can have contracts but no reference to another assembly } else if (compilation.TargetModule.IsValidTypeName(Identifier.For("System.Compiler"), Identifier.For("ComposerAttribute"))) SystemCompilerRuntimeAssemblyLocation.ParsedAssembly = (AssemblyNode)compilation.TargetModule; else if (compilation.TargetModule.IsValidTypeName(Identifier.For("Microsoft.SpecSharp"), Identifier.For("dummy"))) RuntimeAssemblyLocation.ParsedAssembly = (AssemblyNode)compilation.TargetModule; } object ObjectType = SystemTypes.Object; if (ObjectType == null) return; //system types did not initialize //Walk IR looking up names Looker looker = new Looker(compilation.GlobalScope, errorHandler, scopeFor, ambiguousTypes, referencedLabels); if (options != null && options.EmitSourceContextsOnly) { looker.DontInjectDefaultConstructors = true; } // begin change by drunje looker.AllowPointersToManagedStructures = options.AllowPointersToManagedStructures; // end change by drunje looker.VisitCompilation(compilation); looker = null; if (options != null && options.EmitSourceContextsOnly) return; // stop after looker to have resolved types //Walk IR inferring types and resolving overloads TypeSystem typeSystem = new TypeSystem(errorHandler); Resolver resolver = new Resolver(errorHandler, typeSystem); resolver.VisitCompilation(compilation); resolver = null; //Walk IR checking for semantic errors and repairing it so that the next walk will work Checker checker = new Checker(ssCompilation, errorHandler, typeSystem, scopeFor, ambiguousTypes, referencedLabels); checker.VisitCompilation(compilation); checker = null; scopeFor = null; ambiguousTypes = null; referencedLabels = null; if (!options.IsContractAssembly) { if (options.RunProgramVerifier) ssCompilation.AddProgramVerifierPlugin(typeSystem, compilation); //Allow third party extensions to analyze AST IR for further errors ssCompilation.RunPlugins(compilation, errorHandler); } //Walk IR reducing it to nodes that have predefined mappings to MD+IL Normalizer normalizer = new Normalizer(typeSystem); normalizer.VisitCompilation(compilation); normalizer = null; if (options.IsContractAssembly) return; //Walk normalized IR instrumenting accesses of fields of guarded classes with checks CompilationUnit cu = compilation.CompilationUnits[0]; if (cu != null && cu.PreprocessorDefinedSymbols != null && cu.PreprocessorDefinedSymbols.ContainsKey("GuardedFieldAccessChecks")){ if (errorNodes.Count == 0){ GuardedFieldAccessInstrumenter instrumenter = new GuardedFieldAccessInstrumenter(); instrumenter.VisitCompilation(compilation); instrumenter = null; } } //Walk normalized IR doing code analysis Analyzer analyzer = new Analyzer(typeSystem, compilation); analyzer.Visit(compilation); //Allow third party extensions to analyze normalized IR for further errors ssCompilation.analyzer = analyzer; // make the analyzer available to plugins for access to method CFGs ssCompilation.RunPlugins(compilation, errorHandler); ssCompilation.analyzer = null; ssCompilation = null; analyzer = null; errorHandler = null; //Walk IR to optimize code further after analyses were performed, eg. to remove debug only code Optimizer optimizer = new Optimizer(); optimizer.Visit(compilation); optimizer = null; }
public void DBN() { var dbn = DeepBeliefNetwork.CreateGaussianBernoulli(1024, 10); new GaussianWeights(dbn).Randomize(); dbn.UpdateVisibleWeights(); var dblNode = new UnsupervisedLearning(new DeepBeliefNetworkLearning(dbn)); var dataNorm = new Normalizer(); var fromFile = new TextReader(); var dataFile = new System.IO.FileInfo(@"C:\Users\Dan\Documents\Visual Studio 2015\Projects\AccordNETSamples\framework-master\Samples\Neuro\Deep Learning\Resources\optdigits-tra.txt"); // POST DataFile -> TextReader -> ? -> Normalizer -> UnsupervisedLearning var dataProc = new TransformBlock<Instance<string>, IObservable<List<Instance<double>>>>(inst => { var reader = new System.IO.StringReader(inst.Data); var samples = new Subject<List<Instance<double>>>(); var epochSamples = new List<MachineLearningData<double[], double>>(); var buffer = new char[(32 + 1) * 32]; // 32 chars + \n var count = 0; while (true) { var read = reader.ReadBlock(buffer, 0, buffer.Length); var label = reader.ReadLine(); if (read < buffer.Length || label == null) break; var currMLS = new MachineLearningData<double[], double>(); currMLS.Class = Convert.ToInt32(label); count++; } return samples.AsObservable(); }); }
public void TestInitialize() { _normalizer = new Normalizer(); }
internal Expression ContextAttributeConstructor(AttributeList attrs) { AttributeNode contextAttr = this.GetContextAttribute(attrs); if (contextAttr == null) return new Literal(null, SystemTypes.Object); Duplicator duplicator = new Duplicator(null, null); AttributeNode dupAttr = duplicator.VisitAttributeNode(contextAttr); Construct cons = (Construct)Templates.GetExpressionTemplate("ContextAttributeConstructor"); Replacer.Replace(cons, "_AttributeName", dupAttr.Type.Name); Normalizer normalizer = new Normalizer(false); cons.Operands = normalizer.VisitExpressionList(dupAttr.Expressions); return cons; }
public Expression ImplicitCoercionAdapter(Expression source, TypeNode targetType, TypeViewer typeViewer){ ErrorHandler savedErrorHandler = this.ErrorHandler; this.ErrorHandler = null; Expression e = null; try { e = this.ImplicitCoercion(source, targetType, typeViewer); if (e is CoerceTuple || e is BlockExpression || e is ConstructTuple) { Normalizer n = new Normalizer(this); e = n.VisitExpression(e); } } finally { this.ErrorHandler = savedErrorHandler; }; return e; }
private void AddBlockMethod(ZMethod zMethod, Class methodClass, BasicBlock block, List<Scope> nonTrivialScopes) { Method blockMethod = (Method)Templates.GetTypeTemplateByName("BlockMethod").Members[0]; blockMethod.Name = new Identifier(block.Name); methodClass.Members.Add(blockMethod); blockMethod.DeclaringType = methodClass; // Generate the appropriate closing statements for the block. Indicate if the // block terminates an atomic region and establish the transfer of control to // the next block(s) or out of the method. if ((ZingCompilerOptions.IsPreemtive && !block.MiddleOfTransition && !block.IsReturn) || (block.Yields)) { // p.MiddleOfTransition = false; blockMethod.Body.Statements.Add( new ExpressionStatement( new AssignmentExpression( new AssignmentStatement( new QualifiedIdentifier(Identifier.For("p"), Identifier.For("MiddleOfTransition")), new Literal(false, SystemTypes.Boolean) ) ) ) ); } // p.AtomicityLevel = this.SavedAtomicityLevel + X; blockMethod.Body.Statements.Add( new ExpressionStatement( new AssignmentExpression( new AssignmentStatement( new QualifiedIdentifier(Identifier.For("p"), Identifier.For("AtomicityLevel")), new BinaryExpression( new QualifiedIdentifier(new This(), Identifier.For("SavedAtomicityLevel")), new Literal(block.RelativeAtomicLevel, SystemTypes.Int32), NodeType.Add ) ) ) ) ); #if false // // The following code was added for summarization, but isn't quite right. It // updates the nextBlock too early for some blocks. -- Tony // // // when generating summaries of type MaxCall, we need to // know the value of nextBlock before we invoke p.Call(). // the first of the two basic blocks of a Zing method call // is guaranteed to fall through, so we only need to lift // the assignment of nextBlock for fall-through blocks. if (block.ConditionalTarget == null && block.UnconditionalTarget != null) { stmt = Templates.GetStatementTemplate("UnconditionalBlockTransfer"); Replacer.Replace(stmt, "_UnconditionalBlock", new Identifier(block.UnconditionalTarget.Name)); blockMethod.Body.Statements.Add(stmt); } #endif if (block.Attributes != null) { Duplicator duplicator = new Duplicator(null, null); for (int i = 0, n = block.Attributes.Count; i < n; i++) { if (block.Attributes[i] == null) continue; AttributeNode dupAttr = duplicator.VisitAttributeNode(block.Attributes[i]); Normalizer normalizer = new Normalizer(false); ExpressionList attrParams = normalizer.VisitExpressionList(dupAttr.Expressions); // application.Trace(_context, _contextAttr, new Z.Attributes._attrName(...) ); ExpressionStatement traceStmt = new ExpressionStatement( new MethodCall( new QualifiedIdentifier(Identifier.For("application"), Identifier.For("Trace")), new ExpressionList( SourceContextConstructor(dupAttr.SourceContext), new Literal(null, SystemTypes.Object), new Construct( new MemberBinding( null, new TypeExpression( new QualifiedIdentifier( new QualifiedIdentifier(Identifier.For("Z"), Identifier.For("Attributes")), dupAttr.Type.Name ) ) ), attrParams ) ) ) ); blockMethod.Body.Statements.Add(traceStmt); } } if (block.Statement != null) { if (block.SkipNormalizer) blockMethod.Body.Statements.Add(block.Statement); else { // Do statement-level code-gen pass on the block's statement Normalizer normalizer = new Normalizer(this, block.Attributes, block.SecondOfTwo); blockMethod.Body.Statements.Add((Statement)normalizer.Visit(block.Statement)); } } if (block.ConditionalTarget != null && block.ConditionalExpression != null) { Block trueBlock, falseBlock; // if (_conditionalExpression) // nextBlock = Blocks._conditionalTarget; // else // nextBlock = Blocks._unconditionalTarget; blockMethod.Body.Statements.Add( new If( block.ConditionalExpression, trueBlock = new Block(new StatementList( new ExpressionStatement( new AssignmentExpression( new AssignmentStatement( Identifier.For("nextBlock"), new QualifiedIdentifier( Identifier.For("Blocks"), Identifier.For(block.ConditionalTarget.Name) ) ) ) ) )), falseBlock = new Block(new StatementList( new ExpressionStatement( new AssignmentExpression( new AssignmentStatement( Identifier.For("nextBlock"), new QualifiedIdentifier( Identifier.For("Blocks"), Identifier.For(block.UnconditionalTarget.Name) ) ) ) ) )) ) ); AddScopeCleanupCalls(trueBlock.Statements, block, block.ConditionalTarget, nonTrivialScopes); AddScopeCleanupCalls(falseBlock.Statements, block, block.UnconditionalTarget, nonTrivialScopes); } else if (block.UnconditionalTarget != null) { // nextBlock = Blocks.X; blockMethod.Body.Statements.Add( new ExpressionStatement( new AssignmentExpression( new AssignmentStatement( Identifier.For("nextBlock"), new QualifiedIdentifier(Identifier.For("Blocks"), Identifier.For(block.UnconditionalTarget.Name)) ) ) ) ); AddScopeCleanupCalls(blockMethod.Body.Statements, block, block.UnconditionalTarget, nonTrivialScopes); } else if (block.IsReturn) { Debug.Assert(block.UnconditionalTarget == null); Statement returnCall = Templates.GetStatementTemplate("ReturnBlockTransfer"); SourceContext context; Return ret = block.Statement as Return; if (ret != null) { context = ret.SourceContext; } else { // If not a return stmt, the context is the closing brace of the method context = zMethod.SourceContext; context.StartPos = context.EndPos - 1; } Replacer.Replace(returnCall, "_context", SourceContextConstructor(context)); Replacer.Replace(returnCall, "_contextAttr", ContextAttributeConstructor(block.Attributes)); blockMethod.Body.Statements.Add(returnCall); // StateImpl.IsReturn = true; blockMethod.Body.Statements.Add( new ExpressionStatement( new AssignmentExpression( new AssignmentStatement( new QualifiedIdentifier(Identifier.For("StateImpl"), Identifier.For("IsReturn")), new Literal(true, SystemTypes.Boolean) ) ) ) ); } }
private void PatchIsAtomicEntryMethod(Class methodClass, List<BasicBlock> basicBlocks) { Method atomicEntryMethod = (Method) Templates.GetMemberByName(methodClass.Members, "IsAtomicEntryBlock"); Debug.Assert(atomicEntryMethod.Body.Statements[0] is System.Compiler.Switch); System.Compiler.Switch switchStmt = (System.Compiler.Switch)atomicEntryMethod.Body.Statements[0]; Normalizer normalizer = new Normalizer(this, null, false); foreach (BasicBlock block in basicBlocks) { SwitchCase newCase; Expression trueExpr; if (block.IsAtomicEntry) { Debug.Assert(block.RelativeAtomicLevel == 1); trueExpr = new Literal(true, SystemTypes.Boolean); newCase = ((System.Compiler.Switch)Templates.GetStatementTemplate("RunnableSwitchSelect")).Cases[0]; Replacer.Replace(newCase, "_BlockName", new Identifier(block.Name)); Replacer.Replace(newCase, "_expr", trueExpr); switchStmt.Cases.Add(newCase); } } }
private void PatchRunnableMethod(Class methodClass, List<BasicBlock> basicBlocks) { Method runnableJSMethod = (Method) Templates.GetMemberByName(methodClass.Members, "GetRunnableJoinStatements"); Debug.Assert(runnableJSMethod.Body.Statements[0] is System.Compiler.Switch); System.Compiler.Switch switchStmt = (System.Compiler.Switch)runnableJSMethod.Body.Statements[0]; Normalizer normalizer = new Normalizer(this, null, false); foreach (BasicBlock block in basicBlocks) { SwitchCase newCase; if (block.selectStmt != null) { Expression runnableExpr = null; for (int i = 0, n = block.selectStmt.joinStatementList.Length; i < n; i++) { Expression jsRunnable = normalizer.GetRunnablePredicate(block.selectStmt.joinStatementList[i]); if (jsRunnable == null) continue; Expression jsRunnableBit = Templates.GetExpressionTemplate("JoinStatementRunnableBit"); Replacer.Replace(jsRunnableBit, "_jsRunnableBoolExpr", jsRunnable); Replacer.Replace(jsRunnableBit, "_jsBitMask", (Expression)new Literal((ulong)(1 << i), SystemTypes.UInt64)); if (runnableExpr == null) runnableExpr = jsRunnableBit; else runnableExpr = new BinaryExpression(runnableExpr, jsRunnableBit, NodeType.Or, block.selectStmt.SourceContext); } if (runnableExpr != null) { newCase = ((System.Compiler.Switch)Templates.GetStatementTemplate("RunnableSwitchSelect")).Cases[0]; Replacer.Replace(newCase, "_BlockName", new Identifier(block.Name)); Replacer.Replace(newCase, "_expr", runnableExpr); switchStmt.Cases.Add(newCase); } // // Now check for blocks that flow atomically into this one and add cases for them // as well. This can happen when the target of a "goto" is a label preceding a // select statement. // foreach (BasicBlock previousBlock in basicBlocks) { if (previousBlock.UnconditionalTarget == block && previousBlock.ConditionalTarget == null && previousBlock.Statement == null && previousBlock.MiddleOfTransition) { newCase = ((System.Compiler.Switch)Templates.GetStatementTemplate("RelatedSwitchSelect")).Cases[0]; Replacer.Replace(newCase, "_BlockName", new Identifier(previousBlock.Name)); Replacer.Replace(newCase, "_TargetName", new Identifier(block.Name)); switchStmt.Cases.Add(newCase); } } } } }
public override void Space(Normalizer normalizer, char c) { }
private void ProcessGlobals(MemberList globals) { // Locate the "Globals" struct so we can add fields to it. Class globalsClass = (Class)Templates.GetMemberByName(appClass.Members, "GlobalVars"); // Locate the "initialization" constructor so we can add initialization // statements to it. Debug.Assert(appClass.Members[1].Name.Name == ".ctor"); Method initCtor = (Method)appClass.Members[1]; // Locate the WriteString method so we can add statements to write out the globals Method writer = (Method)Templates.GetMemberByName(globalsClass.Members, "WriteString"); Method copier = (Method)Templates.GetMemberByName(globalsClass.Members, "CopyContents"); Method traverser = (Method)Templates.GetMemberByName(globalsClass.Members, "TraverseFields"); Normalizer normalizer = new Normalizer(false); Method getValue = (Method)Templates.GetMemberByName(globalsClass.Members, "GetValue"); Method setValue = (Method)Templates.GetMemberByName(globalsClass.Members, "SetValue"); System.Compiler.Switch switchGetValue = (System.Compiler.Switch)Templates.GetStatementTemplate("GetFieldInfoSwitch"); getValue.Body.Statements.Add(switchGetValue); System.Compiler.Switch switchSetValue = (System.Compiler.Switch)Templates.GetStatementTemplate("SetFieldInfoSwitch"); setValue.Body.Statements.Add(switchSetValue); for (int i = 0, n = globals.Count; i < n; i++) { // Make a shallow copy of the field since we're going to tinker with it Field f = (Field)((Field)globals[i]).Clone(); string name = f.DeclaringType.Name.Name + "_" + f.Name.Name; TypeNode zingType = f.Type; if (GetTypeClassification(f.Type) == TypeClassification.Heap) f.Type = this.ZingPtrType; else if (!IsPredefinedType(f.Type)) f.Type = new TypeExpression(new QualifiedIdentifier( new Identifier("Application"), zingType.Name), f.Type.SourceContext); // Mangle the name to guarantee uniqueness across the globals f.Name = new Identifier("priv_" + name); f.Flags &= ~FieldFlags.Static; // Sriram: I have made this into a public field so that // the fieldInfo below works. If the field is internal // then fieldInfo gets set to null (there is some access // permission problem) // Need to check with Tony as to // whether this is tbe best solution // The following two lines are Tony's code //----------------------------------------------------------- // f.Flags &= (FieldFlags)(~TypeFlags.VisibilityMask); //f.Flags |= FieldFlags.Assembly; //----------------------------------------------------------- //This is my replacement f.Flags |= FieldFlags.Public; /* Identifier idFieldName = new Identifier("id_" + name); System.Compiler.Expression idTypeExpr = new QualifiedIdentifier( new QualifiedIdentifier(new Identifier("System"), new Identifier("Reflection")), new Identifier("FieldInfo")); System.Compiler.TypeNode idfType = new System.Compiler.TypeExpression(idTypeExpr); Field idf = new Field(globalsClass, null, FieldFlags.Public|FieldFlags.Static, idFieldName, idfType, null); idf.Initializer = Templates.GetExpressionTemplate("GetFieldInfo"); Replacer.Replace(idf.Initializer, "_class", globalsClass.Name); Replacer.Replace(idf.Initializer, "_fieldName", new Literal(f.Name.Name, SystemTypes.String)); */ Identifier idFieldName = new Identifier("id_" + name); Field idf = new Field(globalsClass, null, FieldFlags.Public | FieldFlags.Static, idFieldName, SystemTypes.Int32, null); idf.Initializer = new Literal(i, SystemTypes.Int32); SwitchCase getCase = ((System.Compiler.Switch)Templates.GetStatementTemplate("GetFieldInfoCase")).Cases[0]; Replacer.Replace(getCase, "_fieldId", new Literal(i, SystemTypes.Int32)); Replacer.Replace(getCase, "_fieldName", new Identifier(f.Name.Name)); switchGetValue.Cases.Add(getCase); SwitchCase setCase = ((System.Compiler.Switch)Templates.GetStatementTemplate("SetFieldInfoCase")).Cases[0]; Replacer.Replace(setCase, "_fieldId", new Literal(i, SystemTypes.Int32)); Replacer.Replace(setCase, "_fieldName", new Identifier(f.Name.Name)); TypeExpression tn = f.Type as TypeExpression; Replacer.Replace(setCase, "_fieldType", tn != null ? tn.Expression : new Identifier(f.Type.Name.Name)); switchSetValue.Cases.Add(setCase); //The last argument to the call below is a dont care Property accessor = GetAccessorProperty("globalAccessor", f.Type, new Identifier(name), f.Name, idFieldName, f.Name); globalsClass.Members.Add(f); globalsClass.Members.Add(idf); globalsClass.Members.Add(accessor); f.DeclaringType = globalsClass; idf.DeclaringType = globalsClass; accessor.DeclaringType = globalsClass; if (accessor.Getter != null) { globalsClass.Members.Add(accessor.Getter); accessor.Getter.DeclaringType = globalsClass; } if (accessor.Setter != null) { globalsClass.Members.Add(accessor.Setter); accessor.Setter.DeclaringType = globalsClass; } if (zingType is Struct && !zingType.IsPrimitive && f.Type != SystemTypes.Decimal) collectStructAccessors(true, (Struct)zingType, f.Name, name, globalsClass); if (f.Initializer != null) { Statement stmt = Templates.GetStatementTemplate("InitializeGlobal"); Replacer.Replace(stmt, "_FieldName", f.Name); Replacer.Replace(stmt, "_FieldInitializer", normalizer.VisitFieldInitializer(f.Initializer)); initCtor.Body.Statements.Add(stmt); f.Initializer = null; } writer.Body.Statements.Add(GetWriterStatement(null, zingType, f.Name)); copier.Body.Statements.Add(GetCopyStatement(f.Name)); traverser.Body.Statements.Add(GetTraverserStatement(null, zingType, f.Name)); /*if(GetTypeClassification(f.Type) == TypeClassification.Heap) { refTraverser.Body.Statements.Add(GetTraverserStatement(null, zingType, f.Name)); } */ } }
public override void TheOthers(Normalizer normalizer, char c) { normalizer._stringBuilder.Append(' '); normalizer._stringBuilder.Append(c); normalizer._state = Normalizer._theOthersState; }
private void GenerateClass(Class c) { // The following code added by Jiri Adamek // Do not generate any code for native ZOM classes (they were manually written // and their code is placed in another assmbly) if (c is NativeZOM) return; // END of added code TypeNode newClass = Templates.GetTypeTemplateByName("Class"); if (c.Interfaces != null) { for (int i = 0, n = c.Interfaces.Count; i < n; i++) { string iname = c.Interfaces[i].Name.Name; QualifiedIdentifier id = new QualifiedIdentifier(new Identifier(iname), new Identifier("CreateMethods")); newClass.Interfaces.Add(new InterfaceExpression(id)); } } Method writer = (Method)Templates.GetMemberByName(newClass.Members, "WriteString"); Method traverser = (Method)Templates.GetMemberByName(newClass.Members, "TraverseFields"); // Replace all references to the class name Replacer.Replace(newClass, newClass.Name, c.Name); SetTypeId(newClass); Block cloneFields = new Block(); cloneFields.Statements = new StatementList(); Method getValue = (Method)Templates.GetMemberByName(newClass.Members, "GetValue"); Method setValue = (Method)Templates.GetMemberByName(newClass.Members, "SetValue"); System.Compiler.Switch switchGetValue = (System.Compiler.Switch)Templates.GetStatementTemplate("GetFieldInfoSwitch"); getValue.Body.Statements.Add(switchGetValue); System.Compiler.Switch switchSetValue = (System.Compiler.Switch)Templates.GetStatementTemplate("SetFieldInfoSwitch"); setValue.Body.Statements.Add(switchSetValue); // Transfer non-static fields to the emitted class for (int i = 0, n = c.Members.Count; i < n; i++) { Field f = c.Members[i] as Field; if (f != null && f.Type != null && !f.IsStatic) { // Clone the field since we might tinker with it Field newField = (Field)f.Clone(); // change name of the field, so that the accessor can be named appropriately newField.Name = new Identifier("priv_" + f.Name.Name); if (GetTypeClassification(f.Type) == TypeClassification.Heap) newField.Type = this.ZingPtrType; else if (!IsPredefinedType(f.Type)) newField.Type = new TypeExpression(new QualifiedIdentifier( new Identifier("Application"), f.Type.Name), f.Type.SourceContext); if (newField.Initializer != null) { // Move the initialization to our constructor. Expression initializer = newField.Initializer; newField.Initializer = null; Statement initStmt = Templates.GetStatementTemplate("InitComplexInstanceField"); Replacer.Replace(initStmt, "_FieldName", newField.Name); Normalizer normalizer = new Normalizer(false); Replacer.Replace(initStmt, "_expr", normalizer.VisitFieldInitializer(initializer)); Method ctor = (Method)newClass.Members[0]; Debug.Assert(ctor.Parameters.Count == 1); ctor.Body.Statements.Add(initStmt); } Identifier idFieldName = new Identifier("id_" + f.Name.Name); Field idf = new Field(newClass, null, FieldFlags.Public | FieldFlags.Static, idFieldName, SystemTypes.Int32, null); idf.Initializer = new Literal(i, SystemTypes.Int32); SwitchCase getCase = ((System.Compiler.Switch)Templates.GetStatementTemplate("GetFieldInfoCase")).Cases[0]; Replacer.Replace(getCase, "_fieldId", new Literal(i, SystemTypes.Int32)); Replacer.Replace(getCase, "_fieldName", new Identifier(newField.Name.Name)); switchGetValue.Cases.Add(getCase); SwitchCase setCase = ((System.Compiler.Switch)Templates.GetStatementTemplate("SetFieldInfoCase")).Cases[0]; Replacer.Replace(setCase, "_fieldId", new Literal(i, SystemTypes.Int32)); Replacer.Replace(setCase, "_fieldName", new Identifier(newField.Name.Name)); TypeExpression tn = newField.Type as TypeExpression; Replacer.Replace(setCase, "_fieldType", tn != null ? tn.Expression : new Identifier(newField.Type.Name.Name)); switchSetValue.Cases.Add(setCase); newClass.Members.Add(newField); newField.DeclaringType = newClass; newClass.Members.Add(idf); idf.DeclaringType = newClass; // add property for the field Property accessor = GetFieldAccessorProperty(f.Type, newField.Type, f.Name, newField.Name, idFieldName); newClass.Members.Add(accessor); accessor.DeclaringType = newClass; if (accessor.Getter != null) { newClass.Members.Add(accessor.Getter); accessor.Getter.DeclaringType = newClass; } if (accessor.Setter != null) { newClass.Members.Add(accessor.Setter); accessor.Setter.DeclaringType = newClass; } writer.Body.Statements.Add(GetWriterStatement("this", f.Type, newField.Name)); traverser.Body.Statements.Add(GetTraverserStatement("this", f.Type, newField.Name)); /*if(GetTypeClassification(f.Type) == TypeClassification.Heap) { refTraverser.Body.Statements.Add(GetTraverserStatement("this", f.Type, newField.Name)); } */ Statement cloneStmt = GetCloneStatement(newField.Name); cloneFields.Statements.Add(cloneStmt); } ZMethod zMethod = c.Members[i] as ZMethod; if (zMethod != null) { InterfaceList xs = FindMatchingInterfaces(c, zMethod); Interface x = null; if (xs.Count != 0) { // TODO: Handle the case when one method implements methods declared in multiple interfaces Debug.Assert(xs.Count == 1); x = xs[0]; } Class methodClass = GenerateClassMethod(zMethod, x); newClass.Members.Add(methodClass); methodClass.DeclaringType = newClass; methodClass.Flags = (methodClass.Flags & ~TypeFlags.VisibilityMask) | TypeFlags.NestedFamORAssem; if (x != null) { TypeNode tn = Templates.GetTypeTemplateByName("ClassExtras"); Method member = (Method)Templates.GetMemberByName(tn.Members, "__CreateInterfaceMethod"); member.Name = new Identifier("Create" + methodClass.Name.Name); member.DeclaringType = newClass; Replacer.Replace(member, new Identifier("__InterfaceMethod"), new QualifiedIdentifier(x.Name, methodClass.Name)); Replacer.Replace(member, new Identifier("__ClassMethod"), new QualifiedIdentifier(c.Name, methodClass.Name)); newClass.Members.Add(member); } } } // Splice the cloning assignment statements into the class's Clone method // at the appropriate place. Method cloner = (Method)Templates.GetMemberByName(newClass.Members, "Clone"); Replacer.Replace(cloner.Body, "cloneFields", cloneFields); // Add the emitted class to our Zing application class InstallType(newClass); }
public abstract void SpecialChars(Normalizer normalizer, char c);
public override Statement VisitForEach(ForEach forEach) { Normalizer normalizer = new Normalizer(false); Identifier incrVar = new Identifier("____" + forEach.UniqueKey.ToString(CultureInfo.InvariantCulture)); Expression sourceEnumerable = normalizer.VisitExpression(forEach.SourceEnumerable); Statement incrStmt = Templates.GetStatementTemplate("foreachIncrementer"); Replacer.Replace(incrStmt, "_iterator", incrVar); BasicBlock incrBlock = new BasicBlock(incrStmt); AddBlock(incrBlock); incrBlock.MiddleOfTransition = true; incrBlock.SkipNormalizer = true; incrBlock.SourceContext = forEach.SourceContext; PushContinuationStack(incrBlock); this.Visit(forEach.Body); BasicBlock bodyBlock = PopContinuationStack(); Statement derefStmt = Templates.GetStatementTemplate("foreachDeref"); Replacer.Replace(derefStmt, "_tmpVar", normalizer.VisitExpression(forEach.TargetVariable)); Replacer.Replace(derefStmt, "_collectionExpr", sourceEnumerable); Replacer.Replace(derefStmt, "_collectionType", new Identifier(forEach.SourceEnumerable.Type.FullName)); Replacer.Replace(derefStmt, "_iterator", incrVar); BasicBlock derefBlock = new BasicBlock(derefStmt, bodyBlock); AddBlock(derefBlock); derefBlock.MiddleOfTransition = true; derefBlock.SkipNormalizer = true; Expression testExpr = Templates.GetExpressionTemplate("foreachTest"); Replacer.Replace(testExpr, "_iterator", incrVar); Replacer.Replace(testExpr, "_sourceEnumerable", sourceEnumerable); BasicBlock testBlock = new BasicBlock(null, testExpr, derefBlock, CurrentContinuation); AddBlock(testBlock); testBlock.SkipNormalizer = true; incrBlock.UnconditionalTarget = testBlock; Statement initStmt = Templates.GetStatementTemplate("foreachInit"); Replacer.Replace(initStmt, "_iterator", incrVar); BasicBlock initBlock = new BasicBlock(initStmt, testBlock); AddBlock(initBlock); initBlock.MiddleOfTransition = true; initBlock.SkipNormalizer = true; initBlock.SourceContext = forEach.SourceContext; CurrentContinuation = initBlock; return forEach; }
public override void Space(Normalizer normalizer, char c) { normalizer._state = Normalizer._spaceState; }
public override Statement VisitIf(If If) { if (insideAtomicBlock && IsCoalescableIfStmt(If)) { CurrentContinuation = AddBlock(new BasicBlock(If, CurrentContinuation)); return If; } PushContinuationStack(); this.Visit(If.TrueBlock); BasicBlock trueBlock = PopContinuationStack(); PushContinuationStack(); this.Visit(If.FalseBlock); BasicBlock falseBlock = PopContinuationStack(); SourceContext savedConditionalContext = If.SourceContext; if (If.Condition != null) savedConditionalContext = If.Condition.SourceContext; // We normalize all of the conditional expressions here to make life // easier for "select" Normalizer normalizer = new Normalizer(splicer, null, false); CurrentContinuation = AddBlock(new BasicBlock(null, normalizer.VisitExpression(If.Condition), trueBlock, falseBlock)); CurrentContinuation.SourceContext = savedConditionalContext; if (CurrentContinuation.ConditionalExpression != null) CurrentContinuation.ConditionalExpression.SourceContext = savedConditionalContext; return If; }
public override void TheOthers(Normalizer normalizer, char c) { normalizer._stringBuilder.Append(c); }
public override int DivideUpDictionaryRange(CharacterIterator inText, int startPos, int endPos, DequeI foundBreaks) { if (startPos >= endPos) { return(0); } inText.SetIndex(startPos); int inputLength = endPos - startPos; int[] charPositions = new int[inputLength + 1]; StringBuffer s = new StringBuffer(""); inText.SetIndex(startPos); while (inText.Index < endPos) { s.Append(inText.Current); inText.Next(); } string prenormstr = s.ToString(); bool isNormalized = Normalizer.QuickCheck(prenormstr, Normalizer.NFKC) == Normalizer.YES || Normalizer.IsNormalized(prenormstr, Normalizer.NFKC, 0); CharacterIterator text; int numChars = 0; if (isNormalized) { text = new StringCharacterIterator(prenormstr); int index = 0; charPositions[0] = 0; while (index < prenormstr.Length) { int codepoint = prenormstr.CodePointAt(index); index += Character.CharCount(codepoint); numChars++; charPositions[numChars] = index; } } else { string normStr = Normalizer.Normalize(prenormstr, Normalizer.NFKC); text = new StringCharacterIterator(normStr); charPositions = new int[normStr.Length + 1]; Normalizer normalizer = new Normalizer(prenormstr, Normalizer.NFKC, 0); int index = 0; charPositions[0] = 0; while (index < normalizer.EndIndex) { normalizer.Next(); numChars++; index = normalizer.Index; charPositions[numChars] = index; } } // From here on out, do the algorithm. Note that our indices // refer to indices within the normalized string. int[] bestSnlp = new int[numChars + 1]; bestSnlp[0] = 0; for (int i = 1; i <= numChars; i++) { bestSnlp[i] = kint32max; } int[] prev = new int[numChars + 1]; for (int i = 0; i <= numChars; i++) { prev[i] = -1; } int maxWordSize = 20; int[] values = new int[numChars]; int[] lengths = new int[numChars]; // dynamic programming to find the best segmentation bool is_prev_katakana = false; for (int i = 0; i < numChars; i++) { text.SetIndex(i); if (bestSnlp[i] == kint32max) { continue; } int maxSearchLength = (i + maxWordSize < numChars) ? maxWordSize : (numChars - i); int[] count_ = new int[1]; fDictionary.Matches(text, maxSearchLength, lengths, count_, maxSearchLength, values); int count = count_[0]; // if there are no single character matches found in the dictionary // starting with this character, treat character as a 1-character word // with the highest value possible (i.e. the least likely to occur). // Exclude Korean characters from this treatment, as they should be // left together by default. text.SetIndex(i); // fDictionary.matches() advances the text position; undo that. if ((count == 0 || lengths[0] != 1) && CharacterIteration.Current32(text) != CharacterIteration.DONE32 && !fHangulWordSet.Contains(CharacterIteration.Current32(text))) { values[count] = maxSnlp; lengths[count] = 1; count++; } for (int j = 0; j < count; j++) { int newSnlp = bestSnlp[i] + values[j]; if (newSnlp < bestSnlp[lengths[j] + i]) { bestSnlp[lengths[j] + i] = newSnlp; prev[lengths[j] + i] = i; } } // In Japanese, single-character Katakana words are pretty rare. // So we apply the following heuristic to Katakana: any continuous // run of Katakana characters is considered a candidate word with // a default cost specified in the katakanaCost table according // to its length. bool is_katakana = IsKatakana(CharacterIteration.Current32(text)); if (!is_prev_katakana && is_katakana) { int j = i + 1; CharacterIteration.Next32(text); while (j < numChars && (j - i) < kMaxKatakanaGroupLength && IsKatakana(CharacterIteration.Current32(text))) { CharacterIteration.Next32(text); ++j; } if ((j - i) < kMaxKatakanaGroupLength) { int newSnlp = bestSnlp[i] + GetKatakanaCost(j - i); if (newSnlp < bestSnlp[j]) { bestSnlp[j] = newSnlp; prev[j] = i; } } } is_prev_katakana = is_katakana; } int[] t_boundary = new int[numChars + 1]; int numBreaks = 0; if (bestSnlp[numChars] == kint32max) { t_boundary[numBreaks] = numChars; numBreaks++; } else { for (int i = numChars; i > 0; i = prev[i]) { t_boundary[numBreaks] = i; numBreaks++; } Assert.Assrt(prev[t_boundary[numBreaks - 1]] == 0); } if (foundBreaks.Count == 0 || foundBreaks.Peek() < startPos) { t_boundary[numBreaks++] = 0; } int correctedNumBreaks = 0; for (int i = numBreaks - 1; i >= 0; i--) { int pos = charPositions[t_boundary[i]] + startPos; if (!(foundBreaks.Contains(pos) || pos == startPos)) { foundBreaks.Push(charPositions[t_boundary[i]] + startPos); correctedNumBreaks++; } } if (!foundBreaks.IsEmpty && foundBreaks.Peek() == endPos) { foundBreaks.Pop(); correctedNumBreaks--; } if (!foundBreaks.IsEmpty) { inText.SetIndex(foundBreaks.Peek()); } return(correctedNumBreaks); }