Пример #1
0
 public void Reload()
 {
     using (IHaTagger ht = TaggerFactory.CreateTagger(filename, false))
     {
         Tags = Utils.SafeAggregate(ht.Tags).ToUpper();
     }
 }
Пример #2
0
        public void TriGramInCoNLL2000()
        {
            // tokenization
            var tokenizer = new TokenizerFactory <RegexTokenizer>(new TokenizationOptions
            {
                Pattern = RegexTokenizer.WORD_PUNC
            }, SupportedLanguage.English);

            var tokens = tokenizer.Tokenize("Chancellor of the Exchequer Nigel Lawson's restated commitment");

            // test tag
            var tagger = new TaggerFactory <NGramTagger>(new TagOptions
            {
                NGram  = 3,
                Tag    = "NN",
                Corpus = GetTaggedCorpus()
            }, SupportedLanguage.English);

            tagger.Tag(new Sentence {
                Words = tokens
            });

            Assert.IsTrue(tokens[0].Pos == "NNP");
            Assert.IsTrue(tokens[1].Pos == "IN");
            Assert.IsTrue(tokens[2].Pos == "DT");
            Assert.IsTrue(tokens[3].Pos == "NNP");
        }
Пример #3
0
 static void TagFile(string path)
 {
     try
     {
         bool ro = tag_changes.Count == 0;
         using (IHaTagger ht = TaggerFactory.CreateTagger(path))
         {
             if (ro)
             {
                 // Display file tags
                 Console.WriteLine(path + ": " + Utils.SafeAggregate(ht.Tags).ToUpper());
             }
             else
             {
                 // Apply tag changes
                 foreach (HaSyntax.HaSyntaxElement element in tag_changes)
                 {
                     if (element.Flag)
                     {
                         ht.AddTag(element.Element);
                     }
                     else
                     {
                         ht.RemoveTag(element.Element);
                     }
                 }
                 ht.Save();
             }
         }
     }
     catch (HaException)
     {
         Console.WriteLine(string.Format("FAILED ({0})", path));
     }
 }
Пример #4
0
        private object GetTagAt(int tokenIndex, string tagName, bool createIfNotPresent)
        {
            if (tokenIndex < 0 || tokenIndex >= m_tokens.Length)
            {
                throw new ArgumentOutOfRangeException("tokenIndex");
            }

            if (!TaggerFactory.IsTagRegistered(this.Locale, tagName))
            {
                throw new ArgumentException("No Tagger is associated with this tag name!", "tagName");
            }

            object[] tagValues;
            if (!m_tags.TryGetValue(tagName, out tagValues))
            {
                if (createIfNotPresent)
                {
                    Tag(tagName);
                }
                else
                {
                    throw new Exception("Tag does not exist!");
                }
            }

            Debug.Assert(tagValues.Length == m_tokens.Length, "The length of provided tags does not match the length of tokens!");
            return(tagValues[tokenIndex]);
        }
Пример #5
0
        private void applyTagsToList(List <HaSyntax.HaSyntaxElement> tags, IEnumerable <LoadedFile> list)
        {
            // Set tags
            foreach (LoadedFile file in list)
            {
                using (IHaTagger ht = TaggerFactory.CreateTagger(file.Filename, true))
                {
                    foreach (HaSyntax.HaSyntaxElement tag in tags)
                    {
                        if (tag.Flag)
                        {
                            ht.AddTag(tag.Element);
                        }
                        else
                        {
                            ht.RemoveTag(tag.Element);
                        }
                    }
                    ht.Save();
                }
            }

            // Reload views
            foreach (LoadedFile file in list)
            {
                file.Reload();
            }
        }
Пример #6
0
        public void TriGramInCoNLL2000()
        {
            // tokenization
            var tokenizer = new TokenizerFactory(new TokenizationOptions
            {
                Pattern = RegexTokenizer.WORD_PUNC
            }, SupportedLanguage.English);

            tokenizer.GetTokenizer <RegexTokenizer>();

            var tokens = tokenizer.Tokenize("Chancellor of the Exchequer Nigel Lawson's restated commitment");

            // test tag
            var tagger = new TaggerFactory(new TagOptions
            {
                CorpusDir = Configuration.GetValue <String>("CherubNLP:dataDir"),
                NGram     = 3,
                Tag       = "NN"
            }, SupportedLanguage.English);

            tagger.GetTagger <NGramTagger>();

            tagger.Tag(new Sentence {
                Words = tokens
            });

            Assert.IsTrue(tokens[0].Pos == "NNP");
            Assert.IsTrue(tokens[1].Pos == "IN");
            Assert.IsTrue(tokens[2].Pos == "DT");
            Assert.IsTrue(tokens[3].Pos == "NNP");
        }
Пример #7
0
        /// <summary>
        /// Returns a tagger for the specified <see cref="ICodeDocument"/>.
        /// </summary>
        /// <typeparam name="T">The type of <see cref="ITag"/> created by the tagger.</typeparam>
        /// <param name="document">The <see cref="ICodeDocument"/> that requires a tagger.</param>
        /// <returns>A tagger for the specified <see cref="ICodeDocument"/>.</returns>
        public ITagger <T> GetTagger <T>(ICodeDocument document) where T : ITag
        {
            if (typeof(ITagger <T>).IsAssignableFrom(typeof(JsonTokenTagger)))
            {
                var factory = new TaggerFactory(this, document);
                return((ITagger <T>)(document.Properties.GetOrCreateSingleton(typeof(ITagger <ITokenTag>),
                                                                              new ActiproSoftware.Text.Utility.PropertyDictionary.
                                                                              Creator <JsonTokenTagger>(factory.CreateTagger))));
            }

            return(null);
        }
Пример #8
0
        private void Init()
        {
            if (_tagger == null)
            {
                _tagger = new TaggerFactory(new TagOptions
                {
                    CorpusDir = Path.Combine(AppDomain.CurrentDomain.GetData("DataPath").ToString(), "Corpus")
                }, SupportedLanguage.English);

                string tokenizerName = Configuration.GetValue <String>($"tagger");

                _tagger.GetTagger(tokenizerName);
            }
        }
Пример #9
0
        public void TagInCoNLL2000()
        {
            var tokenizer = new TokenizerFactory <RegexTokenizer>(new TokenizationOptions {
            }, SupportedLanguage.English);
            var tokens    = tokenizer.Tokenize("How are you doing?");

            var tagger = new TaggerFactory <DefaultTagger>(new TagOptions
            {
                Tag = "NN"
            }, SupportedLanguage.English);

            tagger.Tag(new Sentence {
                Words = tokens
            });
        }
Пример #10
0
        public BotSharpTagger()
        {
            string dataDir = Path.Combine(AppDomain.CurrentDomain.GetData("DataPath").ToString(), "Corpus", "CoNLL");
            var    data    = new CoNLLReader().Read(new ReaderOptions
            {
                DataDir  = dataDir,
                FileName = "conll2000_chunking_train.txt"
            });

            _tagger = new TaggerFactory <NGramTagger>(new TagOptions
            {
                NGram  = 1,
                Tag    = "NN",
                Corpus = data
            }, SupportedLanguage.English);
        }
Пример #11
0
        static void Main(string[] args)
        {
            Console.WriteLine("Please enter text to tag");
            var text   = Console.ReadLine();
            var corpus = CorpusFactory.GetCorpus("brills");

            var tokenizer = TokenizerFactory.GetTokenizer("simple");
            var tokens    = tokenizer.Tokenize(text);

            var tagger  = TaggerFactory.GetTagger("simple");
            var results = tagger.Tag(corpus, tokens.Where(x => !string.IsNullOrWhiteSpace(x)).ToList());

            Console.WriteLine(String.Join(" ", results.Select(x => string.Format("{0}({1})", x.Token, x.Tag))));
            Console.WriteLine("Press any key to exit");
            var stop = Console.ReadLine();
        }
Пример #12
0
        public void UniGramInCoNLL2000()
        {
            // tokenization
            var tokenizer = new TokenizerFactory(new TokenizationOptions
            {
                Pattern = RegexTokenizer.WORD_PUNC
            }, SupportedLanguage.English);

            tokenizer.GetTokenizer <RegexTokenizer>();

            var tokens = tokenizer.Tokenize("Chancellor of the Exchequer Nigel Lawson's restated commitment");

            // test tag
            var tagger = new TaggerFactory(new TagOptions
            {
                CorpusDir = Configuration.GetValue <String>("CherubNLP:dataDir"),
                NGram     = 1,
                Tag       = "NN"
            }, SupportedLanguage.English);

            tagger.GetTagger <NGramTagger>();

            var watch = Stopwatch.StartNew();

            tagger.Tag(new Sentence {
                Words = tokens
            });
            watch.Stop();
            var elapsedMs1 = watch.ElapsedMilliseconds;

            Assert.IsTrue(tokens[0].Pos == "NNP");
            Assert.IsTrue(tokens[1].Pos == "IN");
            Assert.IsTrue(tokens[2].Pos == "DT");
            Assert.IsTrue(tokens[3].Pos == "NNP");

            // test if model is loaded repeatly.
            watch = Stopwatch.StartNew();
            tagger.Tag(new Sentence {
                Words = tokens
            });
            watch.Stop();
            var elapsedMs2 = watch.ElapsedMilliseconds;

            Assert.IsTrue(elapsedMs1 > elapsedMs2 * 100);
        }
Пример #13
0
        public ActionResult Index(TaggerViewModel model)
        {
            if (!ModelState.IsValid)
            {
                return(View(model));
            }

            var corpus = CorpusFactory.GetCorpus("brills");

            var tokenizer = TokenizerFactory.GetTokenizer("simple");
            var tokens    = tokenizer.Tokenize(model.Text);

            var tagger  = TaggerFactory.GetTagger("simple");
            var results = tagger.Tag(corpus, tokens.Where(x => !string.IsNullOrWhiteSpace(x)).ToList());

            ViewBag.Results = String.Join(" ", results.Select(x => string.Format("{0}({1})", x.Token, x.Tag)));

            return(View(model));
        }
Пример #14
0
        public void SetTagAt(int tokenIndex, string tagName, object tagValue)
        {
            if (!TaggerFactory.IsTagRegistered(this.Locale, tagName))
            {
                throw new InvalidOperationException(string.Format("{0} is not registered", tagName));
            }

            if (tokenIndex >= Count)
            {
                throw new ArgumentOutOfRangeException("tokenIndex");
            }

            tagName = tagName.ToLower();
            if (!m_tags.ContainsKey(tagName))
            {
                m_tags.Add(tagName, new object[Count]);
            }

            m_tags[tagName][tokenIndex] = tagValue;
        }
Пример #15
0
        public bool Tag(string tagName)
        {
            if (!TaggerFactory.IsTagRegistered(this.Locale, tagName))
            {
                throw new ArgumentException("No Tagger is associated with this tag name!", "tagName");
                return(false);
            }

            tagName = tagName.ToLower();
            object[] tagValues;

            if (!m_tags.TryGetValue(tagName, out tagValues))
            {
                var tagsValues = TaggerFactory.Tag(tagName, this);

                if (tagsValues == null)
                {
                    m_tags.Add(tagName, new object[m_tokens.Length]);
                }
                else
                {
                    foreach (var tagAndValues in tagsValues)
                    {
                        if (!m_tags.ContainsKey(tagAndValues.Key.ToLower()))
                        {
                            m_tags.Add(tagAndValues.Key.ToLower(), tagAndValues.Value);
                        }
                        else
                        {
                            m_tags[tagAndValues.Key.ToLower()] = tagAndValues.Value;
                        }
                    }
                }
            }

            return(true);
        }
Пример #16
0
        static void Main(string[] args)
        {
            var optionValues = new OptionValues();
            var options      = new OptionSet
            {
                { "h|help", "Show this message and exit.", s => optionValues.ShowHelp = s != null },
                {
                    "e={:}|encoder={:}",
                    new StringBuilder().AppendLine("The {0:ENCODER TYPE} and {1:QUALITY} value to use. ")
                    .AppendLine()
                    .AppendLine("Valid {0:ENCODER TYPE} values are: ")
                    .AppendLine()
                    .AppendLine("fhgaacenc")
                    .AppendLine("lame")
                    .AppendLine("nero")
                    .AppendLine("oggvorbis")
                    .AppendLine("qaac")
                    .AppendLine("qaac64")
                    .AppendLine()
                    .Append("Numeric {1:QUALITY} values are specific to each encoder ")
                    .AppendLine("(fhgaacenc uses a value between 1 and 6, qaac uses 0 to 127, etc.) ")
                    .AppendLine()
                    .AppendLine("VBR mode is always used.")
                    .AppendLine()
                    .Append("These encoders are not distributed with this program. ")
                    .Append("They must be installed separately and copied to the executable directory ")
                    .Append("or made accessible via the System PATH environment variable. ")
                    .ToString(),
                    (string encStr, decimal encQual) =>
                    {
                        optionValues.EncoderString  = encStr;
                        optionValues.EncoderQuality = encQual;
                    }
                },
                {
                    "i=|input=",
                    new StringBuilder().AppendLine("The {PATH} to the cue sheet file.")
                    .AppendLine()
                    .Append("FLAC, WavPack and Monkey's Audio* files can be split. ")
                    .Append("Decoders for these files are not distributed with this program. ")
                    .Append("They must be installed separately and copied to the executable directory ")
                    .Append("or made accessible via the System PATH environment variable. ")
                    .AppendLine()
                    .AppendLine()
                    .Append("*Ensure the FLAC decoder is installed if splitting Monkey's Audio files. ")
                    .Append("MAC.exe does not provide any splitting functionality so a transcode to FLAC is required.")
                    .ToString(),
                    s => optionValues.CueFilePath = s
                },
                { "o=|output=", "The output {PATH}.", s => optionValues.OutputPath = s },
                { "c=|cover=", "The {PATH} to a front cover image.", s => optionValues.CoverPath = s }
            };

            List <string> extra;

            try
            {
                extra = options.Parse(args);

                if (optionValues.ShowHelp)
                {
                    options.WriteOptionDescriptions(Console.Out);
                    return;
                }

                optionValues.Validate();

                var cueSheetParser = new CueSheetParser(optionValues.CueFilePath);

                CueSheet cueSheet = cueSheetParser.Parse().ToTitleCase();

                var splitterFactory = new SplitterFactory(cueSheet, optionValues.CueFilePath);
                var encoderFactory  = new EncoderFactory(optionValues.EncoderQuality);
                var taggerFactory   = new TaggerFactory(cueSheet, optionValues.CoverPath);
                var stopwatch       = new Stopwatch();

                IEncoder encoder = encoderFactory.Build(optionValues.EncoderType);
                ITagger  tagger  = taggerFactory.Build(optionValues.EncoderType);
                using (ISplitter splitter = splitterFactory.Build())
                {
                    Console.WriteLine("Starting...");
                    Console.WriteLine("Splitting {0} cue sheet into WAV files...", BuildCuesheetTypeStr(cueSheet));

                    stopwatch.Start();

                    splitter.Split();

                    DirectoryInfo encodedOutputDirInfo = Directory.CreateDirectory(Path.Combine(optionValues.OutputPath, encoder.FileType));

                    int trackCountWidth = cueSheet.IsStandard
                        ? cueSheet.Files[0].Tracks.Count.ToString().Length
                        : cueSheet.Files.Count.ToString().Length;

                    Parallel.ForEach(
                        splitter.Results,
                        trackWavPair =>
                    {
                        Track track        = trackWavPair.Track;
                        string wavFilePath = trackWavPair.FilePath;
                        string title       = track.Title.Trim();

                        Console.WriteLine(
                            "Encoding '{0}' to {1} (Thread {2})...",
                            title,
                            encoder.FileType,
                            Thread.CurrentThread.ManagedThreadId);

                        string tempEncodedFilePath = encoder.Encode(wavFilePath, track, tagger);

                        string encodedOutputPath = BuildEncodedFileOutputPath(
                            encodedOutputDirInfo.FullName,
                            title,
                            track.TrackNum,
                            trackCountWidth,
                            encoder.FileExtension);

                        IOUtils.FileMove(tempEncodedFilePath, encodedOutputPath);
                    });
                }

                Console.WriteLine("Copying original files to output directory...");

                CopyOriginalsToOutputPath(optionValues.OutputPath, optionValues.CueFilePath, cueSheet, optionValues.CoverPath);

                stopwatch.Stop();

                Console.WriteLine("Done. Time elapsed: {0}", stopwatch.Elapsed);
            }
            catch (OptionException e)
            {
                Console.WriteLine("{0} {1}", e.OptionName, e.Message);
                Console.WriteLine("Try '--help' for more information.");
            }

            if (Debugger.IsAttached)
            {
                Console.WriteLine("Click any key to exit.");
                Console.ReadKey();
            }
        }