private static void TestPosSegmenterCut(string text)
 {
     var posSeg = new PosSegmenter();
     var tokens = posSeg.Cut(text);
     var result = string.Join(" ", tokens.Select(token => string.Format("{0}/{1}", token.Word, token.Flag)));
     Console.WriteLine(result);
 }
Exemple #2
0
    public static void RunWordAnlayze()
    {
        var s0 = "华陆工程(科技)有限责任公司";
        JiebaSegmenter segmenter = new JiebaSegmenter();
        segmenter.AddWord("华陆工程科技有限责任公司");
        segmenter.AddWord("中煤陕西榆林能源化工有限公司");
        PosSegmenter posSeg = new PosSegmenter(segmenter);
        var c = posSeg.Cut(s0);
        s0 = s0.NormalizeTextResult();
        s0 = RegularTool.TrimBrackets(s0);
       /*  var SProjectName = new Surround();
        var root = HTMLEngine.Anlayze(Program.DocBase + @"\FDDC_announcements_round1_train_20180518\round1_train_20180518\重大合同\html\1044779.html");
        var Contract = TraningDataset.GetContractById("1044779")[0];
        SProjectName.AnlayzeEntitySurroundWords(root, Contract.ProjectName);

        root = HTMLEngine.Anlayze(Program.DocBase + @"\FDDC_announcements_round1_train_20180518\round1_train_20180518\重大合同\html\1450.html");
        Contract = TraningDataset.GetContractById("1450")[0];
        SProjectName.AnlayzeEntitySurroundWords(root, Contract.ProjectName);

        root = HTMLEngine.Anlayze(Program.DocBase + @"\FDDC_announcements_round1_train_20180518\round1_train_20180518\重大合同\html\1042224.html");
        Contract = TraningDataset.GetContractById("1042224")[0];
        SProjectName.AnlayzeEntitySurroundWords(root, Contract.ProjectName);

        root = HTMLEngine.Anlayze(Program.DocBase + @"\FDDC_announcements_round1_train_20180518\round1_train_20180518\重大合同\html\917362.html");
        Contract = TraningDataset.GetContractById("917362")[0];
        SProjectName.AnlayzeEntitySurroundWords(root, Contract.ProjectName);
        SProjectName.WriteTop(10); */
        var TestString = "承运市";
        var pos = new JiebaNet.Segmenter.PosSeg.PosSegmenter();
        foreach (var item in pos.Cut(TestString))
        {
            Console.WriteLine(item.Word + ":" + item.Flag);
        }
    }
 public void TestCutNames()
 {
     var posSeg = new PosSegmenter();
     var tokens = posSeg.Cut("吉林的省会是长春");
     var result = string.Join(" ", tokens.Select(token => string.Format("{0}/{1}", token.Word, token.Flag)));
     Console.WriteLine(result);
 }
Exemple #4
0
        public void PosCutDemo()
        {
            var posSeg = new PosSegmenter();
            var s = "一团硕大无朋的高能离子云,在遥远而神秘的太空中迅疾地飘移";

            var tokens = posSeg.Cut(s);
            Console.WriteLine(string.Join(" ", tokens.Select(token => string.Format("{0}/{1}", token.Word, token.Flag))));
        }
Exemple #5
0
    public static void ConsoleWritePos(string OrgString)
    {
        var pos  = new JiebaNet.Segmenter.PosSeg.PosSegmenter();
        var list = pos.Cut(OrgString);

        foreach (var item in list)
        {
            Console.WriteLine(item.Word + ":" + item.Flag);
        }
    }
        public TextRankExtractor()
        {
            Span = 5;

            Segmenter = new JiebaSegmenter();
            PosSegmenter = new PosSegmenter(Segmenter);
            SetStopWords(ConfigManager.StopWordsFile);
            if (StopWords.IsEmpty())
            {
                StopWords.UnionWith(DefaultStopWords);
            }
        }
Exemple #7
0
        public TfidfExtractor()
        {
            Segmenter = new JiebaSegmenter();
            PosSegmenter = new PosSegmenter(Segmenter);
            SetStopWords(ConfigManager.StopWordsFile);
            if (StopWords.IsEmpty())
            {
                StopWords.UnionWith(DefaultStopWords);
            }

            Loader = new IdfLoader(DefaultIdfFile);

            IdfFreq = Loader.IdfFreq;
            MedianIdf = Loader.MedianIdf;
        }
Exemple #8
0
    /// <summary>
    /// 除去英语
    /// </summary>
    /// <param name="OrgString"></param>
    /// <returns></returns>
    public static string TrimEnglish(string OrgString)
    {
        var MainWordSentence = String.Empty;
        var pos  = new JiebaNet.Segmenter.PosSeg.PosSegmenter();
        var list = pos.Cut(OrgString);

        foreach (var word in list)
        {
            if (word.Flag != LTPTrainingNER.英语)
            {
                MainWordSentence += word.Word;
            }
        }
        return(MainWordSentence);
    }
Exemple #9
0
    public static string TrimEnglish(string OrgString)
    {
        var MainWordSentence = "";
        var pos  = new JiebaNet.Segmenter.PosSeg.PosSegmenter();
        var list = pos.Cut(OrgString);

        foreach (var word in list)
        {
            //去除“副词”和“了”之后的句子
            if (word.Flag != 英语)
            {
                MainWordSentence += word.Word;
            }
        }
        return(MainWordSentence);
    }
Exemple #10
0
    public static string TrimLeadingUL(string OrgString)
    {
        var MainWordSentence = String.Empty;
        var pos      = new JiebaNet.Segmenter.PosSeg.PosSegmenter();
        var list     = pos.Cut(OrgString);
        var HasStart = false;

        foreach (var word in list)
        {
            if (HasStart || (word.Flag != LTPTrainingNER.助词))
            {
                HasStart          = true;
                MainWordSentence += word.Word;
            }
        }
        return(MainWordSentence);
    }
Exemple #11
0
    public static string GetMainWordSentence(string OrgString)
    {
        var MainWordSentence = String.Empty;
        var pos  = new JiebaNet.Segmenter.PosSeg.PosSegmenter();
        var list = pos.Cut(OrgString);

        foreach (var word in list)
        {
            //去除“副词”和“了”之后的句子
            if (word.Flag != LTPTrainingNER.助词 &&
                word.Flag != LTPTrainingNER.副词)
            {
                MainWordSentence += word.Word;
            }
        }
        return(MainWordSentence);
    }
Exemple #12
0
    public static string GetMainWordSentence(string OrgString)
    {
        var MainWordSentence = "";
        var pos  = new JiebaNet.Segmenter.PosSeg.PosSegmenter();
        var list = pos.Cut(OrgString);

        foreach (var word in list)
        {
            //去除“副词”和“了”之后的句子
            if (word.Flag != EntityWordAnlayzeTool.助词 &&
                word.Flag != EntityWordAnlayzeTool.副词)
            {
                MainWordSentence += word.Word;
            }
        }
        return(MainWordSentence);
    }
 public void TestCutWithouHmm()
 {
     var seg = new JiebaSegmenter();
     var posSeg = new PosSegmenter(seg);
     TestCutFunction(posSeg.Cut, false, @"Cases\pos_cut_no_hmm.txt");
 }
 public void TestCut()
 {
     var seg = new JiebaSegmenter();
     var posSeg = new PosSegmenter(seg);
     TestCutFunction(posSeg.Cut, true, @"Cases\pos_cut_hmm.txt");
 }
Exemple #15
0
        private static void SegmentFile(Options options)
        {
            var result = new List<string>();

            var fileName = Path.GetFullPath(options.FileName);
            var lines = File.ReadAllLines(fileName);

            Func<string, bool, bool, IEnumerable<string>> cutMethod = null;
            var segmenter = new JiebaSegmenter();
            if (options.POS)
            {
                cutMethod = (text, cutAll, hmm) =>
                {
                    var posSeg = new PosSegmenter(segmenter);
                    return posSeg.Cut(text, hmm).Select(token => string.Format("{0}/{1}", token.Word, token.Flag));
                };
            }
            else
            {
                cutMethod = segmenter.Cut;
            }

            var delimiter = string.IsNullOrWhiteSpace(options.Delimiter) ? "/ " : options.Delimiter;
            foreach (var line in lines)
            {
                result.Add(string.Join(delimiter, cutMethod(line, options.CutAll, options.NoHmm)));
            }
            Console.WriteLine(string.Join(Environment.NewLine, result));
        }