Exemple #1
0
    //获得日期
    public static List <LocAndValue <DateTime> > LocateDate(HTMLEngine.MyRootHtmlNode root)
    {
        var list = new List <LocAndValue <DateTime> >();

        foreach (var paragrah in root.Children)
        {
            foreach (var sentence in paragrah.Children)
            {
                var OrgString = sentence.Content;
                OrgString = DateUtility.ConvertUpperToLower(OrgString).Replace(" ", String.Empty);
                var datelist = DateUtility.GetDate(OrgString);
                foreach (var strDate in datelist)
                {
                    var    DateNumberList = RegularTool.GetNumberList(strDate);
                    String Year = DateNumberList[0];
                    String Month = DateNumberList[1];
                    String Day = DateNumberList[2];
                    int    year; int month; int day;
                    if (int.TryParse(Year, out year) && int.TryParse(Month, out month) && int.TryParse(Day, out day))
                    {
                        list.Add(new LocAndValue <DateTime>()
                        {
                            Loc   = sentence.PositionId,
                            Type  = "日期",
                            Value = DateUtility.GetWorkDay(year, month, day)
                        });
                    }
                }
            }
        }
        return(list);
    }
Exemple #2
0
        /// <summary>
        /// 最后用抽取
        /// </summary>
        static void Main_FINAL(string[] args)
        {
            Logger = new StreamWriter("Log.log");
            //实体属性器日志设定
            EntityProperty.Logger = Logger;
            //全局编码
            Encoding.RegisterProvider(CodePagesEncodingProvider.Instance);
            //结巴分词的地名修正词典
            PosNS.ImportNS("Resources" + Path.DirectorySeparatorChar + "ns.dict");

            if (!Directory.Exists("/home/118_4/submit"))
            {
                Directory.CreateDirectory("/home/118_4/submit");
            }
            Console.WriteLine("Start To Extract Info Contract TRAIN");
            StreamWriter ResultCSV = new StreamWriter(@"/home/118_4/submit/hetong.txt", false, utf8WithoutBom);

            Run <Contract>(@"/home/data/hetong", @"/home/118_4/temp/hetong", ResultCSV);
            Console.WriteLine("Complete Extract Info Contract");

            Console.WriteLine("Start To Extract Info StockChange TRAIN");
            Console.WriteLine("读取增减持信息:" + "/home/data/zengjianchi/zengjianchi_public.csv");

            var sr = new StreamReader("/home/data/zengjianchi/zengjianchi_public.csv");

            sr.ReadLine();  //Skip Header
            while (!sr.EndOfStream)
            {
                var line        = sr.ReadLine().Split(",");
                var numbers     = RegularTool.GetNumberList(line[0]);
                int year        = int.Parse(numbers[0]);
                int month       = int.Parse(numbers[1]);
                int day         = int.Parse(numbers[2]);
                var AnnouceDate = new DateTime(year, month, day);
                PublishTime.Add(line[1], AnnouceDate);
                //Console.WriteLine("ID:" + line[1] + " Date:" + AnnouceDate.ToString("yyyy-MM-dd"));
            }
            sr.Close();
            Console.WriteLine("读取增减持信息:" + PublishTime.Count);

            ResultCSV = new StreamWriter(@"/home/118_4/submit/zengjianchi.txt", false, utf8WithoutBom);
            Run <StockChange>(@"/home/data/zengjianchi", @"/home/118_4/temp/zengjianchi", ResultCSV);
            Console.WriteLine("Complete Extract Info StockChange");

            Console.WriteLine("Start To Extract Info Reorganization TRAIN");
            //替代训练结果
            Console.WriteLine("加载替代训练结果");
            ReOrganizationTraning.EvaluateMethodList = new string[] {
                "收益法", "资产基础法", "市场法", "市场比较法", "估值法", "成本法", "现金流折现法", "现金流折现法", "剩余法",
                "内含价值调整法", "可比公司市净率法", "重置成本法", "收益现值法", "基础资产法", "假设清偿法",
                "成本逼近法", "单项资产加和法", "成本加和法", "基准地价修正法", "收益还原法", "现金流量法", "单项资产加总法", "折现现金流量法", "基准地价系数修正法"
            }.ToList();
            Console.WriteLine("加载替代训练结果:" + ReOrganizationTraning.EvaluateMethodList.Count);
            ResultCSV = new StreamWriter(@"/home/118_4/submit/chongzu.txt", false, utf8WithoutBom);
            Run <Reorganization>(@"/home/data/chongzu", "", ResultCSV);
            Console.WriteLine("Complete Extract Info Reorganization");

            Logger.Close();
        }
Exemple #3
0
    //获得日期
    public static List <LocAndValue <(DateTime StartDate, DateTime EndDate)> > LocateDateRange(HTMLEngine.MyRootHtmlNode root)
    {
        var list = new List <LocAndValue <(DateTime StartDate, DateTime EndDate)> >();

        foreach (var paragrah in root.Children)
        {
            foreach (var sentence in paragrah.Children)
            {
                var OrgString = sentence.Content;
                OrgString = DateUtility.ConvertUpperToLower(OrgString).Replace(" ", String.Empty);
                var datelist = DateUtility.GetRangeDate(OrgString);
                foreach (var strDate in datelist)
                {
                    var      DateNumberList = RegularTool.GetNumberList(strDate);
                    DateTime ST             = new DateTime();
                    DateTime ED             = new DateTime();
                    if (DateNumberList.Count == 6)
                    {
                        String Year = DateNumberList[0];
                        String Month = DateNumberList[1];
                        String Day = DateNumberList[2];
                        int    year; int month; int day;
                        if (int.TryParse(Year, out year) && int.TryParse(Month, out month) && int.TryParse(Day, out day))
                        {
                            ST = DateUtility.GetWorkDay(year, month, day);
                        }
                        Year  = DateNumberList[3];
                        Month = DateNumberList[4];
                        Day   = DateNumberList[5];
                        if (int.TryParse(Year, out year) && int.TryParse(Month, out month) && int.TryParse(Day, out day))
                        {
                            ED = DateUtility.GetWorkDay(year, month, day);
                        }
                        list.Add(new LocAndValue <(DateTime StartDate, DateTime EndDate)>()
                        {
                            Loc   = sentence.PositionId,
                            Type  = "日期范围",
                            Value = (ST, ED)
                        });
                    }
                    if (DateNumberList.Count == 5)
                    {
                        String Year = DateNumberList[0];
                        String Month = DateNumberList[1];
                        String Day = DateNumberList[2];
                        int    year; int month; int day;
                        if (int.TryParse(Year, out year) && int.TryParse(Month, out month) && int.TryParse(Day, out day))
                        {
                            ST = DateUtility.GetWorkDay(year, month, day);
                        }
                        Month = DateNumberList[3];
                        Day   = DateNumberList[4];
                        if (int.TryParse(Year, out year) && int.TryParse(Month, out month) && int.TryParse(Day, out day))
                        {
                            ED = DateUtility.GetWorkDay(year, month, day);
                        }
                        list.Add(new LocAndValue <(DateTime StartDate, DateTime EndDate)>()
                        {
                            Loc   = sentence.PositionId,
                            Type  = "日期范围",
                            Value = (ST, ED)
                        });
                    }
                    if (DateNumberList.Count == 4)
                    {
                        String Year = DateNumberList[0];
                        String Month = DateNumberList[1];
                        String Day = DateNumberList[2];
                        int    year; int month; int day;
                        if (int.TryParse(Year, out year) && int.TryParse(Month, out month) && int.TryParse(Day, out day))
                        {
                            ST = DateUtility.GetWorkDay(year, month, day);
                        }
                        Day = DateNumberList[3];
                        if (int.TryParse(Year, out year) && int.TryParse(Month, out month) && int.TryParse(Day, out day))
                        {
                            ED = DateUtility.GetWorkDay(year, month, day);
                        }
                        list.Add(new LocAndValue <(DateTime StartDate, DateTime EndDate)>()
                        {
                            Loc   = sentence.PositionId,
                            Type  = "日期范围",
                            Value = (ST, ED)
                        });
                    }
                }
            }
        }
        return(list);
    }
Exemple #4
0
    public List <String> GetCompanys(string OrgString)
    {
        var Rtn = new List <String>();

        if (String.IsNullOrEmpty(OrgString))
        {
            return(Rtn);
        }
        OrgString = OrgString.Replace(" ", "");
        var Items = OrgString.Split(Utility.SplitChar);

        if (Items.Length > 3 && Items.Last().EndsWith("等"))
        {
            Items[Items.Length - 1] = Items[Items.Length - 1].Substring(0, Items[Items.Length - 1].Length - 1);
        }
        foreach (var SingleItem in Items)
        {
            var ExtractSingleItem = SingleItem;
            if (ExtractSingleItem.Equals("交易对方"))
            {
                continue;
            }
            var number = RegularTool.GetNumberList(ExtractSingleItem);
            if (number.Count == 1 && ExtractSingleItem.Contains("名"))
            {
                ExtractSingleItem = Utility.GetStringBefore(ExtractSingleItem, number[0]);
            }
            if (IsCompanyOrPerson(ExtractSingleItem))
            {
                Rtn.Add(ExtractSingleItem);
            }
            else
            {
                //这里可能出现一些 “和” ,“及” 这样的文字,需要区分
                var AndIdx = ExtractSingleItem.IndexOf("和");
                if (AndIdx == -1)
                {
                    AndIdx = ExtractSingleItem.IndexOf("及");
                }
                if (AndIdx != -1 && AndIdx != 0 && AndIdx != (ExtractSingleItem.Length - 1))
                {
                    var FirstWord = ExtractSingleItem.Substring(0, AndIdx);
                    if (FirstWord.Contains("等"))
                    {
                        FirstWord = Utility.GetStringBefore(FirstWord, "等");
                    }
                    if (FirstWord.Contains("自然人"))
                    {
                        FirstWord = Utility.GetStringBefore(FirstWord, "自然人");
                    }
                    var Secondword = ExtractSingleItem.Substring(AndIdx + 1);
                    if (Secondword.Contains("等"))
                    {
                        Secondword = Utility.GetStringBefore(Secondword, "等");
                    }
                    if (Secondword.Contains("自然人"))
                    {
                        Secondword = Utility.GetStringBefore(Secondword, "自然人");
                    }
                    if (IsCompanyOrPerson(FirstWord) && IsCompanyOrPerson(Secondword))
                    {
                        Rtn.Add(FirstWord);
                        Rtn.Add(Secondword);
                    }
                    else
                    {
                        Console.WriteLine("无法匹配任何公司或者自然人:" + FirstWord + "|" + Secondword);
                        return(new List <String>());
                    }
                }
                else
                {
                    Console.WriteLine("无法匹配任何公司或者自然人:" + ExtractSingleItem);
                    return(new List <String>());
                }
            }
        }
        //Console.WriteLine("输入:" + OrgString);
        foreach (var item in Rtn)
        {
            //Console.WriteLine("输出:" + item);
        }
        return(Rtn);
    }
Exemple #5
0
    public static string NormailizeDate(string orgString, string keyword = "")
    {
        orgString = orgString.Trim().Replace(",", "");
        var NumberList = RegularTool.GetNumberList(orgString);

        if (NumberList.Count == 6)
        {
            String Year = NumberList[3];
            String Month = NumberList[4];
            String Day = NumberList[5];
            int    year; int month; int day;
            if (int.TryParse(Year, out year) && int.TryParse(Month, out month) && int.TryParse(Day, out day))
            {
                var d = new DateTime(year, month, day);
                return(d.ToString("yyyy-MM-dd"));
            }
        }
        if (NumberList.Count == 5)
        {
            if (orgString.IndexOf("年") != -1 && orgString.IndexOf("月") != -1 && orgString.IndexOf("日") != -1)
            {
                String Year = NumberList[0];
                String Month = NumberList[3];
                String Day = NumberList[4];
                int    year; int month; int day;
                if (int.TryParse(Year, out year) && int.TryParse(Month, out month) && int.TryParse(Day, out day))
                {
                    if (month <= 12 && day <= 31)
                    {
                        var d = new DateTime(year, month, day);
                        return(d.ToString("yyyy-MM-dd"));
                    }
                }
            }
        }

        if (orgString.Contains("年") && orgString.Contains("月") && orgString.Contains("月"))
        {
            String Year = Utility.GetStringBefore(orgString, "年");
            String Month = RegularTool.GetValueBetweenString(orgString, "年", "月");
            String Day = Utility.GetStringAfter(orgString, "月").Replace("日", "");
            int    year; int month; int day;
            if (int.TryParse(Year, out year) && int.TryParse(Month, out month) && int.TryParse(Day, out day))
            {
                var d = new DateTime(year, month, day);
                return(d.ToString("yyyy-MM-dd"));
            }
        }

        var SplitChar = new string[] { "/", ".", "-" };

        foreach (var sc in SplitChar)
        {
            var SplitArray = orgString.Split(sc);
            if (SplitArray.Length == 3)
            {
                String Year = SplitArray[0];
                String Month = SplitArray[1];
                String Day = SplitArray[2];
                int    year; int month; int day;
                if (int.TryParse(Year, out year) && int.TryParse(Month, out month) && int.TryParse(Day, out day))
                {
                    var d = new DateTime(year, month, day);
                    return(d.ToString("yyyy-MM-dd"));
                }
            }
        }

        return(orgString);
    }
Exemple #6
0
    public static string GetRangeDateEndDate(string orgString, DateTime BaseDate, string format = "yyyy-MM-dd")
    {
        orgString = orgString.Replace(" ", "");
        orgString = orgString.Trim().Replace(",", String.Empty);
        //XXXX年XX月XX日 - XXXX年XX月XX日
        var NumberList = RegularTool.GetNumberList(orgString);

        if (NumberList.Count == 6)
        {
            String Year = NumberList[3];
            String Month = NumberList[4];
            String Day = NumberList[5];
            int    year; int month; int day;
            if (int.TryParse(Year, out year) && int.TryParse(Month, out month) && int.TryParse(Day, out day))
            {
                var d = DateUtility.GetWorkDay(year, month, day);
                return(d.ToString(format));
            }
        }

        //XXXX年XX月XX日 - XX月XX日
        if (NumberList.Count == 5)
        {
            if (orgString.IndexOf("年") != -1 && orgString.IndexOf("月") != -1 && orgString.IndexOf("日") != -1)
            {
                String Year = NumberList[0];
                String Month = NumberList[3];
                String Day = NumberList[4];
                int    year; int month; int day;
                if (int.TryParse(Year, out year) && int.TryParse(Month, out month) && int.TryParse(Day, out day))
                {
                    var d = DateUtility.GetWorkDay(year, month, day);
                    return(d.ToString(format));
                }
            }
        }
        //XXXX年XX月XX日 - XX日
        if (NumberList.Count == 4)
        {
            if (orgString.IndexOf("年") != -1 && orgString.IndexOf("月") != -1 && orgString.IndexOf("日") != -1)
            {
                String Year = NumberList[0];
                String Month = NumberList[1];
                String Day = NumberList[3];
                int    year; int month; int day;
                if (int.TryParse(Year, out year) && int.TryParse(Month, out month) && int.TryParse(Day, out day))
                {
                    var d = DateUtility.GetWorkDay(year, month, day);
                    return(d.ToString(format));
                }
            }
        }
        //XX月XX日
        if (NumberList.Count == 2)
        {
            if (orgString.IndexOf("月") != -1 && orgString.IndexOf("日") != -1)
            {
                if (BaseDate.Year == 0)
                {
                    return(orgString);
                }
                String Month = NumberList[0];
                String Day = NumberList[1];
                int    month; int day;
                if (int.TryParse(Month, out month) && int.TryParse(Day, out day))
                {
                    var d = DateUtility.GetWorkDay(BaseDate.Year, month, day);
                    return(d.ToString(format));
                }
            }
            if (orgString.IndexOf("年") != -1 && orgString.IndexOf("月") != -1)
            {
                /*
                 *  数据主要应用于“股东增减持”类型公告的抽取,对于“变动截止日期”字段,存在少量公告中只公布了月份,未公布具体的日期。对这种情况的处理标准为:
                 *  如果该月份在公告发布月份的前面,变动截止日期为该月份最后1个交易日;
                 *  如果该月份是公告发布的月份,变动截止日期为公告发布日期(见本次更新表格);
                 */
                String Year = NumberList[0];
                String Month = NumberList[1];
                int    year; int month;
                if (int.TryParse(Year, out year) && int.TryParse(Month, out month))
                {
                    //获得公告时间
                    if (year == BaseDate.Year && month == BaseDate.Month)
                    {
                        return(BaseDate.ToString(format));
                    }
                    var d = DateUtility.GetWorkDay(year, month, -1);
                    return(d.ToString(format));
                }
            }
            if (orgString.IndexOf("月") != -1)
            {
                String Year = NumberList[0];
                if (Year.Length != 4)
                {
                    return(orgString);
                }
                String Month = NumberList[1];
                int    year; int month;
                if (int.TryParse(Year, out year) && int.TryParse(Month, out month))
                {
                    var d = DateUtility.GetWorkDay(year, month, -1);
                    return(d.ToString(format));
                }
            }
        }
        //XXXX年XX月XX日
        if (orgString.Contains("年") && orgString.Contains("月") && orgString.Contains("月"))
        {
            String Year = Utility.GetStringBefore(orgString, "年");
            String Month = RegularTool.GetValueBetweenString(orgString, "年", "月");
            String Day = Utility.GetStringAfter(orgString, "月").Replace("日", String.Empty);
            int    year; int month; int day;
            if (int.TryParse(Year, out year) && int.TryParse(Month, out month) && int.TryParse(Day, out day))
            {
                var d = DateUtility.GetWorkDay(year, month, day);
                return(d.ToString(format));
            }
        }

        if (RegularTool.IsInt(orgString))
        {
            if (orgString.Length == 8)
            {
                String Year = orgString.Substring(0, 4);
                String Month = orgString.Substring(4, 2);
                String Day = orgString.Substring(6, 2);
                int    year; int month; int day;
                if (int.TryParse(Year, out year) && int.TryParse(Month, out month) && int.TryParse(Day, out day))
                {
                    if (year < 1900 || year > 2100)
                    {
                        var d = DateUtility.GetWorkDay(year, month, day);
                        return(d.ToString(format));
                    }
                }
            }
        }

        var SplitChar = new string[] { "/", ".", "-" };

        foreach (var sc in SplitChar)
        {
            var SplitArray = orgString.Split(sc);
            if (SplitArray.Length == 3)
            {
                String Year = SplitArray[0];
                String Month = SplitArray[1];
                String Day = SplitArray[2];
                int    year; int month; int day;
                if (int.TryParse(Year, out year) && int.TryParse(Month, out month) && int.TryParse(Day, out day))
                {
                    var d = DateUtility.GetWorkDay(year, month, day);
                    return(d.ToString(format));
                }
            }
        }
        return(orgString);
    }
Exemple #7
0
    static string NormalizerFreezeYear(string orgString, string TitleWord)
    {
        orgString = orgString.Replace(" ", String.Empty);
        if (orgString.Equals("十二"))
        {
            return("12");
        }
        var x1 = Utility.GetStringAfter(orgString, "日起");
        int x2;

        if (int.TryParse(x1, out x2))
        {
            return(x2.ToString());
        }
        x1 = Utility.GetStringBefore(orgString, "个月");
        if (int.TryParse(x1, out x2))
        {
            return(x2.ToString());
        }
        x1 = RegularTool.GetValueBetweenString(orgString, "日起", "个月");
        if (x1.Equals("十二"))
        {
            return("12");
        }
        if (int.TryParse(x1, out x2))
        {
            return(x2.ToString());
        }
        if (orgString.Equals("十二"))
        {
            return("12");
        }
        if (orgString.Equals("十二个月"))
        {
            return("12");
        }
        if (orgString.Equals("1年"))
        {
            return("12");
        }
        if (orgString.Equals("3年"))
        {
            return("36");
        }
        //自2007年2月3日至2010年2月2日止
        var numbers = RegularTool.GetNumberList(orgString);

        if (numbers.Count == 6)
        {
            var sty = 0;
            var edy = 0;
            if (int.TryParse(numbers[3], out edy) && int.TryParse(numbers[0], out sty))
            {
                if (edy - sty == 1)
                {
                    return("12");
                }
                if (edy - sty == 3)
                {
                    return("36");
                }
                if (!Program.IsMultiThreadMode)
                {
                    Program.Logger.WriteLine("限售期确认:" + orgString);
                }
            }
        }

        return(orgString.Trim());
    }