/// <summary> /// �Ƿ�Ӧ�ù��˵� /// </summary> /// <param name="a"></param> /// <returns></returns> public static bool ChineseFilterIt(XunLongCNST a) { if (a.cWord == null) { return true; } if (a.cWord == null & a.cType == null) { return true; } //���˵�ֹͣ�� if (CnStopWord.Contains(a.cWord) == true) { return true; } if (a.cWord != null & a.cType == null) { return false; } string x = a.cType; if (x.IndexOf('n') > -1 | x.IndexOf('v') > -1 | x.IndexOf('i') > -1 | x.IndexOf('j') > -1 | x.IndexOf('l') > -1 | x.IndexOf('s') > -1) { return false; } return true; }
/// <summary> /// ���ķִ� /// </summary> /// <param name="_incn"></param> /// <returns></returns> public static XunLongCNST[] ChineseIntface(TextReader _incn) { string aOLD = _incn.ReadToEnd().ToLower(); string a = aOLD.Trim(); a = a.Replace("---", " "); a = a.Replace("===", " "); a = a.Replace("--", "-"); a = a.Replace("==", "="); //ȥ������ �� ���ֿո� ��������� a = a.Replace(" 1 ", " "); a = a.Replace(" 2 ", " "); a = a.Replace(" 3 ", " "); a = a.Replace(" 4 ", " "); a = a.Replace(" 5 ", " "); a = a.Replace(" 6 ", " "); a = a.Replace(" 7 ", " "); a = a.Replace(" 8 ", " "); a = a.Replace(" 9 ", " "); a = a.Replace(" 0 ", " "); a = a.Replace(" 00 ", " "); a = a.Replace(" 11 ", " "); a = a.Replace(" 12 ", " "); a = a.Replace(" 13 ", " "); a = a.Replace(" 14 ", " "); a = a.Replace(" 15 ", " "); a = a.Replace(" 16 ", " "); a = a.Replace(" 17 ", " "); a = a.Replace(" 18 ", " "); a = a.Replace(" 19 ", " "); a = a.Replace(" 20 ", " "); a = a.Replace(" 21 ", " "); a = a.Replace(" 22 ", " "); a = a.Replace(" 23 ", " "); a = a.Replace(" 24 ", " "); a = a.Replace(" 25 ", " "); a = a.Replace(" 26 ", " "); a = a.Replace(" 27 ", " "); a = a.Replace(" 28 ", " "); a = a.Replace(" 29 ", " "); a = a.Replace(" 30 ", " "); a = a.Replace(" 31 ", " "); a = a.Replace(" ", " "); a = a.Replace(" ", " "); if (a == null | a.Length == 0) { return null; } string dat = ""; if (a.IndexOf(' ') > 0) { string[] NewAS = a.Split(' '); int nal = a.Length / 5; if (NewAS.Length >= nal) { dat = ""; for (int c = 0; c < NewAS.Length; c++) { dat = dat + " " + NewAS[c]; } goto X2; } } int xWN_400 = 512; if (a.Length < xWN_400) { dat = xwordOne.GetXword(a); } else { //������000 Ϊ��λ�и for (int i = xWN_400; i < a.Length; i++) { if (i % xWN_400 == 0) { //ȡ��500���ַ� string one = a.Substring(i - xWN_400, xWN_400); dat = dat + xwordOne.GetXword(one) + " "; int u00 = 0; } int onen = a.Length - (a.Length % xWN_400); //ȡ��500���ַ� if (i == onen) { string one = a.Substring(i, (a.Length % xWN_400)); dat = dat + xwordOne.GetXword(one); break; } } } X2: dat = dat.Trim(); XunLongCNST[] x = new XunLongCNST[1]; if (dat.IndexOf(' ') == -1) { if (dat.IndexOf('/') == -1) { x = new XunLongCNST[1]; x[0].cStart = 0; x[0].cLength = aOLD.Length; x[0].cType = "n"; x[0].cWord = aOLD; return x; } else { x = new XunLongCNST[1]; string[] newtmp = dat.Split('/'); x[0].cStart = 0; x[0].cLength = aOLD.Length; if (newtmp[1].Length == 0) { x[0].cType = "n"; } else { x[0].cType = newtmp[1]; } x[0].cWord = aOLD; return x; } } string[] tmps = dat.Split(' '); x = new XunLongCNST[tmps.Length]; int pX = 0; int pNow = 0; //��ǰ��λ�� for (int i = 0; i < tmps.Length; i++) { string TOne = tmps[i]; if (TOne.Length > 0 & pNow <= aOLD.Length) { if (TOne.IndexOf('/') == -1) { int nn = aOLD.IndexOf(TOne, pNow); if (nn > -1) { pNow = nn + 1; x[pX].cWord = TOne; x[pX].cType = "n"; x[pX].cStart = nn; x[pX].cLength = TOne.Length; pX = pX + 1; } else { } } else { string[] onet = TOne.Split('/'); string onet0 = onet[0]; int nn = aOLD.IndexOf(onet0, pNow); if (nn > -1) { pNow = nn + 1; x[pX].cWord = onet0; if (onet.Length == 2) { if (onet[1].Length == 0) { x[pX].cType = "n"; } else { x[pX].cType = onet[1]; } } else { x[pX].cType = "n"; } x[pX].cStart = nn; x[pX].cLength = onet0.Length; pX = pX + 1; } else { } } } } return x; }
/// <summary> /// ���ķִ� /// </summary> /// <param name="_incn"></param> /// <returns></returns> public static XunLongCNST[] ChineseIntfaceOLD(TextReader _incn) { string a = _incn.ReadToEnd(); a = a.ToLower(); //��� char[] xx = { '!', '(', ')', '{', '}', ':', ';', '\'', '"', ',', '.', '?', '��', '��', '��', '��', '��', '��', '��', '��', '��', '��', ' ', '\n', '\r', '\t' }; string[] xa = a.Split(xx); // 1 xa��Ϊ xxaa ������ string[] xxaa = new string[12048]; int xxaaLen = 0; for (int i = 0; i < xa.Length; i++) { if (xa[i].Length > 2) //����С��2�� �ַ� �����зִ� { string cca = xwordOne.GetXword(xa[i]); if (cca.Length > xa[i].Length) //��ȷ�ִ� { xa[i] = cca; //�ִʽ���滻ԭ������ } } if (xa[i].IndexOf(' ') > -1) { string[] tmp = xa[i].Split(' '); for (int j = 0; j < tmp.Length; j++) { if (tmp[j] != null & tmp[j].Length > 0) { xxaa[xxaaLen] = tmp[j]; xxaaLen = xxaaLen + 1; } } } else { xxaa[xxaaLen] = xa[i]; xxaaLen = xxaaLen + 1; } } int pNow = 0; //��ǰ��λ�� XunLongCNST[] x = new XunLongCNST[12048]; int pX = 0; // aΪԭʼ���� xxaa Ϊ����ģ�� ����ƥ�� for (int i = 0; i < xxaa.Length; i++) { if (xxaa[i] != null) { //��ǰ���� string tmpOne = xxaa[i]; //���� string tmpType = "n"; //�ı�����λ�� int tmpStart = 0; //���� int tmpLength = 0; if (tmpOne.IndexOf('/') > 0) { //�����ִ� ˵�� // ��������� string[] tmpS = tmpOne.Split('/'); int new_tmps = tmpOne.LastIndexOf('/'); string new_1 = tmpOne.Substring(0, new_tmps); string new_2 = tmpOne.Substring(new_tmps + 1, tmpOne.Length - new_tmps - 1); tmpOne = new_1; tmpType = new_2; //�õ��ı� //tmpOne = tmpS[0]; //�õ����� tmpLength = tmpOne.Length; /* if (tmpS.Length > 1) { if (tmpS[1].Length > 0) { tmpType = tmpS[1]; } } */ } //�����ı���ԭ�����е�λ�� int tmpxx = a.IndexOf(tmpOne); if (tmpxx > -1) { tmpStart = tmpxx + pNow; a = a.Substring(tmpxx + tmpLength, a.Length - tmpLength - tmpxx); pNow = pNow + tmpxx + tmpLength - 1; x[pX].cWord = tmpOne; x[pX].cType = tmpType; x[pX].cStart = tmpStart; x[pX].cLength = tmpLength; } pX = pX + 1; } } return x; }