using System; |
using System.Collections.Generic; |
using System.Text; |
using System.Text.RegularExpressions; |
namespace test |
{ |
/// 用C#实现汉字转化为拼音 |
/// 实现的原理就是先将汉字转化成为内码,然后通过内码和拼音的对照来查找。 |
public class SplitWord |
{ |
/// <summary> |
///将所有的汉字转换成为拼音(小写) |
///作者: |
///完成于: |
/// </summary> |
static Regex MyRegex = new Regex ( "^[一-龥]$" ); //汉字的正则表达式“[一-龥]”指所有的汉字,从“一”到“龥”(念yu)的汉字,基本上涵盖了所有的汉字 |
private static int [] pyvalue = new int [] {-20319,-20317,-20304,-20295,-20292,-20283,-20265,-20257,-20242,-20230,-20051,-20036,-20032,-20026, |
-20002,-19990,-19986,-19982,-19976,-19805,-19784,-19775,-19774,-19763,-19756,-19751,-19746,-19741,-19739,-19728, |
-19725,-19715,-19540,-19531,-19525,-19515,-19500,-19484,-19479,-19467,-19289,-19288,-19281,-19275,-19270,-19263, |
-19261,-19249,-19243,-19242,-19238,-19235,-19227,-19224,-19218,-19212,-19038,-19023,-19018,-19006,-19003,-18996, |
-18977,-18961,-18952,-18783,-18774,-18773,-18763,-18756,-18741,-18735,-18731,-18722,-18710,-18697,-18696,-18526, |
-18518,-18501,-18490,-18478,-18463,-18448,-18447,-18446,-18239,-18237,-18231,-18220,-18211,-18201,-18184,-18183, |
-18181,-18012,-17997,-17988,-17970,-17964,-17961,-17950,-17947,-17931,-17928,-17922,-17759,-17752,-17733,-17730, |
-17721,-17703,-17701,-17697,-17692,-17683,-17676,-17496,-17487,-17482,-17468,-17454,-17433,-17427,-17417,-17202, |
-17185,-16983,-16970,-16942,-16915,-16733,-16708,-16706,-16689,-16664,-16657,-16647,-16474,-16470,-16465,-16459, |
-16452,-16448,-16433,-16429,-16427,-16423,-16419,-16412,-16407,-16403,-16401,-16393,-16220,-16216,-16212,-16205, |
-16202,-16187,-16180,-16171,-16169,-16158,-16155,-15959,-15958,-15944,-15933,-15920,-15915,-15903,-15889,-15878, |
-15707,-15701,-15681,-15667,-15661,-15659,-15652,-15640,-15631,-15625,-15454,-15448,-15436,-15435,-15419,-15416, |
-15408,-15394,-15385,-15377,-15375,-15369,-15363,-15362,-15183,-15180,-15165,-15158,-15153,-15150,-15149,-15144, |
-15143,-15141,-15140,-15139,-15128,-15121,-15119,-15117,-15110,-15109,-14941,-14937,-14933,-14930,-14929,-14928, |
-14926,-14922,-14921,-14914,-14908,-14902,-14894,-14889,-14882,-14873,-14871,-14857,-14678,-14674,-14670,-14668, |
-14663,-14654,-14645,-14630,-14594,-14429,-14407,-14399,-14384,-14379,-14368,-14355,-14353,-14345,-14170,-14159, |
-14151,-14149,-14145,-14140,-14137,-14135,-14125,-14123,-14122,-14112,-14109,-14099,-14097,-14094,-14092,-14090, |
-14087,-14083,-13917,-13914,-13910,-13907,-13906,-13905,-13896,-13894,-13878,-13870,-13859,-13847,-13831,-13658, |
-13611,-13601,-13406,-13404,-13400,-13398,-13395,-13391,-13387,-13383,-13367,-13359,-13356,-13343,-13340,-13329, |
-13326,-13318,-13147,-13138,-13120,-13107,-13096,-13095,-13091,-13076,-13068,-13063,-13060,-12888,-12875,-12871, |
-12860,-12858,-12852,-12849,-12838,-12831,-12829,-12812,-12802,-12607,-12597,-12594,-12585,-12556,-12359,-12346, |
-12320,-12300,-12120,-12099,-12089,-12074,-12067,-12058,-12039,-11867,-11861,-11847,-11831,-11798,-11781,-11604, |
-11589,-11536,-11358,-11340,-11339,-11324,-11303,-11097,-11077,-11067,-11055,-11052,-11045,-11041,-11038,-11024, |
-11020,-11019,-11018,-11014,-10838,-10832,-10815,-10800,-10790,-10780,-10764,-10587,-10544,-10533,-10519,-10331, |
-10329,-10328,-10322,-10315,-10309,-10307,-10296,-10281,-10274,-10270,-10262,-10260,-10256,-10254 |
}; |
private static string [] pystr = new string [] { "a" , "ai" , "an" , "ang" , "ao" , "ba" , "bai" , "ban" , "bang" , "bao" , "bei" , "ben" , "beng" , "bi" , "bian" , "biao" , |
"bie" , "bin" , "bing" , "bo" , "bu" , "ca" , "cai" , "can" , "cang" , "cao" , "ce" , "ceng" , "cha" , "chai" , "chan" , "chang" , "chao" , "che" , "chen" , |
"cheng" , "chi" , "chong" , "chou" , "chu" , "chuai" , "chuan" , "chuang" , "chui" , "chun" , "chuo" , "ci" , "cong" , "cou" , "cu" , "cuan" , "cui" , |
"cun" , "cuo" , "da" , "dai" , "dan" , "dang" , "dao" , "de" , "deng" , "di" , "dian" , "diao" , "die" , "ding" , "diu" , "dong" , "dou" , "du" , "duan" , |
"dui" , "dun" , "duo" , "e" , "en" , "er" , "fa" , "fan" , "fang" , "fei" , "fen" , "feng" , "fo" , "fou" , "fu" , "ga" , "gai" , "gan" , "gang" , "gao" , |
"ge" , "gei" , "gen" , "geng" , "gong" , "gou" , "gu" , "gua" , "guai" , "guan" , "guang" , "gui" , "gun" , "guo" , "ha" , "hai" , "han" , "hang" , |
"hao" , "he" , "hei" , "hen" , "heng" , "hong" , "hou" , "hu" , "hua" , "huai" , "huan" , "huang" , "hui" , "hun" , "huo" , "ji" , "jia" , "jian" , |
"jiang" , "jiao" , "jie" , "jin" , "jing" , "jiong" , "jiu" , "ju" , "juan" , "jue" , "jun" , "ka" , "kai" , "kan" , "kang" , "kao" , "ke" , "ken" , |
"keng" , "kong" , "kou" , "ku" , "kua" , "kuai" , "kuan" , "kuang" , "kui" , "kun" , "kuo" , "la" , "lai" , "lan" , "lang" , "lao" , "le" , "lei" , |
"leng" , "li" , "lia" , "lian" , "liang" , "liao" , "lie" , "lin" , "ling" , "liu" , "long" , "lou" , "lu" , "lv" , "luan" , "lue" , "lun" , "luo" , |
"ma" , "mai" , "man" , "mang" , "mao" , "me" , "mei" , "men" , "meng" , "mi" , "mian" , "miao" , "mie" , "min" , "ming" , "miu" , "mo" , "mou" , "mu" , |
"na" , "nai" , "nan" , "nang" , "nao" , "ne" , "nei" , "nen" , "neng" , "ni" , "nian" , "niang" , "niao" , "nie" , "nin" , "ning" , "niu" , "nong" , |
"nu" , "nv" , "nuan" , "nue" , "nuo" , "o" , "ou" , "pa" , "pai" , "pan" , "pang" , "pao" , "pei" , "pen" , "peng" , "pi" , "pian" , "piao" , "pie" , |
"pin" , "ping" , "po" , "pu" , "qi" , "qia" , "qian" , "qiang" , "qiao" , "qie" , "qin" , "qing" , "qiong" , "qiu" , "qu" , "quan" , "que" , "qun" , |
"ran" , "rang" , "rao" , "re" , "ren" , "reng" , "ri" , "rong" , "rou" , "ru" , "ruan" , "rui" , "run" , "ruo" , "sa" , "sai" , "san" , "sang" , |
"sao" , "se" , "sen" , "seng" , "sha" , "shai" , "shan" , "shang" , "shao" , "she" , "shen" , "sheng" , "shi" , "shou" , "shu" , "shua" , |
"shuai" , "shuan" , "shuang" , "shui" , "shun" , "shuo" , "si" , "song" , "sou" , "su" , "suan" , "sui" , "sun" , "suo" , "ta" , "tai" , |
"tan" , "tang" , "tao" , "te" , "teng" , "ti" , "tian" , "tiao" , "tie" , "ting" , "tong" , "tou" , "tu" , "tuan" , "tui" , "tun" , "tuo" , |
"wa" , "wai" , "wan" , "wang" , "wei" , "wen" , "weng" , "wo" , "wu" , "xi" , "xia" , "xian" , "xiang" , "xiao" , "xie" , "xin" , "xing" , |
"xiong" , "xiu" , "xu" , "xuan" , "xue" , "xun" , "ya" , "yan" , "yang" , "yao" , "ye" , "yi" , "yin" , "ying" , "yo" , "yong" , "you" , |
"yu" , "yuan" , "yue" , "yun" , "za" , "zai" , "zan" , "zang" , "zao" , "ze" , "zei" , "zen" , "zeng" , "zha" , "zhai" , "zhan" , "zhang" , |
"zhao" , "zhe" , "zhen" , "zheng" , "zhi" , "zhong" , "zhou" , "zhu" , "zhua" , "zhuai" , "zhuan" , "zhuang" , "zhui" , "zhun" , "zhuo" , |
"zi" , "zong" , "zou" , "zu" , "zuan" , "zui" , "zun" , "zuo" |
}; |
/**/ |
/// <summary> |
/// 转化汉字的全部拼音 |
/// </summary> |
/// <param name="chrstr"></param> |
/// <returns></returns> |
public static string ConvertAllSpell ( string chrstr ) |
{ |
Regex MyRegex = new Regex ( "^[一-龥]$" ); |
byte [] array = new byte [2]; |
string returnstr = "" ; |
int chrasc = 0; |
int i1 = 0; |
int i2 = 0; |
char [] nowchar = chrstr.ToCharArray(); |
for ( int j = 0; j < nowchar.Length; j++ ) |
{ |
if ( MyRegex.IsMatch ( nowchar[j].ToString() ) ) |
{ |
array = System.Text.Encoding.Default.GetBytes ( nowchar[j].ToString() ); |
i1 = ( short ) ( array[0] ); |
i2 = ( short ) ( array[1] ); |
chrasc = i1 * 256 + i2 - 65536; |
if ( chrasc > 0 && chrasc < 160 ) |
{ |
returnstr += nowchar[j]; |
} |
else |
{ |
for ( int i = ( pyvalue.Length - 1 ); i >= 0; i-- ) |
{ |
if ( pyvalue[i] <= chrasc ) |
{ |
returnstr += pystr[i]; |
break ; |
} |
} |
} |
} |
else |
{ |
returnstr += nowchar[j].ToString(); |
} |
} |
return returnstr; |
} |
//ChineseToPY |
/**/ |
/// <summary> |
/// 只转换每个汉字首字母(大写) |
/// </summary> |
/// <param name="strText"></param> |
/// <returns></returns> |
public static string GetChineseSpell ( string strText ) |
{ |
int len = strText.Length; |
string myStr = "" ; |
for ( int i = 0; i < len; i++ ) |
{ |
myStr += getSpell ( strText.Substring ( i, 1 ) ); |
} |
return myStr; |
} |
/// <summary> |
/// 获得第一个汉字的首字母(大写); |
/// </summary> |
/// <param name="cnChar"></param> |
/// <returns></returns> |
public static string getSpell ( string cnChar ) |
{ |
byte [] arrCN = Encoding.Default.GetBytes ( cnChar ); |
if ( arrCN.Length > 1 ) |
{ |
int area = ( short ) arrCN[0]; |
int pos = ( short ) arrCN[1]; |
int code = ( area << 8 ) + pos; |
int [] areacode = { 45217, 45253, 45761, 46318, 46826, 47010, 47297, 47614, 48119, 48119, 49062, 49324, 49896, 50371, 50614, 50622, 50906, 51387, 51446, 52218, 52698, 52698, 52698, 52980, 53689, 54481 }; |
for ( int i = 0; i < 26; i++ ) |
{ |
int max = 55290; |
if ( i != 25 ) max = areacode[i + 1]; |
if ( areacode[i] <= code && code < max ) |
{ |
return Encoding.Default.GetString ( new byte [] { ( byte ) ( 65 + i ) } ); |
} |
} |
return "*" ; |
} |
else return cnChar; |
} |
public static string ConvertFirstSpell ( string chrstr ) |
{ |
Regex MyRegex = new Regex ( "^[一-龥]$" ); |
byte [] array = new byte [2]; |
string returnstr = "" ; |
int chrasc = 0; |
int i1 = 0; |
int i2 = 0; |
char [] nowchar = chrstr.ToCharArray(); |
for ( int j = 0; j < 1; j++ ) |
{ |
if ( MyRegex.IsMatch ( nowchar[j].ToString() ) ) |
{ |
array = System.Text.Encoding.Default.GetBytes ( nowchar[j].ToString() ); |
i1 = ( short ) ( array[0] ); |
i2 = ( short ) ( array[1] ); |
chrasc = i1 * 256 + i2 - 65536; |
if ( chrasc > 0 && chrasc < 160 ) |
{ |
returnstr += nowchar[j]; |
} |
else |
{ |
for ( int i = ( pyvalue.Length - 1 ); i >= 0; i-- ) |
{ |
if ( pyvalue[i] <= chrasc ) |
{ |
returnstr += pystr[i]; |
break ; |
} |
} |
} |
} |
else |
{ |
returnstr += nowchar[j].ToString(); |
} |
} |
return returnstr; |
} |
} |
} |
初级程序员
by: 绝影007 发表于:2016-07-18 19:13:46 顶(0) | 踩(0) 回复
主函数呢?
回复评论