<?php |
/*TWWY'S ART*/ |
function break_passage( $text ){ //分割段落 |
return preg_split( "/(\\r|\\n|\\r\\n)/" , $text , -1, PREG_SPLIT_NO_EMPTY); |
} |
function break_sentence( $text ){ //分割句子 英文的句号后面必须有空格 |
$re = '/# Split sentences on whitespace between them. |
(?<= # Begin positive lookbehind. |
[.!?] # Either an end of sentence punct, |
| [.!?][\\'"] # or end of sentence punct and quote. |
) # End positive lookbehind. |
(?<! # Begin negative lookbehind. |
Mr\\. # Skip either "Mr." |
| Mrs\\. # or "Mrs." , |
| Ms\\. # or "Ms." , |
| Jr\\. # or "Jr." , |
| Dr\\. # or "Dr." , |
| Prof\\. # or "Prof." , |
| Sr\\. # or "Sr." , |
# or ... (you get the idea). |
) # End negative lookbehind. |
\\s+ # Split on whitespace between sentences. |
/ix'; |
$sentences = preg_split( $re , $text , -1, PREG_SPLIT_NO_EMPTY); |
return $sentences ; |
} |
function get_sentence( $text ){ //先分割段落再分割句子 [推荐] |
$passage = break_passage( $text ); |
$return = array (); |
foreach ( $passage as $key => $value ) $return = array_merge ( $return , break_sentence( $value )); |
return $return ; |
} |
?> |
//该片段来自于http://yuncode.net |