分詞是將一段string根據語法進行切分成若干個短語/詞/字,多見于搜索中使用。
- (NSMutableArray *)participleWithString:(NSString *)originalString {
CFLocaleRef locale; //分詞工具
CFRange range; //使用范圍
{
if (!originalString.length) return nil;
range = CFRangeMake(0, [originalString length]);
locale = CFLocaleCopyCurrent(); //要CFRelease!
}
CFStringTokenizerRef tokenizer; // token解析 (要CFRelease!)
tokenizer = CFStringTokenizerCreate(kCFAllocatorDefault, (CFStringRef)originalString, range, kCFStringTokenizerUnitWordBoundary, locale); //初始化
CFStringTokenizerTokenType tokenType; //token狀態(監聽分詞進程)
tokenType = CFStringTokenizerGoToTokenAtIndex(tokenizer, 0);
NSMutableArray *participleArray = [NSMutableArray array];
while (tokenType != kCFStringTokenizerTokenNone) {
//獲取當前使用范圍
range = CFStringTokenizerGetCurrentTokenRange(tokenizer);
CFTypeRef tokenTag; //token標記 (要CFRelease!)
//將標記存儲
tokenTag = CFStringTokenizerCopyCurrentTokenAttribute(tokenizer, kCFStringTokenizerAttributeLatinTranscription);
[participleArray addObject:[originalString substringWithRange:NSMakeRange(range.location, range.length)]];
CFRelease(tokenTag);
//獲取當前進程
tokenType = CFStringTokenizerAdvanceToNextToken(tokenizer);
}
//釋放
CFRelease(locale);
CFRelease(tokenizer);
return participleArray;
}