1
  
2
  
3
  
4
  
5
  
6
  
7
  
8
  
9
  
10
  
11
  
12
  
13
  
14
  
15
  
16
  
17
  
18
  
19
  
20
  
21
  
22
  
23
  
24
  
25
  
26
  
27
  
28
  
29
  
// This file is part of Roxen Search 
// Copyright © 2001 Roxen IS. All rights reserved. 
// 
// $Id: Utils.pmod,v 1.5 2001/06/22 01:28:35 nilsson Exp $ 
 
public array(string) tokenize_and_normalize( string what ) 
//! This can be optimized quite significantly when compared to 
//! tokenize( normalize( x ) ) in the future, currently it's not all 
//! that much faster, but still faster. 
{ 
  return Unicode.split_words_and_normalize( lower_case(what) ); 
} 
 
public array(string) tokenize(string in) 
//! Tokenize the input string (Note: You should first call normalize 
//! on it) 
{ 
  return Unicode.split_words( in ); 
} 
 
 
public string normalize(string in) 
//! Normalize the input string. Performs unicode NFKD normalization 
//! and then lowercases the whole string 
{ 
  return Unicode.normalize( lower_case(in), "KD" ); 
}