87e9262001-06-22Martin Nilsson // This file is part of Roxen Search // Copyright © 2001 Roxen IS. All rights reserved. // // $Id: Utils.pmod,v 1.5 2001/06/22 01:28:35 nilsson Exp $
a0fc192001-06-10Johan Schön public array(string) tokenize_and_normalize( string what )
4779302001-06-05Per Hedbor //! This can be optimized quite significantly when compared to //! tokenize( normalize( x ) ) in the future, currently it's not all //! that much faster, but still faster. { return Unicode.split_words_and_normalize( lower_case(what) ); }
90f5642001-05-17Johan Schön 
a0fc192001-06-10Johan Schön public array(string) tokenize(string in)
4779302001-06-05Per Hedbor //! Tokenize the input string (Note: You should first call normalize //! on it)
90f5642001-05-17Johan Schön {
4779302001-06-05Per Hedbor  return Unicode.split_words( in );
90f5642001-05-17Johan Schön }
a0fc192001-06-10Johan Schön public string normalize(string in)
4779302001-06-05Per Hedbor //! Normalize the input string. Performs unicode NFKD normalization //! and then lowercases the whole string
90f5642001-05-17Johan Schön {
4779302001-06-05Per Hedbor  return Unicode.normalize( lower_case(in), "KD" );
90f5642001-05-17Johan Schön }