4779302001-06-05Per Hedbor array(string) tokenize_and_normalize( string what ) //! This can be optimized quite significantly when compared to //! tokenize( normalize( x ) ) in the future, currently it's not all //! that much faster, but still faster. { return Unicode.split_words_and_normalize( lower_case(what) ); }
90f5642001-05-17Johan Schön  array(string) tokenize(string in)
4779302001-06-05Per Hedbor //! Tokenize the input string (Note: You should first call normalize //! on it)
90f5642001-05-17Johan Schön {
4779302001-06-05Per Hedbor  return Unicode.split_words( in );
90f5642001-05-17Johan Schön } string normalize(string in)
4779302001-06-05Per Hedbor //! Normalize the input string. Performs unicode NFKD normalization //! and then lowercases the whole string
90f5642001-05-17Johan Schön {
4779302001-06-05Per Hedbor  return Unicode.normalize( lower_case(in), "KD" );
90f5642001-05-17Johan Schön }