author | Ting Li <ting.li@amlogic.com> | 2016-11-05 05:53:49 (GMT) |
---|---|---|
committer | Ting Li <ting.li@amlogic.com> | 2016-11-05 05:58:23 (GMT) |
commit | c6b0b1840badcde2458119ad9ef4932aa9d8ba4f (patch) | |
tree | 622cfcfec5eb74d6db53069e6674937bb762dbd5 | |
parent | f95211795aede0f87fb8610ca55b075f77255382 (diff) | |
download | RemoteIME-c6b0b1840badcde2458119ad9ef4932aa9d8ba4f.zip RemoteIME-c6b0b1840badcde2458119ad9ef4932aa9d8ba4f.tar.gz RemoteIME-c6b0b1840badcde2458119ad9ef4932aa9d8ba4f.tar.bz2 |
pd#133647 fix bug of load dict at 64 bit env
Change-Id: Iaefb53d0ff7719723e7d42fc509c1733b237471d
36 files changed, 640 insertions, 637 deletions
diff --git a/jni/android/com_droidlogic_inputmethod_remote_PinyinDecoderService.cpp b/jni/android/com_droidlogic_inputmethod_remote_PinyinDecoderService.cpp index ec71725..80789fb 100644 --- a/jni/android/com_droidlogic_inputmethod_remote_PinyinDecoderService.cpp +++ b/jni/android/com_droidlogic_inputmethod_remote_PinyinDecoderService.cpp @@ -35,7 +35,7 @@ extern "C" { static char16 retbuf[RET_BUF_LEN]; static char16 ( *predict_buf ) [kMaxPredictSize + 1] = NULL; - static size_t predict_len; + static Size_t predict_len; static Sync sync_worker; @@ -76,8 +76,8 @@ extern "C" { JNIEXPORT void JNICALL nativeImSetMaxLens ( JNIEnv *env, jclass jclazz, jint max_sps_len, jint max_hzs_len ) { - im_set_max_lens ( static_cast<size_t> ( max_sps_len ), - static_cast<size_t> ( max_hzs_len ) ); + im_set_max_lens ( static_cast<Size_t> ( max_sps_len ), + static_cast<Size_t> ( max_hzs_len ) ); return; } @@ -114,15 +114,15 @@ extern "C" { JNIEXPORT jstring JNICALL nativeImGetPyStr ( JNIEnv *env, jclass jclazz, jboolean decoded ) { - size_t py_len; + Size_t py_len; const char *py = im_get_sps_str ( &py_len ); // py_len gets decoded length assert ( NULL != py ); if ( !decoded ) { py_len = strlen ( py ); } const unsigned short *spl_start; - size_t len; + Size_t len; len = im_get_spl_start_pos ( spl_start ); - size_t i; + Size_t i; for ( i = 0; i < py_len; i++ ) { retbuf[i] = py[i]; } retbuf[i] = ( char16 ) '\0'; @@ -132,7 +132,7 @@ extern "C" { JNIEXPORT jint JNICALL nativeImGetPyStrLen ( JNIEnv *env, jclass jclazz, jboolean decoded ) { - size_t py_len; + Size_t py_len; const char *py = im_get_sps_str ( &py_len ); // py_len gets decoded length assert ( NULL != py ); if ( !decoded ) @@ -142,14 +142,14 @@ extern "C" { JNIEXPORT jintArray JNICALL nativeImGetSplStart ( JNIEnv *env, jclass jclazz ) { const unsigned short *spl_start; - size_t len; + Size_t len; // There will be len + 1 elements in the buffer when len > 0. len = im_get_spl_start_pos ( spl_start ); jintArray arr = ( *env ).NewIntArray ( len + 2 ); jint *arr_body = ( *env ).GetIntArrayElements ( arr, 0 ); assert ( NULL != arr_body ); arr_body[0] = len; // element 0 is used to store the length of buffer. - for ( size_t i = 0; i <= len; i++ ) + for ( Size_t i = 0; i <= len; i++ ) { arr_body[i + 1] = spl_start[i]; } ( *env ).ReleaseIntArrayElements ( arr, arr_body, 0 ); return arr; @@ -194,7 +194,7 @@ extern "C" { JNIEXPORT jint JNICALL nativeImGetPredictsNum ( JNIEnv *env, jclass clazz, jstring fixed_str ) { char16 *fixed_ptr = ( char16 * ) ( *env ).GetStringChars ( fixed_str, NULL ); - size_t fixed_len = ( size_t ) ( *env ).GetStringLength ( fixed_str ); + Size_t fixed_len = ( Size_t ) ( *env ).GetStringLength ( fixed_str ); char16 fixed_buf[kMaxPredictSize + 1]; if ( fixed_len > kMaxPredictSize ) { fixed_ptr += fixed_len - kMaxPredictSize; @@ -210,7 +210,7 @@ extern "C" { JNIEXPORT jstring JNICALL nativeImGetPredictItem ( JNIEnv *env, jclass clazz, jint predict_no ) { jstring retstr; - if ( predict_no < 0 || ( size_t ) predict_no >= predict_len ) { + if ( predict_no < 0 || ( Size_t ) predict_no >= predict_len ) { retstr = ( *env ).NewString ( ( unsigned short * ) predict_buf[0], 0 ); } else { retstr = ( *env ).NewString ( ( unsigned short * ) predict_buf[predict_no], @@ -241,7 +241,7 @@ extern "C" { JNIEXPORT jint JNICALL nativeSyncPutLemmas ( JNIEnv *env, jclass clazz, jstring tomerge ) { char16 *ptr = ( char16 * ) ( *env ).GetStringChars ( tomerge, NULL ); - int len = ( size_t ) ( *env ).GetStringLength ( tomerge ); + int len = ( Size_t ) ( *env ).GetStringLength ( tomerge ); int added = sync_worker.put_lemmas ( ptr, len ); ( *env ).ReleaseStringChars ( tomerge, ptr ); return added; diff --git a/jni/command/pinyinime_dictbuilder.cpp b/jni/command/pinyinime_dictbuilder.cpp index 40fd7d3..40fd7d3 100755..100644 --- a/jni/command/pinyinime_dictbuilder.cpp +++ b/jni/command/pinyinime_dictbuilder.cpp diff --git a/jni/include/atomdictbase.h b/jni/include/atomdictbase.h index 27a39be..5fc7a40 100755..100644 --- a/jni/include/atomdictbase.h +++ b/jni/include/atomdictbase.h @@ -76,7 +76,7 @@ namespace ime_pinyin { * * @return The total number of lemmas. */ - virtual size_t number_of_lemmas() = 0; + virtual Size_t number_of_lemmas() = 0; /** * This function is called by the decoder when user deletes a character from @@ -124,7 +124,7 @@ namespace ime_pinyin { virtual MileStoneHandle extend_dict ( MileStoneHandle from_handle, const DictExtPara *dep, LmaPsbItem *lpi_items, - size_t lpi_max, size_t *lpi_num ) = 0; + Size_t lpi_max, Size_t *lpi_num ) = 0; /** * Get lemma items with scores according to a spelling id stream. @@ -136,8 +136,8 @@ namespace ime_pinyin { * @param lpi_max The maximum size of the buffer to return result. * @return The number of matched items which have been filled in to lpi_items. */ - virtual size_t get_lpis ( const uint16 *splid_str, uint16 splid_str_len, - LmaPsbItem *lpi_items, size_t lpi_max ) = 0; + virtual Size_t get_lpis ( const uint16 *splid_str, uint16 splid_str_len, + LmaPsbItem *lpi_items, Size_t lpi_max ) = 0; /** * Get a lemma string (The Chinese string) by the given lemma id. @@ -178,9 +178,9 @@ namespace ime_pinyin { * from other atom dictionaries. A atom ditionary can just ignore it. * @return The number of prediction result from this atom dictionary. */ - virtual size_t predict ( const char16 last_hzs[], uint16 hzs_len, - NPredictItem *npre_items, size_t npre_max, - size_t b4_used ) = 0; + virtual Size_t predict ( const char16 last_hzs[], uint16 hzs_len, + NPredictItem *npre_items, Size_t npre_max, + Size_t b4_used ) = 0; /** * Add a lemma to the dictionary. If the dictionary allows to add new @@ -249,14 +249,14 @@ namespace ime_pinyin { * * @return The total occuring count of this atom dictionary. */ - virtual size_t get_total_lemma_count() = 0; + virtual Size_t get_total_lemma_count() = 0; /** * Set the total occuring count of other atom dictionaries. * * @param count The total occuring count of other atom dictionaies. */ - virtual void set_total_lemma_count_of_others ( size_t count ) = 0; + virtual void set_total_lemma_count_of_others ( Size_t count ) = 0; /** * Notify this atom dictionary to flush the cached data to persistent storage diff --git a/jni/include/dictbuilder.h b/jni/include/dictbuilder.h index aa7d4e4..c867db1 100755..100644 --- a/jni/include/dictbuilder.h +++ b/jni/include/dictbuilder.h @@ -37,12 +37,12 @@ namespace ime_pinyin { private: // The raw lemma array buffer. LemmaEntry *lemma_arr_; - size_t lemma_num_; + Size_t lemma_num_; // Used to store all possible single char items. // Two items may have the same Hanzi while their spelling ids are different. SingleCharItem *scis_; - size_t scis_num_; + Size_t scis_num_; // In the tree, root's level is -1. // Lemma nodes for root, and level 0 @@ -52,38 +52,38 @@ namespace ime_pinyin { LmaNodeGE1 *lma_nodes_ge1_; // Number of used lemma nodes - size_t lma_nds_used_num_le0_; - size_t lma_nds_used_num_ge1_; + Size_t lma_nds_used_num_le0_; + Size_t lma_nds_used_num_ge1_; // Used to store homophonies' ids. LemmaIdType *homo_idx_buf_; // Number of homophonies each of which only contains one Chinese character. - size_t homo_idx_num_eq1_; + Size_t homo_idx_num_eq1_; // Number of homophonies each of which contains more than one character. - size_t homo_idx_num_gt1_; + Size_t homo_idx_num_gt1_; // The items with highest scores. LemmaEntry *top_lmas_; - size_t top_lmas_num_; + Size_t top_lmas_num_; SpellingTable *spl_table_; SpellingParser *spl_parser_; #ifdef ___DO_STATISTICS___ - size_t max_sonbuf_len_[kMaxLemmaSize]; - size_t max_homobuf_len_[kMaxLemmaSize]; + Size_t max_sonbuf_len_[kMaxLemmaSize]; + Size_t max_homobuf_len_[kMaxLemmaSize]; - size_t total_son_num_[kMaxLemmaSize]; - size_t total_node_hasson_[kMaxLemmaSize]; - size_t total_sonbuf_num_[kMaxLemmaSize]; - size_t total_sonbuf_allnoson_[kMaxLemmaSize]; - size_t total_node_in_sonbuf_allnoson_[kMaxLemmaSize]; - size_t total_homo_num_[kMaxLemmaSize]; + Size_t total_son_num_[kMaxLemmaSize]; + Size_t total_node_hasson_[kMaxLemmaSize]; + Size_t total_sonbuf_num_[kMaxLemmaSize]; + Size_t total_sonbuf_allnoson_[kMaxLemmaSize]; + Size_t total_node_in_sonbuf_allnoson_[kMaxLemmaSize]; + Size_t total_homo_num_[kMaxLemmaSize]; - size_t sonbufs_num1_; // Number of son buffer with only 1 son - size_t sonbufs_numgt1_; // Number of son buffer with more 1 son; + Size_t sonbufs_num1_; // Number of son buffer with only 1 son + Size_t sonbufs_numgt1_; // Number of son buffer with more 1 son; - size_t total_lma_node_num_; + Size_t total_lma_node_num_; void stat_init(); void stat_print(); @@ -106,10 +106,10 @@ namespace ime_pinyin { void id_to_charbuf ( unsigned char *buf, LemmaIdType id ); // Update the offset of sons for a node. - void set_son_offset ( LmaNodeGE1 *node, size_t offset ); + void set_son_offset ( LmaNodeGE1 *node, Size_t offset ); // Update the offset of homophonies' ids for a node. - void set_homo_id_buf_offset ( LmaNodeGE1 *node, size_t offset ); + void set_homo_id_buf_offset ( LmaNodeGE1 *node, Size_t offset ); // Format a speling string. void format_spelling_str ( char *spl_str ); @@ -126,41 +126,41 @@ namespace ime_pinyin { // lemma buffer lemma_arr_. // This function should be called after the lemma array is ready. // Return the number of unique SingleCharItem elements. - size_t build_scis(); + Size_t build_scis(); // Construct a subtree using a subset of the spelling array (from // item_star to item_end) // parent is the parent node to update the necessary information // parent can be a member of LmaNodeLE0 or LmaNodeGE1 bool construct_subset ( void *parent, LemmaEntry *lemma_arr, - size_t item_start, size_t item_end, size_t level ); + Size_t item_start, Size_t item_end, Size_t level ); // Read valid Chinese Hanzis from the given file. // num is used to return number of chars. // The return buffer is sorted and caller needs to free the returned buffer. - char16 *read_valid_hanzis ( const char *fn_validhzs, size_t *num ); + char16 *read_valid_hanzis ( const char *fn_validhzs, Size_t *num ); // Read a raw dictionary. max_item is the maximum number of items. If there // are more items in the ditionary, only the first max_item will be read. // Returned value is the number of items successfully read from the file. - size_t read_raw_dict ( const char *fn_raw, const char *fn_validhzs, - size_t max_item ); + Size_t read_raw_dict ( const char *fn_raw, const char *fn_validhzs, + Size_t max_item ); // Try to find if a character is in hzs buffer. - bool hz_in_hanzis_list ( const char16 *hzs, size_t hzs_len, char16 hz ); + bool hz_in_hanzis_list ( const char16 *hzs, Size_t hzs_len, char16 hz ); // Try to find if all characters in str are in hzs buffer. - bool str_in_hanzis_list ( const char16 *hzs, size_t hzs_len, - const char16 *str, size_t str_len ); + bool str_in_hanzis_list ( const char16 *hzs, Size_t hzs_len, + const char16 *str, Size_t str_len ); // Get these lemmas with toppest scores. void get_top_lemmas(); // Allocate resource to build dictionary. // lma_num is the number of items to be loaded - bool alloc_resource ( size_t lma_num ); + bool alloc_resource ( Size_t lma_num ); // Free resource. void free_resource(); diff --git a/jni/include/dictdef.h b/jni/include/dictdef.h index 25b1e39..7ea8e76 100755..100644 --- a/jni/include/dictdef.h +++ b/jni/include/dictdef.h @@ -40,14 +40,14 @@ namespace ime_pinyin { const bool kPrintDebug2 = false; // The max length of a lemma. - const size_t kMaxLemmaSize = 8; + const Size_t kMaxLemmaSize = 8; // The max length of a Pinyin (spelling). - const size_t kMaxPinyinSize = 6; + const Size_t kMaxPinyinSize = 6; // The number of half spelling ids. For Chinese Pinyin, there 30 half ids. // See SpellingTrie.h for details. - const size_t kHalfSpellingIdNum = 29; + const Size_t kHalfSpellingIdNum = 29; // The maximum number of full spellings. For Chinese Pinyin, there are only // about 410 spellings. @@ -55,26 +55,26 @@ namespace ime_pinyin { // other structures like SpellingNode, to make sure than a spelling id can be // stored. // -1 is because that 0 is never used. - const size_t kMaxSpellingNum = 512 - kHalfSpellingIdNum - 1; - const size_t kMaxSearchSteps = 40; + const Size_t kMaxSpellingNum = 512 - kHalfSpellingIdNum - 1; + const Size_t kMaxSearchSteps = 40; // One character predicts its following characters. - const size_t kMaxPredictSize = ( kMaxLemmaSize - 1 ); + const Size_t kMaxPredictSize = ( kMaxLemmaSize - 1 ); - // LemmaIdType must always be size_t. - typedef size_t LemmaIdType; - const size_t kLemmaIdSize = 3; // Actually, a Id occupies 3 bytes in storage. - const size_t kLemmaIdComposing = 0xffffff; + // LemmaIdType must always be Size_t. + typedef Size_t LemmaIdType; + const Size_t kLemmaIdSize = 3; // Actually, a Id occupies 3 bytes in storage. + const Size_t kLemmaIdComposing = 0xffffff; typedef uint16 LmaScoreType; typedef uint16 KeyScoreType; // Number of items with highest score are kept for prediction purpose. - const size_t kTopScoreLemmaNum = 10; + const Size_t kTopScoreLemmaNum = 10; - const size_t kMaxPredictNumByGt3 = 1; - const size_t kMaxPredictNumBy3 = 2; - const size_t kMaxPredictNumBy2 = 2; + const Size_t kMaxPredictNumByGt3 = 1; + const Size_t kMaxPredictNumBy3 = 2; + const Size_t kMaxPredictNumBy2 = 2; // The last lemma id (included) for the system dictionary. The system // dictionary's ids always start from 1. @@ -109,8 +109,8 @@ namespace ime_pinyin { * A node occupies 16 bytes. so, totallly less than 16 * 500 = 8K */ struct LmaNodeLE0 { - size_t son_1st_off; - size_t homo_idx_buf_off; + Size_t son_1st_off; + Size_t homo_idx_buf_off; uint16 spl_idx; uint16 num_of_son; uint16 num_of_homo; diff --git a/jni/include/dictlist.h b/jni/include/dictlist.h index a2d78ac..f283a05 100755..100644 --- a/jni/include/dictlist.h +++ b/jni/include/dictlist.h @@ -33,7 +33,7 @@ namespace ime_pinyin { const SpellingTrie *spl_trie_; // Number of SingCharItem. The first is blank, because id 0 is invalid. - size_t scis_num_; + Size_t scis_num_; char16 *scis_hz_; SpellingId *scis_splid_; @@ -42,25 +42,25 @@ namespace ime_pinyin { // Starting position of those words whose lengths are i+1, counted in // char16 - size_t start_pos_[kMaxLemmaSize + 1]; + Size_t start_pos_[kMaxLemmaSize + 1]; - size_t start_id_[kMaxLemmaSize + 1]; + Size_t start_id_[kMaxLemmaSize + 1]; int ( *cmp_func_[kMaxLemmaSize] ) ( const void *, const void * ); - bool alloc_resource ( size_t buf_size, size_t scim_num ); + bool alloc_resource ( Size_t buf_size, Size_t scim_num ); void free_resource(); #ifdef ___BUILD_MODEL___ // Calculate the requsted memory, including the start_pos[] buffer. - size_t calculate_size ( const LemmaEntry *lemma_arr, size_t lemma_num ); + Size_t calculate_size ( const LemmaEntry *lemma_arr, Size_t lemma_num ); - void fill_scis ( const SingleCharItem *scis, size_t scis_num ); + void fill_scis ( const SingleCharItem *scis, Size_t scis_num ); // Copy the related content to the inner buffer // It should be called after calculate_size() - void fill_list ( const LemmaEntry *lemma_arr, size_t lemma_num ); + void fill_list ( const LemmaEntry *lemma_arr, Size_t lemma_num ); // Find the starting position for the buffer of those 2-character Chinese word // whose first character is the given Chinese character. @@ -71,7 +71,7 @@ namespace ime_pinyin { // word_len. The given parameter cmp_func decides how many characters from // beginning will be used to compare. char16 *find_pos_startedbyhzs ( const char16 last_hzs[], - size_t word_Len, + Size_t word_Len, int ( *cmp_func ) ( const void *, const void * ) ); public: @@ -86,8 +86,8 @@ namespace ime_pinyin { // Init the list from the LemmaEntry array. // lemma_arr should have been sorted by the hanzi_str, and have been given // ids from 1 - bool init_list ( const SingleCharItem *scis, size_t scis_num, - const LemmaEntry *lemma_arr, size_t lemma_num ); + bool init_list ( const SingleCharItem *scis, Size_t scis_num, + const LemmaEntry *lemma_arr, Size_t lemma_num ); #endif // Get the hanzi string for the given id @@ -104,9 +104,9 @@ namespace ime_pinyin { // buf_len specifies the buffer length. // b4_used specifies how many items before predict_buf have been used. // Returned value is the number of newly added items. - size_t predict ( const char16 last_hzs[], uint16 hzs_len, - NPredictItem *npre_items, size_t npre_max, - size_t b4_used ); + Size_t predict ( const char16 last_hzs[], uint16 hzs_len, + NPredictItem *npre_items, Size_t npre_max, + Size_t b4_used ); // If half_splid is a valid half spelling id, return those full spelling // ids which share this half id. diff --git a/jni/include/dicttrie.h b/jni/include/dicttrie.h index 2886d22..481e0ac 100755..100644 --- a/jni/include/dicttrie.h +++ b/jni/include/dicttrie.h @@ -28,8 +28,8 @@ namespace ime_pinyin { class DictTrie : AtomDictBase { private: typedef struct ParsingMark { - size_t node_offset: 24; - size_t node_num: 8; // Number of nodes with this spelling id given + Size_t node_offset: 24; + Size_t node_num: 8; // Number of nodes with this spelling id given // by spl_id. If spl_id is a Shengmu, for nodes // in the first layer of DictTrie, it equals to // SpellingTrie::shm2full_num(); but for those @@ -73,15 +73,15 @@ namespace ime_pinyin { // root_[splid_le0_index_[splid - kFullSplIdStart]] uint16 *splid_le0_index_; - size_t lma_node_num_le0_; - size_t lma_node_num_ge1_; + Size_t lma_node_num_le0_; + Size_t lma_node_num_ge1_; // The first part is for homophnies, and the last top_lma_num_ items are // lemmas with highest scores. unsigned char *lma_idx_buf_; - size_t lma_idx_buf_len_; // The total size of lma_idx_buf_ in byte. - size_t total_lma_num_; // Total number of lemmas in this dictionary. - size_t top_lmas_num_; // Number of lemma with highest scores. + Size_t lma_idx_buf_len_; // The total size of lma_idx_buf_ in byte. + Size_t total_lma_num_; // Total number of lemmas in this dictionary. + Size_t top_lmas_num_; // Number of lemma with highest scores. // Parsing mark list used to mark the detailed extended statuses. ParsingMark *parsing_marks_; @@ -95,13 +95,13 @@ namespace ime_pinyin { MileStoneHandle mile_stones_pos_; // Get the offset of sons for a node. - inline size_t get_son_offset ( const LmaNodeGE1 *node ); + inline Size_t get_son_offset ( const LmaNodeGE1 *node ); // Get the offset of homonious ids for a node. - inline size_t get_homo_idx_buf_offset ( const LmaNodeGE1 *node ); + inline Size_t get_homo_idx_buf_offset ( const LmaNodeGE1 *node ); // Get the lemma id by the offset. - inline LemmaIdType get_lemma_id ( size_t id_offset ); + inline LemmaIdType get_lemma_id ( Size_t id_offset ); void free_resource ( bool free_dict_list ); @@ -110,31 +110,31 @@ namespace ime_pinyin { // Given a LmaNodeLE0 node, extract the lemmas specified by it, and fill // them into the lpi_items buffer. // This function is called by the search engine. - size_t fill_lpi_buffer ( LmaPsbItem lpi_items[], size_t max_size, + Size_t fill_lpi_buffer ( LmaPsbItem lpi_items[], Size_t max_size, LmaNodeLE0 *node ); // Given a LmaNodeGE1 node, extract the lemmas specified by it, and fill // them into the lpi_items buffer. // This function is called by inner functions extend_dict0(), extend_dict1() // and extend_dict2(). - size_t fill_lpi_buffer ( LmaPsbItem lpi_items[], size_t max_size, - size_t homo_buf_off, LmaNodeGE1 *node, + Size_t fill_lpi_buffer ( LmaPsbItem lpi_items[], Size_t max_size, + Size_t homo_buf_off, LmaNodeGE1 *node, uint16 lma_len ); // Extend in the trie from level 0. MileStoneHandle extend_dict0 ( MileStoneHandle from_handle, const DictExtPara *dep, LmaPsbItem *lpi_items, - size_t lpi_max, size_t *lpi_num ); + Size_t lpi_max, Size_t *lpi_num ); // Extend in the trie from level 1. MileStoneHandle extend_dict1 ( MileStoneHandle from_handle, const DictExtPara *dep, LmaPsbItem *lpi_items, - size_t lpi_max, size_t *lpi_num ); + Size_t lpi_max, Size_t *lpi_num ); // Extend in the trie from level 2. MileStoneHandle extend_dict2 ( MileStoneHandle from_handle, const DictExtPara *dep, LmaPsbItem *lpi_items, - size_t lpi_max, size_t *lpi_num ); + Size_t lpi_max, Size_t *lpi_num ); // Try to extend the given spelling id buffer, and if the given id_lemma can // be successfully gotten, return true; @@ -179,26 +179,26 @@ namespace ime_pinyin { bool load_dict_fd ( int sys_fd, long start_offset, long length, LemmaIdType start_id, LemmaIdType end_id ); bool close_dict() {return true;} - size_t number_of_lemmas() {return 0;} + Size_t number_of_lemmas() {return 0;} void reset_milestones ( uint16 from_step, MileStoneHandle from_handle ); MileStoneHandle extend_dict ( MileStoneHandle from_handle, const DictExtPara *dep, LmaPsbItem *lpi_items, - size_t lpi_max, size_t *lpi_num ); + Size_t lpi_max, Size_t *lpi_num ); - size_t get_lpis ( const uint16 *splid_str, uint16 splid_str_len, - LmaPsbItem *lpi_items, size_t lpi_max ); + Size_t get_lpis ( const uint16 *splid_str, uint16 splid_str_len, + LmaPsbItem *lpi_items, Size_t lpi_max ); uint16 get_lemma_str ( LemmaIdType id_lemma, char16 *str_buf, uint16 str_max ); uint16 get_lemma_splids ( LemmaIdType id_lemma, uint16 *splids, uint16 splids_max, bool arg_valid ); - size_t predict ( const char16 *last_hzs, uint16 hzs_len, - NPredictItem *npre_items, size_t npre_max, - size_t b4_used ); + Size_t predict ( const char16 *last_hzs, uint16 hzs_len, + NPredictItem *npre_items, Size_t npre_max, + Size_t b4_used ); LemmaIdType put_lemma ( char16 lemma_str[], uint16 splids[], uint16 lemma_len, uint16 count ) {return 0;} @@ -216,8 +216,8 @@ namespace ime_pinyin { bool remove_lemma ( LemmaIdType lemma_id ) {return false;} - size_t get_total_lemma_count() {return 0;} - void set_total_lemma_count_of_others ( size_t count ); + Size_t get_total_lemma_count() {return 0;} + void set_total_lemma_count_of_others ( Size_t count ); void flush_cache() {} @@ -225,8 +225,8 @@ namespace ime_pinyin { // Fill the lemmas with highest scores to the prediction buffer. // his_len is the history length to fill in the prediction buffer. - size_t predict_top_lmas ( size_t his_len, NPredictItem *npre_items, - size_t npre_max, size_t b4_used ); + Size_t predict_top_lmas ( Size_t his_len, NPredictItem *npre_items, + Size_t npre_max, Size_t b4_used ); }; } diff --git a/jni/include/lpicache.h b/jni/include/lpicache.h index 6c90bf1..4a0ecf0 100755..100644 --- a/jni/include/lpicache.h +++ b/jni/include/lpicache.h @@ -48,13 +48,13 @@ namespace ime_pinyin { // maximum length of the cache buffer. // Note: splid must be a half id, and lpi_items must be not NULL. The // caller of this function should guarantee this. - size_t put_cache ( uint16 splid, LmaPsbItem lpi_items[], size_t lpi_num ); + Size_t put_cache ( uint16 splid, LmaPsbItem lpi_items[], Size_t lpi_num ); // Get the cached list for the given half id. // Return the length of the cached buffer. // Note: splid must be a half id, and lpi_items must be not NULL. The // caller of this function should guarantee this. - size_t get_cache ( uint16 splid, LmaPsbItem lpi_items[], size_t lpi_max ); + Size_t get_cache ( uint16 splid, LmaPsbItem lpi_items[], Size_t lpi_max ); }; } // namespace diff --git a/jni/include/matrixsearch.h b/jni/include/matrixsearch.h index 4e882f8..bd4d02f 100755..100644 --- a/jni/include/matrixsearch.h +++ b/jni/include/matrixsearch.h @@ -26,7 +26,7 @@ namespace ime_pinyin { - static const size_t kMaxRowNum = kMaxSearchSteps; + static const Size_t kMaxRowNum = kMaxSearchSteps; typedef struct { // MileStoneHandle objects for the system and user dictionaries. @@ -118,7 +118,7 @@ namespace ime_pinyin { uint16 spl_start[kMaxRowNum]; char16 chn_str[kMaxRowNum]; // Chinese string. uint16 sublma_start[kMaxRowNum]; // Counted in Chinese characters. - size_t sublma_num; + Size_t sublma_num; uint16 length; // Counted in Chinese characters. } ComposingPhrase, *TComposingPhrase; @@ -138,20 +138,20 @@ namespace ime_pinyin { static const bool kOnlyUserDictPredict = false; // The maximum buffer to store LmaPsbItems. - static const size_t kMaxLmaPsbItems = 1450; + static const Size_t kMaxLmaPsbItems = 1450; // How many rows for each step. - static const size_t kMaxNodeARow = 5; + static const Size_t kMaxNodeARow = 5; // The maximum length of the sentence candidates counted in chinese // characters - static const size_t kMaxSentenceLength = 16; + static const Size_t kMaxSentenceLength = 16; // The size of the matrix node pool. - static const size_t kMtrxNdPoolSize = 200; + static const Size_t kMtrxNdPoolSize = 200; // The size of the DMI node pool. - static const size_t kDmiPoolSize = 800; + static const Size_t kDmiPoolSize = 800; // Used to indicate whether this object has been initialized. bool inited_; @@ -177,19 +177,19 @@ namespace ime_pinyin { SpellingParser *spl_parser_; // The maximum allowed length of spelling string (such as a Pinyin string). - size_t max_sps_len_; + Size_t max_sps_len_; // The maximum allowed length of a result Chinese string. - size_t max_hzs_len_; + Size_t max_hzs_len_; // Pinyin string. Max length: kMaxRowNum - 1 char pys_[kMaxRowNum]; // The length of the string that has been decoded successfully. - size_t pys_decoded_len_; + Size_t pys_decoded_len_; // Shared buffer for multiple purposes. - size_t *share_buf_; + Size_t *share_buf_; MatrixNode *mtrx_nd_pool_; PoolPosType mtrx_nd_pool_used_; // How many nodes used in the pool @@ -201,13 +201,13 @@ namespace ime_pinyin { DictExtPara *dep_; // Parameter used to extend DMI nodes. NPredictItem *npre_items_; // Used to do prediction - size_t npre_items_len_; + Size_t npre_items_len_; // The starting positions and lemma ids for the full sentence candidate. - size_t lma_id_num_; + Size_t lma_id_num_; uint16 lma_start_[kMaxRowNum]; // Counted in spelling ids. LemmaIdType lma_id_[kMaxRowNum]; - size_t fixed_lmas_; + Size_t fixed_lmas_; // If fixed_lmas_ is bigger than i, Element i is used to indicate whether // the i'th lemma id in lma_id_ is the first candidate for that step. @@ -227,11 +227,11 @@ namespace ime_pinyin { // The starting positions and spelling ids for the first full sentence // candidate. - size_t spl_id_num_; // Number of splling ids + Size_t spl_id_num_; // Number of splling ids uint16 spl_start_[kMaxRowNum]; // Starting positions uint16 spl_id_[kMaxRowNum]; // Spelling ids // Used to remember the last fixed position, counted in Hanzi. - size_t fixed_hzs_; + Size_t fixed_hzs_; // Lemma Items with possibility score, two purposes: // 1. In Viterbi decoding, this buffer is used to get all possible candidates @@ -239,7 +239,7 @@ namespace ime_pinyin { // 2. When the search is done, this buffer is used to get candiates from the // first un-fixed step and show them to the user. LmaPsbItem lpi_items_[kMaxLmaPsbItems]; - size_t lpi_total_; + Size_t lpi_total_; // Assign the pointers with NULL. The caller makes sure that all pointers are // not valid before calling it. This function only will be called in the @@ -262,11 +262,11 @@ namespace ime_pinyin { // The DMI nodes will be kept. // // Note: this function should not destroy content of pys_. - bool reset_search ( size_t ch_pos, bool clear_fixed_this_step, + bool reset_search ( Size_t ch_pos, bool clear_fixed_this_step, bool clear_dmi_this_step, bool clear_mtrx_this_step ); // Delete a part of the content in pys_. - void del_in_pys ( size_t start, size_t len ); + void del_in_pys ( Size_t start, Size_t len ); // Delete a spelling id and its corresponding Chinese character, and merge // the fixed lemmas into the composing phrase. @@ -274,7 +274,7 @@ namespace ime_pinyin { // This function will update the lemma and spelling segmentation information. // The caller guarantees that fixed_lmas_ > 0 and del_spl_pos is within // the fixed lemmas. - void merge_fixed_lmas ( size_t del_spl_pos ); + void merge_fixed_lmas ( Size_t del_spl_pos ); // Get spelling start posistions and ids. The result will be stored in // spl_id_num_, spl_start_[], spl_id_[]. @@ -286,8 +286,8 @@ namespace ime_pinyin { // If pfullsent is not NULL, means the full sentence candidate may be the // same with the coming lemma string, if so, remove that lemma. // The result is sorted in descendant order by the frequency score. - size_t get_lpis ( const uint16 *splid_str, size_t splid_str_len, - LmaPsbItem *lma_buf, size_t max_lma_buf, + Size_t get_lpis ( const uint16 *splid_str, Size_t splid_str_len, + LmaPsbItem *lma_buf, Size_t max_lma_buf, const char16 *pfullsent, bool sort_by_psb ); uint16 get_lemma_str ( LemmaIdType id_lemma, char16 *str_buf, uint16 str_max ); @@ -315,23 +315,23 @@ namespace ime_pinyin { // calling this function if necessary. // // The caller should guarantees that NULL != dep. - size_t extend_dmi ( DictExtPara *dep, DictMatchInfo *dmi_s ); + Size_t extend_dmi ( DictExtPara *dep, DictMatchInfo *dmi_s ); // Extend dmi for the composing phrase. - size_t extend_dmi_c ( DictExtPara *dep, DictMatchInfo *dmi_s ); + Size_t extend_dmi_c ( DictExtPara *dep, DictMatchInfo *dmi_s ); // Extend a MatrixNode with the give LmaPsbItem list. // res_row is the destination row number. // This function does not change mtrx_nd_pool_used_. Please change it after // calling this function if necessary. // return 0 always. - size_t extend_mtrx_nd ( MatrixNode *mtrx_nd, LmaPsbItem lpi_items[], - size_t lpi_num, PoolPosType dmi_fr, size_t res_row ); + Size_t extend_mtrx_nd ( MatrixNode *mtrx_nd, LmaPsbItem lpi_items[], + Size_t lpi_num, PoolPosType dmi_fr, Size_t res_row ); // Try to find a dmi node at step_to position, and the found dmi node should // match the given spelling id strings. - PoolPosType match_dmi ( size_t step_to, uint16 spl_ids[], uint16 spl_id_num ); + PoolPosType match_dmi ( Size_t step_to, uint16 spl_ids[], uint16 spl_id_num ); bool add_char ( char ch ); bool prepare_add_char ( char ch ); @@ -352,9 +352,9 @@ namespace ime_pinyin { bool splid_end_split, unsigned char splstr_len, unsigned char all_full_id ); - size_t inner_predict ( const char16 fixed_scis_ids[], uint16 scis_num, + Size_t inner_predict ( const char16 fixed_scis_ids[], uint16 scis_num, char16 predict_buf[][kMaxPredictSize + 1], - size_t buf_len ); + Size_t buf_len ); // Add the first candidate to the user dictionary. bool try_add_cand0_to_userdict(); @@ -380,7 +380,7 @@ namespace ime_pinyin { bool init_fd ( int sys_fd, long start_offset, long length, const char *fn_usr_dict ); - void set_max_lens ( size_t max_sps_len, size_t max_hzs_len ); + void set_max_lens ( Size_t max_sps_len, Size_t max_hzs_len ); void close(); @@ -396,7 +396,7 @@ namespace ime_pinyin { // Search a Pinyin string. // Return value is the position successfully parsed. - size_t search ( const char *py, size_t py_len ); + Size_t search ( const char *py, Size_t py_len ); // Used to delete something in the Pinyin string kept by the engine, and do // a re-search. @@ -411,45 +411,45 @@ namespace ime_pinyin { // If is_pos_in_splid is false, and pos-th character is in the range for the // fixed lemmas or composing string, this function will do nothing and just // return the result of the previous search. - size_t delsearch ( size_t pos, bool is_pos_in_splid, + Size_t delsearch ( Size_t pos, bool is_pos_in_splid, bool clear_fixed_this_step ); // Get the number of candiates, called after search(). - size_t get_candidate_num(); + Size_t get_candidate_num(); // Get the Pinyin string stored by the engine. // *decoded_len returns the length of the successfully decoded string. - const char *get_pystr ( size_t *decoded_len ); + const char *get_pystr ( Size_t *decoded_len ); // Get the spelling boundaries for the first sentence candidate. // Number of spellings will be returned. The number of valid elements in // spl_start is one more than the return value because the last one is used // to indicate the beginning of the next un-input speling. // For a Pinyin "women", the returned value is 2, spl_start is [0, 2, 5] . - size_t get_spl_start ( const uint16 *&spl_start ); + Size_t get_spl_start ( const uint16 *&spl_start ); // Get one candiate string. If full sentence candidate is available, it will // be the first one. - char16 *get_candidate ( size_t cand_id, char16 *cand_str, size_t max_len ); + char16 *get_candidate ( Size_t cand_id, char16 *cand_str, Size_t max_len ); // Get the first candiate, which is a "full sentence". // retstr_len is not NULL, it will be used to return the string length. // If only_unfixed is true, only unfixed part will be fetched. - char16 *get_candidate0 ( char16 *cand_str, size_t max_len, + char16 *get_candidate0 ( char16 *cand_str, Size_t max_len, uint16 *retstr_len, bool only_unfixed ); // Choose a candidate. The decoder will do a search after the fixed position. - size_t choose ( size_t cand_id ); + Size_t choose ( Size_t cand_id ); // Cancel the last choosing operation, and return the new number of choices. - size_t cancel_last_choice(); + Size_t cancel_last_choice(); // Get the length of fixed Hanzis. - size_t get_fixedlen(); + Size_t get_fixedlen(); - size_t get_predicts ( const char16 fixed_buf[], + Size_t get_predicts ( const char16 fixed_buf[], char16 predict_buf[][kMaxPredictSize + 1], - size_t buf_len ); + Size_t buf_len ); }; } diff --git a/jni/include/mystdlib.h b/jni/include/mystdlib.h index 976d208..ed08283 100755..100644 --- a/jni/include/mystdlib.h +++ b/jni/include/mystdlib.h @@ -18,14 +18,14 @@ #define PINYINIME_INCLUDE_MYSTDLIB_H__ #include <stdlib.h> - +#include "./utf16char.h" namespace ime_pinyin { - void myqsort ( void *p, size_t n, size_t es, + void myqsort ( void *p, Size_t n, Size_t es, int ( *cmp ) ( const void *, const void * ) ); void *mybsearch ( const void *key, const void *base, - size_t nmemb, size_t size, + Size_t nmemb, Size_t size, int ( *compar ) ( const void *, const void * ) ); } diff --git a/jni/include/ngram.h b/jni/include/ngram.h index a740b7e..825b3a9 100755..100644 --- a/jni/include/ngram.h +++ b/jni/include/ngram.h @@ -25,7 +25,7 @@ namespace ime_pinyin { typedef unsigned char CODEBOOK_TYPE; - static const size_t kCodeBookSize = 256; + static const Size_t kCodeBookSize = 256; class NGram { public: @@ -42,16 +42,16 @@ namespace ime_pinyin { // total frequency changes. // In this version, frequencies of system lemmas are fixed. We are considering // to make them changable in next version. - static const size_t kSysDictTotalFreq = 100000000; + static const Size_t kSysDictTotalFreq = 100000000; private: static NGram *instance_; bool initialized_; - size_t idx_num_; + Size_t idx_num_; - size_t total_freq_none_sys_; + Size_t total_freq_none_sys_; // Score compensation for system dictionary lemmas. // Because after user adds some user lemmas, the total frequency changes, and @@ -74,7 +74,7 @@ namespace ime_pinyin { bool load_ngram ( FILE *fp ); // Set the total frequency of all none system dictionaries. - void set_total_freq_none_sys ( size_t freq_none_sys ); + void set_total_freq_none_sys ( Size_t freq_none_sys ); float get_uni_psb ( LemmaIdType lma_id ); @@ -87,7 +87,7 @@ namespace ime_pinyin { #ifdef ___BUILD_MODEL___ // For constructing the unigram mode model. - bool build_unigram ( LemmaEntry *lemma_arr, size_t num, + bool build_unigram ( LemmaEntry *lemma_arr, Size_t num, LemmaIdType next_idx_unused ); #endif }; diff --git a/jni/include/pinyinime.h b/jni/include/pinyinime.h index bc2844b..f6562e4 100755..100644 --- a/jni/include/pinyinime.h +++ b/jni/include/pinyinime.h @@ -66,7 +66,7 @@ extern "C" { * @param max_sps_len Maximum length of the spelling string(Pinyin string). * @max_hzs_len Maximum length of the decoded Chinese character string. */ - void im_set_max_lens ( size_t max_sps_len, size_t max_hzs_len ); + void im_set_max_lens ( Size_t max_sps_len, Size_t max_hzs_len ); /** * Flush cached data to persistent memory. Because at runtime, in order to @@ -87,7 +87,7 @@ extern "C" { * @param sps_len The length of the spelling string buffer. * @return The number of candidates. */ - size_t im_search ( const char *sps_buf, size_t sps_len ); + Size_t im_search ( const char *sps_buf, Size_t sps_len ); /** * Make a delete operation in the current search result, and make research if @@ -99,7 +99,7 @@ extern "C" { * in the spelling string, or the position in the result spelling id string. * @return The number of candidates. */ - size_t im_delsearch ( size_t pos, bool is_pos_in_splid, + Size_t im_delsearch ( Size_t pos, bool is_pos_in_splid, bool clear_fixed_this_step ); /** @@ -115,7 +115,7 @@ extern "C" { * @param ch The letter to add. * @return The number of candidates. */ - size_t im_add_letter ( char ch ); + Size_t im_add_letter ( char ch ); /** * Get the spelling string kept by the decoder. @@ -124,7 +124,7 @@ extern "C" { * string is successfully parsed. * @return The spelling string kept by the decoder. */ - const char *im_get_sps_str ( size_t *decoded_len ); + const char *im_get_sps_str ( Size_t *decoded_len ); /** * Get a candidate(or choice) string. @@ -135,8 +135,8 @@ extern "C" { * @param max_len The maximum length of the buffer. * @return cand_str if succeeds, otherwise NULL. */ - char16 *im_get_candidate ( size_t cand_id, char16 *cand_str, - size_t max_len ); + char16 *im_get_candidate ( Size_t cand_id, char16 *cand_str, + Size_t max_len ); /** * Get the segmentation information(the starting positions) of the spelling @@ -147,7 +147,7 @@ extern "C" { * elements in spl_start, and spl_start[L] is the posistion after the end of * the last spelling id. */ - size_t im_get_spl_start_pos ( const uint16 *&spl_start ); + Size_t im_get_spl_start_pos ( const uint16 *&spl_start ); /** * Choose a candidate and make it fixed. If the candidate does not match @@ -160,21 +160,21 @@ extern "C" { * @return The number of candidates. If after the selection, the whole result * string has been fixed, there will be only one candidate. */ - size_t im_choose ( size_t cand_id ); + Size_t im_choose ( Size_t cand_id ); /** * Cancel the last selection, or revert the last operation of im_choose(). * * @return The number of candidates. */ - size_t im_cancel_last_choice(); + Size_t im_cancel_last_choice(); /** * Get the number of fixed spelling ids, or Chinese characters. * * @return The number of fixed spelling ids, of Chinese characters. */ - size_t im_get_fixed_len(); + Size_t im_get_fixed_len(); /** * Cancel the input state and reset the search workspace. @@ -190,7 +190,7 @@ extern "C" { * @param pre_buf Used to return prediction result list. * @return The number of predicted result string. */ - size_t im_get_predicts ( const char16 *his_buf, + Size_t im_get_predicts ( const char16 *his_buf, char16 ( *&pre_buf ) [kMaxPredictSize + 1] ); /** diff --git a/jni/include/searchutility.h b/jni/include/searchutility.h index e33b2b0..d218242 100755..100644 --- a/jni/include/searchutility.h +++ b/jni/include/searchutility.h @@ -30,8 +30,8 @@ namespace ime_pinyin { // Type used to express a lemma and its probability score. typedef struct { - size_t id: ( kLemmaIdSize * 8 ); - size_t lma_len: 4; + Size_t id: ( kLemmaIdSize * 8 ); + Size_t lma_len: 4; uint16 psb; // The score, the lower psb, the higher possibility. // For single character items, we may also need Hanzi. // For multiple characer items, ignore it. @@ -133,9 +133,9 @@ namespace ime_pinyin { int cmp_npre_by_hanzi_score ( const void *p1, const void *p2 ); - size_t remove_duplicate_npre ( NPredictItem *npre_items, size_t npre_num ); + Size_t remove_duplicate_npre ( NPredictItem *npre_items, Size_t npre_num ); - size_t align_to_size_t ( size_t size ); + Size_t align_to_Size_t ( Size_t size ); } // namespace diff --git a/jni/include/spellingtable.h b/jni/include/spellingtable.h index b137a8e..35eeaab 100644 --- a/jni/include/spellingtable.h +++ b/jni/include/spellingtable.h @@ -24,7 +24,7 @@ namespace ime_pinyin { #ifdef ___BUILD_MODEL___ - const size_t kMaxSpellingSize = kMaxPinyinSize; + const Size_t kMaxSpellingSize = kMaxPinyinSize; typedef struct { char str[kMaxSpellingSize + 1]; @@ -37,12 +37,12 @@ namespace ime_pinyin { // we only keep its first spelling_size_ chars. class SpellingTable { private: - static const size_t kNotSupportNum = 3; + static const Size_t kNotSupportNum = 3; static const char kNotSupportList[kNotSupportNum][kMaxSpellingSize + 1]; bool need_score_; - size_t spelling_max_num_; + Size_t spelling_max_num_; RawSpelling *raw_spellings_; @@ -54,7 +54,7 @@ namespace ime_pinyin { double total_freq_; - size_t spelling_num_; + Size_t spelling_num_; double score_amplifier_; @@ -63,8 +63,8 @@ namespace ime_pinyin { // If frozen is true, put_spelling() and contain() are not allowed to call. bool frozen_; - size_t get_hash_pos ( const char *spelling_str ); - size_t hash_pos_next ( size_t hash_pos ); + Size_t get_hash_pos ( const char *spelling_str ); + Size_t hash_pos_next ( Size_t hash_pos ); void free_resource(); public: SpellingTable(); @@ -75,7 +75,7 @@ namespace ime_pinyin { // spl_max_num is the maximum number of spelling strings to store. // need_score is used to indicate whether the caller needs to calculate a // score for each spelling. - bool init_table ( size_t pure_spl_size, size_t spl_max_num, bool need_score ); + bool init_table ( Size_t pure_spl_size, Size_t spl_max_num, bool need_score ); // Put a spelling string to the table. // It always returns false if called after arrange() withtout a new @@ -99,7 +99,7 @@ namespace ime_pinyin { // unsinged char score. // An item with a lower score has a higher probability. // Do not call put_spelling() and contains() after arrange(). - const char *arrange ( size_t *item_size, size_t *spl_num ); + const char *arrange ( Size_t *item_size, Size_t *spl_num ); float get_score_amplifier(); diff --git a/jni/include/spellingtrie.h b/jni/include/spellingtrie.h index 77f27a1..600a3a6 100644 --- a/jni/include/spellingtrie.h +++ b/jni/include/spellingtrie.h @@ -39,7 +39,7 @@ namespace ime_pinyin { class SpellingTrie { private: static const int kMaxYmNum = 64; - static const size_t kValidSplCharNum = 26; + static const Size_t kValidSplCharNum = 26; static const uint16 kHalfIdShengmuMask = 0x01; static const uint16 kHalfIdYunmuMask = 0x02; @@ -80,8 +80,8 @@ namespace ime_pinyin { // The Yunmu table. // Each Yunmu will be assigned with Yunmu id from 1. char *ym_buf_; - size_t ym_size_; // The size of longest Yunmu string, '\0'included. - size_t ym_num_; + Size_t ym_size_; // The size of longest Yunmu string, '\0'included. + Size_t ym_num_; // The spelling string just queried char *splstr_queried_; @@ -116,7 +116,7 @@ namespace ime_pinyin { #ifdef ___BUILD_MODEL___ // How many node used to build the trie. - size_t node_num_; + Size_t node_num_; #endif SpellingTrie(); @@ -127,8 +127,8 @@ namespace ime_pinyin { // item_star to item_end). // Member spelliing_buf_ and spelling_size_ should be valid. // parent is used to update its num_of_son and score. - SpellingNode *construct_spellings_subset ( size_t item_start, size_t item_end, - size_t level, SpellingNode *parent ); + SpellingNode *construct_spellings_subset ( Size_t item_start, Size_t item_end, + Size_t level, SpellingNode *parent ); bool build_f2h(); // The caller should guarantee ch >= 'A' && ch <= 'Z' @@ -168,7 +168,7 @@ namespace ime_pinyin { // score_amplifier is used to convert a possibility value into score. // average_score is the average_score of all spellings. The dumb node is // assigned with this score. - bool construct ( const char *spelling_arr, size_t item_size, size_t item_num, + bool construct ( const char *spelling_arr, Size_t item_size, Size_t item_num, float score_amplifier, unsigned char average_score ); // Test if the given id is a valid spelling id. @@ -236,7 +236,7 @@ namespace ime_pinyin { bool load_spl_trie ( FILE *fp ); // Get the number of spellings - size_t get_spelling_num(); + Size_t get_spelling_num(); // Return the Yunmu id for the given Yunmu string. // If the string is not valid, return 0; @@ -250,8 +250,8 @@ namespace ime_pinyin { // Get Pinyin string for a given spelling id. Return the length of the // string, and fill-in '\0' at the end. - size_t get_spelling_str16 ( uint16 splid, char16 *splstr16, - size_t splstr16_len ); + Size_t get_spelling_str16 ( uint16 splid, char16 *splstr16, + Size_t splstr16_len ); }; } diff --git a/jni/include/splparser.h b/jni/include/splparser.h index 9df41ea..9df41ea 100755..100644 --- a/jni/include/splparser.h +++ b/jni/include/splparser.h diff --git a/jni/include/sync.h b/jni/include/sync.h index d123b62..d123b62 100755..100644 --- a/jni/include/sync.h +++ b/jni/include/sync.h diff --git a/jni/include/userdict.h b/jni/include/userdict.h index 22048d4..b3712bd 100755..100644 --- a/jni/include/userdict.h +++ b/jni/include/userdict.h @@ -39,16 +39,16 @@ namespace ime_pinyin { bool close_dict(); - size_t number_of_lemmas(); + Size_t number_of_lemmas(); void reset_milestones ( uint16 from_step, MileStoneHandle from_handle ); MileStoneHandle extend_dict ( MileStoneHandle from_handle, const DictExtPara *dep, LmaPsbItem *lpi_items, - size_t lpi_max, size_t *lpi_num ); + Size_t lpi_max, Size_t *lpi_num ); - size_t get_lpis ( const uint16 *splid_str, uint16 splid_str_len, - LmaPsbItem *lpi_items, size_t lpi_max ); + Size_t get_lpis ( const uint16 *splid_str, uint16 splid_str_len, + LmaPsbItem *lpi_items, Size_t lpi_max ); uint16 get_lemma_str ( LemmaIdType id_lemma, char16 *str_buf, uint16 str_max ); @@ -56,9 +56,9 @@ namespace ime_pinyin { uint16 get_lemma_splids ( LemmaIdType id_lemma, uint16 *splids, uint16 splids_max, bool arg_valid ); - size_t predict ( const char16 last_hzs[], uint16 hzs_len, - NPredictItem *npre_items, size_t npre_max, - size_t b4_used ); + Size_t predict ( const char16 last_hzs[], uint16 hzs_len, + NPredictItem *npre_items, Size_t npre_max, + Size_t b4_used ); // Full spelling ids are required LemmaIdType put_lemma ( char16 lemma_str[], uint16 splids[], @@ -77,8 +77,8 @@ namespace ime_pinyin { bool remove_lemma ( LemmaIdType lemma_id ); - size_t get_total_lemma_count(); - void set_total_lemma_count_of_others ( size_t count ); + Size_t get_total_lemma_count(); + void set_total_lemma_count_of_others ( Size_t count ); void flush_cache(); @@ -182,12 +182,12 @@ namespace ime_pinyin { #endif #ifdef ___SYNC_ENABLED___ uint32 *syncs_; - size_t sync_count_size_; + Size_t sync_count_size_; #endif uint32 *offsets_by_id_; - size_t lemma_count_left_; - size_t lemma_size_left_; + Size_t lemma_count_left_; + Size_t lemma_size_left_; const char *dict_file_; @@ -304,8 +304,8 @@ namespace ime_pinyin { LemmaIdType _put_lemma ( char16 lemma_str[], uint16 splids[], uint16 lemma_len, uint16 count, uint64 lmt ); - size_t _get_lpis ( const uint16 *splid_str, uint16 splid_str_len, - LmaPsbItem *lpi_items, size_t lpi_max, bool *need_extend ); + Size_t _get_lpis ( const uint16 *splid_str, uint16 splid_str_len, + LmaPsbItem *lpi_items, Size_t lpi_max, bool *need_extend ); int _get_lemma_score ( char16 lemma_str[], uint16 splids[], uint16 lemma_len ); diff --git a/jni/include/utf16char.h b/jni/include/utf16char.h index f466d41..dec64e1 100755..100644 --- a/jni/include/utf16char.h +++ b/jni/include/utf16char.h @@ -24,26 +24,26 @@ namespace ime_pinyin { #ifdef __cplusplus extern "C" { #endif - + typedef unsigned int Size_t; typedef unsigned short char16; // Get a token from utf16_str, // Returned pointer is a '\0'-terminated utf16 string, or NULL // *utf16_str_next returns the next part of the string for further tokenizing - char16 *utf16_strtok ( char16 *utf16_str, size_t *token_size, + char16 *utf16_strtok ( char16 *utf16_str, Size_t *token_size, char16 **utf16_str_next ); int utf16_atoi ( const char16 *utf16_str ); float utf16_atof ( const char16 *utf16_str ); - size_t utf16_strlen ( const char16 *utf16_str ); + Size_t utf16_strlen ( const char16 *utf16_str ); int utf16_strcmp ( const char16 *str1, const char16 *str2 ); - int utf16_strncmp ( const char16 *str1, const char16 *str2, size_t size ); + int utf16_strncmp ( const char16 *str1, const char16 *str2, Size_t size ); char16 *utf16_strcpy ( char16 *dst, const char16 *src ); - char16 *utf16_strncpy ( char16 *dst, const char16 *src, size_t size ); + char16 *utf16_strncpy ( char16 *dst, const char16 *src, Size_t size ); char *utf16_strcpy_tochar ( char *dst, const char16 *src ); diff --git a/jni/include/utf16reader.h b/jni/include/utf16reader.h index 02de634..ad64cd0 100755..100644 --- a/jni/include/utf16reader.h +++ b/jni/include/utf16reader.h @@ -26,11 +26,11 @@ namespace ime_pinyin { private: FILE *fp_; char16 *buffer_; - size_t buffer_total_len_; - size_t buffer_next_pos_; + Size_t buffer_total_len_; + Size_t buffer_next_pos_; // Always less than buffer_total_len_ - buffer_next_pos_ - size_t buffer_valid_len_; + Size_t buffer_valid_len_; public: Utf16Reader(); @@ -39,8 +39,8 @@ namespace ime_pinyin { // filename is the name of the file to open. // buffer_len specifies how long buffer should be allocated to speed up the // future reading - bool open ( const char *filename, size_t buffer_len ); - char16 *readline ( char16 *read_buf, size_t max_len ); + bool open ( const char *filename, Size_t buffer_len ); + char16 *readline ( char16 *read_buf, Size_t max_len ); bool close(); }; } diff --git a/jni/share/dictbuilder.cpp b/jni/share/dictbuilder.cpp index 8dadd7c..847052a 100755..100644 --- a/jni/share/dictbuilder.cpp +++ b/jni/share/dictbuilder.cpp @@ -33,8 +33,8 @@ namespace ime_pinyin { #ifdef ___BUILD_MODEL___ - static const size_t kReadBufLen = 512; - static const size_t kSplTableHashLen = 2000; + static const Size_t kReadBufLen = 512; + static const Size_t kSplTableHashLen = 2000; // Compare a SingleCharItem, first by Hanzis, then by spelling ids, then by // frequencies. @@ -81,8 +81,8 @@ namespace ime_pinyin { } int cmp_lemma_entry_hzs ( const void *p1, const void *p2 ) { - size_t size1 = utf16_strlen ( ( ( const LemmaEntry * ) p1 )->hanzi_str ); - size_t size2 = utf16_strlen ( ( ( const LemmaEntry * ) p2 )->hanzi_str ); + Size_t size1 = utf16_strlen ( ( ( const LemmaEntry * ) p1 )->hanzi_str ); + Size_t size2 = utf16_strlen ( ( ( const LemmaEntry * ) p2 )->hanzi_str ); if ( size1 < size2 ) { return -1; } else if ( size1 > size2 ) @@ -110,8 +110,8 @@ namespace ime_pinyin { // First hanzi, if the same, then Pinyin int cmp_lemma_entry_hzspys ( const void *p1, const void *p2 ) { - size_t size1 = utf16_strlen ( ( ( const LemmaEntry * ) p1 )->hanzi_str ); - size_t size2 = utf16_strlen ( ( ( const LemmaEntry * ) p2 )->hanzi_str ); + Size_t size1 = utf16_strlen ( ( ( const LemmaEntry * ) p1 )->hanzi_str ); + Size_t size2 = utf16_strlen ( ( ( const LemmaEntry * ) p2 )->hanzi_str ); if ( size1 < size2 ) { return -1; } else if ( size1 > size2 ) @@ -153,7 +153,7 @@ namespace ime_pinyin { free_resource(); } - bool DictBuilder::alloc_resource ( size_t lma_num ) { + bool DictBuilder::alloc_resource ( Size_t lma_num ) { if ( 0 == lma_num ) { return false; } free_resource(); @@ -189,7 +189,7 @@ namespace ime_pinyin { return true; } - char16 *DictBuilder::read_valid_hanzis ( const char *fn_validhzs, size_t *num ) { + char16 *DictBuilder::read_valid_hanzis ( const char *fn_validhzs, Size_t *num ) { if ( NULL == fn_validhzs || NULL == num ) { return NULL; } *num = 0; @@ -222,7 +222,7 @@ namespace ime_pinyin { return hzs; } - bool DictBuilder::hz_in_hanzis_list ( const char16 *hzs, size_t hzs_len, + bool DictBuilder::hz_in_hanzis_list ( const char16 *hzs, Size_t hzs_len, char16 hz ) { if ( NULL == hzs ) { return false; } @@ -236,11 +236,11 @@ namespace ime_pinyin { } // The caller makes sure that the parameters are valid. - bool DictBuilder::str_in_hanzis_list ( const char16 *hzs, size_t hzs_len, - const char16 *str, size_t str_len ) { + bool DictBuilder::str_in_hanzis_list ( const char16 *hzs, Size_t hzs_len, + const char16 *str, Size_t str_len ) { if ( NULL == hzs || NULL == str ) { return false; } - for ( size_t pos = 0; pos < str_len; pos++ ) { + for ( Size_t pos = 0; pos < str_len; pos++ ) { if ( !hz_in_hanzis_list ( hzs, hzs_len, str[pos] ) ) { return false; } } @@ -251,7 +251,7 @@ namespace ime_pinyin { top_lmas_num_ = 0; if ( NULL == lemma_arr_ ) { return; } - for ( size_t pos = 0; pos < lemma_num_; pos++ ) { + for ( Size_t pos = 0; pos < lemma_num_; pos++ ) { if ( 0 == top_lmas_num_ ) { top_lmas_[0] = lemma_arr_[pos]; top_lmas_num_ = 1; @@ -260,7 +260,7 @@ namespace ime_pinyin { if ( lemma_arr_[pos].freq > top_lmas_[top_lmas_num_ - 1].freq ) { if ( kTopScoreLemmaNum > top_lmas_num_ ) { top_lmas_num_ += 1; } - size_t move_pos; + Size_t move_pos; for ( move_pos = top_lmas_num_ - 1; move_pos > 0; move_pos-- ) { top_lmas_[move_pos] = top_lmas_[move_pos - 1]; if ( 0 == move_pos - 1 || @@ -278,7 +278,7 @@ namespace ime_pinyin { } if ( kPrintDebug0 ) { printf ( "\n------Top Lemmas------------------\n" ); - for ( size_t pos = 0; pos < top_lmas_num_; pos++ ) { + for ( Size_t pos = 0; pos < top_lmas_num_; pos++ ) { printf ( "--%d, idx:%06d, score:%.5f\n", pos, top_lmas_[pos].idx_by_hz, top_lmas_[pos].freq ); } @@ -314,32 +314,32 @@ namespace ime_pinyin { homo_idx_num_gt1_ = 0; } - size_t DictBuilder::read_raw_dict ( const char *fn_raw, + Size_t DictBuilder::read_raw_dict ( const char *fn_raw, const char *fn_validhzs, - size_t max_item ) { + Size_t max_item ) { if ( NULL == fn_raw ) { return 0; } Utf16Reader utf16_reader; if ( !utf16_reader.open ( fn_raw, kReadBufLen * 10 ) ) { return false; } char16 read_buf[kReadBufLen]; // Read the number of lemmas in the file - size_t lemma_num = 240000; + Size_t lemma_num = 240000; // allocate resource required if ( !alloc_resource ( lemma_num ) ) { utf16_reader.close(); } // Read the valid Hanzi list. char16 *valid_hzs = NULL; - size_t valid_hzs_num = 0; + Size_t valid_hzs_num = 0; valid_hzs = read_valid_hanzis ( fn_validhzs, &valid_hzs_num ); // Begin reading the lemma entries - for ( size_t i = 0; i < max_item; i++ ) { + for ( Size_t i = 0; i < max_item; i++ ) { // read next entry if ( !utf16_reader.readline ( read_buf, kReadBufLen ) ) { lemma_num = i; break; } - size_t token_size; + Size_t token_size; char16 *token; char16 *to_tokenize = read_buf; // Get the Hanzi string @@ -349,7 +349,7 @@ namespace ime_pinyin { utf16_reader.close(); return false; } - size_t lemma_size = utf16_strlen ( token ); + Size_t lemma_size = utf16_strlen ( token ); if ( lemma_size > kMaxLemmaSize ) { i--; continue; @@ -393,7 +393,7 @@ namespace ime_pinyin { } // Get spelling String bool spelling_not_support = false; - for ( size_t hz_pos = 0; hz_pos < ( size_t ) lemma_arr_[i].hz_str_len; + for ( Size_t hz_pos = 0; hz_pos < ( Size_t ) lemma_arr_[i].hz_str_len; hz_pos++ ) { // Get a Pinyin token = utf16_strtok ( to_tokenize, &token_size, &to_tokenize ); @@ -437,8 +437,8 @@ namespace ime_pinyin { // The size of an spelling. '\0' is included. If the spelling table is // initialized to calculate the spelling scores, the last char in the // spelling string will be score, and it is also included in spl_item_size. - size_t spl_item_size; - size_t spl_num; + Size_t spl_item_size; + Size_t spl_num; const char *spl_buf; spl_buf = spl_table_->arrange ( &spl_item_size, &spl_num ); if ( NULL == spl_buf ) { @@ -454,8 +454,8 @@ namespace ime_pinyin { } printf ( "spelling tree construct successfully.\n" ); // Convert the spelling string to idxs - for ( size_t i = 0; i < lemma_num_; i++ ) { - for ( size_t hz_pos = 0; hz_pos < ( size_t ) lemma_arr_[i].hz_str_len; + for ( Size_t i = 0; i < lemma_num_; i++ ) { + for ( Size_t hz_pos = 0; hz_pos < ( Size_t ) lemma_arr_[i].hz_str_len; hz_pos++ ) { uint16 spl_idxs[2]; uint16 spl_start_pos[3]; @@ -504,7 +504,7 @@ namespace ime_pinyin { // Move the node data and homo data to the DictTrie dict_trie->root_ = new LmaNodeLE0[lma_nds_used_num_le0_]; dict_trie->nodes_ge1_ = new LmaNodeGE1[lma_nds_used_num_ge1_]; - size_t lma_idx_num = homo_idx_num_eq1_ + homo_idx_num_gt1_ + top_lmas_num_; + Size_t lma_idx_num = homo_idx_num_eq1_ + homo_idx_num_gt1_ + top_lmas_num_; dict_trie->lma_idx_buf_ = new unsigned char[lma_idx_num * kLemmaIdSize]; assert ( NULL != dict_trie->root_ ); assert ( NULL != dict_trie->lma_idx_buf_ ); @@ -516,11 +516,11 @@ namespace ime_pinyin { sizeof ( LmaNodeLE0 ) * lma_nds_used_num_le0_ ); memcpy ( dict_trie->nodes_ge1_, lma_nodes_ge1_, sizeof ( LmaNodeGE1 ) * lma_nds_used_num_ge1_ ); - for ( size_t pos = 0; pos < homo_idx_num_eq1_ + homo_idx_num_gt1_; pos++ ) { + for ( Size_t pos = 0; pos < homo_idx_num_eq1_ + homo_idx_num_gt1_; pos++ ) { id_to_charbuf ( dict_trie->lma_idx_buf_ + pos * kLemmaIdSize, homo_idx_buf_[pos] ); } - for ( size_t pos = homo_idx_num_eq1_ + homo_idx_num_gt1_; + for ( Size_t pos = homo_idx_num_eq1_ + homo_idx_num_gt1_; pos < lma_idx_num; pos++ ) { LemmaIdType idx = top_lmas_[pos - homo_idx_num_eq1_ - homo_idx_num_gt1_].idx_by_hz; @@ -540,17 +540,17 @@ namespace ime_pinyin { void DictBuilder::id_to_charbuf ( unsigned char *buf, LemmaIdType id ) { if ( NULL == buf ) { return; } - for ( size_t pos = 0; pos < kLemmaIdSize; pos++ ) { + for ( Size_t pos = 0; pos < kLemmaIdSize; pos++ ) { ( buf ) [pos] = ( unsigned char ) ( id >> ( pos * 8 ) ); } } - void DictBuilder::set_son_offset ( LmaNodeGE1 *node, size_t offset ) { + void DictBuilder::set_son_offset ( LmaNodeGE1 *node, Size_t offset ) { node->son_1st_off_l = static_cast<uint16> ( offset ); node->son_1st_off_h = static_cast<unsigned char> ( offset >> 16 ); } - void DictBuilder:: set_homo_id_buf_offset ( LmaNodeGE1 *node, size_t offset ) { + void DictBuilder:: set_homo_id_buf_offset ( LmaNodeGE1 *node, Size_t offset ) { node->homo_idx_buf_off_l = static_cast<uint16> ( offset ); node->homo_idx_buf_off_h = static_cast<unsigned char> ( offset >> 16 ); } @@ -580,7 +580,7 @@ namespace ime_pinyin { myqsort ( lemma_arr_, lemma_num_, sizeof ( LemmaEntry ), cmp_lemma_entry_hzs ); lemma_arr_[0].idx_by_hz = 1; LemmaIdType idx_max = 1; - for ( size_t i = 1; i < lemma_num_; i++ ) { + for ( Size_t i = 1; i < lemma_num_; i++ ) { if ( utf16_strcmp ( lemma_arr_[i].hanzi_str, lemma_arr_[i - 1].hanzi_str ) ) { idx_max++; lemma_arr_[i].idx_by_hz = idx_max; @@ -592,7 +592,7 @@ namespace ime_pinyin { return idx_max + 1; } - size_t DictBuilder::build_scis() { + Size_t DictBuilder::build_scis() { if ( NULL == scis_ || lemma_num_ * kMaxLemmaSize > scis_num_ ) { return 0; } SpellingTrie &spl_trie = SpellingTrie::get_instance(); @@ -603,9 +603,9 @@ namespace ime_pinyin { scis_[0].splid.half_splid = 0; scis_num_ = 1; // Copy the hanzis to the buffer - for ( size_t pos = 0; pos < lemma_num_; pos++ ) { - size_t hz_num = lemma_arr_[pos].hz_str_len; - for ( size_t hzpos = 0; hzpos < hz_num; hzpos++ ) { + for ( Size_t pos = 0; pos < lemma_num_; pos++ ) { + Size_t hz_num = lemma_arr_[pos].hz_str_len; + for ( Size_t hzpos = 0; hzpos < hz_num; hzpos++ ) { scis_[scis_num_].hz = lemma_arr_[pos].hanzi_str[hzpos]; scis_[scis_num_].splid.full_splid = lemma_arr_[pos].spl_idx_arr[hzpos]; scis_[scis_num_].splid.half_splid = @@ -619,8 +619,8 @@ namespace ime_pinyin { } myqsort ( scis_, scis_num_, sizeof ( SingleCharItem ), cmp_scis_hz_splid_freq ); // Remove repeated items - size_t unique_scis_num = 1; - for ( size_t pos = 1; pos < scis_num_; pos++ ) { + Size_t unique_scis_num = 1; + for ( Size_t pos = 1; pos < scis_num_; pos++ ) { if ( scis_[pos].hz == scis_[pos - 1].hz && scis_[pos].splid.full_splid == scis_[pos - 1].splid.full_splid ) { continue; } @@ -631,9 +631,9 @@ namespace ime_pinyin { } scis_num_ = unique_scis_num; // Update the lemma list. - for ( size_t pos = 0; pos < lemma_num_; pos++ ) { - size_t hz_num = lemma_arr_[pos].hz_str_len; - for ( size_t hzpos = 0; hzpos < hz_num; hzpos++ ) { + for ( Size_t pos = 0; pos < lemma_num_; pos++ ) { + Size_t hz_num = lemma_arr_[pos].hz_str_len; + for ( Size_t hzpos = 0; hzpos < hz_num; hzpos++ ) { SingleCharItem key; key.hz = lemma_arr_[pos].hanzi_str[hzpos]; key.splid.full_splid = lemma_arr_[pos].spl_idx_arr[hzpos]; @@ -653,18 +653,18 @@ namespace ime_pinyin { } bool DictBuilder::construct_subset ( void *parent, LemmaEntry *lemma_arr, - size_t item_start, size_t item_end, - size_t level ) { + Size_t item_start, Size_t item_end, + Size_t level ) { if ( level >= kMaxLemmaSize || item_end <= item_start ) { return false; } // 1. Scan for how many sons - size_t parent_son_num = 0; + Size_t parent_son_num = 0; // LemmaNode *son_1st = NULL; // parent.num_of_son = 0; LemmaEntry *lma_last_start = lemma_arr_ + item_start; uint16 spl_idx_node = lma_last_start->spl_idx_arr[level]; // Scan for how many sons to be allocaed - for ( size_t i = item_start + 1; i < item_end; i++ ) { + for ( Size_t i = item_start + 1; i < item_end; i++ ) { LemmaEntry *lma_current = lemma_arr + i; uint16 spl_idx_current = lma_current->spl_idx_arr[level]; if ( spl_idx_current != spl_idx_node ) { @@ -717,14 +717,14 @@ namespace ime_pinyin { ( unsigned char ) parent_son_num; } // 3. Now begin to construct the son one by one - size_t son_pos = 0; + Size_t son_pos = 0; lma_last_start = lemma_arr_ + item_start; spl_idx_node = lma_last_start->spl_idx_arr[level]; - size_t homo_num = 0; + Size_t homo_num = 0; if ( lma_last_start->spl_idx_arr[level + 1] == 0 ) { homo_num = 1; } - size_t item_start_next = item_start; - for ( size_t i = item_start + 1; i < item_end; i++ ) { + Size_t item_start_next = item_start; + for ( Size_t i = item_start + 1; i < item_end; i++ ) { LemmaEntry *lma_current = lemma_arr_ + i; uint16 spl_idx_current = lma_current->spl_idx_arr[level]; if ( spl_idx_current == spl_idx_node ) { @@ -758,7 +758,7 @@ namespace ime_pinyin { assert ( homo_num <= 255 ); node_cur_ge1->num_of_homo = ( unsigned char ) homo_num; } - for ( size_t homo_pos = 0; homo_pos < homo_num; homo_pos++ ) { + for ( Size_t homo_pos = 0; homo_pos < homo_num; homo_pos++ ) { idx_buf[homo_pos] = lemma_arr_[item_start_next + homo_pos].idx_by_hz; } #ifdef ___DO_STATISTICS___ @@ -817,7 +817,7 @@ namespace ime_pinyin { assert ( homo_num <= 255 ); node_cur_ge1->num_of_homo = ( unsigned char ) homo_num; } - for ( size_t homo_pos = 0; homo_pos < homo_num; homo_pos++ ) { + for ( Size_t homo_pos = 0; homo_pos < homo_num; homo_pos++ ) { idx_buf[homo_pos] = lemma_arr[item_start_next + homo_pos].idx_by_hz; } #ifdef ___DO_STATISTICS___ @@ -851,14 +851,14 @@ namespace ime_pinyin { #ifdef ___DO_STATISTICS___ void DictBuilder::stat_init() { - memset ( max_sonbuf_len_, 0, sizeof ( size_t ) * kMaxLemmaSize ); - memset ( max_homobuf_len_, 0, sizeof ( size_t ) * kMaxLemmaSize ); - memset ( total_son_num_, 0, sizeof ( size_t ) * kMaxLemmaSize ); - memset ( total_node_hasson_, 0, sizeof ( size_t ) * kMaxLemmaSize ); - memset ( total_sonbuf_num_, 0, sizeof ( size_t ) * kMaxLemmaSize ); - memset ( total_sonbuf_allnoson_, 0, sizeof ( size_t ) * kMaxLemmaSize ); - memset ( total_node_in_sonbuf_allnoson_, 0, sizeof ( size_t ) * kMaxLemmaSize ); - memset ( total_homo_num_, 0, sizeof ( size_t ) * kMaxLemmaSize ); + memset ( max_sonbuf_len_, 0, sizeof ( Size_t ) * kMaxLemmaSize ); + memset ( max_homobuf_len_, 0, sizeof ( Size_t ) * kMaxLemmaSize ); + memset ( total_son_num_, 0, sizeof ( Size_t ) * kMaxLemmaSize ); + memset ( total_node_hasson_, 0, sizeof ( Size_t ) * kMaxLemmaSize ); + memset ( total_sonbuf_num_, 0, sizeof ( Size_t ) * kMaxLemmaSize ); + memset ( total_sonbuf_allnoson_, 0, sizeof ( Size_t ) * kMaxLemmaSize ); + memset ( total_node_in_sonbuf_allnoson_, 0, sizeof ( Size_t ) * kMaxLemmaSize ); + memset ( total_homo_num_, 0, sizeof ( Size_t ) * kMaxLemmaSize ); sonbufs_num1_ = 0; sonbufs_numgt1_ = 0; total_lma_node_num_ = 0; @@ -868,35 +868,35 @@ namespace ime_pinyin { printf ( "\n------------STAT INFO-------------\n" ); printf ( "[root is layer -1]\n" ); printf ( ".. max_sonbuf_len per layer(from layer 0):\n " ); - for ( size_t i = 0; i < kMaxLemmaSize; i++ ) + for ( Size_t i = 0; i < kMaxLemmaSize; i++ ) { printf ( "%d, ", max_sonbuf_len_[i] ); } printf ( "-, \n" ); printf ( ".. max_homobuf_len per layer:\n -, " ); - for ( size_t i = 0; i < kMaxLemmaSize; i++ ) + for ( Size_t i = 0; i < kMaxLemmaSize; i++ ) { printf ( "%d, ", max_homobuf_len_[i] ); } printf ( "\n" ); printf ( ".. total_son_num per layer:\n " ); - for ( size_t i = 0; i < kMaxLemmaSize; i++ ) + for ( Size_t i = 0; i < kMaxLemmaSize; i++ ) { printf ( "%d, ", total_son_num_[i] ); } printf ( "-, \n" ); printf ( ".. total_node_hasson per layer:\n 1, " ); - for ( size_t i = 0; i < kMaxLemmaSize; i++ ) + for ( Size_t i = 0; i < kMaxLemmaSize; i++ ) { printf ( "%d, ", total_node_hasson_[i] ); } printf ( "\n" ); printf ( ".. total_sonbuf_num per layer:\n " ); - for ( size_t i = 0; i < kMaxLemmaSize; i++ ) + for ( Size_t i = 0; i < kMaxLemmaSize; i++ ) { printf ( "%d, ", total_sonbuf_num_[i] ); } printf ( "-, \n" ); printf ( ".. total_sonbuf_allnoson per layer:\n " ); - for ( size_t i = 0; i < kMaxLemmaSize; i++ ) + for ( Size_t i = 0; i < kMaxLemmaSize; i++ ) { printf ( "%d, ", total_sonbuf_allnoson_[i] ); } printf ( "-, \n" ); printf ( ".. total_node_in_sonbuf_allnoson per layer:\n " ); - for ( size_t i = 0; i < kMaxLemmaSize; i++ ) + for ( Size_t i = 0; i < kMaxLemmaSize; i++ ) { printf ( "%d, ", total_node_in_sonbuf_allnoson_[i] ); } printf ( "-, \n" ); printf ( ".. total_homo_num per layer:\n 0, " ); - for ( size_t i = 0; i < kMaxLemmaSize; i++ ) + for ( Size_t i = 0; i < kMaxLemmaSize; i++ ) { printf ( "%d, ", total_homo_num_[i] ); } printf ( "\n" ); printf ( ".. son buf allocation number with only 1 son: %d\n", sonbufs_num1_ ); diff --git a/jni/share/dictlist.cpp b/jni/share/dictlist.cpp index 0c3fea3..6304e08 100755..100644 --- a/jni/share/dictlist.cpp +++ b/jni/share/dictlist.cpp @@ -46,7 +46,7 @@ namespace ime_pinyin { free_resource(); } - bool DictList::alloc_resource ( size_t buf_size, size_t scis_num ) { + bool DictList::alloc_resource ( Size_t buf_size, Size_t scis_num ) { // Allocate memory buf_ = static_cast<char16 *> ( malloc ( buf_size * sizeof ( char16 ) ) ); if ( NULL == buf_ ) @@ -75,15 +75,15 @@ namespace ime_pinyin { } #ifdef ___BUILD_MODEL___ - bool DictList::init_list ( const SingleCharItem *scis, size_t scis_num, - const LemmaEntry *lemma_arr, size_t lemma_num ) { + bool DictList::init_list ( const SingleCharItem *scis, Size_t scis_num, + const LemmaEntry *lemma_arr, Size_t lemma_num ) { if ( NULL == scis || 0 == scis_num || NULL == lemma_arr || 0 == lemma_num ) { return false; } initialized_ = false; if ( NULL != buf_ ) { free ( buf_ ); } // calculate the size - size_t buf_size = calculate_size ( lemma_arr, lemma_num ); + Size_t buf_size = calculate_size ( lemma_arr, lemma_num ); if ( 0 == buf_size ) { return false; } if ( !alloc_resource ( buf_size, scis_num ) ) @@ -95,11 +95,11 @@ namespace ime_pinyin { return true; } - size_t DictList::calculate_size ( const LemmaEntry *lemma_arr, size_t lemma_num ) { - size_t last_hz_len = 0; - size_t list_size = 0; - size_t id_num = 0; - for ( size_t i = 0; i < lemma_num; i++ ) { + Size_t DictList::calculate_size ( const LemmaEntry *lemma_arr, Size_t lemma_num ) { + Size_t last_hz_len = 0; + Size_t list_size = 0; + Size_t id_num = 0; + for ( Size_t i = 0; i < lemma_num; i++ ) { if ( 0 == i ) { last_hz_len = lemma_arr[i].hz_str_len; assert ( last_hz_len > 0 ); @@ -110,13 +110,13 @@ namespace ime_pinyin { last_hz_len = 1; list_size += last_hz_len; } else { - size_t current_hz_len = lemma_arr[i].hz_str_len; + Size_t current_hz_len = lemma_arr[i].hz_str_len; assert ( current_hz_len >= last_hz_len ); if ( current_hz_len == last_hz_len ) { list_size += current_hz_len; id_num++; } else { - for ( size_t len = last_hz_len; len < current_hz_len - 1; len++ ) { + for ( Size_t len = last_hz_len; len < current_hz_len - 1; len++ ) { start_pos_[len] = start_pos_[len - 1]; start_id_[len] = start_id_[len - 1]; } @@ -128,7 +128,7 @@ namespace ime_pinyin { } } } - for ( size_t i = last_hz_len; i <= kMaxLemmaSize; i++ ) { + for ( Size_t i = last_hz_len; i <= kMaxLemmaSize; i++ ) { if ( 0 == i ) { start_pos_[0] = 0; start_id_[0] = 1; @@ -140,21 +140,21 @@ namespace ime_pinyin { return start_pos_[kMaxLemmaSize]; } - void DictList::fill_scis ( const SingleCharItem *scis, size_t scis_num ) { + void DictList::fill_scis ( const SingleCharItem *scis, Size_t scis_num ) { assert ( scis_num_ == scis_num ); - for ( size_t pos = 0; pos < scis_num_; pos++ ) { + for ( Size_t pos = 0; pos < scis_num_; pos++ ) { scis_hz_[pos] = scis[pos].hz; scis_splid_[pos] = scis[pos].splid; } } - void DictList::fill_list ( const LemmaEntry *lemma_arr, size_t lemma_num ) { - size_t current_pos = 0; + void DictList::fill_list ( const LemmaEntry *lemma_arr, Size_t lemma_num ) { + Size_t current_pos = 0; utf16_strncpy ( buf_, lemma_arr[0].hanzi_str, lemma_arr[0].hz_str_len ); current_pos = lemma_arr[0].hz_str_len; - size_t id_num = 1; - for ( size_t i = 1; i < lemma_num; i++ ) { + Size_t id_num = 1; + for ( Size_t i = 1; i < lemma_num; i++ ) { utf16_strncpy ( buf_ + current_pos, lemma_arr[i].hanzi_str, lemma_arr[i].hz_str_len ); id_num++; @@ -178,7 +178,7 @@ namespace ime_pinyin { #endif // ___BUILD_MODEL___ char16 *DictList::find_pos_startedbyhzs ( const char16 last_hzs[], - size_t word_len, int ( *cmp_func ) ( const void *, const void * ) ) { + Size_t word_len, int ( *cmp_func ) ( const void *, const void * ) ) { char16 *found_w = static_cast<char16 *> ( mybsearch ( last_hzs, buf_ + start_pos_[word_len - 1], ( start_pos_[word_len] - start_pos_[word_len - 1] ) @@ -192,14 +192,14 @@ namespace ime_pinyin { return found_w; } - size_t DictList::predict ( const char16 last_hzs[], uint16 hzs_len, - NPredictItem *npre_items, size_t npre_max, - size_t b4_used ) { + Size_t DictList::predict ( const char16 last_hzs[], uint16 hzs_len, + NPredictItem *npre_items, Size_t npre_max, + Size_t b4_used ) { assert ( hzs_len <= kMaxPredictSize && hzs_len > 0 ); // 1. Prepare work int ( *cmp_func ) ( const void *, const void * ) = cmp_func_[hzs_len - 1]; NGram &ngram = NGram::get_instance(); - size_t item_num = 0; + Size_t item_num = 0; // 2. Do prediction for ( uint16 pre_len = 1; pre_len <= kMaxPredictSize + 1 - hzs_len; pre_len++ ) { @@ -213,17 +213,17 @@ namespace ime_pinyin { memset ( npre_items + item_num, 0, sizeof ( NPredictItem ) ); utf16_strncpy ( npre_items[item_num].pre_hzs, w_buf + hzs_len, pre_len ); npre_items[item_num].psb = - ngram.get_uni_psb ( ( size_t ) ( w_buf - buf_ - start_pos_[word_len - 1] ) + ngram.get_uni_psb ( ( Size_t ) ( w_buf - buf_ - start_pos_[word_len - 1] ) / word_len + start_id_[word_len - 1] ); npre_items[item_num].his_len = hzs_len; item_num++; w_buf += word_len; } } - size_t new_num = 0; - for ( size_t i = 0; i < item_num; i++ ) { + Size_t new_num = 0; + for ( Size_t i = 0; i < item_num; i++ ) { // Try to find it in the existing items - size_t e_pos; + Size_t e_pos; for ( e_pos = 1; e_pos <= b4_used; e_pos++ ) { if ( utf16_strncmp ( ( * ( npre_items - e_pos ) ).pre_hzs, npre_items[i].pre_hzs, kMaxPredictSize ) == 0 ) @@ -248,7 +248,7 @@ namespace ime_pinyin { if ( i + 1 > str_max - 1 ) { return 0; } if ( start_id_[i] <= id_lemma && start_id_[i + 1] > id_lemma ) { - size_t id_span = id_lemma - start_id_[i]; + Size_t id_span = id_lemma - start_id_[i]; uint16 *buf = buf_ + start_pos_[i] + id_span * ( i + 1 ); for ( uint16 len = 0; len <= i; len++ ) { str_buf[len] = buf[len]; @@ -301,7 +301,7 @@ namespace ime_pinyin { if ( NULL == found ) { return 0; } assert ( found > buf_ ); - assert ( static_cast<size_t> ( found - buf_ ) >= start_pos_[str_len - 1] ); + assert ( static_cast<Size_t> ( found - buf_ ) >= start_pos_[str_len - 1] ); return static_cast<LemmaIdType> ( start_id_[str_len - 1] + ( found - buf_ - start_pos_[str_len - 1] ) / str_len ); @@ -327,12 +327,12 @@ namespace ime_pinyin { if ( NULL == buf_ || 0 == start_pos_[kMaxLemmaSize] || NULL == scis_hz_ || NULL == scis_splid_ || 0 == scis_num_ ) { return false; } - if ( fwrite ( &scis_num_, sizeof ( size_t ), 1, fp ) != 1 ) + if ( fwrite ( &scis_num_, sizeof ( Size_t ), 1, fp ) != 1 ) { return false; } - if ( fwrite ( start_pos_, sizeof ( size_t ), kMaxLemmaSize + 1, fp ) != + if ( fwrite ( start_pos_, sizeof ( Size_t ), kMaxLemmaSize + 1, fp ) != kMaxLemmaSize + 1 ) { return false; } - if ( fwrite ( start_id_, sizeof ( size_t ), kMaxLemmaSize + 1, fp ) != + if ( fwrite ( start_id_, sizeof ( Size_t ), kMaxLemmaSize + 1, fp ) != kMaxLemmaSize + 1 ) { return false; } if ( fwrite ( scis_hz_, sizeof ( char16 ), scis_num_, fp ) != scis_num_ ) @@ -349,12 +349,12 @@ namespace ime_pinyin { if ( NULL == fp ) { return false; } initialized_ = false; - if ( fread ( &scis_num_, sizeof ( size_t ), 1, fp ) != 1 ) + if ( fread ( &scis_num_, sizeof ( Size_t ), 1, fp ) != 1 ) { return false; } - if ( fread ( start_pos_, sizeof ( size_t ), kMaxLemmaSize + 1, fp ) != + if ( fread ( start_pos_, sizeof ( Size_t ), kMaxLemmaSize + 1, fp ) != kMaxLemmaSize + 1 ) { return false; } - if ( fread ( start_id_, sizeof ( size_t ), kMaxLemmaSize + 1, fp ) != + if ( fread ( start_id_, sizeof ( Size_t ), kMaxLemmaSize + 1, fp ) != kMaxLemmaSize + 1 ) { return false; } free_resource(); diff --git a/jni/share/dicttrie.cpp b/jni/share/dicttrie.cpp index 4566e8e..0034c41 100755..100644 --- a/jni/share/dicttrie.cpp +++ b/jni/share/dicttrie.cpp @@ -13,7 +13,6 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - #include <assert.h> #include <stdio.h> #include <string.h> @@ -74,16 +73,16 @@ namespace ime_pinyin { reset_milestones ( 0, kFirstValidMileStoneHandle ); } - inline size_t DictTrie::get_son_offset ( const LmaNodeGE1 *node ) { - return ( ( size_t ) node->son_1st_off_l + ( ( size_t ) node->son_1st_off_h << 16 ) ); + inline Size_t DictTrie::get_son_offset ( const LmaNodeGE1 *node ) { + return ( ( Size_t ) node->son_1st_off_l + ( ( Size_t ) node->son_1st_off_h << 16 ) ); } - inline size_t DictTrie::get_homo_idx_buf_offset ( const LmaNodeGE1 *node ) { - return ( ( size_t ) node->homo_idx_buf_off_l + - ( ( size_t ) node->homo_idx_buf_off_h << 16 ) ); + inline Size_t DictTrie::get_homo_idx_buf_offset ( const LmaNodeGE1 *node ) { + return ( ( Size_t ) node->homo_idx_buf_off_l + + ( ( Size_t ) node->homo_idx_buf_off_h << 16 ) ); } - inline LemmaIdType DictTrie::get_lemma_id ( size_t id_offset ) { + inline LemmaIdType DictTrie::get_lemma_id ( Size_t id_offset ) { LemmaIdType id = 0; for ( uint16 pos = kLemmaIdSize - 1; pos > 0; pos-- ) { id = ( id << 8 ) + lma_idx_buf_[id_offset * kLemmaIdSize + pos]; } @@ -101,13 +100,13 @@ namespace ime_pinyin { bool DictTrie::save_dict ( FILE *fp ) { if ( NULL == fp ) { return false; } - if ( fwrite ( &lma_node_num_le0_, sizeof ( size_t ), 1, fp ) != 1 ) + if ( fwrite ( &lma_node_num_le0_, sizeof ( Size_t ), 1, fp ) != 1 ) { return false; } - if ( fwrite ( &lma_node_num_ge1_, sizeof ( size_t ), 1, fp ) != 1 ) + if ( fwrite ( &lma_node_num_ge1_, sizeof ( Size_t ), 1, fp ) != 1 ) { return false; } - if ( fwrite ( &lma_idx_buf_len_, sizeof ( size_t ), 1, fp ) != 1 ) + if ( fwrite ( &lma_idx_buf_len_, sizeof ( Size_t ), 1, fp ) != 1 ) { return false; } - if ( fwrite ( &top_lmas_num_, sizeof ( size_t ), 1, fp ) != 1 ) + if ( fwrite ( &top_lmas_num_, sizeof ( Size_t ), 1, fp ) != 1 ) { return false; } if ( fwrite ( root_, sizeof ( LmaNodeLE0 ), lma_node_num_le0_, fp ) != lma_node_num_le0_ ) @@ -144,13 +143,13 @@ namespace ime_pinyin { bool DictTrie::load_dict ( FILE *fp ) { if ( NULL == fp ) { return false; } - if ( fread ( &lma_node_num_le0_, sizeof ( size_t ), 1, fp ) != 1 ) + if ( fread ( &lma_node_num_le0_, sizeof ( Size_t ), 1, fp ) != 1 ) { return false; } - if ( fread ( &lma_node_num_ge1_, sizeof ( size_t ), 1, fp ) != 1 ) + if ( fread ( &lma_node_num_ge1_, sizeof ( Size_t ), 1, fp ) != 1 ) { return false; } - if ( fread ( &lma_idx_buf_len_, sizeof ( size_t ), 1, fp ) != 1 ) + if ( fread ( &lma_idx_buf_len_, sizeof ( Size_t ), 1, fp ) != 1 ) { return false; } - if ( fread ( &top_lmas_num_, sizeof ( size_t ), 1, fp ) != 1 || + if ( fread ( &top_lmas_num_, sizeof ( Size_t ), 1, fp ) != 1 || top_lmas_num_ >= lma_idx_buf_len_ ) { return false; } free_resource ( false ); @@ -160,7 +159,7 @@ namespace ime_pinyin { ( malloc ( lma_node_num_ge1_ * sizeof ( LmaNodeGE1 ) ) ); lma_idx_buf_ = ( unsigned char * ) malloc ( lma_idx_buf_len_ ); total_lma_num_ = lma_idx_buf_len_ / kLemmaIdSize; - size_t buf_size = SpellingTrie::get_instance().get_spelling_num() + 1; + Size_t buf_size = SpellingTrie::get_instance().get_spelling_num() + 1; assert ( lma_node_num_le0_ <= buf_size ); splid_le0_index_ = static_cast<uint16 *> ( malloc ( buf_size * sizeof ( uint16 ) ) ); // Init the space for parsing. @@ -184,8 +183,8 @@ namespace ime_pinyin { { return false; } // The quick index for the first level sons uint16 last_splid = kFullSplIdStart; - size_t last_pos = 0; - for ( size_t i = 1; i < lma_node_num_le0_; i++ ) { + Size_t last_pos = 0; + for ( Size_t i = 1; i < lma_node_num_le0_; i++ ) { for ( uint16 splid = last_splid; splid < root_[i].spl_idx; splid++ ) { splid_le0_index_[splid - kFullSplIdStart] = last_pos; } splid_le0_index_[root_[i].spl_idx - kFullSplIdStart] = @@ -195,7 +194,7 @@ namespace ime_pinyin { } for ( uint16 splid = last_splid + 1; splid < buf_size + kFullSplIdStart; splid++ ) { - assert ( static_cast<size_t> ( splid - kFullSplIdStart ) < buf_size ); + assert ( static_cast<Size_t> ( splid - kFullSplIdStart ) < buf_size ); splid_le0_index_[splid - kFullSplIdStart] = last_pos + 1; } return true; @@ -234,7 +233,8 @@ namespace ime_pinyin { { return false; } FILE *fp = fdopen ( sys_fd, "rb" ); if ( NULL == fp ) - { return false; } + { + return false; } if ( -1 == fseek ( fp, start_offset, SEEK_SET ) ) { fclose ( fp ); return false; @@ -259,11 +259,11 @@ namespace ime_pinyin { return true; } - size_t DictTrie::fill_lpi_buffer ( LmaPsbItem lpi_items[], size_t lpi_max, + Size_t DictTrie::fill_lpi_buffer ( LmaPsbItem lpi_items[], Size_t lpi_max, LmaNodeLE0 *node ) { - size_t lpi_num = 0; + Size_t lpi_num = 0; NGram &ngram = NGram::get_instance(); - for ( size_t homo = 0; homo < ( size_t ) node->num_of_homo; homo++ ) { + for ( Size_t homo = 0; homo < ( Size_t ) node->num_of_homo; homo++ ) { lpi_items[lpi_num].id = get_lemma_id ( node->homo_idx_buf_off + homo ); lpi_items[lpi_num].lma_len = 1; @@ -276,12 +276,12 @@ namespace ime_pinyin { return lpi_num; } - size_t DictTrie::fill_lpi_buffer ( LmaPsbItem lpi_items[], size_t lpi_max, - size_t homo_buf_off, LmaNodeGE1 *node, + Size_t DictTrie::fill_lpi_buffer ( LmaPsbItem lpi_items[], Size_t lpi_max, + Size_t homo_buf_off, LmaNodeGE1 *node, uint16 lma_len ) { - size_t lpi_num = 0; + Size_t lpi_num = 0; NGram &ngram = NGram::get_instance(); - for ( size_t homo = 0; homo < ( size_t ) node->num_of_homo; homo++ ) { + for ( Size_t homo = 0; homo < ( Size_t ) node->num_of_homo; homo++ ) { lpi_items[lpi_num].id = get_lemma_id ( homo_buf_off + homo ); lpi_items[lpi_num].lma_len = lma_len; lpi_items[lpi_num].psb = @@ -308,8 +308,8 @@ namespace ime_pinyin { MileStoneHandle DictTrie::extend_dict ( MileStoneHandle from_handle, const DictExtPara *dep, - LmaPsbItem *lpi_items, size_t lpi_max, - size_t *lpi_num ) { + LmaPsbItem *lpi_items, Size_t lpi_max, + Size_t *lpi_num ) { if ( NULL == dep ) { return 0; } // from LmaNodeLE0 (root) to LmaNodeLE0 @@ -327,7 +327,7 @@ namespace ime_pinyin { MileStoneHandle DictTrie::extend_dict0 ( MileStoneHandle from_handle, const DictExtPara *dep, LmaPsbItem *lpi_items, - size_t lpi_max, size_t *lpi_num ) { + Size_t lpi_max, Size_t *lpi_num ) { assert ( NULL != dep && 0 == from_handle ); *lpi_num = 0; MileStoneHandle ret_handle = 0; @@ -339,9 +339,9 @@ namespace ime_pinyin { // 2. Begin exgtending // 2.1 Get the LmaPsbItem list LmaNodeLE0 *node = root_; - size_t son_start = splid_le0_index_[id_start - kFullSplIdStart]; - size_t son_end = splid_le0_index_[id_start + id_num - kFullSplIdStart]; - for ( size_t son_pos = son_start; son_pos < son_end; son_pos++ ) { + Size_t son_start = splid_le0_index_[id_start - kFullSplIdStart]; + Size_t son_end = splid_le0_index_[id_start + id_num - kFullSplIdStart]; + for ( Size_t son_pos = son_start; son_pos < son_end; son_pos++ ) { assert ( 1 == node->son_1st_off ); LmaNodeLE0 *son = root_ + son_pos; assert ( son->spl_idx >= id_start && son->spl_idx < id_start + id_num ); @@ -377,12 +377,12 @@ namespace ime_pinyin { MileStoneHandle DictTrie::extend_dict1 ( MileStoneHandle from_handle, const DictExtPara *dep, LmaPsbItem *lpi_items, - size_t lpi_max, size_t *lpi_num ) { + Size_t lpi_max, Size_t *lpi_num ) { assert ( NULL != dep && from_handle > 0 && from_handle < mile_stones_pos_ ); MileStoneHandle ret_handle = 0; // 1. If this is a half Id, get its corresponding full starting Id and // number of full Id. - size_t ret_val = 0; + Size_t ret_val = 0; uint16 id_start = dep->id_start; uint16 id_num = dep->id_num; // 2. Begin extending. @@ -392,15 +392,15 @@ namespace ime_pinyin { uint16 ext_num = p_mark.node_num; for ( uint16 ext_pos = 0; ext_pos < ext_num; ext_pos++ ) { LmaNodeLE0 *node = root_ + p_mark.node_offset + ext_pos; - size_t found_start = 0; - size_t found_num = 0; - for ( size_t son_pos = 0; son_pos < ( size_t ) node->num_of_son; son_pos++ ) { + Size_t found_start = 0; + Size_t found_num = 0; + for ( Size_t son_pos = 0; son_pos < ( Size_t ) node->num_of_son; son_pos++ ) { assert ( node->son_1st_off <= lma_node_num_ge1_ ); LmaNodeGE1 *son = nodes_ge1_ + node->son_1st_off + son_pos; if ( son->spl_idx >= id_start && son->spl_idx < id_start + id_num ) { if ( *lpi_num < lpi_max ) { - size_t homo_buf_off = get_homo_idx_buf_offset ( son ); + Size_t homo_buf_off = get_homo_idx_buf_offset ( son ); *lpi_num += fill_lpi_buffer ( lpi_items + ( *lpi_num ), lpi_max - *lpi_num, homo_buf_off, son, 2 ); @@ -412,7 +412,7 @@ namespace ime_pinyin { found_num++; } if ( son->spl_idx >= id_start + id_num - 1 || son_pos == - ( size_t ) node->num_of_son - 1 ) { + ( Size_t ) node->num_of_son - 1 ) { if ( found_num > 0 ) { if ( mile_stones_pos_ < kMaxMileStone && parsing_marks_pos_ < kMaxParsingMark ) { @@ -445,12 +445,12 @@ namespace ime_pinyin { MileStoneHandle DictTrie::extend_dict2 ( MileStoneHandle from_handle, const DictExtPara *dep, LmaPsbItem *lpi_items, - size_t lpi_max, size_t *lpi_num ) { + Size_t lpi_max, Size_t *lpi_num ) { assert ( NULL != dep && from_handle > 0 && from_handle < mile_stones_pos_ ); MileStoneHandle ret_handle = 0; // 1. If this is a half Id, get its corresponding full starting Id and // number of full Id. - size_t ret_val = 0; + Size_t ret_val = 0; uint16 id_start = dep->id_start; uint16 id_num = dep->id_num; // 2. Begin extending. @@ -460,15 +460,15 @@ namespace ime_pinyin { uint16 ext_num = p_mark.node_num; for ( uint16 ext_pos = 0; ext_pos < ext_num; ext_pos++ ) { LmaNodeGE1 *node = nodes_ge1_ + p_mark.node_offset + ext_pos; - size_t found_start = 0; - size_t found_num = 0; - for ( size_t son_pos = 0; son_pos < ( size_t ) node->num_of_son; son_pos++ ) { + Size_t found_start = 0; + Size_t found_num = 0; + for ( Size_t son_pos = 0; son_pos < ( Size_t ) node->num_of_son; son_pos++ ) { assert ( node->son_1st_off_l > 0 || node->son_1st_off_h > 0 ); LmaNodeGE1 *son = nodes_ge1_ + get_son_offset ( node ) + son_pos; if ( son->spl_idx >= id_start && son->spl_idx < id_start + id_num ) { if ( *lpi_num < lpi_max ) { - size_t homo_buf_off = get_homo_idx_buf_offset ( son ); + Size_t homo_buf_off = get_homo_idx_buf_offset ( son ); *lpi_num += fill_lpi_buffer ( lpi_items + ( *lpi_num ), lpi_max - *lpi_num, homo_buf_off, son, dep->splids_extended + 1 ); @@ -480,7 +480,7 @@ namespace ime_pinyin { found_num++; } if ( son->spl_idx >= id_start + id_num - 1 || son_pos == - ( size_t ) node->num_of_son - 1 ) { + ( Size_t ) node->num_of_son - 1 ) { if ( found_num > 0 ) { if ( mile_stones_pos_ < kMaxMileStone && parsing_marks_pos_ < kMaxParsingMark ) { @@ -550,8 +550,8 @@ namespace ime_pinyin { } if ( 1 == splid_num ) { LmaNodeLE0 *node_le0 = reinterpret_cast<LmaNodeLE0 *> ( node ); - size_t num_of_homo = ( size_t ) node_le0->num_of_homo; - for ( size_t homo_pos = 0; homo_pos < num_of_homo; homo_pos++ ) { + Size_t num_of_homo = ( Size_t ) node_le0->num_of_homo; + for ( Size_t homo_pos = 0; homo_pos < num_of_homo; homo_pos++ ) { LemmaIdType id_this = get_lemma_id ( node_le0->homo_idx_buf_off + homo_pos ); char16 str[2]; get_lemma_str ( id_this, str, 2 ); @@ -560,9 +560,9 @@ namespace ime_pinyin { } } else { LmaNodeGE1 *node_ge1 = reinterpret_cast<LmaNodeGE1 *> ( node ); - size_t num_of_homo = ( size_t ) node_ge1->num_of_homo; - for ( size_t homo_pos = 0; homo_pos < num_of_homo; homo_pos++ ) { - size_t node_homo_off = get_homo_idx_buf_offset ( node_ge1 ); + Size_t num_of_homo = ( Size_t ) node_ge1->num_of_homo; + for ( Size_t homo_pos = 0; homo_pos < num_of_homo; homo_pos++ ) { + Size_t node_homo_off = get_homo_idx_buf_offset ( node_ge1 ); if ( get_lemma_id ( node_homo_off + homo_pos ) == id_lemma ) { return true; } } @@ -570,25 +570,25 @@ namespace ime_pinyin { return false; } - size_t DictTrie::get_lpis ( const uint16 *splid_str, uint16 splid_str_len, - LmaPsbItem *lma_buf, size_t max_lma_buf ) { + Size_t DictTrie::get_lpis ( const uint16 *splid_str, uint16 splid_str_len, + LmaPsbItem *lma_buf, Size_t max_lma_buf ) { if ( splid_str_len > kMaxLemmaSize ) { return 0; } #define MAX_EXTENDBUF_LEN 200 - size_t *node_buf1[MAX_EXTENDBUF_LEN]; // use size_t for data alignment - size_t *node_buf2[MAX_EXTENDBUF_LEN]; + Size_t *node_buf1[MAX_EXTENDBUF_LEN]; // use Size_t for data alignment + Size_t *node_buf2[MAX_EXTENDBUF_LEN]; LmaNodeLE0 **node_fr_le0 = reinterpret_cast<LmaNodeLE0 **> ( node_buf1 ); // Nodes from. LmaNodeLE0 **node_to_le0 = reinterpret_cast<LmaNodeLE0 **> ( node_buf2 ); // Nodes to. LmaNodeGE1 **node_fr_ge1 = NULL; LmaNodeGE1 **node_to_ge1 = NULL; - size_t node_fr_num = 1; - size_t node_to_num = 0; + Size_t node_fr_num = 1; + Size_t node_to_num = 0; node_fr_le0[0] = root_; if ( NULL == node_fr_le0[0] ) { return 0; } - size_t spl_pos = 0; + Size_t spl_pos = 0; while ( spl_pos < splid_str_len ) { uint16 id_num = 1; uint16 id_start = splid_str[spl_pos]; @@ -599,13 +599,13 @@ namespace ime_pinyin { } // Extend the nodes if ( 0 == spl_pos ) { // From LmaNodeLE0 (root) to LmaNodeLE0 nodes - for ( size_t node_fr_pos = 0; node_fr_pos < node_fr_num; node_fr_pos++ ) { + for ( Size_t node_fr_pos = 0; node_fr_pos < node_fr_num; node_fr_pos++ ) { LmaNodeLE0 *node = node_fr_le0[node_fr_pos]; assert ( node == root_ && 1 == node_fr_num ); - size_t son_start = splid_le0_index_[id_start - kFullSplIdStart]; - size_t son_end = + Size_t son_start = splid_le0_index_[id_start - kFullSplIdStart]; + Size_t son_end = splid_le0_index_[id_start + id_num - kFullSplIdStart]; - for ( size_t son_pos = son_start; son_pos < son_end; son_pos++ ) { + for ( Size_t son_pos = son_start; son_pos < son_end; son_pos++ ) { assert ( 1 == node->son_1st_off ); LmaNodeLE0 *node_son = root_ + son_pos; assert ( node_son->spl_idx >= id_start @@ -630,9 +630,9 @@ namespace ime_pinyin { node_to_le0 = NULL; node_to_ge1 = reinterpret_cast<LmaNodeGE1 **> ( node_tmp ); } else if ( 1 == spl_pos ) { // From LmaNodeLE0 to LmaNodeGE1 nodes - for ( size_t node_fr_pos = 0; node_fr_pos < node_fr_num; node_fr_pos++ ) { + for ( Size_t node_fr_pos = 0; node_fr_pos < node_fr_num; node_fr_pos++ ) { LmaNodeLE0 *node = node_fr_le0[node_fr_pos]; - for ( size_t son_pos = 0; son_pos < ( size_t ) node->num_of_son; + for ( Size_t son_pos = 0; son_pos < ( Size_t ) node->num_of_son; son_pos++ ) { assert ( node->son_1st_off <= lma_node_num_ge1_ ); LmaNodeGE1 *node_son = nodes_ge1_ + node->son_1st_off @@ -660,9 +660,9 @@ namespace ime_pinyin { node_fr_le0 = NULL; node_to_le0 = NULL; } else { // From LmaNodeGE1 to LmaNodeGE1 nodes - for ( size_t node_fr_pos = 0; node_fr_pos < node_fr_num; node_fr_pos++ ) { + for ( Size_t node_fr_pos = 0; node_fr_pos < node_fr_num; node_fr_pos++ ) { LmaNodeGE1 *node = node_fr_ge1[node_fr_pos]; - for ( size_t son_pos = 0; son_pos < ( size_t ) node->num_of_son; + for ( Size_t son_pos = 0; son_pos < ( Size_t ) node->num_of_son; son_pos++ ) { assert ( node->son_1st_off_l > 0 || node->son_1st_off_h > 0 ); LmaNodeGE1 *node_son = nodes_ge1_ @@ -696,18 +696,18 @@ namespace ime_pinyin { if ( 0 == node_to_num ) { return 0; } NGram &ngram = NGram::get_instance(); - size_t lma_num = 0; + Size_t lma_num = 0; // If the length is 1, and the splid is a one-char Yunmu like 'a', 'o', 'e', // only those candidates for the full matched one-char id will be returned. if ( 1 == splid_str_len && spl_trie_->is_half_id_yunmu ( splid_str[0] ) ) { node_to_num = node_to_num > 0 ? 1 : 0; } - for ( size_t node_pos = 0; node_pos < node_to_num; node_pos++ ) { - size_t num_of_homo = 0; + for ( Size_t node_pos = 0; node_pos < node_to_num; node_pos++ ) { + Size_t num_of_homo = 0; if ( spl_pos <= 1 ) { // Get from LmaNodeLE0 nodes LmaNodeLE0 *node_le0 = node_to_le0[node_pos]; - num_of_homo = ( size_t ) node_le0->num_of_homo; - for ( size_t homo_pos = 0; homo_pos < num_of_homo; homo_pos++ ) { - size_t ch_pos = lma_num + homo_pos; + num_of_homo = ( Size_t ) node_le0->num_of_homo; + for ( Size_t homo_pos = 0; homo_pos < num_of_homo; homo_pos++ ) { + Size_t ch_pos = lma_num + homo_pos; lma_buf[ch_pos].id = get_lemma_id ( node_le0->homo_idx_buf_off + homo_pos ); lma_buf[ch_pos].lma_len = 1; @@ -718,10 +718,10 @@ namespace ime_pinyin { } } else { // Get from LmaNodeGE1 nodes LmaNodeGE1 *node_ge1 = node_to_ge1[node_pos]; - num_of_homo = ( size_t ) node_ge1->num_of_homo; - for ( size_t homo_pos = 0; homo_pos < num_of_homo; homo_pos++ ) { - size_t ch_pos = lma_num + homo_pos; - size_t node_homo_off = get_homo_idx_buf_offset ( node_ge1 ); + num_of_homo = ( Size_t ) node_ge1->num_of_homo; + for ( Size_t homo_pos = 0; homo_pos < num_of_homo; homo_pos++ ) { + Size_t ch_pos = lma_num + homo_pos; + Size_t node_homo_off = get_homo_idx_buf_offset ( node_ge1 ); lma_buf[ch_pos].id = get_lemma_id ( node_homo_off + homo_pos ); lma_buf[ch_pos].lma_len = splid_str_len; lma_buf[ch_pos].psb = @@ -780,7 +780,7 @@ namespace ime_pinyin { return 0; } - void DictTrie::set_total_lemma_count_of_others ( size_t count ) { + void DictTrie::set_total_lemma_count_of_others ( Size_t count ) { NGram &ngram = NGram::get_instance(); ngram.set_total_freq_none_sys ( count ); } @@ -799,12 +799,12 @@ namespace ime_pinyin { return dict_list_->get_lemma_id ( lemma_str, lemma_len ); } - size_t DictTrie::predict_top_lmas ( size_t his_len, NPredictItem *npre_items, - size_t npre_max, size_t b4_used ) { + Size_t DictTrie::predict_top_lmas ( Size_t his_len, NPredictItem *npre_items, + Size_t npre_max, Size_t b4_used ) { NGram &ngram = NGram::get_instance(); - size_t item_num = 0; - size_t top_lmas_id_offset = lma_idx_buf_len_ / kLemmaIdSize - top_lmas_num_; - size_t top_lmas_pos = 0; + Size_t item_num = 0; + Size_t top_lmas_id_offset = lma_idx_buf_len_ / kLemmaIdSize - top_lmas_num_; + Size_t top_lmas_pos = 0; while ( item_num < npre_max && top_lmas_pos < top_lmas_num_ ) { memset ( npre_items + item_num, 0, sizeof ( NPredictItem ) ); LemmaIdType top_lma_id = get_lemma_id ( top_lmas_id_offset + top_lmas_pos ); @@ -821,9 +821,9 @@ namespace ime_pinyin { return item_num; } - size_t DictTrie::predict ( const char16 *last_hzs, uint16 hzs_len, - NPredictItem *npre_items, size_t npre_max, - size_t b4_used ) { + Size_t DictTrie::predict ( const char16 *last_hzs, uint16 hzs_len, + NPredictItem *npre_items, Size_t npre_max, + Size_t b4_used ) { return dict_list_->predict ( last_hzs, hzs_len, npre_items, npre_max, b4_used ); } } // namespace ime_pinyin diff --git a/jni/share/lpicache.cpp b/jni/share/lpicache.cpp index d95879c..10c7bd9 100755..100644 --- a/jni/share/lpicache.cpp +++ b/jni/share/lpicache.cpp @@ -51,8 +51,8 @@ namespace ime_pinyin { return lpi_cache_len_[splid] != 0; } - size_t LpiCache::put_cache ( uint16 splid, LmaPsbItem lpi_items[], - size_t lpi_num ) { + Size_t LpiCache::put_cache ( uint16 splid, LmaPsbItem lpi_items[], + Size_t lpi_num ) { uint16 num = kMaxLpiCachePerId; if ( num > lpi_num ) { num = static_cast<uint16> ( lpi_num ); } @@ -63,8 +63,8 @@ namespace ime_pinyin { return num; } - size_t LpiCache::get_cache ( uint16 splid, LmaPsbItem lpi_items[], - size_t lpi_max ) { + Size_t LpiCache::get_cache ( uint16 splid, LmaPsbItem lpi_items[], + Size_t lpi_max ) { if ( lpi_max > lpi_cache_len_[splid] ) { lpi_max = lpi_cache_len_[splid]; } LmaPsbItem *lpi_cache_this = lpi_cache_ + splid * kMaxLpiCachePerId; diff --git a/jni/share/matrixsearch.cpp b/jni/share/matrixsearch.cpp index 5df8527..3c44315 100755..100644 --- a/jni/share/matrixsearch.cpp +++ b/jni/share/matrixsearch.cpp @@ -13,7 +13,6 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - #include <assert.h> #include <math.h> #include <stdio.h> @@ -66,16 +65,16 @@ namespace ime_pinyin { dict_trie_ = new DictTrie(); user_dict_ = static_cast<AtomDictBase *> ( new UserDict() ); spl_parser_ = new SpellingParser(); - size_t mtrx_nd_size = sizeof ( MatrixNode ) * kMtrxNdPoolSize; - mtrx_nd_size = align_to_size_t ( mtrx_nd_size ) / sizeof ( size_t ); - size_t dmi_size = sizeof ( DictMatchInfo ) * kDmiPoolSize; - dmi_size = align_to_size_t ( dmi_size ) / sizeof ( size_t ); - size_t matrix_size = sizeof ( MatrixRow ) * kMaxRowNum; - matrix_size = align_to_size_t ( matrix_size ) / sizeof ( size_t ); - size_t dep_size = sizeof ( DictExtPara ); - dep_size = align_to_size_t ( dep_size ) / sizeof ( size_t ); + Size_t mtrx_nd_size = sizeof ( MatrixNode ) * kMtrxNdPoolSize; + mtrx_nd_size = align_to_Size_t ( mtrx_nd_size ) / sizeof ( Size_t ); + Size_t dmi_size = sizeof ( DictMatchInfo ) * kDmiPoolSize; + dmi_size = align_to_Size_t ( dmi_size ) / sizeof ( Size_t ); + Size_t matrix_size = sizeof ( MatrixRow ) * kMaxRowNum; + matrix_size = align_to_Size_t ( matrix_size ) / sizeof ( Size_t ); + Size_t dep_size = sizeof ( DictExtPara ); + dep_size = align_to_Size_t ( dep_size ) / sizeof ( Size_t ); // share_buf's size is determined by the buffers for search. - share_buf_ = new size_t[mtrx_nd_size + dmi_size + matrix_size + dep_size]; + share_buf_ = new Size_t[mtrx_nd_size + dmi_size + matrix_size + dep_size]; if ( NULL == dict_trie_ || NULL == user_dict_ || NULL == spl_parser_ || NULL == share_buf_ ) { return false; } @@ -88,7 +87,7 @@ namespace ime_pinyin { // The prediction buffer is also based on the share buffer. npre_items_ = reinterpret_cast<NPredictItem *> ( share_buf_ ); npre_items_len_ = ( mtrx_nd_size + dmi_size + matrix_size + dep_size ) * - sizeof ( size_t ) / sizeof ( NPredictItem ); + sizeof ( Size_t ) / sizeof ( NPredictItem ); return true; } @@ -106,11 +105,11 @@ namespace ime_pinyin { bool MatrixSearch::init ( const char *fn_sys_dict, const char *fn_usr_dict ) { if ( NULL == fn_sys_dict || NULL == fn_usr_dict ) - { return false; } + { return false; } if ( !alloc_resource() ) - { return false; } + { return false; } if ( !dict_trie_->load_dict ( fn_sys_dict, 1, kSysDictIdEnd ) ) - { return false; } + { return false; } // If engine fails to load the user dictionary, reset the user dictionary // to NULL. if ( !user_dict_->load_dict ( fn_usr_dict, kUserDictIdStart, kUserDictIdEnd ) ) { @@ -143,7 +142,7 @@ namespace ime_pinyin { return true; } - void MatrixSearch::set_max_lens ( size_t max_sps_len, size_t max_hzs_len ) { + void MatrixSearch::set_max_lens ( Size_t max_sps_len, Size_t max_hzs_len ) { if ( 0 != max_sps_len ) { max_sps_len_ = max_sps_len; } if ( 0 != max_hzs_len ) @@ -206,7 +205,7 @@ namespace ime_pinyin { return true; } - bool MatrixSearch::reset_search ( size_t ch_pos, bool clear_fixed_this_step, + bool MatrixSearch::reset_search ( Size_t ch_pos, bool clear_fixed_this_step, bool clear_dmi_this_step, bool clear_mtrx_this_step ) { if ( !inited_ || ch_pos > pys_decoded_len_ || ch_pos >= kMaxRowNum ) @@ -254,7 +253,7 @@ namespace ime_pinyin { ( ( kLemmaIdComposing != lma_id_[0] ) || ( kLemmaIdComposing == lma_id_[0] && spl_start_[c_phrase_.length] <= ch_pos ) ) ) { - size_t fixed_ch_pos = ch_pos; + Size_t fixed_ch_pos = ch_pos; if ( clear_fixed_this_step ) { fixed_ch_pos = fixed_ch_pos > 0 ? fixed_ch_pos - 1 : 0; } while ( NULL == matrix_[fixed_ch_pos].mtrx_nd_fixed && fixed_ch_pos > 0 ) @@ -355,14 +354,14 @@ namespace ime_pinyin { return true; } - void MatrixSearch::del_in_pys ( size_t start, size_t len ) { + void MatrixSearch::del_in_pys ( Size_t start, Size_t len ) { while ( start < kMaxRowNum - len && '\0' != pys_[start] ) { pys_[start] = pys_[start + len]; start++; } } - size_t MatrixSearch::search ( const char *py, size_t py_len ) { + Size_t MatrixSearch::search ( const char *py, Size_t py_len ) { if ( !inited_ || NULL == py ) { return 0; } // If the search Pinyin string is too long, it will be truncated. @@ -370,7 +369,7 @@ namespace ime_pinyin { { py_len = kMaxRowNum - 1; } // Compare the new string with the previous one. Find their prefix to // increase search efficiency. - size_t ch_pos = 0; + Size_t ch_pos = 0; for ( ch_pos = 0; ch_pos < pys_decoded_len_; ch_pos++ ) { if ( '\0' == py[ch_pos] || py[ch_pos] != pys_[ch_pos] ) { break; } @@ -411,11 +410,11 @@ namespace ime_pinyin { return ch_pos; } - size_t MatrixSearch::delsearch ( size_t pos, bool is_pos_in_splid, + Size_t MatrixSearch::delsearch ( Size_t pos, bool is_pos_in_splid, bool clear_fixed_this_step ) { if ( !inited_ ) { return 0; } - size_t reset_pos = pos; + Size_t reset_pos = pos; // Out of range for both Pinyin mode and Spelling id mode. if ( pys_decoded_len_ <= pos ) { del_in_pys ( pos, 1 ); @@ -437,8 +436,8 @@ namespace ime_pinyin { { return pys_decoded_len_; } // Begin to handle two modes respectively. // Pinyin mode by default - size_t c_py_len = 0; // The length of composing phrase's Pinyin - size_t del_py_len = 1; + Size_t c_py_len = 0; // The length of composing phrase's Pinyin + Size_t del_py_len = 1; if ( !is_pos_in_splid ) { // Pinyin mode is only allowed to delete beyond the fixed lemmas. if ( fixed_lmas_ > 0 && pos < spl_start_[lma_start_[fixed_lmas_]] ) @@ -513,15 +512,15 @@ namespace ime_pinyin { return pys_decoded_len_; } - size_t MatrixSearch::get_candidate_num() { + Size_t MatrixSearch::get_candidate_num() { if ( !inited_ || 0 == pys_decoded_len_ || 0 == matrix_[pys_decoded_len_].mtrx_nd_num ) { return 0; } return 1 + lpi_total_; } - char16 *MatrixSearch::get_candidate ( size_t cand_id, char16 *cand_str, - size_t max_len ) { + char16 *MatrixSearch::get_candidate ( Size_t cand_id, char16 *cand_str, + Size_t max_len ) { if ( !inited_ || 0 == pys_decoded_len_ || NULL == cand_str ) { return NULL; } if ( 0 == cand_id ) { @@ -557,7 +556,7 @@ namespace ime_pinyin { if ( NULL != user_dict_ ) { // Update the total frequency of all lemmas, including system lemmas and // user dictionary lemmas. - size_t total_freq = user_dict_->get_total_lemma_count(); + Size_t total_freq = user_dict_->get_total_lemma_count(); dict_trie_->set_total_lemma_count_of_others ( total_freq ); } } @@ -610,7 +609,7 @@ namespace ime_pinyin { } bool MatrixSearch::try_add_cand0_to_userdict() { - size_t new_cand_num = get_candidate_num(); + Size_t new_cand_num = get_candidate_num(); if ( fixed_hzs_ > 0 && 1 == new_cand_num ) { float score_from = 0; uint16 lma_id_from = 0; @@ -671,14 +670,14 @@ namespace ime_pinyin { // 1.2.1. The whole sentence will be added as a user lemma. If the // sentence contains user lemmas, -> hit, and add occuring count // by 1. - size_t MatrixSearch::choose ( size_t cand_id ) { + Size_t MatrixSearch::choose ( Size_t cand_id ) { if ( !inited_ || 0 == pys_decoded_len_ ) { return 0; } if ( 0 == cand_id ) { fixed_hzs_ = spl_id_num_; matrix_[spl_start_[fixed_hzs_]].mtrx_nd_fixed = mtrx_nd_pool_ + matrix_[spl_start_[fixed_hzs_]].mtrx_nd_pos; - for ( size_t pos = fixed_lmas_; pos < lma_id_num_; pos++ ) { + for ( Size_t pos = fixed_lmas_; pos < lma_id_num_; pos++ ) { fixed_lmas_no1_[pos] = 1; } fixed_lmas_ = lma_id_num_; @@ -711,7 +710,7 @@ namespace ime_pinyin { // Find the length of the candidate. LemmaIdType id_chosen = lpi_items_[cand_id].id; LmaScoreType score_chosen = lpi_items_[cand_id].psb; - size_t cand_len = lpi_items_[cand_id].lma_len; + Size_t cand_len = lpi_items_[cand_id].lma_len; assert ( cand_len > 0 ); // Notify the atom dictionary that this item is hit. if ( is_user_lemma ( id_chosen ) ) { @@ -722,10 +721,10 @@ namespace ime_pinyin { } // 3. Fixed the chosen item. // 3.1 Get the steps number. - size_t step_fr = spl_start_[fixed_hzs_]; - size_t step_to = spl_start_[fixed_hzs_ + cand_len]; + Size_t step_fr = spl_start_[fixed_hzs_]; + Size_t step_to = spl_start_[fixed_hzs_ + cand_len]; // 3.2 Save the length of the original string. - size_t pys_decoded_len = pys_decoded_len_; + Size_t pys_decoded_len = pys_decoded_len_; // 3.2 Reset the space of the fixed part. reset_search ( step_to, false, false, true ); // 3.3 For the last character of the fixed part, the previous DMI @@ -767,12 +766,12 @@ namespace ime_pinyin { return get_candidate_num(); } - size_t MatrixSearch::cancel_last_choice() { + Size_t MatrixSearch::cancel_last_choice() { if ( !inited_ || 0 == pys_decoded_len_ ) { return 0; } - size_t step_start = 0; + Size_t step_start = 0; if ( fixed_hzs_ > 0 ) { - size_t step_end = spl_start_[fixed_hzs_]; + Size_t step_end = spl_start_[fixed_hzs_]; MatrixNode *end_node = matrix_[step_end].mtrx_nd_fixed; assert ( NULL != end_node ); step_start = end_node->from->step; @@ -793,7 +792,7 @@ namespace ime_pinyin { return get_candidate_num(); } - size_t MatrixSearch::get_fixedlen() { + Size_t MatrixSearch::get_fixedlen() { if ( !inited_ || 0 == pys_decoded_len_ ) { return 0; } return fixed_hzs_; @@ -997,12 +996,12 @@ namespace ime_pinyin { if ( sent_len > kMaxLemmaSize ) { pfullsent = NULL; } lpi_total_ = 0; - size_t lpi_num_full_match = 0; // Number of items which are fully-matched. + Size_t lpi_num_full_match = 0; // Number of items which are fully-matched. while ( lma_size > 0 ) { - size_t lma_num; + Size_t lma_num; lma_num = get_lpis ( spl_id_ + fixed_hzs_, lma_size, lpi_items_ + lpi_total_, - size_t ( kMaxLmaPsbItems - lpi_total_ ), + Size_t ( kMaxLmaPsbItems - lpi_total_ ), pfullsent, lma_size == lma_size_max ); if ( lma_num > 0 ) { lpi_total_ += lma_num; @@ -1020,7 +1019,7 @@ namespace ime_pinyin { sizeof ( LmaPsbItem ), cmp_lpi_with_unified_psb ); if ( kPrintDebug0 ) { printf ( "-----Prepare candidates, score:\n" ); - for ( size_t a = 0; a < lpi_total_; a++ ) { + for ( Size_t a = 0; a < lpi_total_; a++ ) { printf ( "[%03d]%d ", a, lpi_items_[a].psb ); if ( ( a + 1 ) % 6 == 0 ) { printf ( "\n" ); } } @@ -1031,20 +1030,20 @@ namespace ime_pinyin { } } - const char *MatrixSearch::get_pystr ( size_t *decoded_len ) { + const char *MatrixSearch::get_pystr ( Size_t *decoded_len ) { if ( !inited_ || NULL == decoded_len ) - { return NULL; } + { return NULL; } *decoded_len = pys_decoded_len_; return pys_; } - void MatrixSearch::merge_fixed_lmas ( size_t del_spl_pos ) { + void MatrixSearch::merge_fixed_lmas ( Size_t del_spl_pos ) { if ( fixed_lmas_ == 0 ) { return; } // Update spelling segmentation information first. spl_id_num_ -= 1; uint16 del_py_len = spl_start_[del_spl_pos + 1] - spl_start_[del_spl_pos]; - for ( size_t pos = del_spl_pos; pos <= spl_id_num_; pos++ ) { + for ( Size_t pos = del_spl_pos; pos <= spl_id_num_; pos++ ) { spl_start_[pos] = spl_start_[pos + 1] - del_py_len; if ( pos == spl_id_num_ ) { break; } @@ -1164,7 +1163,7 @@ namespace ime_pinyin { mtrx_nd = mtrx_nd->from; } // Reverse the result of spelling info - for ( size_t pos = fixed_hzs_; + for ( Size_t pos = fixed_hzs_; pos < fixed_hzs_ + ( spl_id_num_ - fixed_hzs_ + 1 ) / 2; pos++ ) { if ( spl_id_num_ + fixed_hzs_ - pos != pos + 1 ) { spl_start_[pos + 1] ^= spl_start_[spl_id_num_ - pos + fixed_hzs_]; @@ -1176,7 +1175,7 @@ namespace ime_pinyin { } } // Reverse the result of lemma info - for ( size_t pos = fixed_lmas_; + for ( Size_t pos = fixed_lmas_; pos < fixed_lmas_ + ( lma_id_num_ - fixed_lmas_ + 1 ) / 2; pos++ ) { assert ( lma_id_num_ + fixed_lmas_ - pos - 1 >= pos ); if ( lma_id_num_ + fixed_lmas_ - pos > pos + 1 ) { @@ -1188,7 +1187,7 @@ namespace ime_pinyin { lma_id_[pos] ^= lma_id_[lma_id_num_ - 1 - pos + fixed_lmas_]; } } - for ( size_t pos = fixed_lmas_ + 1; pos <= lma_id_num_; pos++ ) { + for ( Size_t pos = fixed_lmas_ + 1; pos <= lma_id_num_; pos++ ) { if ( pos < lma_id_num_ ) lma_start_[pos] = lma_start_[pos - 1] + ( lma_start_[pos] - lma_start_[pos + 1] ); @@ -1198,7 +1197,7 @@ namespace ime_pinyin { } // Find the last fixed position fixed_hzs_ = 0; - for ( size_t pos = spl_id_num_; pos > 0; pos-- ) { + for ( Size_t pos = spl_id_num_; pos > 0; pos-- ) { if ( NULL != matrix_[spl_start_[pos]].mtrx_nd_fixed ) { fixed_hzs_ = pos; break; @@ -1207,13 +1206,13 @@ namespace ime_pinyin { return; } - size_t MatrixSearch::get_spl_start ( const uint16 *&spl_start ) { + Size_t MatrixSearch::get_spl_start ( const uint16 *&spl_start ) { get_spl_start_id(); spl_start = spl_start_; return spl_id_num_; } - size_t MatrixSearch::extend_dmi ( DictExtPara *dep, DictMatchInfo *dmi_s ) { + Size_t MatrixSearch::extend_dmi ( DictExtPara *dep, DictMatchInfo *dmi_s ) { if ( dmi_pool_used_ >= kDmiPoolSize ) { return 0; } if ( dmi_c_phrase_ ) { return extend_dmi_c ( dep, dmi_s ); } @@ -1224,7 +1223,7 @@ namespace ime_pinyin { { cached = lpi_cache.is_cached ( splid ); } // 1. If this is a half Id, get its corresponding full starting Id and // number of full Id. - size_t ret_val = 0; + Size_t ret_val = 0; PoolPosType mtrx_dmi_fr = ( PoolPosType ) - 1; // From which dmi node lpi_total_ = 0; MileStoneHandle from_h[3]; @@ -1235,7 +1234,7 @@ namespace ime_pinyin { from_h[1] = dmi_s->dict_handles[1]; } // 2. Begin exgtending in the system dictionary - size_t lpi_num = 0; + Size_t lpi_num = 0; MileStoneHandle handles[2]; handles[0] = handles[1] = 0; if ( from_h[0] > 0 || NULL == dmi_s ) { @@ -1256,7 +1255,7 @@ namespace ime_pinyin { &lpi_num ); if ( handles[1] > 0 ) { if ( kPrintDebug0 ) { - for ( size_t t = 0; t < lpi_num; t++ ) { + for ( Size_t t = 0; t < lpi_num; t++ ) { printf ( "--Extend in user dict: uid:%d uscore:%d\n", lpi_items_[lpi_total_ + t].id, lpi_items_[lpi_total_ + t].psb ); } @@ -1297,7 +1296,7 @@ namespace ime_pinyin { return ret_val; } - size_t MatrixSearch::extend_dmi_c ( DictExtPara *dep, DictMatchInfo *dmi_s ) { + Size_t MatrixSearch::extend_dmi_c ( DictExtPara *dep, DictMatchInfo *dmi_s ) { lpi_total_ = 0; uint16 pos = dep->splids_extended; assert ( dmi_c_phrase_ ); @@ -1328,9 +1327,9 @@ namespace ime_pinyin { return 0; } - size_t MatrixSearch::extend_mtrx_nd ( MatrixNode *mtrx_nd, LmaPsbItem lpi_items[], - size_t lpi_num, PoolPosType dmi_fr, - size_t res_row ) { + Size_t MatrixSearch::extend_mtrx_nd ( MatrixNode *mtrx_nd, LmaPsbItem lpi_items[], + Size_t lpi_num, PoolPosType dmi_fr, + Size_t res_row ) { assert ( NULL != mtrx_nd ); matrix_[res_row].mtrx_nd_fixed = NULL; if ( mtrx_nd_pool_used_ >= kMtrxNdPoolSize - kMaxNodeARow ) @@ -1342,17 +1341,17 @@ namespace ime_pinyin { { lpi_num = kMaxNodeARow; } } MatrixNode *mtrx_nd_res_min = mtrx_nd_pool_ + matrix_[res_row].mtrx_nd_pos; - for ( size_t pos = 0; pos < lpi_num; pos++ ) { + for ( Size_t pos = 0; pos < lpi_num; pos++ ) { float score = mtrx_nd->score + lpi_items[pos].psb; if ( pos > 0 && score - PRUMING_SCORE > mtrx_nd_res_min->score ) { break; } // Try to add a new node - size_t mtrx_nd_num = matrix_[res_row].mtrx_nd_num; + Size_t mtrx_nd_num = matrix_[res_row].mtrx_nd_num; MatrixNode *mtrx_nd_res = mtrx_nd_res_min + mtrx_nd_num; bool replace = false; // Find its position while ( mtrx_nd_res > mtrx_nd_res_min && score < ( mtrx_nd_res - 1 )->score ) { - if ( static_cast<size_t> ( mtrx_nd_res - mtrx_nd_res_min ) < kMaxNodeARow ) + if ( static_cast<Size_t> ( mtrx_nd_res - mtrx_nd_res_min ) < kMaxNodeARow ) { *mtrx_nd_res = * ( mtrx_nd_res - 1 ); } mtrx_nd_res--; replace = true; @@ -1371,7 +1370,7 @@ namespace ime_pinyin { return matrix_[res_row].mtrx_nd_num; } - PoolPosType MatrixSearch::match_dmi ( size_t step_to, uint16 spl_ids[], + PoolPosType MatrixSearch::match_dmi ( Size_t step_to, uint16 spl_ids[], uint16 spl_id_num ) { if ( pys_decoded_len_ < step_to || 0 == matrix_[step_to].dmi_num ) { return static_cast<PoolPosType> ( -1 ); @@ -1395,14 +1394,14 @@ namespace ime_pinyin { return static_cast<PoolPosType> ( -1 ); } - char16 *MatrixSearch::get_candidate0 ( char16 *cand_str, size_t max_len, + char16 *MatrixSearch::get_candidate0 ( char16 *cand_str, Size_t max_len, uint16 *retstr_len, bool only_unfixed ) { if ( pys_decoded_len_ == 0 || matrix_[pys_decoded_len_].mtrx_nd_num == 0 ) { return NULL; } LemmaIdType idxs[kMaxRowNum]; - size_t id_num = 0; + Size_t id_num = 0; MatrixNode *mtrx_nd = mtrx_nd_pool_ + matrix_[pys_decoded_len_].mtrx_nd_pos; if ( kPrintDebug0 ) { printf ( "--- sentence score: %f\n", mtrx_nd->score ); @@ -1423,7 +1422,7 @@ namespace ime_pinyin { if ( kPrintDebug1 ) { printf ( "<<==============Sentence DMI (reverse order) end=============\n" ); } - size_t ret_pos = 0; + Size_t ret_pos = 0; do { id_num--; if ( 0 == idxs[id_num] ) @@ -1453,36 +1452,36 @@ namespace ime_pinyin { return cand_str; } - size_t MatrixSearch::get_lpis ( const uint16 *splid_str, size_t splid_str_len, - LmaPsbItem *lma_buf, size_t max_lma_buf, + Size_t MatrixSearch::get_lpis ( const uint16 *splid_str, Size_t splid_str_len, + LmaPsbItem *lma_buf, Size_t max_lma_buf, const char16 *pfullsent, bool sort_by_psb ) { if ( splid_str_len > kMaxLemmaSize ) { return 0; } - size_t num1 = dict_trie_->get_lpis ( splid_str, splid_str_len, + Size_t num1 = dict_trie_->get_lpis ( splid_str, splid_str_len, lma_buf, max_lma_buf ); - size_t num2 = 0; + Size_t num2 = 0; if ( NULL != user_dict_ ) { num2 = user_dict_->get_lpis ( splid_str, splid_str_len, lma_buf + num1, max_lma_buf - num1 ); } - size_t num = num1 + num2; + Size_t num = num1 + num2; if ( 0 == num ) { return 0; } // Remove repeated items. if ( splid_str_len > 1 ) { LmaPsbStrItem *lpsis = reinterpret_cast<LmaPsbStrItem *> ( lma_buf + num ); - size_t lpsi_num = ( max_lma_buf - num ) * sizeof ( LmaPsbItem ) / + Size_t lpsi_num = ( max_lma_buf - num ) * sizeof ( LmaPsbItem ) / sizeof ( LmaPsbStrItem ); assert ( lpsi_num > num ); if ( num > lpsi_num ) { num = lpsi_num; } lpsi_num = num; - for ( size_t pos = 0; pos < lpsi_num; pos++ ) { + for ( Size_t pos = 0; pos < lpsi_num; pos++ ) { lpsis[pos].lpi = lma_buf[pos]; get_lemma_str ( lma_buf[pos].id, lpsis[pos].str, kMaxLemmaSize + 1 ); } myqsort ( lpsis, lpsi_num, sizeof ( LmaPsbStrItem ), cmp_lpsi_with_str ); - size_t remain_num = 0; - for ( size_t pos = 0; pos < lpsi_num; pos++ ) { + Size_t remain_num = 0; + for ( Size_t pos = 0; pos < lpsi_num; pos++ ) { if ( pos > 0 && utf16_strcmp ( lpsis[pos].str, lpsis[pos - 1].str ) == 0 ) { if ( lpsis[pos].lpi.psb < lpsis[pos - 1].lpi.psb ) { assert ( remain_num > 0 ); @@ -1502,14 +1501,14 @@ namespace ime_pinyin { // example, "de" and "di" are all valid for a Chinese character, so when // the user input "d", repeated items are generated. // For single character lemmas, Hanzis will be gotten - for ( size_t pos = 0; pos < num; pos++ ) { + for ( Size_t pos = 0; pos < num; pos++ ) { char16 hanzis[2]; get_lemma_str ( lma_buf[pos].id, hanzis, 2 ); lma_buf[pos].hanzi = hanzis[0]; } myqsort ( lma_buf, num, sizeof ( LmaPsbItem ), cmp_lpi_with_hanzi ); - size_t remain_num = 0; - for ( size_t pos = 0; pos < num; pos++ ) { + Size_t remain_num = 0; + for ( Size_t pos = 0; pos < num; pos++ ) { if ( pos > 0 && lma_buf[pos].hanzi == lma_buf[pos - 1].hanzi ) { if ( NULL != pfullsent && static_cast<char16> ( 0 ) == pfullsent[1] && @@ -1597,20 +1596,20 @@ namespace ime_pinyin { return splid_num; } - size_t MatrixSearch::inner_predict ( const char16 *fixed_buf, uint16 fixed_len, + Size_t MatrixSearch::inner_predict ( const char16 *fixed_buf, uint16 fixed_len, char16 predict_buf[][kMaxPredictSize + 1], - size_t buf_len ) { - size_t res_total = 0; + Size_t buf_len ) { + Size_t res_total = 0; memset ( npre_items_, 0, sizeof ( NPredictItem ) * npre_items_len_ ); // In order to shorten the comments, j-character candidates predicted by // i-character prefix are called P(i,j). All candiates predicted by // i-character prefix are called P(i,*) // Step 1. Get P(kMaxPredictSize, *) and sort them, here // P(kMaxPredictSize, *) == P(kMaxPredictSize, 1) - for ( size_t len = fixed_len; len > 0; len-- ) { + for ( Size_t len = fixed_len; len > 0; len-- ) { // How many blank items are available - size_t this_max = npre_items_len_ - res_total; - size_t res_this; + Size_t this_max = npre_items_len_ - res_total; + Size_t res_this; // If the history is longer than 1, and we can not get prediction from // lemmas longer than 2, in this case, we will add lemmas with // highest scores as the prediction result. @@ -1618,7 +1617,7 @@ namespace ime_pinyin { // Try to find if recent n (n>1) characters can be a valid lemma in system // dictionary. bool nearest_n_word = false; - for ( size_t nlen = 2; nlen <= fixed_len; nlen++ ) { + for ( Size_t nlen = 2; nlen <= fixed_len; nlen++ ) { if ( dict_trie_->get_lemma_id ( fixed_buf + fixed_len - nlen, nlen ) > 0 ) { nearest_n_word = true; break; @@ -1673,16 +1672,16 @@ namespace ime_pinyin { } if ( kPrintDebug2 ) { printf ( "/////////////////Predicted Items Begin////////////////////>>\n" ); - for ( size_t i = 0; i < res_total; i++ ) { + for ( Size_t i = 0; i < res_total; i++ ) { printf ( "---" ); - for ( size_t j = 0; j < kMaxPredictSize; j++ ) { + for ( Size_t j = 0; j < kMaxPredictSize; j++ ) { printf ( "%d ", npre_items_[i].pre_hzs[j] ); } printf ( "\n" ); } printf ( "<<///////////////Predicted Items End////////////////////////\n" ); } - for ( size_t i = 0; i < res_total; i++ ) { + for ( Size_t i = 0; i < res_total; i++ ) { utf16_strncpy ( predict_buf[i], npre_items_[i].pre_hzs, kMaxPredictSize ); predict_buf[i][kMaxPredictSize] = '\0'; @@ -1690,10 +1689,10 @@ namespace ime_pinyin { return res_total; } - size_t MatrixSearch::get_predicts ( const char16 fixed_buf[], + Size_t MatrixSearch::get_predicts ( const char16 fixed_buf[], char16 predict_buf[][kMaxPredictSize + 1], - size_t buf_len ) { - size_t fixed_len = utf16_strlen ( fixed_buf ); + Size_t buf_len ) { + Size_t fixed_len = utf16_strlen ( fixed_buf ); if ( 0 == fixed_len || fixed_len > kMaxPredictSize || 0 == buf_len ) { return 0; } return inner_predict ( fixed_buf, fixed_len, predict_buf, buf_len ); diff --git a/jni/share/mystdlib.cpp b/jni/share/mystdlib.cpp index 86e069c..6b47c17 100755..100644 --- a/jni/share/mystdlib.cpp +++ b/jni/share/mystdlib.cpp @@ -15,19 +15,19 @@ */ #include <stdlib.h> - +#include "../include/utf16char.h" namespace ime_pinyin { // For debug purpose. You can add a fixed version of qsort and bsearch functions // here so that the output will be totally the same under different platforms. - void myqsort ( void *p, size_t n, size_t es, + void myqsort ( void *p, Size_t n, Size_t es, int ( *cmp ) ( const void *, const void * ) ) { qsort ( p, n, es, cmp ); } void *mybsearch ( const void *k, const void *b, - size_t n, size_t es, + Size_t n, Size_t es, int ( *cmp ) ( const void *, const void * ) ) { return bsearch ( k, b, n, es, cmp ); } diff --git a/jni/share/ngram.cpp b/jni/share/ngram.cpp index 1ccbf34..8771cfc 100755..100644 --- a/jni/share/ngram.cpp +++ b/jni/share/ngram.cpp @@ -55,10 +55,10 @@ namespace ime_pinyin { { return qsearch_nearest ( code_book, freq, mid, end ); } } - size_t update_code_idx ( double freqs[], size_t num, double code_book[], + Size_t update_code_idx ( double freqs[], Size_t num, double code_book[], CODEBOOK_TYPE *code_idx ) { - size_t changed = 0; - for ( size_t pos = 0; pos < num; pos++ ) { + Size_t changed = 0; + for ( Size_t pos = 0; pos < num; pos++ ) { CODEBOOK_TYPE idx; idx = qsearch_nearest ( code_book, freqs[pos], 0, kCodeBookSize - 1 ); if ( idx != code_idx[pos] ) @@ -68,21 +68,21 @@ namespace ime_pinyin { return changed; } - double recalculate_kernel ( double freqs[], size_t num, double code_book[], + double recalculate_kernel ( double freqs[], Size_t num, double code_book[], CODEBOOK_TYPE *code_idx ) { double ret = 0; - size_t *item_num = new size_t[kCodeBookSize]; + Size_t *item_num = new Size_t[kCodeBookSize]; assert ( item_num ); - memset ( item_num, 0, sizeof ( size_t ) * kCodeBookSize ); + memset ( item_num, 0, sizeof ( Size_t ) * kCodeBookSize ); double *cb_new = new double[kCodeBookSize]; assert ( cb_new ); memset ( cb_new, 0, sizeof ( double ) * kCodeBookSize ); - for ( size_t pos = 0; pos < num; pos++ ) { + for ( Size_t pos = 0; pos < num; pos++ ) { ret += distance ( freqs[pos], code_book[code_idx[pos]] ); cb_new[code_idx[pos]] += freqs[pos]; item_num[code_idx[pos]] += 1; } - for ( size_t code = 0; code < kCodeBookSize; code++ ) { + for ( Size_t code = 0; code < kCodeBookSize; code++ ) { assert ( item_num[code] > 0 ); code_book[code] = cb_new[code] / item_num[code]; } @@ -91,12 +91,12 @@ namespace ime_pinyin { return ret; } - void iterate_codes ( double freqs[], size_t num, double code_book[], + void iterate_codes ( double freqs[], Size_t num, double code_book[], CODEBOOK_TYPE *code_idx ) { - size_t iter_num = 0; + Size_t iter_num = 0; double delta_last = 0; do { - size_t changed = update_code_idx ( freqs, num, code_book, code_idx ); + Size_t changed = update_code_idx ( freqs, num, code_book, code_idx ); double delta = recalculate_kernel ( freqs, num, code_book, code_idx ); if ( kPrintDebug0 ) { printf ( "---Unigram codebook iteration: %d : %d, %.9f\n", @@ -146,7 +146,7 @@ namespace ime_pinyin { { return false; } if ( 0 == idx_num_ || NULL == freq_codes_ || NULL == lma_freq_idx_ ) { return false; } - if ( fwrite ( &idx_num_, sizeof ( size_t ), 1, fp ) != 1 ) + if ( fwrite ( &idx_num_, sizeof ( Size_t ), 1, fp ) != 1 ) { return false; } if ( fwrite ( freq_codes_, sizeof ( LmaScoreType ), kCodeBookSize, fp ) != kCodeBookSize ) @@ -160,7 +160,7 @@ namespace ime_pinyin { if ( NULL == fp ) { return false; } initialized_ = false; - if ( fread ( &idx_num_, sizeof ( size_t ), 1, fp ) != 1 ) + if ( fread ( &idx_num_, sizeof ( Size_t ), 1, fp ) != 1 ) { return false; } if ( NULL != lma_freq_idx_ ) { free ( lma_freq_idx_ ); } @@ -182,7 +182,7 @@ namespace ime_pinyin { return true; } - void NGram::set_total_freq_none_sys ( size_t freq_none_sys ) { + void NGram::set_total_freq_none_sys ( Size_t freq_none_sys ) { total_freq_none_sys_ = freq_none_sys; if ( 0 == total_freq_none_sys_ ) { sys_score_compensation_ = 0; @@ -210,7 +210,7 @@ namespace ime_pinyin { } #ifdef ___BUILD_MODEL___ - bool NGram::build_unigram ( LemmaEntry *lemma_arr, size_t lemma_num, + bool NGram::build_unigram ( LemmaEntry *lemma_arr, Size_t lemma_num, LemmaIdType next_idx_unused ) { if ( NULL == lemma_arr || 0 == lemma_num || next_idx_unused <= 1 ) { return false; } @@ -221,7 +221,7 @@ namespace ime_pinyin { freqs[0] = ADD_COUNT; total_freq += freqs[0]; LemmaIdType idx_now = 0; - for ( size_t pos = 0; pos < lemma_num; pos++ ) { + for ( Size_t pos = 0; pos < lemma_num; pos++ ) { if ( lemma_arr[pos].idx_by_hz == idx_now ) { continue; } idx_now++; @@ -234,7 +234,7 @@ namespace ime_pinyin { double max_freq = 0; idx_num_ = idx_now + 1; assert ( idx_now + 1 == next_idx_unused ); - for ( size_t pos = 0; pos < idx_num_; pos++ ) { + for ( Size_t pos = 0; pos < idx_num_; pos++ ) { freqs[pos] = freqs[pos] / total_freq; assert ( freqs[pos] > 0 ); if ( freqs[pos] > max_freq ) @@ -249,13 +249,13 @@ namespace ime_pinyin { { freq_codes_ = new LmaScoreType[kCodeBookSize]; } assert ( freq_codes_ ); memset ( freq_codes_, 0, sizeof ( LmaScoreType ) * kCodeBookSize ); - size_t freq_pos = 0; - for ( size_t code_pos = 0; code_pos < kCodeBookSize; code_pos++ ) { + Size_t freq_pos = 0; + for ( Size_t code_pos = 0; code_pos < kCodeBookSize; code_pos++ ) { bool found = true; while ( found ) { found = false; double cand = freqs[freq_pos]; - for ( size_t i = 0; i < code_pos; i++ ) + for ( Size_t i = 0; i < code_pos; i++ ) if ( freq_codes_df_[i] == cand ) { found = true; break; @@ -275,7 +275,7 @@ namespace ime_pinyin { if ( kPrintDebug0 ) { printf ( "\n------Language Model Unigram Codebook------\n" ); } - for ( size_t code_pos = 0; code_pos < kCodeBookSize; code_pos++ ) { + for ( Size_t code_pos = 0; code_pos < kCodeBookSize; code_pos++ ) { double log_score = log ( freq_codes_df_[code_pos] ); float final_score = convert_psb_to_score ( freq_codes_df_[code_pos] ); if ( kPrintDebug0 ) { diff --git a/jni/share/pinyinime.cpp b/jni/share/pinyinime.cpp index a72f3b4..5589d6a 100755..100644 --- a/jni/share/pinyinime.cpp +++ b/jni/share/pinyinime.cpp @@ -13,7 +13,6 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - #include <stdlib.h> #include "../include/pinyinime.h" #include "../include/dicttrie.h" @@ -27,7 +26,7 @@ extern "C" { using namespace ime_pinyin; // The maximum number of the prediction items. - static const size_t kMaxPredictNum = 500; + static const Size_t kMaxPredictNum = 500; // Used to search Pinyin string and give the best candidate. MatrixSearch *matrix_search = NULL; @@ -62,7 +61,7 @@ extern "C" { matrix_search = NULL; } - void im_set_max_lens ( size_t max_sps_len, size_t max_hzs_len ) { + void im_set_max_lens ( Size_t max_sps_len, Size_t max_hzs_len ) { if ( NULL != matrix_search ) { matrix_search->set_max_lens ( max_sps_len, max_hzs_len ); } @@ -74,14 +73,14 @@ extern "C" { } // To be updated. - size_t im_search ( const char *pybuf, size_t pylen ) { + Size_t im_search ( const char *pybuf, Size_t pylen ) { if ( NULL == matrix_search ) { return 0; } matrix_search->search ( pybuf, pylen ); return matrix_search->get_candidate_num(); } - size_t im_delsearch ( size_t pos, bool is_pos_in_splid, + Size_t im_delsearch ( Size_t pos, bool is_pos_in_splid, bool clear_fixed_this_step ) { if ( NULL == matrix_search ) { return 0; } @@ -96,42 +95,45 @@ extern "C" { } // To be removed - size_t im_add_letter ( char ch ) { + Size_t im_add_letter ( char ch ) { return 0; } - const char *im_get_sps_str ( size_t *decoded_len ) { + const char *im_get_sps_str ( Size_t *decoded_len ) + { if ( NULL == matrix_search ) - { return NULL; } + { + return NULL; + } return matrix_search->get_pystr ( decoded_len ); } - char16 *im_get_candidate ( size_t cand_id, char16 *cand_str, - size_t max_len ) { + char16 *im_get_candidate ( Size_t cand_id, char16 *cand_str, + Size_t max_len ) { if ( NULL == matrix_search ) { return NULL; } return matrix_search->get_candidate ( cand_id, cand_str, max_len ); } - size_t im_get_spl_start_pos ( const uint16 *&spl_start ) { + Size_t im_get_spl_start_pos ( const uint16 *&spl_start ) { if ( NULL == matrix_search ) { return 0; } return matrix_search->get_spl_start ( spl_start ); } - size_t im_choose ( size_t choice_id ) { + Size_t im_choose ( Size_t choice_id ) { if ( NULL == matrix_search ) { return 0; } return matrix_search->choose ( choice_id ); } - size_t im_cancel_last_choice() { + Size_t im_cancel_last_choice() { if ( NULL == matrix_search ) { return 0; } return matrix_search->cancel_last_choice(); } - size_t im_get_fixed_len() { + Size_t im_get_fixed_len() { if ( NULL == matrix_search ) { return 0; } return matrix_search->get_fixedlen(); @@ -143,11 +145,11 @@ extern "C" { } - size_t im_get_predicts ( const char16 *his_buf, + Size_t im_get_predicts ( const char16 *his_buf, char16 ( *&pre_buf ) [kMaxPredictSize + 1] ) { if ( NULL == his_buf ) { return 0; } - size_t fixed_len = utf16_strlen ( his_buf ); + Size_t fixed_len = utf16_strlen ( his_buf ); const char16 *fixed_ptr = his_buf; if ( fixed_len > kMaxPredictSize ) { fixed_ptr += fixed_len - kMaxPredictSize; diff --git a/jni/share/searchutility.cpp b/jni/share/searchutility.cpp index e1d46d5..2708992 100755..100644 --- a/jni/share/searchutility.cpp +++ b/jni/share/searchutility.cpp @@ -48,8 +48,8 @@ namespace ime_pinyin { // The real unified psb is psb1 / lma_len1 and psb2 * lma_len2 // But we use psb1 * lma_len2 and psb2 * lma_len1 to get better // precision. - size_t up1 = item1->psb * ( item2->lma_len ); - size_t up2 = item2->psb * ( item1->lma_len ); + Size_t up1 = item1->psb * ( item2->lma_len ); + Size_t up2 = item2->psb * ( item1->lma_len ); if ( up1 < up2 ) { return -1; } @@ -170,12 +170,12 @@ namespace ime_pinyin { return 0; } - size_t remove_duplicate_npre ( NPredictItem *npre_items, size_t npre_num ) { + Size_t remove_duplicate_npre ( NPredictItem *npre_items, Size_t npre_num ) { if ( NULL == npre_items || 0 == npre_num ) { return 0; } myqsort ( npre_items, npre_num, sizeof ( NPredictItem ), cmp_npre_by_hanzi_score ); - size_t remain_num = 1; // The first one is reserved. - for ( size_t pos = 1; pos < npre_num; pos++ ) { + Size_t remain_num = 1; // The first one is reserved. + for ( Size_t pos = 1; pos < npre_num; pos++ ) { if ( utf16_strncmp ( npre_items[pos].pre_hzs, npre_items[remain_num - 1].pre_hzs, kMaxPredictSize ) != 0 ) { @@ -188,8 +188,8 @@ namespace ime_pinyin { return remain_num; } - size_t align_to_size_t ( size_t size ) { - size_t s = sizeof ( size_t ); + Size_t align_to_Size_t ( Size_t size ) { + Size_t s = sizeof ( Size_t ); return ( size + s - 1 ) / s * s; } diff --git a/jni/share/spellingtable.cpp b/jni/share/spellingtable.cpp index 29b8d3d..5d497fb 100755..100644 --- a/jni/share/spellingtable.cpp +++ b/jni/share/spellingtable.cpp @@ -39,12 +39,12 @@ namespace ime_pinyin { ( static_cast<const RawSpelling *> ( p2 ) )->str ); } - size_t get_odd_next ( size_t value ) { - size_t v_next = value; + Size_t get_odd_next ( Size_t value ) { + Size_t v_next = value; while ( true ) { - size_t v_next_sqrt = ( size_t ) sqrt ( v_next ); + Size_t v_next_sqrt = ( Size_t ) sqrt ( v_next ); bool is_odd = true; - for ( size_t v_dv = 2; v_dv < v_next_sqrt + 1; v_dv++ ) { + for ( Size_t v_dv = 2; v_dv < v_next_sqrt + 1; v_dv++ ) { if ( v_next % v_dv == 0 ) { is_odd = false; break; @@ -71,18 +71,18 @@ namespace ime_pinyin { free_resource(); } - size_t SpellingTable::get_hash_pos ( const char *spelling_str ) { - size_t hash_pos = 0; - for ( size_t pos = 0; pos < spelling_size_; pos++ ) { + Size_t SpellingTable::get_hash_pos ( const char *spelling_str ) { + Size_t hash_pos = 0; + for ( Size_t pos = 0; pos < spelling_size_; pos++ ) { if ( '\0' == spelling_str[pos] ) { break; } - hash_pos += ( size_t ) spelling_str[pos]; + hash_pos += ( Size_t ) spelling_str[pos]; } hash_pos = hash_pos % spelling_max_num_; return hash_pos; } - size_t SpellingTable::hash_pos_next ( size_t hash_pos ) { + Size_t SpellingTable::hash_pos_next ( Size_t hash_pos ) { hash_pos += 123; hash_pos = hash_pos % spelling_max_num_; return hash_pos; @@ -97,7 +97,7 @@ namespace ime_pinyin { spelling_buf_ = NULL; } - bool SpellingTable::init_table ( size_t pure_spl_size, size_t spl_max_num, + bool SpellingTable::init_table ( Size_t pure_spl_size, Size_t spl_max_num, bool need_score ) { if ( pure_spl_size == 0 || spl_max_num == 0 ) { return false; } @@ -124,20 +124,20 @@ namespace ime_pinyin { bool SpellingTable::put_spelling ( const char *spelling_str, double freq ) { if ( frozen_ || NULL == spelling_str ) { return false; } - for ( size_t pos = 0; pos < kNotSupportNum; pos++ ) { + for ( Size_t pos = 0; pos < kNotSupportNum; pos++ ) { if ( strcmp ( spelling_str, kNotSupportList[pos] ) == 0 ) { return false; } } total_freq_ += freq; - size_t hash_pos = get_hash_pos ( spelling_str ); + Size_t hash_pos = get_hash_pos ( spelling_str ); raw_spellings_[hash_pos].str[spelling_size_ - 1] = '\0'; if ( strncmp ( raw_spellings_[hash_pos].str, spelling_str, spelling_size_ - 1 ) == 0 ) { raw_spellings_[hash_pos].freq += freq; return true; } - size_t hash_pos_ori = hash_pos; + Size_t hash_pos_ori = hash_pos; while ( true ) { if ( strncmp ( raw_spellings_[hash_pos].str, spelling_str, spelling_size_ - 1 ) == 0 ) { @@ -162,13 +162,13 @@ namespace ime_pinyin { bool SpellingTable::contain ( const char *spelling_str ) { if ( NULL == spelling_str || NULL == spelling_buf_ || frozen_ ) { return false; } - size_t hash_pos = get_hash_pos ( spelling_str ); + Size_t hash_pos = get_hash_pos ( spelling_str ); if ( '\0' == raw_spellings_[hash_pos].str[0] ) { return false; } if ( strncmp ( raw_spellings_[hash_pos].str, spelling_str, spelling_size_ - 1 ) == 0 ) { return true; } - size_t hash_pos_ori = hash_pos; + Size_t hash_pos_ori = hash_pos; while ( true ) { hash_pos = hash_pos_next ( hash_pos ); if ( hash_pos_ori == hash_pos ) @@ -183,7 +183,7 @@ namespace ime_pinyin { return false; } - const char *SpellingTable::arrange ( size_t *item_size, size_t *spl_num ) { + const char *SpellingTable::arrange ( Size_t *item_size, Size_t *spl_num ) { if ( NULL == raw_spellings_ || NULL == spelling_buf_ || NULL == item_size || NULL == spl_num ) { return NULL; } @@ -191,7 +191,7 @@ namespace ime_pinyin { compare_raw_spl_eb ); // After sorting, only the first spelling_num_ items are valid. // Copy them to the destination buffer. - for ( size_t pos = 0; pos < spelling_num_; pos++ ) { + for ( Size_t pos = 0; pos < spelling_num_; pos++ ) { strncpy ( spelling_buf_ + pos * spelling_size_, raw_spellings_[pos].str, spelling_size_ ); } @@ -201,7 +201,7 @@ namespace ime_pinyin { double max_score = 0; double min_score = 0; // After sorting, only the first spelling_num_ items are valid. - for ( size_t pos = 0; pos < spelling_num_; pos++ ) { + for ( Size_t pos = 0; pos < spelling_num_; pos++ ) { raw_spellings_[pos].freq /= total_freq_; if ( need_score_ ) { if ( 0 == pos ) { @@ -226,7 +226,7 @@ namespace ime_pinyin { // both of them are negative after log function. score_amplifier_ = 1.0 * 255 / min_score; double average_score = 0; - for ( size_t pos = 0; pos < spelling_num_; pos++ ) { + for ( Size_t pos = 0; pos < spelling_num_; pos++ ) { double score = log ( raw_spellings_[pos].freq ) * score_amplifier_; assert ( score >= 0 ); average_score += score; diff --git a/jni/share/spellingtrie.cpp b/jni/share/spellingtrie.cpp index 0c0dbc1..92977e1 100644 --- a/jni/share/spellingtrie.cpp +++ b/jni/share/spellingtrie.cpp @@ -13,7 +13,6 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - #include <stdio.h> #include <string.h> #include <assert.h> @@ -239,15 +238,15 @@ namespace ime_pinyin { void SpellingTrie::free_son_trie ( SpellingNode *node ) { if ( NULL == node ) { return; } - for ( size_t pos = 0; pos < node->num_of_son; pos++ ) { + for ( Size_t pos = 0; pos < node->num_of_son; pos++ ) { free_son_trie ( node->first_son + pos ); } if ( NULL != node->first_son ) { delete [] node->first_son; } } - bool SpellingTrie::construct ( const char *spelling_arr, size_t item_size, - size_t item_num, float score_amplifier, + bool SpellingTrie::construct ( const char *spelling_arr, Size_t item_size, + Size_t item_num, float score_amplifier, unsigned char average_score ) { if ( spelling_arr == NULL ) { return false; } @@ -336,8 +335,8 @@ namespace ime_pinyin { assert ( sucess ); } } - size_t ym_item_size; // '\0' is included - size_t ym_num; + Size_t ym_item_size; // '\0' is included + Size_t ym_num; const char *ym_buf; ym_buf = spl_table->arrange ( &ym_item_size, &ym_num ); if ( NULL != ym_buf_ ) @@ -374,7 +373,7 @@ namespace ime_pinyin { #endif SpellingNode *SpellingTrie::construct_spellings_subset ( - size_t item_start, size_t item_end, size_t level, SpellingNode *parent ) { + Size_t item_start, Size_t item_end, Size_t level, SpellingNode *parent ) { if ( level >= spelling_size_ || item_end <= item_start || NULL == parent ) { return NULL; } SpellingNode *first_son = NULL; @@ -385,7 +384,7 @@ namespace ime_pinyin { assert ( char_for_node >= 'A' && char_for_node <= 'Z' || 'h' == char_for_node ); // Scan the array to find how many sons - for ( size_t i = item_start + 1; i < item_end; i++ ) { + for ( Size_t i = item_start + 1; i < item_end; i++ ) { const char *spelling_current = spelling_buf_ + spelling_size_ * i; char char_current = spelling_current[level]; if ( char_current != char_for_node ) { @@ -401,14 +400,14 @@ namespace ime_pinyin { first_son = new SpellingNode[num_of_son]; memset ( first_son, 0, sizeof ( SpellingNode ) *num_of_son ); // Now begin construct tree - size_t son_pos = 0; + Size_t son_pos = 0; spelling_last_start = spelling_buf_ + spelling_size_ * item_start; char_for_node = spelling_last_start[level]; bool spelling_endable = true; if ( spelling_last_start[level + 1] != '\0' ) { spelling_endable = false; } - size_t item_start_next = item_start; - for ( size_t i = item_start + 1; i < item_end; i++ ) { + Size_t item_start_next = item_start; + for ( Size_t i = item_start + 1; i < item_end; i++ ) { const char *spelling_current = spelling_buf_ + spelling_size_ * i; char char_current = spelling_current[level]; assert ( is_valid_spl_char ( char_current ) ); @@ -423,7 +422,7 @@ namespace ime_pinyin { node_current->spelling_idx = kFullSplIdStart + item_start_next; } if ( spelling_last_start[level + 1] != '\0' || i - item_start_next > 1 ) { - size_t real_start = item_start_next; + Size_t real_start = item_start_next; if ( spelling_last_start[level + 1] == '\0' ) { real_start++; } node_current->first_son = @@ -495,7 +494,7 @@ namespace ime_pinyin { } if ( spelling_last_start[level + 1] != '\0' || item_end - item_start_next > 1 ) { - size_t real_start = item_start_next; + Size_t real_start = item_start_next; if ( spelling_last_start[level + 1] == '\0' ) { real_start++; } node_current->first_son = @@ -569,12 +568,14 @@ namespace ime_pinyin { } bool SpellingTrie::load_spl_trie ( FILE *fp ) { + spelling_num_=0; if ( NULL == fp ) { return false; } - if ( fread ( &spelling_size_, sizeof ( size_t ), 1, fp ) != 1 ) - { return false; } - if ( fread ( &spelling_num_, sizeof ( size_t ), 1, fp ) != 1 ) + if ( fread ( &spelling_size_, sizeof ( Size_t ), 1, fp ) != 1 ) { return false; } + if ( fread ( &spelling_num_, sizeof ( Size_t ), 1, fp ) != 1 ) + {return false; } + if ( fread ( &score_amplifier_, sizeof ( float ), 1, fp ) != 1 ) { return false; } if ( fread ( &average_score_, sizeof ( unsigned char ), 1, fp ) != 1 ) @@ -583,10 +584,11 @@ namespace ime_pinyin { { delete [] spelling_buf_; } spelling_buf_ = new char[spelling_size_ * spelling_num_]; if ( NULL == spelling_buf_ ) - { return false; } + return false; if ( fread ( spelling_buf_, sizeof ( char ) * spelling_size_, - spelling_num_, fp ) != spelling_num_ ) - { return false; } + spelling_num_, fp ) != spelling_num_ ){ + return false; + } return construct ( spelling_buf_, spelling_size_, spelling_num_, score_amplifier_, average_score_ ); } @@ -605,7 +607,7 @@ namespace ime_pinyin { return true; } - size_t SpellingTrie::get_spelling_num() { + Size_t SpellingTrie::get_spelling_num() { return spelling_num_; } @@ -647,7 +649,7 @@ namespace ime_pinyin { splstr16_queried_[0] = '\0'; if ( splid >= kFullSplIdStart ) { splid -= kFullSplIdStart; - for ( size_t pos = 0; pos < spelling_size_; pos++ ) { + for ( Size_t pos = 0; pos < spelling_size_; pos++ ) { splstr16_queried_[pos] = static_cast<char16> ( spelling_buf_[splid * spelling_size_ + pos] ); } @@ -676,12 +678,12 @@ namespace ime_pinyin { return splstr16_queried_; } - size_t SpellingTrie::get_spelling_str16 ( uint16 splid, char16 *splstr16, - size_t splstr16_len ) { + Size_t SpellingTrie::get_spelling_str16 ( uint16 splid, char16 *splstr16, + Size_t splstr16_len ) { if ( NULL == splstr16 || splstr16_len < kMaxPinyinSize + 1 ) { return 0; } if ( splid >= kFullSplIdStart ) { splid -= kFullSplIdStart; - for ( size_t pos = 0; pos <= kMaxPinyinSize; pos++ ) { + for ( Size_t pos = 0; pos <= kMaxPinyinSize; pos++ ) { splstr16[pos] = static_cast<char16> ( spelling_buf_[splid * spelling_size_ + pos] ); if ( static_cast<char16> ( '\0' ) == splstr16[pos] ) { diff --git a/jni/share/splparser.cpp b/jni/share/splparser.cpp index b9ccf22..b9ccf22 100755..100644 --- a/jni/share/splparser.cpp +++ b/jni/share/splparser.cpp diff --git a/jni/share/sync.cpp b/jni/share/sync.cpp index 00f61da..00f61da 100755..100644 --- a/jni/share/sync.cpp +++ b/jni/share/sync.cpp diff --git a/jni/share/userdict.cpp b/jni/share/userdict.cpp index a762a3f..02f8491 100755..100644 --- a/jni/share/userdict.cpp +++ b/jni/share/userdict.cpp @@ -317,7 +317,7 @@ namespace ime_pinyin { return true; } - size_t UserDict::number_of_lemmas() { + Size_t UserDict::number_of_lemmas() { return dict_info_.lemma_count; } @@ -328,7 +328,7 @@ namespace ime_pinyin { MileStoneHandle UserDict::extend_dict ( MileStoneHandle from_handle, const DictExtPara *dep, LmaPsbItem *lpi_items, - size_t lpi_max, size_t *lpi_num ) { + Size_t lpi_max, Size_t *lpi_num ) { if ( is_valid_state() == false ) { return 0; } bool need_extend = false; @@ -464,14 +464,14 @@ namespace ime_pinyin { } } - size_t UserDict::get_lpis ( const uint16 *splid_str, uint16 splid_str_len, - LmaPsbItem *lpi_items, size_t lpi_max ) { + Size_t UserDict::get_lpis ( const uint16 *splid_str, uint16 splid_str_len, + LmaPsbItem *lpi_items, Size_t lpi_max ) { return _get_lpis ( splid_str, splid_str_len, lpi_items, lpi_max, NULL ); } - size_t UserDict::_get_lpis ( const uint16 *splid_str, + Size_t UserDict::_get_lpis ( const uint16 *splid_str, uint16 splid_str_len, LmaPsbItem *lpi_items, - size_t lpi_max, bool *need_extend ) { + Size_t lpi_max, bool *need_extend ) { bool tmp_extend; if ( !need_extend ) { need_extend = &tmp_extend; } @@ -516,10 +516,10 @@ namespace ime_pinyin { #endif return 0; } - size_t lpi_current = 0; + Size_t lpi_current = 0; bool fuzzy_break = false; bool prefix_break = false; - while ( ( size_t ) middle < max_off && !fuzzy_break && !prefix_break ) { + while ( ( Size_t ) middle < max_off && !fuzzy_break && !prefix_break ) { if ( lpi_current >= lpi_max ) { break; } uint32 offset = offsets_[middle]; @@ -595,9 +595,9 @@ namespace ime_pinyin { return i; } - size_t UserDict::predict ( const char16 last_hzs[], uint16 hzs_len, - NPredictItem *npre_items, size_t npre_max, - size_t b4_used ) { + Size_t UserDict::predict ( const char16 last_hzs[], uint16 hzs_len, + NPredictItem *npre_items, Size_t npre_max, + Size_t b4_used ) { uint32 new_added = 0; #ifdef ___PREDICT_ENABLED___ int32 end = dict_info_.lemma_count - 1; @@ -929,7 +929,7 @@ namespace ime_pinyin { return false; } uint32 version = kUserDictVersion; - size_t wred = fwrite ( &version, 1, 4, fp ); + Size_t wred = fwrite ( &version, 1, 4, fp ); UserDictInfo info; memset ( &info, 0, sizeof ( info ) ); // By default, no limitation for lemma count and size @@ -951,8 +951,8 @@ namespace ime_pinyin { if ( !fp ) { return false; } - size_t size; - size_t readed; + Size_t size; + Size_t readed; uint32 version; UserDictInfo dict_info; // validate @@ -1004,7 +1004,7 @@ namespace ime_pinyin { pthread_mutex_unlock ( &g_mutex_ ); return false; } - size_t readed, toread; + Size_t readed, toread; UserDictInfo dict_info; uint8 *lemmas = NULL; uint32 *offsets = NULL; @@ -1017,7 +1017,7 @@ namespace ime_pinyin { #ifdef ___PREDICT_ENABLED___ uint32 *predicts = NULL; #endif - size_t i; + Size_t i; int err; err = fseek ( fp, -1 * sizeof ( dict_info ), SEEK_END ); if ( err ) { goto error; } @@ -1210,7 +1210,7 @@ namespace ime_pinyin { if ( err == -1 ) { return; } // New lemmas are always appended, no need to write whole lemma block - size_t need_write = kUserDictPreAlloc * + Size_t need_write = kUserDictPreAlloc * ( 2 + ( kUserDictAverageNchar << 2 ) ) - lemma_size_left_; err = lseek ( fd, dict_info_.lemma_size - need_write, SEEK_CUR ); if ( err == -1 ) @@ -1395,8 +1395,8 @@ namespace ime_pinyin { if ( is_valid_state() == false ) { return; } // Fixup offsets_, set REMOVE flag to lemma's flag if needed - size_t first_freed = 0; - size_t first_inuse = 0; + Size_t first_freed = 0; + Size_t first_inuse = 0; while ( first_freed < dict_info_.lemma_count ) { // Find first freed offset while ( ( offsets_[first_freed] & kUserDictOffsetFlagRemove ) == 0 && @@ -1468,12 +1468,12 @@ namespace ime_pinyin { #endif dict_info_.lemma_count = first_freed; // Fixup lemmas_ - size_t begin = 0; - size_t end = 0; - size_t dst = 0; + Size_t begin = 0; + Size_t end = 0; + Size_t dst = 0; int total_size = dict_info_.lemma_size + lemma_size_left_; int total_count = dict_info_.lemma_count + lemma_count_left_; - size_t real_size = total_size - lemma_size_left_; + Size_t real_size = total_size - lemma_size_left_; while ( dst < real_size ) { unsigned char flag = get_lemma_flag ( dst ); unsigned char nchr = get_lemma_nchar ( dst ); @@ -1509,7 +1509,7 @@ namespace ime_pinyin { break; } memmove ( lemmas_ + dst, lemmas_ + begin, end - begin ); - for ( size_t j = 0; j < dict_info_.lemma_count; j++ ) { + for ( Size_t j = 0; j < dict_info_.lemma_count; j++ ) { if ( offsets_[j] >= begin && offsets_[j] < end ) { offsets_[j] -= ( begin - dst ); offsets_by_id_[ids_[j] - start_id_] = offsets_[j]; @@ -1521,7 +1521,7 @@ namespace ime_pinyin { #endif } #ifdef ___SYNC_ENABLED___ - for ( size_t j = 0; j < dict_info_.sync_count; j++ ) { + for ( Size_t j = 0; j < dict_info_.sync_count; j++ ) { if ( syncs_[j] >= begin && syncs_[j] < end ) { syncs_[j] -= ( begin - dst ); } @@ -1933,7 +1933,7 @@ namespace ime_pinyin { } int flushed = 0; if ( lemma_count_left_ == 0 || - lemma_size_left_ < ( size_t ) ( 2 + ( lemma_len << 2 ) ) ) { + lemma_size_left_ < ( Size_t ) ( 2 + ( lemma_len << 2 ) ) ) { // XXX When there is no space for new lemma, we flush to disk // flush_cache() may be called by upper user // and better place shoule be found instead of here @@ -2015,23 +2015,23 @@ namespace ime_pinyin { return 0; } - size_t UserDict::get_total_lemma_count() { + Size_t UserDict::get_total_lemma_count() { return dict_info_.total_nfreq; } - void UserDict::set_total_lemma_count_of_others ( size_t count ) { + void UserDict::set_total_lemma_count_of_others ( Size_t count ) { total_other_nfreq_ = count; } LemmaIdType UserDict::append_a_lemma ( char16 lemma_str[], uint16 splids[], uint16 lemma_len, uint16 count, uint64 lmt ) { LemmaIdType id = get_max_lemma_id() + 1; - size_t offset = dict_info_.lemma_size; + Size_t offset = dict_info_.lemma_size; if ( offset > kUserDictOffsetMask ) { return 0; } lemmas_[offset] = 0; lemmas_[offset + 1] = ( uint8 ) lemma_len; - for ( size_t i = 0; i < lemma_len; i++ ) { + for ( Size_t i = 0; i < lemma_len; i++ ) { * ( ( uint16 * ) &lemmas_[offset + 2 + ( i << 1 )] ) = splids[i]; * ( ( char16 * ) &lemmas_[offset + 2 + ( lemma_len << 1 ) + ( i << 1 )] ) = lemma_str[i]; @@ -2051,7 +2051,7 @@ namespace ime_pinyin { // Sort UserDictSearchable searchable; prepare_locate ( &searchable, splids, lemma_len ); - size_t i = 0; + Size_t i = 0; while ( i < off ) { offset = offsets_[i]; uint32 nchar = get_lemma_nchar ( offset ); diff --git a/jni/share/utf16char.cpp b/jni/share/utf16char.cpp index a295a18..9a6bd74 100755..100644 --- a/jni/share/utf16char.cpp +++ b/jni/share/utf16char.cpp @@ -23,13 +23,13 @@ namespace ime_pinyin { extern "C" { #endif - char16 *utf16_strtok ( char16 *utf16_str, size_t *token_size, + char16 *utf16_strtok ( char16 *utf16_str, Size_t *token_size, char16 **utf16_str_next ) { if ( NULL == utf16_str || NULL == token_size || NULL == utf16_str_next ) { return NULL; } // Skip the splitters - size_t pos = 0; + Size_t pos = 0; while ( ( char16 ) ' ' == utf16_str[pos] || ( char16 ) '\n' == utf16_str[pos] || ( char16 ) '\t' == utf16_str[pos] ) { pos++; } @@ -58,7 +58,7 @@ namespace ime_pinyin { { return 0; } int value = 0; int sign = 1; - size_t pos = 0; + Size_t pos = 0; if ( ( char16 ) '-' == utf16_str[pos] ) { sign = -1; pos++; @@ -79,24 +79,24 @@ namespace ime_pinyin { return atof ( char8 ); } - size_t utf16_strlen ( const char16 *utf16_str ) { + Size_t utf16_strlen ( const char16 *utf16_str ) { if ( NULL == utf16_str ) { return 0; } - size_t size = 0; + Size_t size = 0; while ( ( char16 ) '\0' != utf16_str[size] ) { size++; } return size; } int utf16_strcmp ( const char16 *str1, const char16 *str2 ) { - size_t pos = 0; + Size_t pos = 0; while ( str1[pos] == str2[pos] && ( char16 ) '\0' != str1[pos] ) { pos++; } return static_cast<int> ( str1[pos] ) - static_cast<int> ( str2[pos] ); } - int utf16_strncmp ( const char16 *str1, const char16 *str2, size_t size ) { - size_t pos = 0; + int utf16_strncmp ( const char16 *str1, const char16 *str2, Size_t size ) { + Size_t pos = 0; while ( pos < size && str1[pos] == str2[pos] && ( char16 ) '\0' != str1[pos] ) { pos++; } if ( pos == size ) @@ -118,7 +118,7 @@ namespace ime_pinyin { return dst; } - char16 *utf16_strncpy ( char16 *dst, const char16 *src, size_t size ) { + char16 *utf16_strncpy ( char16 *dst, const char16 *src, Size_t size ) { if ( NULL == src || NULL == dst || 0 == size ) { return NULL; } if ( src == dst ) diff --git a/jni/share/utf16reader.cpp b/jni/share/utf16reader.cpp index 3f4e0ca..0b460b6 100755..100644 --- a/jni/share/utf16reader.cpp +++ b/jni/share/utf16reader.cpp @@ -37,7 +37,7 @@ namespace ime_pinyin { } - bool Utf16Reader::open ( const char *filename, size_t buffer_len ) { + bool Utf16Reader::open ( const char *filename, Size_t buffer_len ) { if ( filename == NULL ) { return false; } if ( buffer_len < MIN_BUF_LEN ) @@ -62,10 +62,10 @@ namespace ime_pinyin { return true; } - char16 *Utf16Reader::readline ( char16 *read_buf, size_t max_len ) { + char16 *Utf16Reader::readline ( char16 *read_buf, Size_t max_len ) { if ( NULL == fp_ || NULL == read_buf || 0 == max_len ) { return NULL; } - size_t ret_len = 0; + Size_t ret_len = 0; do { if ( buffer_valid_len_ == 0 ) { buffer_next_pos_ = 0; @@ -78,7 +78,7 @@ namespace ime_pinyin { return read_buf; } } - for ( size_t i = 0; i < buffer_valid_len_; i++ ) { + for ( Size_t i = 0; i < buffer_valid_len_; i++ ) { if ( i == max_len - 1 || buffer_[buffer_next_pos_ + i] == ( char16 ) '\n' ) { if ( ret_len + i > 0 && read_buf[ret_len + i - 1] == ( char16 ) '\r' ) { |