summaryrefslogtreecommitdiff
authorTing Li <ting.li@amlogic.com>2016-11-05 05:53:49 (GMT)
committer Ting Li <ting.li@amlogic.com>2016-11-05 05:58:23 (GMT)
commitc6b0b1840badcde2458119ad9ef4932aa9d8ba4f (patch)
tree622cfcfec5eb74d6db53069e6674937bb762dbd5
parentf95211795aede0f87fb8610ca55b075f77255382 (diff)
downloadRemoteIME-c6b0b1840badcde2458119ad9ef4932aa9d8ba4f.zip
RemoteIME-c6b0b1840badcde2458119ad9ef4932aa9d8ba4f.tar.gz
RemoteIME-c6b0b1840badcde2458119ad9ef4932aa9d8ba4f.tar.bz2
pd#133647 fix bug of load dict at 64 bit env
Change-Id: Iaefb53d0ff7719723e7d42fc509c1733b237471d
Diffstat
-rw-r--r--jni/android/com_droidlogic_inputmethod_remote_PinyinDecoderService.cpp24
-rw-r--r--[-rwxr-xr-x]jni/command/pinyinime_dictbuilder.cpp0
-rw-r--r--[-rwxr-xr-x]jni/include/atomdictbase.h18
-rw-r--r--[-rwxr-xr-x]jni/include/dictbuilder.h58
-rw-r--r--[-rwxr-xr-x]jni/include/dictdef.h32
-rw-r--r--[-rwxr-xr-x]jni/include/dictlist.h26
-rw-r--r--[-rwxr-xr-x]jni/include/dicttrie.h54
-rw-r--r--[-rwxr-xr-x]jni/include/lpicache.h4
-rw-r--r--[-rwxr-xr-x]jni/include/matrixsearch.h84
-rw-r--r--[-rwxr-xr-x]jni/include/mystdlib.h6
-rw-r--r--[-rwxr-xr-x]jni/include/ngram.h12
-rw-r--r--[-rwxr-xr-x]jni/include/pinyinime.h24
-rw-r--r--[-rwxr-xr-x]jni/include/searchutility.h8
-rw-r--r--jni/include/spellingtable.h16
-rw-r--r--jni/include/spellingtrie.h20
-rw-r--r--[-rwxr-xr-x]jni/include/splparser.h0
-rw-r--r--[-rwxr-xr-x]jni/include/sync.h0
-rw-r--r--[-rwxr-xr-x]jni/include/userdict.h28
-rw-r--r--[-rwxr-xr-x]jni/include/utf16char.h10
-rw-r--r--[-rwxr-xr-x]jni/include/utf16reader.h10
-rw-r--r--[-rwxr-xr-x]jni/share/dictbuilder.cpp138
-rw-r--r--[-rwxr-xr-x]jni/share/dictlist.cpp70
-rw-r--r--[-rwxr-xr-x]jni/share/dicttrie.cpp172
-rw-r--r--[-rwxr-xr-x]jni/share/lpicache.cpp8
-rw-r--r--[-rwxr-xr-x]jni/share/matrixsearch.cpp183
-rw-r--r--[-rwxr-xr-x]jni/share/mystdlib.cpp6
-rw-r--r--[-rwxr-xr-x]jni/share/ngram.cpp42
-rw-r--r--[-rwxr-xr-x]jni/share/pinyinime.cpp34
-rw-r--r--[-rwxr-xr-x]jni/share/searchutility.cpp14
-rw-r--r--[-rwxr-xr-x]jni/share/spellingtable.cpp38
-rw-r--r--jni/share/spellingtrie.cpp50
-rw-r--r--[-rwxr-xr-x]jni/share/splparser.cpp0
-rw-r--r--[-rwxr-xr-x]jni/share/sync.cpp0
-rw-r--r--[-rwxr-xr-x]jni/share/userdict.cpp62
-rw-r--r--[-rwxr-xr-x]jni/share/utf16char.cpp18
-rw-r--r--[-rwxr-xr-x]jni/share/utf16reader.cpp8
36 files changed, 640 insertions, 637 deletions
diff --git a/jni/android/com_droidlogic_inputmethod_remote_PinyinDecoderService.cpp b/jni/android/com_droidlogic_inputmethod_remote_PinyinDecoderService.cpp
index ec71725..80789fb 100644
--- a/jni/android/com_droidlogic_inputmethod_remote_PinyinDecoderService.cpp
+++ b/jni/android/com_droidlogic_inputmethod_remote_PinyinDecoderService.cpp
@@ -35,7 +35,7 @@ extern "C" {
static char16 retbuf[RET_BUF_LEN];
static char16 ( *predict_buf ) [kMaxPredictSize + 1] = NULL;
- static size_t predict_len;
+ static Size_t predict_len;
static Sync sync_worker;
@@ -76,8 +76,8 @@ extern "C" {
JNIEXPORT void JNICALL nativeImSetMaxLens ( JNIEnv *env, jclass jclazz,
jint max_sps_len,
jint max_hzs_len ) {
- im_set_max_lens ( static_cast<size_t> ( max_sps_len ),
- static_cast<size_t> ( max_hzs_len ) );
+ im_set_max_lens ( static_cast<Size_t> ( max_sps_len ),
+ static_cast<Size_t> ( max_hzs_len ) );
return;
}
@@ -114,15 +114,15 @@ extern "C" {
JNIEXPORT jstring JNICALL nativeImGetPyStr ( JNIEnv *env, jclass jclazz,
jboolean decoded ) {
- size_t py_len;
+ Size_t py_len;
const char *py = im_get_sps_str ( &py_len ); // py_len gets decoded length
assert ( NULL != py );
if ( !decoded )
{ py_len = strlen ( py ); }
const unsigned short *spl_start;
- size_t len;
+ Size_t len;
len = im_get_spl_start_pos ( spl_start );
- size_t i;
+ Size_t i;
for ( i = 0; i < py_len; i++ )
{ retbuf[i] = py[i]; }
retbuf[i] = ( char16 ) '\0';
@@ -132,7 +132,7 @@ extern "C" {
JNIEXPORT jint JNICALL nativeImGetPyStrLen ( JNIEnv *env, jclass jclazz,
jboolean decoded ) {
- size_t py_len;
+ Size_t py_len;
const char *py = im_get_sps_str ( &py_len ); // py_len gets decoded length
assert ( NULL != py );
if ( !decoded )
@@ -142,14 +142,14 @@ extern "C" {
JNIEXPORT jintArray JNICALL nativeImGetSplStart ( JNIEnv *env, jclass jclazz ) {
const unsigned short *spl_start;
- size_t len;
+ Size_t len;
// There will be len + 1 elements in the buffer when len > 0.
len = im_get_spl_start_pos ( spl_start );
jintArray arr = ( *env ).NewIntArray ( len + 2 );
jint *arr_body = ( *env ).GetIntArrayElements ( arr, 0 );
assert ( NULL != arr_body );
arr_body[0] = len; // element 0 is used to store the length of buffer.
- for ( size_t i = 0; i <= len; i++ )
+ for ( Size_t i = 0; i <= len; i++ )
{ arr_body[i + 1] = spl_start[i]; }
( *env ).ReleaseIntArrayElements ( arr, arr_body, 0 );
return arr;
@@ -194,7 +194,7 @@ extern "C" {
JNIEXPORT jint JNICALL nativeImGetPredictsNum ( JNIEnv *env, jclass clazz,
jstring fixed_str ) {
char16 *fixed_ptr = ( char16 * ) ( *env ).GetStringChars ( fixed_str, NULL );
- size_t fixed_len = ( size_t ) ( *env ).GetStringLength ( fixed_str );
+ Size_t fixed_len = ( Size_t ) ( *env ).GetStringLength ( fixed_str );
char16 fixed_buf[kMaxPredictSize + 1];
if ( fixed_len > kMaxPredictSize ) {
fixed_ptr += fixed_len - kMaxPredictSize;
@@ -210,7 +210,7 @@ extern "C" {
JNIEXPORT jstring JNICALL nativeImGetPredictItem ( JNIEnv *env, jclass clazz,
jint predict_no ) {
jstring retstr;
- if ( predict_no < 0 || ( size_t ) predict_no >= predict_len ) {
+ if ( predict_no < 0 || ( Size_t ) predict_no >= predict_len ) {
retstr = ( *env ).NewString ( ( unsigned short * ) predict_buf[0], 0 );
} else {
retstr = ( *env ).NewString ( ( unsigned short * ) predict_buf[predict_no],
@@ -241,7 +241,7 @@ extern "C" {
JNIEXPORT jint JNICALL nativeSyncPutLemmas ( JNIEnv *env, jclass clazz,
jstring tomerge ) {
char16 *ptr = ( char16 * ) ( *env ).GetStringChars ( tomerge, NULL );
- int len = ( size_t ) ( *env ).GetStringLength ( tomerge );
+ int len = ( Size_t ) ( *env ).GetStringLength ( tomerge );
int added = sync_worker.put_lemmas ( ptr, len );
( *env ).ReleaseStringChars ( tomerge, ptr );
return added;
diff --git a/jni/command/pinyinime_dictbuilder.cpp b/jni/command/pinyinime_dictbuilder.cpp
index 40fd7d3..40fd7d3 100755..100644
--- a/jni/command/pinyinime_dictbuilder.cpp
+++ b/jni/command/pinyinime_dictbuilder.cpp
diff --git a/jni/include/atomdictbase.h b/jni/include/atomdictbase.h
index 27a39be..5fc7a40 100755..100644
--- a/jni/include/atomdictbase.h
+++ b/jni/include/atomdictbase.h
@@ -76,7 +76,7 @@ namespace ime_pinyin {
*
* @return The total number of lemmas.
*/
- virtual size_t number_of_lemmas() = 0;
+ virtual Size_t number_of_lemmas() = 0;
/**
* This function is called by the decoder when user deletes a character from
@@ -124,7 +124,7 @@ namespace ime_pinyin {
virtual MileStoneHandle extend_dict ( MileStoneHandle from_handle,
const DictExtPara *dep,
LmaPsbItem *lpi_items,
- size_t lpi_max, size_t *lpi_num ) = 0;
+ Size_t lpi_max, Size_t *lpi_num ) = 0;
/**
* Get lemma items with scores according to a spelling id stream.
@@ -136,8 +136,8 @@ namespace ime_pinyin {
* @param lpi_max The maximum size of the buffer to return result.
* @return The number of matched items which have been filled in to lpi_items.
*/
- virtual size_t get_lpis ( const uint16 *splid_str, uint16 splid_str_len,
- LmaPsbItem *lpi_items, size_t lpi_max ) = 0;
+ virtual Size_t get_lpis ( const uint16 *splid_str, uint16 splid_str_len,
+ LmaPsbItem *lpi_items, Size_t lpi_max ) = 0;
/**
* Get a lemma string (The Chinese string) by the given lemma id.
@@ -178,9 +178,9 @@ namespace ime_pinyin {
* from other atom dictionaries. A atom ditionary can just ignore it.
* @return The number of prediction result from this atom dictionary.
*/
- virtual size_t predict ( const char16 last_hzs[], uint16 hzs_len,
- NPredictItem *npre_items, size_t npre_max,
- size_t b4_used ) = 0;
+ virtual Size_t predict ( const char16 last_hzs[], uint16 hzs_len,
+ NPredictItem *npre_items, Size_t npre_max,
+ Size_t b4_used ) = 0;
/**
* Add a lemma to the dictionary. If the dictionary allows to add new
@@ -249,14 +249,14 @@ namespace ime_pinyin {
*
* @return The total occuring count of this atom dictionary.
*/
- virtual size_t get_total_lemma_count() = 0;
+ virtual Size_t get_total_lemma_count() = 0;
/**
* Set the total occuring count of other atom dictionaries.
*
* @param count The total occuring count of other atom dictionaies.
*/
- virtual void set_total_lemma_count_of_others ( size_t count ) = 0;
+ virtual void set_total_lemma_count_of_others ( Size_t count ) = 0;
/**
* Notify this atom dictionary to flush the cached data to persistent storage
diff --git a/jni/include/dictbuilder.h b/jni/include/dictbuilder.h
index aa7d4e4..c867db1 100755..100644
--- a/jni/include/dictbuilder.h
+++ b/jni/include/dictbuilder.h
@@ -37,12 +37,12 @@ namespace ime_pinyin {
private:
// The raw lemma array buffer.
LemmaEntry *lemma_arr_;
- size_t lemma_num_;
+ Size_t lemma_num_;
// Used to store all possible single char items.
// Two items may have the same Hanzi while their spelling ids are different.
SingleCharItem *scis_;
- size_t scis_num_;
+ Size_t scis_num_;
// In the tree, root's level is -1.
// Lemma nodes for root, and level 0
@@ -52,38 +52,38 @@ namespace ime_pinyin {
LmaNodeGE1 *lma_nodes_ge1_;
// Number of used lemma nodes
- size_t lma_nds_used_num_le0_;
- size_t lma_nds_used_num_ge1_;
+ Size_t lma_nds_used_num_le0_;
+ Size_t lma_nds_used_num_ge1_;
// Used to store homophonies' ids.
LemmaIdType *homo_idx_buf_;
// Number of homophonies each of which only contains one Chinese character.
- size_t homo_idx_num_eq1_;
+ Size_t homo_idx_num_eq1_;
// Number of homophonies each of which contains more than one character.
- size_t homo_idx_num_gt1_;
+ Size_t homo_idx_num_gt1_;
// The items with highest scores.
LemmaEntry *top_lmas_;
- size_t top_lmas_num_;
+ Size_t top_lmas_num_;
SpellingTable *spl_table_;
SpellingParser *spl_parser_;
#ifdef ___DO_STATISTICS___
- size_t max_sonbuf_len_[kMaxLemmaSize];
- size_t max_homobuf_len_[kMaxLemmaSize];
+ Size_t max_sonbuf_len_[kMaxLemmaSize];
+ Size_t max_homobuf_len_[kMaxLemmaSize];
- size_t total_son_num_[kMaxLemmaSize];
- size_t total_node_hasson_[kMaxLemmaSize];
- size_t total_sonbuf_num_[kMaxLemmaSize];
- size_t total_sonbuf_allnoson_[kMaxLemmaSize];
- size_t total_node_in_sonbuf_allnoson_[kMaxLemmaSize];
- size_t total_homo_num_[kMaxLemmaSize];
+ Size_t total_son_num_[kMaxLemmaSize];
+ Size_t total_node_hasson_[kMaxLemmaSize];
+ Size_t total_sonbuf_num_[kMaxLemmaSize];
+ Size_t total_sonbuf_allnoson_[kMaxLemmaSize];
+ Size_t total_node_in_sonbuf_allnoson_[kMaxLemmaSize];
+ Size_t total_homo_num_[kMaxLemmaSize];
- size_t sonbufs_num1_; // Number of son buffer with only 1 son
- size_t sonbufs_numgt1_; // Number of son buffer with more 1 son;
+ Size_t sonbufs_num1_; // Number of son buffer with only 1 son
+ Size_t sonbufs_numgt1_; // Number of son buffer with more 1 son;
- size_t total_lma_node_num_;
+ Size_t total_lma_node_num_;
void stat_init();
void stat_print();
@@ -106,10 +106,10 @@ namespace ime_pinyin {
void id_to_charbuf ( unsigned char *buf, LemmaIdType id );
// Update the offset of sons for a node.
- void set_son_offset ( LmaNodeGE1 *node, size_t offset );
+ void set_son_offset ( LmaNodeGE1 *node, Size_t offset );
// Update the offset of homophonies' ids for a node.
- void set_homo_id_buf_offset ( LmaNodeGE1 *node, size_t offset );
+ void set_homo_id_buf_offset ( LmaNodeGE1 *node, Size_t offset );
// Format a speling string.
void format_spelling_str ( char *spl_str );
@@ -126,41 +126,41 @@ namespace ime_pinyin {
// lemma buffer lemma_arr_.
// This function should be called after the lemma array is ready.
// Return the number of unique SingleCharItem elements.
- size_t build_scis();
+ Size_t build_scis();
// Construct a subtree using a subset of the spelling array (from
// item_star to item_end)
// parent is the parent node to update the necessary information
// parent can be a member of LmaNodeLE0 or LmaNodeGE1
bool construct_subset ( void *parent, LemmaEntry *lemma_arr,
- size_t item_start, size_t item_end, size_t level );
+ Size_t item_start, Size_t item_end, Size_t level );
// Read valid Chinese Hanzis from the given file.
// num is used to return number of chars.
// The return buffer is sorted and caller needs to free the returned buffer.
- char16 *read_valid_hanzis ( const char *fn_validhzs, size_t *num );
+ char16 *read_valid_hanzis ( const char *fn_validhzs, Size_t *num );
// Read a raw dictionary. max_item is the maximum number of items. If there
// are more items in the ditionary, only the first max_item will be read.
// Returned value is the number of items successfully read from the file.
- size_t read_raw_dict ( const char *fn_raw, const char *fn_validhzs,
- size_t max_item );
+ Size_t read_raw_dict ( const char *fn_raw, const char *fn_validhzs,
+ Size_t max_item );
// Try to find if a character is in hzs buffer.
- bool hz_in_hanzis_list ( const char16 *hzs, size_t hzs_len, char16 hz );
+ bool hz_in_hanzis_list ( const char16 *hzs, Size_t hzs_len, char16 hz );
// Try to find if all characters in str are in hzs buffer.
- bool str_in_hanzis_list ( const char16 *hzs, size_t hzs_len,
- const char16 *str, size_t str_len );
+ bool str_in_hanzis_list ( const char16 *hzs, Size_t hzs_len,
+ const char16 *str, Size_t str_len );
// Get these lemmas with toppest scores.
void get_top_lemmas();
// Allocate resource to build dictionary.
// lma_num is the number of items to be loaded
- bool alloc_resource ( size_t lma_num );
+ bool alloc_resource ( Size_t lma_num );
// Free resource.
void free_resource();
diff --git a/jni/include/dictdef.h b/jni/include/dictdef.h
index 25b1e39..7ea8e76 100755..100644
--- a/jni/include/dictdef.h
+++ b/jni/include/dictdef.h
@@ -40,14 +40,14 @@ namespace ime_pinyin {
const bool kPrintDebug2 = false;
// The max length of a lemma.
- const size_t kMaxLemmaSize = 8;
+ const Size_t kMaxLemmaSize = 8;
// The max length of a Pinyin (spelling).
- const size_t kMaxPinyinSize = 6;
+ const Size_t kMaxPinyinSize = 6;
// The number of half spelling ids. For Chinese Pinyin, there 30 half ids.
// See SpellingTrie.h for details.
- const size_t kHalfSpellingIdNum = 29;
+ const Size_t kHalfSpellingIdNum = 29;
// The maximum number of full spellings. For Chinese Pinyin, there are only
// about 410 spellings.
@@ -55,26 +55,26 @@ namespace ime_pinyin {
// other structures like SpellingNode, to make sure than a spelling id can be
// stored.
// -1 is because that 0 is never used.
- const size_t kMaxSpellingNum = 512 - kHalfSpellingIdNum - 1;
- const size_t kMaxSearchSteps = 40;
+ const Size_t kMaxSpellingNum = 512 - kHalfSpellingIdNum - 1;
+ const Size_t kMaxSearchSteps = 40;
// One character predicts its following characters.
- const size_t kMaxPredictSize = ( kMaxLemmaSize - 1 );
+ const Size_t kMaxPredictSize = ( kMaxLemmaSize - 1 );
- // LemmaIdType must always be size_t.
- typedef size_t LemmaIdType;
- const size_t kLemmaIdSize = 3; // Actually, a Id occupies 3 bytes in storage.
- const size_t kLemmaIdComposing = 0xffffff;
+ // LemmaIdType must always be Size_t.
+ typedef Size_t LemmaIdType;
+ const Size_t kLemmaIdSize = 3; // Actually, a Id occupies 3 bytes in storage.
+ const Size_t kLemmaIdComposing = 0xffffff;
typedef uint16 LmaScoreType;
typedef uint16 KeyScoreType;
// Number of items with highest score are kept for prediction purpose.
- const size_t kTopScoreLemmaNum = 10;
+ const Size_t kTopScoreLemmaNum = 10;
- const size_t kMaxPredictNumByGt3 = 1;
- const size_t kMaxPredictNumBy3 = 2;
- const size_t kMaxPredictNumBy2 = 2;
+ const Size_t kMaxPredictNumByGt3 = 1;
+ const Size_t kMaxPredictNumBy3 = 2;
+ const Size_t kMaxPredictNumBy2 = 2;
// The last lemma id (included) for the system dictionary. The system
// dictionary's ids always start from 1.
@@ -109,8 +109,8 @@ namespace ime_pinyin {
* A node occupies 16 bytes. so, totallly less than 16 * 500 = 8K
*/
struct LmaNodeLE0 {
- size_t son_1st_off;
- size_t homo_idx_buf_off;
+ Size_t son_1st_off;
+ Size_t homo_idx_buf_off;
uint16 spl_idx;
uint16 num_of_son;
uint16 num_of_homo;
diff --git a/jni/include/dictlist.h b/jni/include/dictlist.h
index a2d78ac..f283a05 100755..100644
--- a/jni/include/dictlist.h
+++ b/jni/include/dictlist.h
@@ -33,7 +33,7 @@ namespace ime_pinyin {
const SpellingTrie *spl_trie_;
// Number of SingCharItem. The first is blank, because id 0 is invalid.
- size_t scis_num_;
+ Size_t scis_num_;
char16 *scis_hz_;
SpellingId *scis_splid_;
@@ -42,25 +42,25 @@ namespace ime_pinyin {
// Starting position of those words whose lengths are i+1, counted in
// char16
- size_t start_pos_[kMaxLemmaSize + 1];
+ Size_t start_pos_[kMaxLemmaSize + 1];
- size_t start_id_[kMaxLemmaSize + 1];
+ Size_t start_id_[kMaxLemmaSize + 1];
int ( *cmp_func_[kMaxLemmaSize] ) ( const void *, const void * );
- bool alloc_resource ( size_t buf_size, size_t scim_num );
+ bool alloc_resource ( Size_t buf_size, Size_t scim_num );
void free_resource();
#ifdef ___BUILD_MODEL___
// Calculate the requsted memory, including the start_pos[] buffer.
- size_t calculate_size ( const LemmaEntry *lemma_arr, size_t lemma_num );
+ Size_t calculate_size ( const LemmaEntry *lemma_arr, Size_t lemma_num );
- void fill_scis ( const SingleCharItem *scis, size_t scis_num );
+ void fill_scis ( const SingleCharItem *scis, Size_t scis_num );
// Copy the related content to the inner buffer
// It should be called after calculate_size()
- void fill_list ( const LemmaEntry *lemma_arr, size_t lemma_num );
+ void fill_list ( const LemmaEntry *lemma_arr, Size_t lemma_num );
// Find the starting position for the buffer of those 2-character Chinese word
// whose first character is the given Chinese character.
@@ -71,7 +71,7 @@ namespace ime_pinyin {
// word_len. The given parameter cmp_func decides how many characters from
// beginning will be used to compare.
char16 *find_pos_startedbyhzs ( const char16 last_hzs[],
- size_t word_Len,
+ Size_t word_Len,
int ( *cmp_func ) ( const void *, const void * ) );
public:
@@ -86,8 +86,8 @@ namespace ime_pinyin {
// Init the list from the LemmaEntry array.
// lemma_arr should have been sorted by the hanzi_str, and have been given
// ids from 1
- bool init_list ( const SingleCharItem *scis, size_t scis_num,
- const LemmaEntry *lemma_arr, size_t lemma_num );
+ bool init_list ( const SingleCharItem *scis, Size_t scis_num,
+ const LemmaEntry *lemma_arr, Size_t lemma_num );
#endif
// Get the hanzi string for the given id
@@ -104,9 +104,9 @@ namespace ime_pinyin {
// buf_len specifies the buffer length.
// b4_used specifies how many items before predict_buf have been used.
// Returned value is the number of newly added items.
- size_t predict ( const char16 last_hzs[], uint16 hzs_len,
- NPredictItem *npre_items, size_t npre_max,
- size_t b4_used );
+ Size_t predict ( const char16 last_hzs[], uint16 hzs_len,
+ NPredictItem *npre_items, Size_t npre_max,
+ Size_t b4_used );
// If half_splid is a valid half spelling id, return those full spelling
// ids which share this half id.
diff --git a/jni/include/dicttrie.h b/jni/include/dicttrie.h
index 2886d22..481e0ac 100755..100644
--- a/jni/include/dicttrie.h
+++ b/jni/include/dicttrie.h
@@ -28,8 +28,8 @@ namespace ime_pinyin {
class DictTrie : AtomDictBase {
private:
typedef struct ParsingMark {
- size_t node_offset: 24;
- size_t node_num: 8; // Number of nodes with this spelling id given
+ Size_t node_offset: 24;
+ Size_t node_num: 8; // Number of nodes with this spelling id given
// by spl_id. If spl_id is a Shengmu, for nodes
// in the first layer of DictTrie, it equals to
// SpellingTrie::shm2full_num(); but for those
@@ -73,15 +73,15 @@ namespace ime_pinyin {
// root_[splid_le0_index_[splid - kFullSplIdStart]]
uint16 *splid_le0_index_;
- size_t lma_node_num_le0_;
- size_t lma_node_num_ge1_;
+ Size_t lma_node_num_le0_;
+ Size_t lma_node_num_ge1_;
// The first part is for homophnies, and the last top_lma_num_ items are
// lemmas with highest scores.
unsigned char *lma_idx_buf_;
- size_t lma_idx_buf_len_; // The total size of lma_idx_buf_ in byte.
- size_t total_lma_num_; // Total number of lemmas in this dictionary.
- size_t top_lmas_num_; // Number of lemma with highest scores.
+ Size_t lma_idx_buf_len_; // The total size of lma_idx_buf_ in byte.
+ Size_t total_lma_num_; // Total number of lemmas in this dictionary.
+ Size_t top_lmas_num_; // Number of lemma with highest scores.
// Parsing mark list used to mark the detailed extended statuses.
ParsingMark *parsing_marks_;
@@ -95,13 +95,13 @@ namespace ime_pinyin {
MileStoneHandle mile_stones_pos_;
// Get the offset of sons for a node.
- inline size_t get_son_offset ( const LmaNodeGE1 *node );
+ inline Size_t get_son_offset ( const LmaNodeGE1 *node );
// Get the offset of homonious ids for a node.
- inline size_t get_homo_idx_buf_offset ( const LmaNodeGE1 *node );
+ inline Size_t get_homo_idx_buf_offset ( const LmaNodeGE1 *node );
// Get the lemma id by the offset.
- inline LemmaIdType get_lemma_id ( size_t id_offset );
+ inline LemmaIdType get_lemma_id ( Size_t id_offset );
void free_resource ( bool free_dict_list );
@@ -110,31 +110,31 @@ namespace ime_pinyin {
// Given a LmaNodeLE0 node, extract the lemmas specified by it, and fill
// them into the lpi_items buffer.
// This function is called by the search engine.
- size_t fill_lpi_buffer ( LmaPsbItem lpi_items[], size_t max_size,
+ Size_t fill_lpi_buffer ( LmaPsbItem lpi_items[], Size_t max_size,
LmaNodeLE0 *node );
// Given a LmaNodeGE1 node, extract the lemmas specified by it, and fill
// them into the lpi_items buffer.
// This function is called by inner functions extend_dict0(), extend_dict1()
// and extend_dict2().
- size_t fill_lpi_buffer ( LmaPsbItem lpi_items[], size_t max_size,
- size_t homo_buf_off, LmaNodeGE1 *node,
+ Size_t fill_lpi_buffer ( LmaPsbItem lpi_items[], Size_t max_size,
+ Size_t homo_buf_off, LmaNodeGE1 *node,
uint16 lma_len );
// Extend in the trie from level 0.
MileStoneHandle extend_dict0 ( MileStoneHandle from_handle,
const DictExtPara *dep, LmaPsbItem *lpi_items,
- size_t lpi_max, size_t *lpi_num );
+ Size_t lpi_max, Size_t *lpi_num );
// Extend in the trie from level 1.
MileStoneHandle extend_dict1 ( MileStoneHandle from_handle,
const DictExtPara *dep, LmaPsbItem *lpi_items,
- size_t lpi_max, size_t *lpi_num );
+ Size_t lpi_max, Size_t *lpi_num );
// Extend in the trie from level 2.
MileStoneHandle extend_dict2 ( MileStoneHandle from_handle,
const DictExtPara *dep, LmaPsbItem *lpi_items,
- size_t lpi_max, size_t *lpi_num );
+ Size_t lpi_max, Size_t *lpi_num );
// Try to extend the given spelling id buffer, and if the given id_lemma can
// be successfully gotten, return true;
@@ -179,26 +179,26 @@ namespace ime_pinyin {
bool load_dict_fd ( int sys_fd, long start_offset, long length,
LemmaIdType start_id, LemmaIdType end_id );
bool close_dict() {return true;}
- size_t number_of_lemmas() {return 0;}
+ Size_t number_of_lemmas() {return 0;}
void reset_milestones ( uint16 from_step, MileStoneHandle from_handle );
MileStoneHandle extend_dict ( MileStoneHandle from_handle,
const DictExtPara *dep,
LmaPsbItem *lpi_items,
- size_t lpi_max, size_t *lpi_num );
+ Size_t lpi_max, Size_t *lpi_num );
- size_t get_lpis ( const uint16 *splid_str, uint16 splid_str_len,
- LmaPsbItem *lpi_items, size_t lpi_max );
+ Size_t get_lpis ( const uint16 *splid_str, uint16 splid_str_len,
+ LmaPsbItem *lpi_items, Size_t lpi_max );
uint16 get_lemma_str ( LemmaIdType id_lemma, char16 *str_buf, uint16 str_max );
uint16 get_lemma_splids ( LemmaIdType id_lemma, uint16 *splids,
uint16 splids_max, bool arg_valid );
- size_t predict ( const char16 *last_hzs, uint16 hzs_len,
- NPredictItem *npre_items, size_t npre_max,
- size_t b4_used );
+ Size_t predict ( const char16 *last_hzs, uint16 hzs_len,
+ NPredictItem *npre_items, Size_t npre_max,
+ Size_t b4_used );
LemmaIdType put_lemma ( char16 lemma_str[], uint16 splids[],
uint16 lemma_len, uint16 count ) {return 0;}
@@ -216,8 +216,8 @@ namespace ime_pinyin {
bool remove_lemma ( LemmaIdType lemma_id ) {return false;}
- size_t get_total_lemma_count() {return 0;}
- void set_total_lemma_count_of_others ( size_t count );
+ Size_t get_total_lemma_count() {return 0;}
+ void set_total_lemma_count_of_others ( Size_t count );
void flush_cache() {}
@@ -225,8 +225,8 @@ namespace ime_pinyin {
// Fill the lemmas with highest scores to the prediction buffer.
// his_len is the history length to fill in the prediction buffer.
- size_t predict_top_lmas ( size_t his_len, NPredictItem *npre_items,
- size_t npre_max, size_t b4_used );
+ Size_t predict_top_lmas ( Size_t his_len, NPredictItem *npre_items,
+ Size_t npre_max, Size_t b4_used );
};
}
diff --git a/jni/include/lpicache.h b/jni/include/lpicache.h
index 6c90bf1..4a0ecf0 100755..100644
--- a/jni/include/lpicache.h
+++ b/jni/include/lpicache.h
@@ -48,13 +48,13 @@ namespace ime_pinyin {
// maximum length of the cache buffer.
// Note: splid must be a half id, and lpi_items must be not NULL. The
// caller of this function should guarantee this.
- size_t put_cache ( uint16 splid, LmaPsbItem lpi_items[], size_t lpi_num );
+ Size_t put_cache ( uint16 splid, LmaPsbItem lpi_items[], Size_t lpi_num );
// Get the cached list for the given half id.
// Return the length of the cached buffer.
// Note: splid must be a half id, and lpi_items must be not NULL. The
// caller of this function should guarantee this.
- size_t get_cache ( uint16 splid, LmaPsbItem lpi_items[], size_t lpi_max );
+ Size_t get_cache ( uint16 splid, LmaPsbItem lpi_items[], Size_t lpi_max );
};
} // namespace
diff --git a/jni/include/matrixsearch.h b/jni/include/matrixsearch.h
index 4e882f8..bd4d02f 100755..100644
--- a/jni/include/matrixsearch.h
+++ b/jni/include/matrixsearch.h
@@ -26,7 +26,7 @@
namespace ime_pinyin {
- static const size_t kMaxRowNum = kMaxSearchSteps;
+ static const Size_t kMaxRowNum = kMaxSearchSteps;
typedef struct {
// MileStoneHandle objects for the system and user dictionaries.
@@ -118,7 +118,7 @@ namespace ime_pinyin {
uint16 spl_start[kMaxRowNum];
char16 chn_str[kMaxRowNum]; // Chinese string.
uint16 sublma_start[kMaxRowNum]; // Counted in Chinese characters.
- size_t sublma_num;
+ Size_t sublma_num;
uint16 length; // Counted in Chinese characters.
} ComposingPhrase, *TComposingPhrase;
@@ -138,20 +138,20 @@ namespace ime_pinyin {
static const bool kOnlyUserDictPredict = false;
// The maximum buffer to store LmaPsbItems.
- static const size_t kMaxLmaPsbItems = 1450;
+ static const Size_t kMaxLmaPsbItems = 1450;
// How many rows for each step.
- static const size_t kMaxNodeARow = 5;
+ static const Size_t kMaxNodeARow = 5;
// The maximum length of the sentence candidates counted in chinese
// characters
- static const size_t kMaxSentenceLength = 16;
+ static const Size_t kMaxSentenceLength = 16;
// The size of the matrix node pool.
- static const size_t kMtrxNdPoolSize = 200;
+ static const Size_t kMtrxNdPoolSize = 200;
// The size of the DMI node pool.
- static const size_t kDmiPoolSize = 800;
+ static const Size_t kDmiPoolSize = 800;
// Used to indicate whether this object has been initialized.
bool inited_;
@@ -177,19 +177,19 @@ namespace ime_pinyin {
SpellingParser *spl_parser_;
// The maximum allowed length of spelling string (such as a Pinyin string).
- size_t max_sps_len_;
+ Size_t max_sps_len_;
// The maximum allowed length of a result Chinese string.
- size_t max_hzs_len_;
+ Size_t max_hzs_len_;
// Pinyin string. Max length: kMaxRowNum - 1
char pys_[kMaxRowNum];
// The length of the string that has been decoded successfully.
- size_t pys_decoded_len_;
+ Size_t pys_decoded_len_;
// Shared buffer for multiple purposes.
- size_t *share_buf_;
+ Size_t *share_buf_;
MatrixNode *mtrx_nd_pool_;
PoolPosType mtrx_nd_pool_used_; // How many nodes used in the pool
@@ -201,13 +201,13 @@ namespace ime_pinyin {
DictExtPara *dep_; // Parameter used to extend DMI nodes.
NPredictItem *npre_items_; // Used to do prediction
- size_t npre_items_len_;
+ Size_t npre_items_len_;
// The starting positions and lemma ids for the full sentence candidate.
- size_t lma_id_num_;
+ Size_t lma_id_num_;
uint16 lma_start_[kMaxRowNum]; // Counted in spelling ids.
LemmaIdType lma_id_[kMaxRowNum];
- size_t fixed_lmas_;
+ Size_t fixed_lmas_;
// If fixed_lmas_ is bigger than i, Element i is used to indicate whether
// the i'th lemma id in lma_id_ is the first candidate for that step.
@@ -227,11 +227,11 @@ namespace ime_pinyin {
// The starting positions and spelling ids for the first full sentence
// candidate.
- size_t spl_id_num_; // Number of splling ids
+ Size_t spl_id_num_; // Number of splling ids
uint16 spl_start_[kMaxRowNum]; // Starting positions
uint16 spl_id_[kMaxRowNum]; // Spelling ids
// Used to remember the last fixed position, counted in Hanzi.
- size_t fixed_hzs_;
+ Size_t fixed_hzs_;
// Lemma Items with possibility score, two purposes:
// 1. In Viterbi decoding, this buffer is used to get all possible candidates
@@ -239,7 +239,7 @@ namespace ime_pinyin {
// 2. When the search is done, this buffer is used to get candiates from the
// first un-fixed step and show them to the user.
LmaPsbItem lpi_items_[kMaxLmaPsbItems];
- size_t lpi_total_;
+ Size_t lpi_total_;
// Assign the pointers with NULL. The caller makes sure that all pointers are
// not valid before calling it. This function only will be called in the
@@ -262,11 +262,11 @@ namespace ime_pinyin {
// The DMI nodes will be kept.
//
// Note: this function should not destroy content of pys_.
- bool reset_search ( size_t ch_pos, bool clear_fixed_this_step,
+ bool reset_search ( Size_t ch_pos, bool clear_fixed_this_step,
bool clear_dmi_this_step, bool clear_mtrx_this_step );
// Delete a part of the content in pys_.
- void del_in_pys ( size_t start, size_t len );
+ void del_in_pys ( Size_t start, Size_t len );
// Delete a spelling id and its corresponding Chinese character, and merge
// the fixed lemmas into the composing phrase.
@@ -274,7 +274,7 @@ namespace ime_pinyin {
// This function will update the lemma and spelling segmentation information.
// The caller guarantees that fixed_lmas_ > 0 and del_spl_pos is within
// the fixed lemmas.
- void merge_fixed_lmas ( size_t del_spl_pos );
+ void merge_fixed_lmas ( Size_t del_spl_pos );
// Get spelling start posistions and ids. The result will be stored in
// spl_id_num_, spl_start_[], spl_id_[].
@@ -286,8 +286,8 @@ namespace ime_pinyin {
// If pfullsent is not NULL, means the full sentence candidate may be the
// same with the coming lemma string, if so, remove that lemma.
// The result is sorted in descendant order by the frequency score.
- size_t get_lpis ( const uint16 *splid_str, size_t splid_str_len,
- LmaPsbItem *lma_buf, size_t max_lma_buf,
+ Size_t get_lpis ( const uint16 *splid_str, Size_t splid_str_len,
+ LmaPsbItem *lma_buf, Size_t max_lma_buf,
const char16 *pfullsent, bool sort_by_psb );
uint16 get_lemma_str ( LemmaIdType id_lemma, char16 *str_buf, uint16 str_max );
@@ -315,23 +315,23 @@ namespace ime_pinyin {
// calling this function if necessary.
//
// The caller should guarantees that NULL != dep.
- size_t extend_dmi ( DictExtPara *dep, DictMatchInfo *dmi_s );
+ Size_t extend_dmi ( DictExtPara *dep, DictMatchInfo *dmi_s );
// Extend dmi for the composing phrase.
- size_t extend_dmi_c ( DictExtPara *dep, DictMatchInfo *dmi_s );
+ Size_t extend_dmi_c ( DictExtPara *dep, DictMatchInfo *dmi_s );
// Extend a MatrixNode with the give LmaPsbItem list.
// res_row is the destination row number.
// This function does not change mtrx_nd_pool_used_. Please change it after
// calling this function if necessary.
// return 0 always.
- size_t extend_mtrx_nd ( MatrixNode *mtrx_nd, LmaPsbItem lpi_items[],
- size_t lpi_num, PoolPosType dmi_fr, size_t res_row );
+ Size_t extend_mtrx_nd ( MatrixNode *mtrx_nd, LmaPsbItem lpi_items[],
+ Size_t lpi_num, PoolPosType dmi_fr, Size_t res_row );
// Try to find a dmi node at step_to position, and the found dmi node should
// match the given spelling id strings.
- PoolPosType match_dmi ( size_t step_to, uint16 spl_ids[], uint16 spl_id_num );
+ PoolPosType match_dmi ( Size_t step_to, uint16 spl_ids[], uint16 spl_id_num );
bool add_char ( char ch );
bool prepare_add_char ( char ch );
@@ -352,9 +352,9 @@ namespace ime_pinyin {
bool splid_end_split, unsigned char splstr_len,
unsigned char all_full_id );
- size_t inner_predict ( const char16 fixed_scis_ids[], uint16 scis_num,
+ Size_t inner_predict ( const char16 fixed_scis_ids[], uint16 scis_num,
char16 predict_buf[][kMaxPredictSize + 1],
- size_t buf_len );
+ Size_t buf_len );
// Add the first candidate to the user dictionary.
bool try_add_cand0_to_userdict();
@@ -380,7 +380,7 @@ namespace ime_pinyin {
bool init_fd ( int sys_fd, long start_offset, long length,
const char *fn_usr_dict );
- void set_max_lens ( size_t max_sps_len, size_t max_hzs_len );
+ void set_max_lens ( Size_t max_sps_len, Size_t max_hzs_len );
void close();
@@ -396,7 +396,7 @@ namespace ime_pinyin {
// Search a Pinyin string.
// Return value is the position successfully parsed.
- size_t search ( const char *py, size_t py_len );
+ Size_t search ( const char *py, Size_t py_len );
// Used to delete something in the Pinyin string kept by the engine, and do
// a re-search.
@@ -411,45 +411,45 @@ namespace ime_pinyin {
// If is_pos_in_splid is false, and pos-th character is in the range for the
// fixed lemmas or composing string, this function will do nothing and just
// return the result of the previous search.
- size_t delsearch ( size_t pos, bool is_pos_in_splid,
+ Size_t delsearch ( Size_t pos, bool is_pos_in_splid,
bool clear_fixed_this_step );
// Get the number of candiates, called after search().
- size_t get_candidate_num();
+ Size_t get_candidate_num();
// Get the Pinyin string stored by the engine.
// *decoded_len returns the length of the successfully decoded string.
- const char *get_pystr ( size_t *decoded_len );
+ const char *get_pystr ( Size_t *decoded_len );
// Get the spelling boundaries for the first sentence candidate.
// Number of spellings will be returned. The number of valid elements in
// spl_start is one more than the return value because the last one is used
// to indicate the beginning of the next un-input speling.
// For a Pinyin "women", the returned value is 2, spl_start is [0, 2, 5] .
- size_t get_spl_start ( const uint16 *&spl_start );
+ Size_t get_spl_start ( const uint16 *&spl_start );
// Get one candiate string. If full sentence candidate is available, it will
// be the first one.
- char16 *get_candidate ( size_t cand_id, char16 *cand_str, size_t max_len );
+ char16 *get_candidate ( Size_t cand_id, char16 *cand_str, Size_t max_len );
// Get the first candiate, which is a "full sentence".
// retstr_len is not NULL, it will be used to return the string length.
// If only_unfixed is true, only unfixed part will be fetched.
- char16 *get_candidate0 ( char16 *cand_str, size_t max_len,
+ char16 *get_candidate0 ( char16 *cand_str, Size_t max_len,
uint16 *retstr_len, bool only_unfixed );
// Choose a candidate. The decoder will do a search after the fixed position.
- size_t choose ( size_t cand_id );
+ Size_t choose ( Size_t cand_id );
// Cancel the last choosing operation, and return the new number of choices.
- size_t cancel_last_choice();
+ Size_t cancel_last_choice();
// Get the length of fixed Hanzis.
- size_t get_fixedlen();
+ Size_t get_fixedlen();
- size_t get_predicts ( const char16 fixed_buf[],
+ Size_t get_predicts ( const char16 fixed_buf[],
char16 predict_buf[][kMaxPredictSize + 1],
- size_t buf_len );
+ Size_t buf_len );
};
}
diff --git a/jni/include/mystdlib.h b/jni/include/mystdlib.h
index 976d208..ed08283 100755..100644
--- a/jni/include/mystdlib.h
+++ b/jni/include/mystdlib.h
@@ -18,14 +18,14 @@
#define PINYINIME_INCLUDE_MYSTDLIB_H__
#include <stdlib.h>
-
+#include "./utf16char.h"
namespace ime_pinyin {
- void myqsort ( void *p, size_t n, size_t es,
+ void myqsort ( void *p, Size_t n, Size_t es,
int ( *cmp ) ( const void *, const void * ) );
void *mybsearch ( const void *key, const void *base,
- size_t nmemb, size_t size,
+ Size_t nmemb, Size_t size,
int ( *compar ) ( const void *, const void * ) );
}
diff --git a/jni/include/ngram.h b/jni/include/ngram.h
index a740b7e..825b3a9 100755..100644
--- a/jni/include/ngram.h
+++ b/jni/include/ngram.h
@@ -25,7 +25,7 @@ namespace ime_pinyin {
typedef unsigned char CODEBOOK_TYPE;
- static const size_t kCodeBookSize = 256;
+ static const Size_t kCodeBookSize = 256;
class NGram {
public:
@@ -42,16 +42,16 @@ namespace ime_pinyin {
// total frequency changes.
// In this version, frequencies of system lemmas are fixed. We are considering
// to make them changable in next version.
- static const size_t kSysDictTotalFreq = 100000000;
+ static const Size_t kSysDictTotalFreq = 100000000;
private:
static NGram *instance_;
bool initialized_;
- size_t idx_num_;
+ Size_t idx_num_;
- size_t total_freq_none_sys_;
+ Size_t total_freq_none_sys_;
// Score compensation for system dictionary lemmas.
// Because after user adds some user lemmas, the total frequency changes, and
@@ -74,7 +74,7 @@ namespace ime_pinyin {
bool load_ngram ( FILE *fp );
// Set the total frequency of all none system dictionaries.
- void set_total_freq_none_sys ( size_t freq_none_sys );
+ void set_total_freq_none_sys ( Size_t freq_none_sys );
float get_uni_psb ( LemmaIdType lma_id );
@@ -87,7 +87,7 @@ namespace ime_pinyin {
#ifdef ___BUILD_MODEL___
// For constructing the unigram mode model.
- bool build_unigram ( LemmaEntry *lemma_arr, size_t num,
+ bool build_unigram ( LemmaEntry *lemma_arr, Size_t num,
LemmaIdType next_idx_unused );
#endif
};
diff --git a/jni/include/pinyinime.h b/jni/include/pinyinime.h
index bc2844b..f6562e4 100755..100644
--- a/jni/include/pinyinime.h
+++ b/jni/include/pinyinime.h
@@ -66,7 +66,7 @@ extern "C" {
* @param max_sps_len Maximum length of the spelling string(Pinyin string).
* @max_hzs_len Maximum length of the decoded Chinese character string.
*/
- void im_set_max_lens ( size_t max_sps_len, size_t max_hzs_len );
+ void im_set_max_lens ( Size_t max_sps_len, Size_t max_hzs_len );
/**
* Flush cached data to persistent memory. Because at runtime, in order to
@@ -87,7 +87,7 @@ extern "C" {
* @param sps_len The length of the spelling string buffer.
* @return The number of candidates.
*/
- size_t im_search ( const char *sps_buf, size_t sps_len );
+ Size_t im_search ( const char *sps_buf, Size_t sps_len );
/**
* Make a delete operation in the current search result, and make research if
@@ -99,7 +99,7 @@ extern "C" {
* in the spelling string, or the position in the result spelling id string.
* @return The number of candidates.
*/
- size_t im_delsearch ( size_t pos, bool is_pos_in_splid,
+ Size_t im_delsearch ( Size_t pos, bool is_pos_in_splid,
bool clear_fixed_this_step );
/**
@@ -115,7 +115,7 @@ extern "C" {
* @param ch The letter to add.
* @return The number of candidates.
*/
- size_t im_add_letter ( char ch );
+ Size_t im_add_letter ( char ch );
/**
* Get the spelling string kept by the decoder.
@@ -124,7 +124,7 @@ extern "C" {
* string is successfully parsed.
* @return The spelling string kept by the decoder.
*/
- const char *im_get_sps_str ( size_t *decoded_len );
+ const char *im_get_sps_str ( Size_t *decoded_len );
/**
* Get a candidate(or choice) string.
@@ -135,8 +135,8 @@ extern "C" {
* @param max_len The maximum length of the buffer.
* @return cand_str if succeeds, otherwise NULL.
*/
- char16 *im_get_candidate ( size_t cand_id, char16 *cand_str,
- size_t max_len );
+ char16 *im_get_candidate ( Size_t cand_id, char16 *cand_str,
+ Size_t max_len );
/**
* Get the segmentation information(the starting positions) of the spelling
@@ -147,7 +147,7 @@ extern "C" {
* elements in spl_start, and spl_start[L] is the posistion after the end of
* the last spelling id.
*/
- size_t im_get_spl_start_pos ( const uint16 *&spl_start );
+ Size_t im_get_spl_start_pos ( const uint16 *&spl_start );
/**
* Choose a candidate and make it fixed. If the candidate does not match
@@ -160,21 +160,21 @@ extern "C" {
* @return The number of candidates. If after the selection, the whole result
* string has been fixed, there will be only one candidate.
*/
- size_t im_choose ( size_t cand_id );
+ Size_t im_choose ( Size_t cand_id );
/**
* Cancel the last selection, or revert the last operation of im_choose().
*
* @return The number of candidates.
*/
- size_t im_cancel_last_choice();
+ Size_t im_cancel_last_choice();
/**
* Get the number of fixed spelling ids, or Chinese characters.
*
* @return The number of fixed spelling ids, of Chinese characters.
*/
- size_t im_get_fixed_len();
+ Size_t im_get_fixed_len();
/**
* Cancel the input state and reset the search workspace.
@@ -190,7 +190,7 @@ extern "C" {
* @param pre_buf Used to return prediction result list.
* @return The number of predicted result string.
*/
- size_t im_get_predicts ( const char16 *his_buf,
+ Size_t im_get_predicts ( const char16 *his_buf,
char16 ( *&pre_buf ) [kMaxPredictSize + 1] );
/**
diff --git a/jni/include/searchutility.h b/jni/include/searchutility.h
index e33b2b0..d218242 100755..100644
--- a/jni/include/searchutility.h
+++ b/jni/include/searchutility.h
@@ -30,8 +30,8 @@ namespace ime_pinyin {
// Type used to express a lemma and its probability score.
typedef struct {
- size_t id: ( kLemmaIdSize * 8 );
- size_t lma_len: 4;
+ Size_t id: ( kLemmaIdSize * 8 );
+ Size_t lma_len: 4;
uint16 psb; // The score, the lower psb, the higher possibility.
// For single character items, we may also need Hanzi.
// For multiple characer items, ignore it.
@@ -133,9 +133,9 @@ namespace ime_pinyin {
int cmp_npre_by_hanzi_score ( const void *p1, const void *p2 );
- size_t remove_duplicate_npre ( NPredictItem *npre_items, size_t npre_num );
+ Size_t remove_duplicate_npre ( NPredictItem *npre_items, Size_t npre_num );
- size_t align_to_size_t ( size_t size );
+ Size_t align_to_Size_t ( Size_t size );
} // namespace
diff --git a/jni/include/spellingtable.h b/jni/include/spellingtable.h
index b137a8e..35eeaab 100644
--- a/jni/include/spellingtable.h
+++ b/jni/include/spellingtable.h
@@ -24,7 +24,7 @@ namespace ime_pinyin {
#ifdef ___BUILD_MODEL___
- const size_t kMaxSpellingSize = kMaxPinyinSize;
+ const Size_t kMaxSpellingSize = kMaxPinyinSize;
typedef struct {
char str[kMaxSpellingSize + 1];
@@ -37,12 +37,12 @@ namespace ime_pinyin {
// we only keep its first spelling_size_ chars.
class SpellingTable {
private:
- static const size_t kNotSupportNum = 3;
+ static const Size_t kNotSupportNum = 3;
static const char kNotSupportList[kNotSupportNum][kMaxSpellingSize + 1];
bool need_score_;
- size_t spelling_max_num_;
+ Size_t spelling_max_num_;
RawSpelling *raw_spellings_;
@@ -54,7 +54,7 @@ namespace ime_pinyin {
double total_freq_;
- size_t spelling_num_;
+ Size_t spelling_num_;
double score_amplifier_;
@@ -63,8 +63,8 @@ namespace ime_pinyin {
// If frozen is true, put_spelling() and contain() are not allowed to call.
bool frozen_;
- size_t get_hash_pos ( const char *spelling_str );
- size_t hash_pos_next ( size_t hash_pos );
+ Size_t get_hash_pos ( const char *spelling_str );
+ Size_t hash_pos_next ( Size_t hash_pos );
void free_resource();
public:
SpellingTable();
@@ -75,7 +75,7 @@ namespace ime_pinyin {
// spl_max_num is the maximum number of spelling strings to store.
// need_score is used to indicate whether the caller needs to calculate a
// score for each spelling.
- bool init_table ( size_t pure_spl_size, size_t spl_max_num, bool need_score );
+ bool init_table ( Size_t pure_spl_size, Size_t spl_max_num, bool need_score );
// Put a spelling string to the table.
// It always returns false if called after arrange() withtout a new
@@ -99,7 +99,7 @@ namespace ime_pinyin {
// unsinged char score.
// An item with a lower score has a higher probability.
// Do not call put_spelling() and contains() after arrange().
- const char *arrange ( size_t *item_size, size_t *spl_num );
+ const char *arrange ( Size_t *item_size, Size_t *spl_num );
float get_score_amplifier();
diff --git a/jni/include/spellingtrie.h b/jni/include/spellingtrie.h
index 77f27a1..600a3a6 100644
--- a/jni/include/spellingtrie.h
+++ b/jni/include/spellingtrie.h
@@ -39,7 +39,7 @@ namespace ime_pinyin {
class SpellingTrie {
private:
static const int kMaxYmNum = 64;
- static const size_t kValidSplCharNum = 26;
+ static const Size_t kValidSplCharNum = 26;
static const uint16 kHalfIdShengmuMask = 0x01;
static const uint16 kHalfIdYunmuMask = 0x02;
@@ -80,8 +80,8 @@ namespace ime_pinyin {
// The Yunmu table.
// Each Yunmu will be assigned with Yunmu id from 1.
char *ym_buf_;
- size_t ym_size_; // The size of longest Yunmu string, '\0'included.
- size_t ym_num_;
+ Size_t ym_size_; // The size of longest Yunmu string, '\0'included.
+ Size_t ym_num_;
// The spelling string just queried
char *splstr_queried_;
@@ -116,7 +116,7 @@ namespace ime_pinyin {
#ifdef ___BUILD_MODEL___
// How many node used to build the trie.
- size_t node_num_;
+ Size_t node_num_;
#endif
SpellingTrie();
@@ -127,8 +127,8 @@ namespace ime_pinyin {
// item_star to item_end).
// Member spelliing_buf_ and spelling_size_ should be valid.
// parent is used to update its num_of_son and score.
- SpellingNode *construct_spellings_subset ( size_t item_start, size_t item_end,
- size_t level, SpellingNode *parent );
+ SpellingNode *construct_spellings_subset ( Size_t item_start, Size_t item_end,
+ Size_t level, SpellingNode *parent );
bool build_f2h();
// The caller should guarantee ch >= 'A' && ch <= 'Z'
@@ -168,7 +168,7 @@ namespace ime_pinyin {
// score_amplifier is used to convert a possibility value into score.
// average_score is the average_score of all spellings. The dumb node is
// assigned with this score.
- bool construct ( const char *spelling_arr, size_t item_size, size_t item_num,
+ bool construct ( const char *spelling_arr, Size_t item_size, Size_t item_num,
float score_amplifier, unsigned char average_score );
// Test if the given id is a valid spelling id.
@@ -236,7 +236,7 @@ namespace ime_pinyin {
bool load_spl_trie ( FILE *fp );
// Get the number of spellings
- size_t get_spelling_num();
+ Size_t get_spelling_num();
// Return the Yunmu id for the given Yunmu string.
// If the string is not valid, return 0;
@@ -250,8 +250,8 @@ namespace ime_pinyin {
// Get Pinyin string for a given spelling id. Return the length of the
// string, and fill-in '\0' at the end.
- size_t get_spelling_str16 ( uint16 splid, char16 *splstr16,
- size_t splstr16_len );
+ Size_t get_spelling_str16 ( uint16 splid, char16 *splstr16,
+ Size_t splstr16_len );
};
}
diff --git a/jni/include/splparser.h b/jni/include/splparser.h
index 9df41ea..9df41ea 100755..100644
--- a/jni/include/splparser.h
+++ b/jni/include/splparser.h
diff --git a/jni/include/sync.h b/jni/include/sync.h
index d123b62..d123b62 100755..100644
--- a/jni/include/sync.h
+++ b/jni/include/sync.h
diff --git a/jni/include/userdict.h b/jni/include/userdict.h
index 22048d4..b3712bd 100755..100644
--- a/jni/include/userdict.h
+++ b/jni/include/userdict.h
@@ -39,16 +39,16 @@ namespace ime_pinyin {
bool close_dict();
- size_t number_of_lemmas();
+ Size_t number_of_lemmas();
void reset_milestones ( uint16 from_step, MileStoneHandle from_handle );
MileStoneHandle extend_dict ( MileStoneHandle from_handle,
const DictExtPara *dep, LmaPsbItem *lpi_items,
- size_t lpi_max, size_t *lpi_num );
+ Size_t lpi_max, Size_t *lpi_num );
- size_t get_lpis ( const uint16 *splid_str, uint16 splid_str_len,
- LmaPsbItem *lpi_items, size_t lpi_max );
+ Size_t get_lpis ( const uint16 *splid_str, uint16 splid_str_len,
+ LmaPsbItem *lpi_items, Size_t lpi_max );
uint16 get_lemma_str ( LemmaIdType id_lemma, char16 *str_buf,
uint16 str_max );
@@ -56,9 +56,9 @@ namespace ime_pinyin {
uint16 get_lemma_splids ( LemmaIdType id_lemma, uint16 *splids,
uint16 splids_max, bool arg_valid );
- size_t predict ( const char16 last_hzs[], uint16 hzs_len,
- NPredictItem *npre_items, size_t npre_max,
- size_t b4_used );
+ Size_t predict ( const char16 last_hzs[], uint16 hzs_len,
+ NPredictItem *npre_items, Size_t npre_max,
+ Size_t b4_used );
// Full spelling ids are required
LemmaIdType put_lemma ( char16 lemma_str[], uint16 splids[],
@@ -77,8 +77,8 @@ namespace ime_pinyin {
bool remove_lemma ( LemmaIdType lemma_id );
- size_t get_total_lemma_count();
- void set_total_lemma_count_of_others ( size_t count );
+ Size_t get_total_lemma_count();
+ void set_total_lemma_count_of_others ( Size_t count );
void flush_cache();
@@ -182,12 +182,12 @@ namespace ime_pinyin {
#endif
#ifdef ___SYNC_ENABLED___
uint32 *syncs_;
- size_t sync_count_size_;
+ Size_t sync_count_size_;
#endif
uint32 *offsets_by_id_;
- size_t lemma_count_left_;
- size_t lemma_size_left_;
+ Size_t lemma_count_left_;
+ Size_t lemma_size_left_;
const char *dict_file_;
@@ -304,8 +304,8 @@ namespace ime_pinyin {
LemmaIdType _put_lemma ( char16 lemma_str[], uint16 splids[],
uint16 lemma_len, uint16 count, uint64 lmt );
- size_t _get_lpis ( const uint16 *splid_str, uint16 splid_str_len,
- LmaPsbItem *lpi_items, size_t lpi_max, bool *need_extend );
+ Size_t _get_lpis ( const uint16 *splid_str, uint16 splid_str_len,
+ LmaPsbItem *lpi_items, Size_t lpi_max, bool *need_extend );
int _get_lemma_score ( char16 lemma_str[], uint16 splids[], uint16 lemma_len );
diff --git a/jni/include/utf16char.h b/jni/include/utf16char.h
index f466d41..dec64e1 100755..100644
--- a/jni/include/utf16char.h
+++ b/jni/include/utf16char.h
@@ -24,26 +24,26 @@ namespace ime_pinyin {
#ifdef __cplusplus
extern "C" {
#endif
-
+ typedef unsigned int Size_t;
typedef unsigned short char16;
// Get a token from utf16_str,
// Returned pointer is a '\0'-terminated utf16 string, or NULL
// *utf16_str_next returns the next part of the string for further tokenizing
- char16 *utf16_strtok ( char16 *utf16_str, size_t *token_size,
+ char16 *utf16_strtok ( char16 *utf16_str, Size_t *token_size,
char16 **utf16_str_next );
int utf16_atoi ( const char16 *utf16_str );
float utf16_atof ( const char16 *utf16_str );
- size_t utf16_strlen ( const char16 *utf16_str );
+ Size_t utf16_strlen ( const char16 *utf16_str );
int utf16_strcmp ( const char16 *str1, const char16 *str2 );
- int utf16_strncmp ( const char16 *str1, const char16 *str2, size_t size );
+ int utf16_strncmp ( const char16 *str1, const char16 *str2, Size_t size );
char16 *utf16_strcpy ( char16 *dst, const char16 *src );
- char16 *utf16_strncpy ( char16 *dst, const char16 *src, size_t size );
+ char16 *utf16_strncpy ( char16 *dst, const char16 *src, Size_t size );
char *utf16_strcpy_tochar ( char *dst, const char16 *src );
diff --git a/jni/include/utf16reader.h b/jni/include/utf16reader.h
index 02de634..ad64cd0 100755..100644
--- a/jni/include/utf16reader.h
+++ b/jni/include/utf16reader.h
@@ -26,11 +26,11 @@ namespace ime_pinyin {
private:
FILE *fp_;
char16 *buffer_;
- size_t buffer_total_len_;
- size_t buffer_next_pos_;
+ Size_t buffer_total_len_;
+ Size_t buffer_next_pos_;
// Always less than buffer_total_len_ - buffer_next_pos_
- size_t buffer_valid_len_;
+ Size_t buffer_valid_len_;
public:
Utf16Reader();
@@ -39,8 +39,8 @@ namespace ime_pinyin {
// filename is the name of the file to open.
// buffer_len specifies how long buffer should be allocated to speed up the
// future reading
- bool open ( const char *filename, size_t buffer_len );
- char16 *readline ( char16 *read_buf, size_t max_len );
+ bool open ( const char *filename, Size_t buffer_len );
+ char16 *readline ( char16 *read_buf, Size_t max_len );
bool close();
};
}
diff --git a/jni/share/dictbuilder.cpp b/jni/share/dictbuilder.cpp
index 8dadd7c..847052a 100755..100644
--- a/jni/share/dictbuilder.cpp
+++ b/jni/share/dictbuilder.cpp
@@ -33,8 +33,8 @@ namespace ime_pinyin {
#ifdef ___BUILD_MODEL___
- static const size_t kReadBufLen = 512;
- static const size_t kSplTableHashLen = 2000;
+ static const Size_t kReadBufLen = 512;
+ static const Size_t kSplTableHashLen = 2000;
// Compare a SingleCharItem, first by Hanzis, then by spelling ids, then by
// frequencies.
@@ -81,8 +81,8 @@ namespace ime_pinyin {
}
int cmp_lemma_entry_hzs ( const void *p1, const void *p2 ) {
- size_t size1 = utf16_strlen ( ( ( const LemmaEntry * ) p1 )->hanzi_str );
- size_t size2 = utf16_strlen ( ( ( const LemmaEntry * ) p2 )->hanzi_str );
+ Size_t size1 = utf16_strlen ( ( ( const LemmaEntry * ) p1 )->hanzi_str );
+ Size_t size2 = utf16_strlen ( ( ( const LemmaEntry * ) p2 )->hanzi_str );
if ( size1 < size2 )
{ return -1; }
else if ( size1 > size2 )
@@ -110,8 +110,8 @@ namespace ime_pinyin {
// First hanzi, if the same, then Pinyin
int cmp_lemma_entry_hzspys ( const void *p1, const void *p2 ) {
- size_t size1 = utf16_strlen ( ( ( const LemmaEntry * ) p1 )->hanzi_str );
- size_t size2 = utf16_strlen ( ( ( const LemmaEntry * ) p2 )->hanzi_str );
+ Size_t size1 = utf16_strlen ( ( ( const LemmaEntry * ) p1 )->hanzi_str );
+ Size_t size2 = utf16_strlen ( ( ( const LemmaEntry * ) p2 )->hanzi_str );
if ( size1 < size2 )
{ return -1; }
else if ( size1 > size2 )
@@ -153,7 +153,7 @@ namespace ime_pinyin {
free_resource();
}
- bool DictBuilder::alloc_resource ( size_t lma_num ) {
+ bool DictBuilder::alloc_resource ( Size_t lma_num ) {
if ( 0 == lma_num )
{ return false; }
free_resource();
@@ -189,7 +189,7 @@ namespace ime_pinyin {
return true;
}
- char16 *DictBuilder::read_valid_hanzis ( const char *fn_validhzs, size_t *num ) {
+ char16 *DictBuilder::read_valid_hanzis ( const char *fn_validhzs, Size_t *num ) {
if ( NULL == fn_validhzs || NULL == num )
{ return NULL; }
*num = 0;
@@ -222,7 +222,7 @@ namespace ime_pinyin {
return hzs;
}
- bool DictBuilder::hz_in_hanzis_list ( const char16 *hzs, size_t hzs_len,
+ bool DictBuilder::hz_in_hanzis_list ( const char16 *hzs, Size_t hzs_len,
char16 hz ) {
if ( NULL == hzs )
{ return false; }
@@ -236,11 +236,11 @@ namespace ime_pinyin {
}
// The caller makes sure that the parameters are valid.
- bool DictBuilder::str_in_hanzis_list ( const char16 *hzs, size_t hzs_len,
- const char16 *str, size_t str_len ) {
+ bool DictBuilder::str_in_hanzis_list ( const char16 *hzs, Size_t hzs_len,
+ const char16 *str, Size_t str_len ) {
if ( NULL == hzs || NULL == str )
{ return false; }
- for ( size_t pos = 0; pos < str_len; pos++ ) {
+ for ( Size_t pos = 0; pos < str_len; pos++ ) {
if ( !hz_in_hanzis_list ( hzs, hzs_len, str[pos] ) )
{ return false; }
}
@@ -251,7 +251,7 @@ namespace ime_pinyin {
top_lmas_num_ = 0;
if ( NULL == lemma_arr_ )
{ return; }
- for ( size_t pos = 0; pos < lemma_num_; pos++ ) {
+ for ( Size_t pos = 0; pos < lemma_num_; pos++ ) {
if ( 0 == top_lmas_num_ ) {
top_lmas_[0] = lemma_arr_[pos];
top_lmas_num_ = 1;
@@ -260,7 +260,7 @@ namespace ime_pinyin {
if ( lemma_arr_[pos].freq > top_lmas_[top_lmas_num_ - 1].freq ) {
if ( kTopScoreLemmaNum > top_lmas_num_ )
{ top_lmas_num_ += 1; }
- size_t move_pos;
+ Size_t move_pos;
for ( move_pos = top_lmas_num_ - 1; move_pos > 0; move_pos-- ) {
top_lmas_[move_pos] = top_lmas_[move_pos - 1];
if ( 0 == move_pos - 1 ||
@@ -278,7 +278,7 @@ namespace ime_pinyin {
}
if ( kPrintDebug0 ) {
printf ( "\n------Top Lemmas------------------\n" );
- for ( size_t pos = 0; pos < top_lmas_num_; pos++ ) {
+ for ( Size_t pos = 0; pos < top_lmas_num_; pos++ ) {
printf ( "--%d, idx:%06d, score:%.5f\n", pos, top_lmas_[pos].idx_by_hz,
top_lmas_[pos].freq );
}
@@ -314,32 +314,32 @@ namespace ime_pinyin {
homo_idx_num_gt1_ = 0;
}
- size_t DictBuilder::read_raw_dict ( const char *fn_raw,
+ Size_t DictBuilder::read_raw_dict ( const char *fn_raw,
const char *fn_validhzs,
- size_t max_item ) {
+ Size_t max_item ) {
if ( NULL == fn_raw ) { return 0; }
Utf16Reader utf16_reader;
if ( !utf16_reader.open ( fn_raw, kReadBufLen * 10 ) )
{ return false; }
char16 read_buf[kReadBufLen];
// Read the number of lemmas in the file
- size_t lemma_num = 240000;
+ Size_t lemma_num = 240000;
// allocate resource required
if ( !alloc_resource ( lemma_num ) ) {
utf16_reader.close();
}
// Read the valid Hanzi list.
char16 *valid_hzs = NULL;
- size_t valid_hzs_num = 0;
+ Size_t valid_hzs_num = 0;
valid_hzs = read_valid_hanzis ( fn_validhzs, &valid_hzs_num );
// Begin reading the lemma entries
- for ( size_t i = 0; i < max_item; i++ ) {
+ for ( Size_t i = 0; i < max_item; i++ ) {
// read next entry
if ( !utf16_reader.readline ( read_buf, kReadBufLen ) ) {
lemma_num = i;
break;
}
- size_t token_size;
+ Size_t token_size;
char16 *token;
char16 *to_tokenize = read_buf;
// Get the Hanzi string
@@ -349,7 +349,7 @@ namespace ime_pinyin {
utf16_reader.close();
return false;
}
- size_t lemma_size = utf16_strlen ( token );
+ Size_t lemma_size = utf16_strlen ( token );
if ( lemma_size > kMaxLemmaSize ) {
i--;
continue;
@@ -393,7 +393,7 @@ namespace ime_pinyin {
}
// Get spelling String
bool spelling_not_support = false;
- for ( size_t hz_pos = 0; hz_pos < ( size_t ) lemma_arr_[i].hz_str_len;
+ for ( Size_t hz_pos = 0; hz_pos < ( Size_t ) lemma_arr_[i].hz_str_len;
hz_pos++ ) {
// Get a Pinyin
token = utf16_strtok ( to_tokenize, &token_size, &to_tokenize );
@@ -437,8 +437,8 @@ namespace ime_pinyin {
// The size of an spelling. '\0' is included. If the spelling table is
// initialized to calculate the spelling scores, the last char in the
// spelling string will be score, and it is also included in spl_item_size.
- size_t spl_item_size;
- size_t spl_num;
+ Size_t spl_item_size;
+ Size_t spl_num;
const char *spl_buf;
spl_buf = spl_table_->arrange ( &spl_item_size, &spl_num );
if ( NULL == spl_buf ) {
@@ -454,8 +454,8 @@ namespace ime_pinyin {
}
printf ( "spelling tree construct successfully.\n" );
// Convert the spelling string to idxs
- for ( size_t i = 0; i < lemma_num_; i++ ) {
- for ( size_t hz_pos = 0; hz_pos < ( size_t ) lemma_arr_[i].hz_str_len;
+ for ( Size_t i = 0; i < lemma_num_; i++ ) {
+ for ( Size_t hz_pos = 0; hz_pos < ( Size_t ) lemma_arr_[i].hz_str_len;
hz_pos++ ) {
uint16 spl_idxs[2];
uint16 spl_start_pos[3];
@@ -504,7 +504,7 @@ namespace ime_pinyin {
// Move the node data and homo data to the DictTrie
dict_trie->root_ = new LmaNodeLE0[lma_nds_used_num_le0_];
dict_trie->nodes_ge1_ = new LmaNodeGE1[lma_nds_used_num_ge1_];
- size_t lma_idx_num = homo_idx_num_eq1_ + homo_idx_num_gt1_ + top_lmas_num_;
+ Size_t lma_idx_num = homo_idx_num_eq1_ + homo_idx_num_gt1_ + top_lmas_num_;
dict_trie->lma_idx_buf_ = new unsigned char[lma_idx_num * kLemmaIdSize];
assert ( NULL != dict_trie->root_ );
assert ( NULL != dict_trie->lma_idx_buf_ );
@@ -516,11 +516,11 @@ namespace ime_pinyin {
sizeof ( LmaNodeLE0 ) * lma_nds_used_num_le0_ );
memcpy ( dict_trie->nodes_ge1_, lma_nodes_ge1_,
sizeof ( LmaNodeGE1 ) * lma_nds_used_num_ge1_ );
- for ( size_t pos = 0; pos < homo_idx_num_eq1_ + homo_idx_num_gt1_; pos++ ) {
+ for ( Size_t pos = 0; pos < homo_idx_num_eq1_ + homo_idx_num_gt1_; pos++ ) {
id_to_charbuf ( dict_trie->lma_idx_buf_ + pos * kLemmaIdSize,
homo_idx_buf_[pos] );
}
- for ( size_t pos = homo_idx_num_eq1_ + homo_idx_num_gt1_;
+ for ( Size_t pos = homo_idx_num_eq1_ + homo_idx_num_gt1_;
pos < lma_idx_num; pos++ ) {
LemmaIdType idx =
top_lmas_[pos - homo_idx_num_eq1_ - homo_idx_num_gt1_].idx_by_hz;
@@ -540,17 +540,17 @@ namespace ime_pinyin {
void DictBuilder::id_to_charbuf ( unsigned char *buf, LemmaIdType id ) {
if ( NULL == buf ) { return; }
- for ( size_t pos = 0; pos < kLemmaIdSize; pos++ ) {
+ for ( Size_t pos = 0; pos < kLemmaIdSize; pos++ ) {
( buf ) [pos] = ( unsigned char ) ( id >> ( pos * 8 ) );
}
}
- void DictBuilder::set_son_offset ( LmaNodeGE1 *node, size_t offset ) {
+ void DictBuilder::set_son_offset ( LmaNodeGE1 *node, Size_t offset ) {
node->son_1st_off_l = static_cast<uint16> ( offset );
node->son_1st_off_h = static_cast<unsigned char> ( offset >> 16 );
}
- void DictBuilder:: set_homo_id_buf_offset ( LmaNodeGE1 *node, size_t offset ) {
+ void DictBuilder:: set_homo_id_buf_offset ( LmaNodeGE1 *node, Size_t offset ) {
node->homo_idx_buf_off_l = static_cast<uint16> ( offset );
node->homo_idx_buf_off_h = static_cast<unsigned char> ( offset >> 16 );
}
@@ -580,7 +580,7 @@ namespace ime_pinyin {
myqsort ( lemma_arr_, lemma_num_, sizeof ( LemmaEntry ), cmp_lemma_entry_hzs );
lemma_arr_[0].idx_by_hz = 1;
LemmaIdType idx_max = 1;
- for ( size_t i = 1; i < lemma_num_; i++ ) {
+ for ( Size_t i = 1; i < lemma_num_; i++ ) {
if ( utf16_strcmp ( lemma_arr_[i].hanzi_str, lemma_arr_[i - 1].hanzi_str ) ) {
idx_max++;
lemma_arr_[i].idx_by_hz = idx_max;
@@ -592,7 +592,7 @@ namespace ime_pinyin {
return idx_max + 1;
}
- size_t DictBuilder::build_scis() {
+ Size_t DictBuilder::build_scis() {
if ( NULL == scis_ || lemma_num_ * kMaxLemmaSize > scis_num_ )
{ return 0; }
SpellingTrie &spl_trie = SpellingTrie::get_instance();
@@ -603,9 +603,9 @@ namespace ime_pinyin {
scis_[0].splid.half_splid = 0;
scis_num_ = 1;
// Copy the hanzis to the buffer
- for ( size_t pos = 0; pos < lemma_num_; pos++ ) {
- size_t hz_num = lemma_arr_[pos].hz_str_len;
- for ( size_t hzpos = 0; hzpos < hz_num; hzpos++ ) {
+ for ( Size_t pos = 0; pos < lemma_num_; pos++ ) {
+ Size_t hz_num = lemma_arr_[pos].hz_str_len;
+ for ( Size_t hzpos = 0; hzpos < hz_num; hzpos++ ) {
scis_[scis_num_].hz = lemma_arr_[pos].hanzi_str[hzpos];
scis_[scis_num_].splid.full_splid = lemma_arr_[pos].spl_idx_arr[hzpos];
scis_[scis_num_].splid.half_splid =
@@ -619,8 +619,8 @@ namespace ime_pinyin {
}
myqsort ( scis_, scis_num_, sizeof ( SingleCharItem ), cmp_scis_hz_splid_freq );
// Remove repeated items
- size_t unique_scis_num = 1;
- for ( size_t pos = 1; pos < scis_num_; pos++ ) {
+ Size_t unique_scis_num = 1;
+ for ( Size_t pos = 1; pos < scis_num_; pos++ ) {
if ( scis_[pos].hz == scis_[pos - 1].hz &&
scis_[pos].splid.full_splid == scis_[pos - 1].splid.full_splid )
{ continue; }
@@ -631,9 +631,9 @@ namespace ime_pinyin {
}
scis_num_ = unique_scis_num;
// Update the lemma list.
- for ( size_t pos = 0; pos < lemma_num_; pos++ ) {
- size_t hz_num = lemma_arr_[pos].hz_str_len;
- for ( size_t hzpos = 0; hzpos < hz_num; hzpos++ ) {
+ for ( Size_t pos = 0; pos < lemma_num_; pos++ ) {
+ Size_t hz_num = lemma_arr_[pos].hz_str_len;
+ for ( Size_t hzpos = 0; hzpos < hz_num; hzpos++ ) {
SingleCharItem key;
key.hz = lemma_arr_[pos].hanzi_str[hzpos];
key.splid.full_splid = lemma_arr_[pos].spl_idx_arr[hzpos];
@@ -653,18 +653,18 @@ namespace ime_pinyin {
}
bool DictBuilder::construct_subset ( void *parent, LemmaEntry *lemma_arr,
- size_t item_start, size_t item_end,
- size_t level ) {
+ Size_t item_start, Size_t item_end,
+ Size_t level ) {
if ( level >= kMaxLemmaSize || item_end <= item_start )
{ return false; }
// 1. Scan for how many sons
- size_t parent_son_num = 0;
+ Size_t parent_son_num = 0;
// LemmaNode *son_1st = NULL;
// parent.num_of_son = 0;
LemmaEntry *lma_last_start = lemma_arr_ + item_start;
uint16 spl_idx_node = lma_last_start->spl_idx_arr[level];
// Scan for how many sons to be allocaed
- for ( size_t i = item_start + 1; i < item_end; i++ ) {
+ for ( Size_t i = item_start + 1; i < item_end; i++ ) {
LemmaEntry *lma_current = lemma_arr + i;
uint16 spl_idx_current = lma_current->spl_idx_arr[level];
if ( spl_idx_current != spl_idx_node ) {
@@ -717,14 +717,14 @@ namespace ime_pinyin {
( unsigned char ) parent_son_num;
}
// 3. Now begin to construct the son one by one
- size_t son_pos = 0;
+ Size_t son_pos = 0;
lma_last_start = lemma_arr_ + item_start;
spl_idx_node = lma_last_start->spl_idx_arr[level];
- size_t homo_num = 0;
+ Size_t homo_num = 0;
if ( lma_last_start->spl_idx_arr[level + 1] == 0 )
{ homo_num = 1; }
- size_t item_start_next = item_start;
- for ( size_t i = item_start + 1; i < item_end; i++ ) {
+ Size_t item_start_next = item_start;
+ for ( Size_t i = item_start + 1; i < item_end; i++ ) {
LemmaEntry *lma_current = lemma_arr_ + i;
uint16 spl_idx_current = lma_current->spl_idx_arr[level];
if ( spl_idx_current == spl_idx_node ) {
@@ -758,7 +758,7 @@ namespace ime_pinyin {
assert ( homo_num <= 255 );
node_cur_ge1->num_of_homo = ( unsigned char ) homo_num;
}
- for ( size_t homo_pos = 0; homo_pos < homo_num; homo_pos++ ) {
+ for ( Size_t homo_pos = 0; homo_pos < homo_num; homo_pos++ ) {
idx_buf[homo_pos] = lemma_arr_[item_start_next + homo_pos].idx_by_hz;
}
#ifdef ___DO_STATISTICS___
@@ -817,7 +817,7 @@ namespace ime_pinyin {
assert ( homo_num <= 255 );
node_cur_ge1->num_of_homo = ( unsigned char ) homo_num;
}
- for ( size_t homo_pos = 0; homo_pos < homo_num; homo_pos++ ) {
+ for ( Size_t homo_pos = 0; homo_pos < homo_num; homo_pos++ ) {
idx_buf[homo_pos] = lemma_arr[item_start_next + homo_pos].idx_by_hz;
}
#ifdef ___DO_STATISTICS___
@@ -851,14 +851,14 @@ namespace ime_pinyin {
#ifdef ___DO_STATISTICS___
void DictBuilder::stat_init() {
- memset ( max_sonbuf_len_, 0, sizeof ( size_t ) * kMaxLemmaSize );
- memset ( max_homobuf_len_, 0, sizeof ( size_t ) * kMaxLemmaSize );
- memset ( total_son_num_, 0, sizeof ( size_t ) * kMaxLemmaSize );
- memset ( total_node_hasson_, 0, sizeof ( size_t ) * kMaxLemmaSize );
- memset ( total_sonbuf_num_, 0, sizeof ( size_t ) * kMaxLemmaSize );
- memset ( total_sonbuf_allnoson_, 0, sizeof ( size_t ) * kMaxLemmaSize );
- memset ( total_node_in_sonbuf_allnoson_, 0, sizeof ( size_t ) * kMaxLemmaSize );
- memset ( total_homo_num_, 0, sizeof ( size_t ) * kMaxLemmaSize );
+ memset ( max_sonbuf_len_, 0, sizeof ( Size_t ) * kMaxLemmaSize );
+ memset ( max_homobuf_len_, 0, sizeof ( Size_t ) * kMaxLemmaSize );
+ memset ( total_son_num_, 0, sizeof ( Size_t ) * kMaxLemmaSize );
+ memset ( total_node_hasson_, 0, sizeof ( Size_t ) * kMaxLemmaSize );
+ memset ( total_sonbuf_num_, 0, sizeof ( Size_t ) * kMaxLemmaSize );
+ memset ( total_sonbuf_allnoson_, 0, sizeof ( Size_t ) * kMaxLemmaSize );
+ memset ( total_node_in_sonbuf_allnoson_, 0, sizeof ( Size_t ) * kMaxLemmaSize );
+ memset ( total_homo_num_, 0, sizeof ( Size_t ) * kMaxLemmaSize );
sonbufs_num1_ = 0;
sonbufs_numgt1_ = 0;
total_lma_node_num_ = 0;
@@ -868,35 +868,35 @@ namespace ime_pinyin {
printf ( "\n------------STAT INFO-------------\n" );
printf ( "[root is layer -1]\n" );
printf ( ".. max_sonbuf_len per layer(from layer 0):\n " );
- for ( size_t i = 0; i < kMaxLemmaSize; i++ )
+ for ( Size_t i = 0; i < kMaxLemmaSize; i++ )
{ printf ( "%d, ", max_sonbuf_len_[i] ); }
printf ( "-, \n" );
printf ( ".. max_homobuf_len per layer:\n -, " );
- for ( size_t i = 0; i < kMaxLemmaSize; i++ )
+ for ( Size_t i = 0; i < kMaxLemmaSize; i++ )
{ printf ( "%d, ", max_homobuf_len_[i] ); }
printf ( "\n" );
printf ( ".. total_son_num per layer:\n " );
- for ( size_t i = 0; i < kMaxLemmaSize; i++ )
+ for ( Size_t i = 0; i < kMaxLemmaSize; i++ )
{ printf ( "%d, ", total_son_num_[i] ); }
printf ( "-, \n" );
printf ( ".. total_node_hasson per layer:\n 1, " );
- for ( size_t i = 0; i < kMaxLemmaSize; i++ )
+ for ( Size_t i = 0; i < kMaxLemmaSize; i++ )
{ printf ( "%d, ", total_node_hasson_[i] ); }
printf ( "\n" );
printf ( ".. total_sonbuf_num per layer:\n " );
- for ( size_t i = 0; i < kMaxLemmaSize; i++ )
+ for ( Size_t i = 0; i < kMaxLemmaSize; i++ )
{ printf ( "%d, ", total_sonbuf_num_[i] ); }
printf ( "-, \n" );
printf ( ".. total_sonbuf_allnoson per layer:\n " );
- for ( size_t i = 0; i < kMaxLemmaSize; i++ )
+ for ( Size_t i = 0; i < kMaxLemmaSize; i++ )
{ printf ( "%d, ", total_sonbuf_allnoson_[i] ); }
printf ( "-, \n" );
printf ( ".. total_node_in_sonbuf_allnoson per layer:\n " );
- for ( size_t i = 0; i < kMaxLemmaSize; i++ )
+ for ( Size_t i = 0; i < kMaxLemmaSize; i++ )
{ printf ( "%d, ", total_node_in_sonbuf_allnoson_[i] ); }
printf ( "-, \n" );
printf ( ".. total_homo_num per layer:\n 0, " );
- for ( size_t i = 0; i < kMaxLemmaSize; i++ )
+ for ( Size_t i = 0; i < kMaxLemmaSize; i++ )
{ printf ( "%d, ", total_homo_num_[i] ); }
printf ( "\n" );
printf ( ".. son buf allocation number with only 1 son: %d\n", sonbufs_num1_ );
diff --git a/jni/share/dictlist.cpp b/jni/share/dictlist.cpp
index 0c3fea3..6304e08 100755..100644
--- a/jni/share/dictlist.cpp
+++ b/jni/share/dictlist.cpp
@@ -46,7 +46,7 @@ namespace ime_pinyin {
free_resource();
}
- bool DictList::alloc_resource ( size_t buf_size, size_t scis_num ) {
+ bool DictList::alloc_resource ( Size_t buf_size, Size_t scis_num ) {
// Allocate memory
buf_ = static_cast<char16 *> ( malloc ( buf_size * sizeof ( char16 ) ) );
if ( NULL == buf_ )
@@ -75,15 +75,15 @@ namespace ime_pinyin {
}
#ifdef ___BUILD_MODEL___
- bool DictList::init_list ( const SingleCharItem *scis, size_t scis_num,
- const LemmaEntry *lemma_arr, size_t lemma_num ) {
+ bool DictList::init_list ( const SingleCharItem *scis, Size_t scis_num,
+ const LemmaEntry *lemma_arr, Size_t lemma_num ) {
if ( NULL == scis || 0 == scis_num || NULL == lemma_arr || 0 == lemma_num )
{ return false; }
initialized_ = false;
if ( NULL != buf_ )
{ free ( buf_ ); }
// calculate the size
- size_t buf_size = calculate_size ( lemma_arr, lemma_num );
+ Size_t buf_size = calculate_size ( lemma_arr, lemma_num );
if ( 0 == buf_size )
{ return false; }
if ( !alloc_resource ( buf_size, scis_num ) )
@@ -95,11 +95,11 @@ namespace ime_pinyin {
return true;
}
- size_t DictList::calculate_size ( const LemmaEntry *lemma_arr, size_t lemma_num ) {
- size_t last_hz_len = 0;
- size_t list_size = 0;
- size_t id_num = 0;
- for ( size_t i = 0; i < lemma_num; i++ ) {
+ Size_t DictList::calculate_size ( const LemmaEntry *lemma_arr, Size_t lemma_num ) {
+ Size_t last_hz_len = 0;
+ Size_t list_size = 0;
+ Size_t id_num = 0;
+ for ( Size_t i = 0; i < lemma_num; i++ ) {
if ( 0 == i ) {
last_hz_len = lemma_arr[i].hz_str_len;
assert ( last_hz_len > 0 );
@@ -110,13 +110,13 @@ namespace ime_pinyin {
last_hz_len = 1;
list_size += last_hz_len;
} else {
- size_t current_hz_len = lemma_arr[i].hz_str_len;
+ Size_t current_hz_len = lemma_arr[i].hz_str_len;
assert ( current_hz_len >= last_hz_len );
if ( current_hz_len == last_hz_len ) {
list_size += current_hz_len;
id_num++;
} else {
- for ( size_t len = last_hz_len; len < current_hz_len - 1; len++ ) {
+ for ( Size_t len = last_hz_len; len < current_hz_len - 1; len++ ) {
start_pos_[len] = start_pos_[len - 1];
start_id_[len] = start_id_[len - 1];
}
@@ -128,7 +128,7 @@ namespace ime_pinyin {
}
}
}
- for ( size_t i = last_hz_len; i <= kMaxLemmaSize; i++ ) {
+ for ( Size_t i = last_hz_len; i <= kMaxLemmaSize; i++ ) {
if ( 0 == i ) {
start_pos_[0] = 0;
start_id_[0] = 1;
@@ -140,21 +140,21 @@ namespace ime_pinyin {
return start_pos_[kMaxLemmaSize];
}
- void DictList::fill_scis ( const SingleCharItem *scis, size_t scis_num ) {
+ void DictList::fill_scis ( const SingleCharItem *scis, Size_t scis_num ) {
assert ( scis_num_ == scis_num );
- for ( size_t pos = 0; pos < scis_num_; pos++ ) {
+ for ( Size_t pos = 0; pos < scis_num_; pos++ ) {
scis_hz_[pos] = scis[pos].hz;
scis_splid_[pos] = scis[pos].splid;
}
}
- void DictList::fill_list ( const LemmaEntry *lemma_arr, size_t lemma_num ) {
- size_t current_pos = 0;
+ void DictList::fill_list ( const LemmaEntry *lemma_arr, Size_t lemma_num ) {
+ Size_t current_pos = 0;
utf16_strncpy ( buf_, lemma_arr[0].hanzi_str,
lemma_arr[0].hz_str_len );
current_pos = lemma_arr[0].hz_str_len;
- size_t id_num = 1;
- for ( size_t i = 1; i < lemma_num; i++ ) {
+ Size_t id_num = 1;
+ for ( Size_t i = 1; i < lemma_num; i++ ) {
utf16_strncpy ( buf_ + current_pos, lemma_arr[i].hanzi_str,
lemma_arr[i].hz_str_len );
id_num++;
@@ -178,7 +178,7 @@ namespace ime_pinyin {
#endif // ___BUILD_MODEL___
char16 *DictList::find_pos_startedbyhzs ( const char16 last_hzs[],
- size_t word_len, int ( *cmp_func ) ( const void *, const void * ) ) {
+ Size_t word_len, int ( *cmp_func ) ( const void *, const void * ) ) {
char16 *found_w = static_cast<char16 *>
( mybsearch ( last_hzs, buf_ + start_pos_[word_len - 1],
( start_pos_[word_len] - start_pos_[word_len - 1] )
@@ -192,14 +192,14 @@ namespace ime_pinyin {
return found_w;
}
- size_t DictList::predict ( const char16 last_hzs[], uint16 hzs_len,
- NPredictItem *npre_items, size_t npre_max,
- size_t b4_used ) {
+ Size_t DictList::predict ( const char16 last_hzs[], uint16 hzs_len,
+ NPredictItem *npre_items, Size_t npre_max,
+ Size_t b4_used ) {
assert ( hzs_len <= kMaxPredictSize && hzs_len > 0 );
// 1. Prepare work
int ( *cmp_func ) ( const void *, const void * ) = cmp_func_[hzs_len - 1];
NGram &ngram = NGram::get_instance();
- size_t item_num = 0;
+ Size_t item_num = 0;
// 2. Do prediction
for ( uint16 pre_len = 1; pre_len <= kMaxPredictSize + 1 - hzs_len;
pre_len++ ) {
@@ -213,17 +213,17 @@ namespace ime_pinyin {
memset ( npre_items + item_num, 0, sizeof ( NPredictItem ) );
utf16_strncpy ( npre_items[item_num].pre_hzs, w_buf + hzs_len, pre_len );
npre_items[item_num].psb =
- ngram.get_uni_psb ( ( size_t ) ( w_buf - buf_ - start_pos_[word_len - 1] )
+ ngram.get_uni_psb ( ( Size_t ) ( w_buf - buf_ - start_pos_[word_len - 1] )
/ word_len + start_id_[word_len - 1] );
npre_items[item_num].his_len = hzs_len;
item_num++;
w_buf += word_len;
}
}
- size_t new_num = 0;
- for ( size_t i = 0; i < item_num; i++ ) {
+ Size_t new_num = 0;
+ for ( Size_t i = 0; i < item_num; i++ ) {
// Try to find it in the existing items
- size_t e_pos;
+ Size_t e_pos;
for ( e_pos = 1; e_pos <= b4_used; e_pos++ ) {
if ( utf16_strncmp ( ( * ( npre_items - e_pos ) ).pre_hzs, npre_items[i].pre_hzs,
kMaxPredictSize ) == 0 )
@@ -248,7 +248,7 @@ namespace ime_pinyin {
if ( i + 1 > str_max - 1 )
{ return 0; }
if ( start_id_[i] <= id_lemma && start_id_[i + 1] > id_lemma ) {
- size_t id_span = id_lemma - start_id_[i];
+ Size_t id_span = id_lemma - start_id_[i];
uint16 *buf = buf_ + start_pos_[i] + id_span * ( i + 1 );
for ( uint16 len = 0; len <= i; len++ ) {
str_buf[len] = buf[len];
@@ -301,7 +301,7 @@ namespace ime_pinyin {
if ( NULL == found )
{ return 0; }
assert ( found > buf_ );
- assert ( static_cast<size_t> ( found - buf_ ) >= start_pos_[str_len - 1] );
+ assert ( static_cast<Size_t> ( found - buf_ ) >= start_pos_[str_len - 1] );
return static_cast<LemmaIdType>
( start_id_[str_len - 1] +
( found - buf_ - start_pos_[str_len - 1] ) / str_len );
@@ -327,12 +327,12 @@ namespace ime_pinyin {
if ( NULL == buf_ || 0 == start_pos_[kMaxLemmaSize] ||
NULL == scis_hz_ || NULL == scis_splid_ || 0 == scis_num_ )
{ return false; }
- if ( fwrite ( &scis_num_, sizeof ( size_t ), 1, fp ) != 1 )
+ if ( fwrite ( &scis_num_, sizeof ( Size_t ), 1, fp ) != 1 )
{ return false; }
- if ( fwrite ( start_pos_, sizeof ( size_t ), kMaxLemmaSize + 1, fp ) !=
+ if ( fwrite ( start_pos_, sizeof ( Size_t ), kMaxLemmaSize + 1, fp ) !=
kMaxLemmaSize + 1 )
{ return false; }
- if ( fwrite ( start_id_, sizeof ( size_t ), kMaxLemmaSize + 1, fp ) !=
+ if ( fwrite ( start_id_, sizeof ( Size_t ), kMaxLemmaSize + 1, fp ) !=
kMaxLemmaSize + 1 )
{ return false; }
if ( fwrite ( scis_hz_, sizeof ( char16 ), scis_num_, fp ) != scis_num_ )
@@ -349,12 +349,12 @@ namespace ime_pinyin {
if ( NULL == fp )
{ return false; }
initialized_ = false;
- if ( fread ( &scis_num_, sizeof ( size_t ), 1, fp ) != 1 )
+ if ( fread ( &scis_num_, sizeof ( Size_t ), 1, fp ) != 1 )
{ return false; }
- if ( fread ( start_pos_, sizeof ( size_t ), kMaxLemmaSize + 1, fp ) !=
+ if ( fread ( start_pos_, sizeof ( Size_t ), kMaxLemmaSize + 1, fp ) !=
kMaxLemmaSize + 1 )
{ return false; }
- if ( fread ( start_id_, sizeof ( size_t ), kMaxLemmaSize + 1, fp ) !=
+ if ( fread ( start_id_, sizeof ( Size_t ), kMaxLemmaSize + 1, fp ) !=
kMaxLemmaSize + 1 )
{ return false; }
free_resource();
diff --git a/jni/share/dicttrie.cpp b/jni/share/dicttrie.cpp
index 4566e8e..0034c41 100755..100644
--- a/jni/share/dicttrie.cpp
+++ b/jni/share/dicttrie.cpp
@@ -13,7 +13,6 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-
#include <assert.h>
#include <stdio.h>
#include <string.h>
@@ -74,16 +73,16 @@ namespace ime_pinyin {
reset_milestones ( 0, kFirstValidMileStoneHandle );
}
- inline size_t DictTrie::get_son_offset ( const LmaNodeGE1 *node ) {
- return ( ( size_t ) node->son_1st_off_l + ( ( size_t ) node->son_1st_off_h << 16 ) );
+ inline Size_t DictTrie::get_son_offset ( const LmaNodeGE1 *node ) {
+ return ( ( Size_t ) node->son_1st_off_l + ( ( Size_t ) node->son_1st_off_h << 16 ) );
}
- inline size_t DictTrie::get_homo_idx_buf_offset ( const LmaNodeGE1 *node ) {
- return ( ( size_t ) node->homo_idx_buf_off_l +
- ( ( size_t ) node->homo_idx_buf_off_h << 16 ) );
+ inline Size_t DictTrie::get_homo_idx_buf_offset ( const LmaNodeGE1 *node ) {
+ return ( ( Size_t ) node->homo_idx_buf_off_l +
+ ( ( Size_t ) node->homo_idx_buf_off_h << 16 ) );
}
- inline LemmaIdType DictTrie::get_lemma_id ( size_t id_offset ) {
+ inline LemmaIdType DictTrie::get_lemma_id ( Size_t id_offset ) {
LemmaIdType id = 0;
for ( uint16 pos = kLemmaIdSize - 1; pos > 0; pos-- )
{ id = ( id << 8 ) + lma_idx_buf_[id_offset * kLemmaIdSize + pos]; }
@@ -101,13 +100,13 @@ namespace ime_pinyin {
bool DictTrie::save_dict ( FILE *fp ) {
if ( NULL == fp )
{ return false; }
- if ( fwrite ( &lma_node_num_le0_, sizeof ( size_t ), 1, fp ) != 1 )
+ if ( fwrite ( &lma_node_num_le0_, sizeof ( Size_t ), 1, fp ) != 1 )
{ return false; }
- if ( fwrite ( &lma_node_num_ge1_, sizeof ( size_t ), 1, fp ) != 1 )
+ if ( fwrite ( &lma_node_num_ge1_, sizeof ( Size_t ), 1, fp ) != 1 )
{ return false; }
- if ( fwrite ( &lma_idx_buf_len_, sizeof ( size_t ), 1, fp ) != 1 )
+ if ( fwrite ( &lma_idx_buf_len_, sizeof ( Size_t ), 1, fp ) != 1 )
{ return false; }
- if ( fwrite ( &top_lmas_num_, sizeof ( size_t ), 1, fp ) != 1 )
+ if ( fwrite ( &top_lmas_num_, sizeof ( Size_t ), 1, fp ) != 1 )
{ return false; }
if ( fwrite ( root_, sizeof ( LmaNodeLE0 ), lma_node_num_le0_, fp )
!= lma_node_num_le0_ )
@@ -144,13 +143,13 @@ namespace ime_pinyin {
bool DictTrie::load_dict ( FILE *fp ) {
if ( NULL == fp )
{ return false; }
- if ( fread ( &lma_node_num_le0_, sizeof ( size_t ), 1, fp ) != 1 )
+ if ( fread ( &lma_node_num_le0_, sizeof ( Size_t ), 1, fp ) != 1 )
{ return false; }
- if ( fread ( &lma_node_num_ge1_, sizeof ( size_t ), 1, fp ) != 1 )
+ if ( fread ( &lma_node_num_ge1_, sizeof ( Size_t ), 1, fp ) != 1 )
{ return false; }
- if ( fread ( &lma_idx_buf_len_, sizeof ( size_t ), 1, fp ) != 1 )
+ if ( fread ( &lma_idx_buf_len_, sizeof ( Size_t ), 1, fp ) != 1 )
{ return false; }
- if ( fread ( &top_lmas_num_, sizeof ( size_t ), 1, fp ) != 1 ||
+ if ( fread ( &top_lmas_num_, sizeof ( Size_t ), 1, fp ) != 1 ||
top_lmas_num_ >= lma_idx_buf_len_ )
{ return false; }
free_resource ( false );
@@ -160,7 +159,7 @@ namespace ime_pinyin {
( malloc ( lma_node_num_ge1_ * sizeof ( LmaNodeGE1 ) ) );
lma_idx_buf_ = ( unsigned char * ) malloc ( lma_idx_buf_len_ );
total_lma_num_ = lma_idx_buf_len_ / kLemmaIdSize;
- size_t buf_size = SpellingTrie::get_instance().get_spelling_num() + 1;
+ Size_t buf_size = SpellingTrie::get_instance().get_spelling_num() + 1;
assert ( lma_node_num_le0_ <= buf_size );
splid_le0_index_ = static_cast<uint16 *> ( malloc ( buf_size * sizeof ( uint16 ) ) );
// Init the space for parsing.
@@ -184,8 +183,8 @@ namespace ime_pinyin {
{ return false; }
// The quick index for the first level sons
uint16 last_splid = kFullSplIdStart;
- size_t last_pos = 0;
- for ( size_t i = 1; i < lma_node_num_le0_; i++ ) {
+ Size_t last_pos = 0;
+ for ( Size_t i = 1; i < lma_node_num_le0_; i++ ) {
for ( uint16 splid = last_splid; splid < root_[i].spl_idx; splid++ )
{ splid_le0_index_[splid - kFullSplIdStart] = last_pos; }
splid_le0_index_[root_[i].spl_idx - kFullSplIdStart] =
@@ -195,7 +194,7 @@ namespace ime_pinyin {
}
for ( uint16 splid = last_splid + 1;
splid < buf_size + kFullSplIdStart; splid++ ) {
- assert ( static_cast<size_t> ( splid - kFullSplIdStart ) < buf_size );
+ assert ( static_cast<Size_t> ( splid - kFullSplIdStart ) < buf_size );
splid_le0_index_[splid - kFullSplIdStart] = last_pos + 1;
}
return true;
@@ -234,7 +233,8 @@ namespace ime_pinyin {
{ return false; }
FILE *fp = fdopen ( sys_fd, "rb" );
if ( NULL == fp )
- { return false; }
+ {
+ return false; }
if ( -1 == fseek ( fp, start_offset, SEEK_SET ) ) {
fclose ( fp );
return false;
@@ -259,11 +259,11 @@ namespace ime_pinyin {
return true;
}
- size_t DictTrie::fill_lpi_buffer ( LmaPsbItem lpi_items[], size_t lpi_max,
+ Size_t DictTrie::fill_lpi_buffer ( LmaPsbItem lpi_items[], Size_t lpi_max,
LmaNodeLE0 *node ) {
- size_t lpi_num = 0;
+ Size_t lpi_num = 0;
NGram &ngram = NGram::get_instance();
- for ( size_t homo = 0; homo < ( size_t ) node->num_of_homo; homo++ ) {
+ for ( Size_t homo = 0; homo < ( Size_t ) node->num_of_homo; homo++ ) {
lpi_items[lpi_num].id = get_lemma_id ( node->homo_idx_buf_off +
homo );
lpi_items[lpi_num].lma_len = 1;
@@ -276,12 +276,12 @@ namespace ime_pinyin {
return lpi_num;
}
- size_t DictTrie::fill_lpi_buffer ( LmaPsbItem lpi_items[], size_t lpi_max,
- size_t homo_buf_off, LmaNodeGE1 *node,
+ Size_t DictTrie::fill_lpi_buffer ( LmaPsbItem lpi_items[], Size_t lpi_max,
+ Size_t homo_buf_off, LmaNodeGE1 *node,
uint16 lma_len ) {
- size_t lpi_num = 0;
+ Size_t lpi_num = 0;
NGram &ngram = NGram::get_instance();
- for ( size_t homo = 0; homo < ( size_t ) node->num_of_homo; homo++ ) {
+ for ( Size_t homo = 0; homo < ( Size_t ) node->num_of_homo; homo++ ) {
lpi_items[lpi_num].id = get_lemma_id ( homo_buf_off + homo );
lpi_items[lpi_num].lma_len = lma_len;
lpi_items[lpi_num].psb =
@@ -308,8 +308,8 @@ namespace ime_pinyin {
MileStoneHandle DictTrie::extend_dict ( MileStoneHandle from_handle,
const DictExtPara *dep,
- LmaPsbItem *lpi_items, size_t lpi_max,
- size_t *lpi_num ) {
+ LmaPsbItem *lpi_items, Size_t lpi_max,
+ Size_t *lpi_num ) {
if ( NULL == dep )
{ return 0; }
// from LmaNodeLE0 (root) to LmaNodeLE0
@@ -327,7 +327,7 @@ namespace ime_pinyin {
MileStoneHandle DictTrie::extend_dict0 ( MileStoneHandle from_handle,
const DictExtPara *dep,
LmaPsbItem *lpi_items,
- size_t lpi_max, size_t *lpi_num ) {
+ Size_t lpi_max, Size_t *lpi_num ) {
assert ( NULL != dep && 0 == from_handle );
*lpi_num = 0;
MileStoneHandle ret_handle = 0;
@@ -339,9 +339,9 @@ namespace ime_pinyin {
// 2. Begin exgtending
// 2.1 Get the LmaPsbItem list
LmaNodeLE0 *node = root_;
- size_t son_start = splid_le0_index_[id_start - kFullSplIdStart];
- size_t son_end = splid_le0_index_[id_start + id_num - kFullSplIdStart];
- for ( size_t son_pos = son_start; son_pos < son_end; son_pos++ ) {
+ Size_t son_start = splid_le0_index_[id_start - kFullSplIdStart];
+ Size_t son_end = splid_le0_index_[id_start + id_num - kFullSplIdStart];
+ for ( Size_t son_pos = son_start; son_pos < son_end; son_pos++ ) {
assert ( 1 == node->son_1st_off );
LmaNodeLE0 *son = root_ + son_pos;
assert ( son->spl_idx >= id_start && son->spl_idx < id_start + id_num );
@@ -377,12 +377,12 @@ namespace ime_pinyin {
MileStoneHandle DictTrie::extend_dict1 ( MileStoneHandle from_handle,
const DictExtPara *dep,
LmaPsbItem *lpi_items,
- size_t lpi_max, size_t *lpi_num ) {
+ Size_t lpi_max, Size_t *lpi_num ) {
assert ( NULL != dep && from_handle > 0 && from_handle < mile_stones_pos_ );
MileStoneHandle ret_handle = 0;
// 1. If this is a half Id, get its corresponding full starting Id and
// number of full Id.
- size_t ret_val = 0;
+ Size_t ret_val = 0;
uint16 id_start = dep->id_start;
uint16 id_num = dep->id_num;
// 2. Begin extending.
@@ -392,15 +392,15 @@ namespace ime_pinyin {
uint16 ext_num = p_mark.node_num;
for ( uint16 ext_pos = 0; ext_pos < ext_num; ext_pos++ ) {
LmaNodeLE0 *node = root_ + p_mark.node_offset + ext_pos;
- size_t found_start = 0;
- size_t found_num = 0;
- for ( size_t son_pos = 0; son_pos < ( size_t ) node->num_of_son; son_pos++ ) {
+ Size_t found_start = 0;
+ Size_t found_num = 0;
+ for ( Size_t son_pos = 0; son_pos < ( Size_t ) node->num_of_son; son_pos++ ) {
assert ( node->son_1st_off <= lma_node_num_ge1_ );
LmaNodeGE1 *son = nodes_ge1_ + node->son_1st_off + son_pos;
if ( son->spl_idx >= id_start
&& son->spl_idx < id_start + id_num ) {
if ( *lpi_num < lpi_max ) {
- size_t homo_buf_off = get_homo_idx_buf_offset ( son );
+ Size_t homo_buf_off = get_homo_idx_buf_offset ( son );
*lpi_num += fill_lpi_buffer ( lpi_items + ( *lpi_num ),
lpi_max - *lpi_num, homo_buf_off, son,
2 );
@@ -412,7 +412,7 @@ namespace ime_pinyin {
found_num++;
}
if ( son->spl_idx >= id_start + id_num - 1 || son_pos ==
- ( size_t ) node->num_of_son - 1 ) {
+ ( Size_t ) node->num_of_son - 1 ) {
if ( found_num > 0 ) {
if ( mile_stones_pos_ < kMaxMileStone &&
parsing_marks_pos_ < kMaxParsingMark ) {
@@ -445,12 +445,12 @@ namespace ime_pinyin {
MileStoneHandle DictTrie::extend_dict2 ( MileStoneHandle from_handle,
const DictExtPara *dep,
LmaPsbItem *lpi_items,
- size_t lpi_max, size_t *lpi_num ) {
+ Size_t lpi_max, Size_t *lpi_num ) {
assert ( NULL != dep && from_handle > 0 && from_handle < mile_stones_pos_ );
MileStoneHandle ret_handle = 0;
// 1. If this is a half Id, get its corresponding full starting Id and
// number of full Id.
- size_t ret_val = 0;
+ Size_t ret_val = 0;
uint16 id_start = dep->id_start;
uint16 id_num = dep->id_num;
// 2. Begin extending.
@@ -460,15 +460,15 @@ namespace ime_pinyin {
uint16 ext_num = p_mark.node_num;
for ( uint16 ext_pos = 0; ext_pos < ext_num; ext_pos++ ) {
LmaNodeGE1 *node = nodes_ge1_ + p_mark.node_offset + ext_pos;
- size_t found_start = 0;
- size_t found_num = 0;
- for ( size_t son_pos = 0; son_pos < ( size_t ) node->num_of_son; son_pos++ ) {
+ Size_t found_start = 0;
+ Size_t found_num = 0;
+ for ( Size_t son_pos = 0; son_pos < ( Size_t ) node->num_of_son; son_pos++ ) {
assert ( node->son_1st_off_l > 0 || node->son_1st_off_h > 0 );
LmaNodeGE1 *son = nodes_ge1_ + get_son_offset ( node ) + son_pos;
if ( son->spl_idx >= id_start
&& son->spl_idx < id_start + id_num ) {
if ( *lpi_num < lpi_max ) {
- size_t homo_buf_off = get_homo_idx_buf_offset ( son );
+ Size_t homo_buf_off = get_homo_idx_buf_offset ( son );
*lpi_num += fill_lpi_buffer ( lpi_items + ( *lpi_num ),
lpi_max - *lpi_num, homo_buf_off, son,
dep->splids_extended + 1 );
@@ -480,7 +480,7 @@ namespace ime_pinyin {
found_num++;
}
if ( son->spl_idx >= id_start + id_num - 1 || son_pos ==
- ( size_t ) node->num_of_son - 1 ) {
+ ( Size_t ) node->num_of_son - 1 ) {
if ( found_num > 0 ) {
if ( mile_stones_pos_ < kMaxMileStone &&
parsing_marks_pos_ < kMaxParsingMark ) {
@@ -550,8 +550,8 @@ namespace ime_pinyin {
}
if ( 1 == splid_num ) {
LmaNodeLE0 *node_le0 = reinterpret_cast<LmaNodeLE0 *> ( node );
- size_t num_of_homo = ( size_t ) node_le0->num_of_homo;
- for ( size_t homo_pos = 0; homo_pos < num_of_homo; homo_pos++ ) {
+ Size_t num_of_homo = ( Size_t ) node_le0->num_of_homo;
+ for ( Size_t homo_pos = 0; homo_pos < num_of_homo; homo_pos++ ) {
LemmaIdType id_this = get_lemma_id ( node_le0->homo_idx_buf_off + homo_pos );
char16 str[2];
get_lemma_str ( id_this, str, 2 );
@@ -560,9 +560,9 @@ namespace ime_pinyin {
}
} else {
LmaNodeGE1 *node_ge1 = reinterpret_cast<LmaNodeGE1 *> ( node );
- size_t num_of_homo = ( size_t ) node_ge1->num_of_homo;
- for ( size_t homo_pos = 0; homo_pos < num_of_homo; homo_pos++ ) {
- size_t node_homo_off = get_homo_idx_buf_offset ( node_ge1 );
+ Size_t num_of_homo = ( Size_t ) node_ge1->num_of_homo;
+ for ( Size_t homo_pos = 0; homo_pos < num_of_homo; homo_pos++ ) {
+ Size_t node_homo_off = get_homo_idx_buf_offset ( node_ge1 );
if ( get_lemma_id ( node_homo_off + homo_pos ) == id_lemma )
{ return true; }
}
@@ -570,25 +570,25 @@ namespace ime_pinyin {
return false;
}
- size_t DictTrie::get_lpis ( const uint16 *splid_str, uint16 splid_str_len,
- LmaPsbItem *lma_buf, size_t max_lma_buf ) {
+ Size_t DictTrie::get_lpis ( const uint16 *splid_str, uint16 splid_str_len,
+ LmaPsbItem *lma_buf, Size_t max_lma_buf ) {
if ( splid_str_len > kMaxLemmaSize )
{ return 0; }
#define MAX_EXTENDBUF_LEN 200
- size_t *node_buf1[MAX_EXTENDBUF_LEN]; // use size_t for data alignment
- size_t *node_buf2[MAX_EXTENDBUF_LEN];
+ Size_t *node_buf1[MAX_EXTENDBUF_LEN]; // use Size_t for data alignment
+ Size_t *node_buf2[MAX_EXTENDBUF_LEN];
LmaNodeLE0 **node_fr_le0 =
reinterpret_cast<LmaNodeLE0 **> ( node_buf1 ); // Nodes from.
LmaNodeLE0 **node_to_le0 =
reinterpret_cast<LmaNodeLE0 **> ( node_buf2 ); // Nodes to.
LmaNodeGE1 **node_fr_ge1 = NULL;
LmaNodeGE1 **node_to_ge1 = NULL;
- size_t node_fr_num = 1;
- size_t node_to_num = 0;
+ Size_t node_fr_num = 1;
+ Size_t node_to_num = 0;
node_fr_le0[0] = root_;
if ( NULL == node_fr_le0[0] )
{ return 0; }
- size_t spl_pos = 0;
+ Size_t spl_pos = 0;
while ( spl_pos < splid_str_len ) {
uint16 id_num = 1;
uint16 id_start = splid_str[spl_pos];
@@ -599,13 +599,13 @@ namespace ime_pinyin {
}
// Extend the nodes
if ( 0 == spl_pos ) { // From LmaNodeLE0 (root) to LmaNodeLE0 nodes
- for ( size_t node_fr_pos = 0; node_fr_pos < node_fr_num; node_fr_pos++ ) {
+ for ( Size_t node_fr_pos = 0; node_fr_pos < node_fr_num; node_fr_pos++ ) {
LmaNodeLE0 *node = node_fr_le0[node_fr_pos];
assert ( node == root_ && 1 == node_fr_num );
- size_t son_start = splid_le0_index_[id_start - kFullSplIdStart];
- size_t son_end =
+ Size_t son_start = splid_le0_index_[id_start - kFullSplIdStart];
+ Size_t son_end =
splid_le0_index_[id_start + id_num - kFullSplIdStart];
- for ( size_t son_pos = son_start; son_pos < son_end; son_pos++ ) {
+ for ( Size_t son_pos = son_start; son_pos < son_end; son_pos++ ) {
assert ( 1 == node->son_1st_off );
LmaNodeLE0 *node_son = root_ + son_pos;
assert ( node_son->spl_idx >= id_start
@@ -630,9 +630,9 @@ namespace ime_pinyin {
node_to_le0 = NULL;
node_to_ge1 = reinterpret_cast<LmaNodeGE1 **> ( node_tmp );
} else if ( 1 == spl_pos ) { // From LmaNodeLE0 to LmaNodeGE1 nodes
- for ( size_t node_fr_pos = 0; node_fr_pos < node_fr_num; node_fr_pos++ ) {
+ for ( Size_t node_fr_pos = 0; node_fr_pos < node_fr_num; node_fr_pos++ ) {
LmaNodeLE0 *node = node_fr_le0[node_fr_pos];
- for ( size_t son_pos = 0; son_pos < ( size_t ) node->num_of_son;
+ for ( Size_t son_pos = 0; son_pos < ( Size_t ) node->num_of_son;
son_pos++ ) {
assert ( node->son_1st_off <= lma_node_num_ge1_ );
LmaNodeGE1 *node_son = nodes_ge1_ + node->son_1st_off
@@ -660,9 +660,9 @@ namespace ime_pinyin {
node_fr_le0 = NULL;
node_to_le0 = NULL;
} else { // From LmaNodeGE1 to LmaNodeGE1 nodes
- for ( size_t node_fr_pos = 0; node_fr_pos < node_fr_num; node_fr_pos++ ) {
+ for ( Size_t node_fr_pos = 0; node_fr_pos < node_fr_num; node_fr_pos++ ) {
LmaNodeGE1 *node = node_fr_ge1[node_fr_pos];
- for ( size_t son_pos = 0; son_pos < ( size_t ) node->num_of_son;
+ for ( Size_t son_pos = 0; son_pos < ( Size_t ) node->num_of_son;
son_pos++ ) {
assert ( node->son_1st_off_l > 0 || node->son_1st_off_h > 0 );
LmaNodeGE1 *node_son = nodes_ge1_
@@ -696,18 +696,18 @@ namespace ime_pinyin {
if ( 0 == node_to_num )
{ return 0; }
NGram &ngram = NGram::get_instance();
- size_t lma_num = 0;
+ Size_t lma_num = 0;
// If the length is 1, and the splid is a one-char Yunmu like 'a', 'o', 'e',
// only those candidates for the full matched one-char id will be returned.
if ( 1 == splid_str_len && spl_trie_->is_half_id_yunmu ( splid_str[0] ) )
{ node_to_num = node_to_num > 0 ? 1 : 0; }
- for ( size_t node_pos = 0; node_pos < node_to_num; node_pos++ ) {
- size_t num_of_homo = 0;
+ for ( Size_t node_pos = 0; node_pos < node_to_num; node_pos++ ) {
+ Size_t num_of_homo = 0;
if ( spl_pos <= 1 ) { // Get from LmaNodeLE0 nodes
LmaNodeLE0 *node_le0 = node_to_le0[node_pos];
- num_of_homo = ( size_t ) node_le0->num_of_homo;
- for ( size_t homo_pos = 0; homo_pos < num_of_homo; homo_pos++ ) {
- size_t ch_pos = lma_num + homo_pos;
+ num_of_homo = ( Size_t ) node_le0->num_of_homo;
+ for ( Size_t homo_pos = 0; homo_pos < num_of_homo; homo_pos++ ) {
+ Size_t ch_pos = lma_num + homo_pos;
lma_buf[ch_pos].id =
get_lemma_id ( node_le0->homo_idx_buf_off + homo_pos );
lma_buf[ch_pos].lma_len = 1;
@@ -718,10 +718,10 @@ namespace ime_pinyin {
}
} else { // Get from LmaNodeGE1 nodes
LmaNodeGE1 *node_ge1 = node_to_ge1[node_pos];
- num_of_homo = ( size_t ) node_ge1->num_of_homo;
- for ( size_t homo_pos = 0; homo_pos < num_of_homo; homo_pos++ ) {
- size_t ch_pos = lma_num + homo_pos;
- size_t node_homo_off = get_homo_idx_buf_offset ( node_ge1 );
+ num_of_homo = ( Size_t ) node_ge1->num_of_homo;
+ for ( Size_t homo_pos = 0; homo_pos < num_of_homo; homo_pos++ ) {
+ Size_t ch_pos = lma_num + homo_pos;
+ Size_t node_homo_off = get_homo_idx_buf_offset ( node_ge1 );
lma_buf[ch_pos].id = get_lemma_id ( node_homo_off + homo_pos );
lma_buf[ch_pos].lma_len = splid_str_len;
lma_buf[ch_pos].psb =
@@ -780,7 +780,7 @@ namespace ime_pinyin {
return 0;
}
- void DictTrie::set_total_lemma_count_of_others ( size_t count ) {
+ void DictTrie::set_total_lemma_count_of_others ( Size_t count ) {
NGram &ngram = NGram::get_instance();
ngram.set_total_freq_none_sys ( count );
}
@@ -799,12 +799,12 @@ namespace ime_pinyin {
return dict_list_->get_lemma_id ( lemma_str, lemma_len );
}
- size_t DictTrie::predict_top_lmas ( size_t his_len, NPredictItem *npre_items,
- size_t npre_max, size_t b4_used ) {
+ Size_t DictTrie::predict_top_lmas ( Size_t his_len, NPredictItem *npre_items,
+ Size_t npre_max, Size_t b4_used ) {
NGram &ngram = NGram::get_instance();
- size_t item_num = 0;
- size_t top_lmas_id_offset = lma_idx_buf_len_ / kLemmaIdSize - top_lmas_num_;
- size_t top_lmas_pos = 0;
+ Size_t item_num = 0;
+ Size_t top_lmas_id_offset = lma_idx_buf_len_ / kLemmaIdSize - top_lmas_num_;
+ Size_t top_lmas_pos = 0;
while ( item_num < npre_max && top_lmas_pos < top_lmas_num_ ) {
memset ( npre_items + item_num, 0, sizeof ( NPredictItem ) );
LemmaIdType top_lma_id = get_lemma_id ( top_lmas_id_offset + top_lmas_pos );
@@ -821,9 +821,9 @@ namespace ime_pinyin {
return item_num;
}
- size_t DictTrie::predict ( const char16 *last_hzs, uint16 hzs_len,
- NPredictItem *npre_items, size_t npre_max,
- size_t b4_used ) {
+ Size_t DictTrie::predict ( const char16 *last_hzs, uint16 hzs_len,
+ NPredictItem *npre_items, Size_t npre_max,
+ Size_t b4_used ) {
return dict_list_->predict ( last_hzs, hzs_len, npre_items, npre_max, b4_used );
}
} // namespace ime_pinyin
diff --git a/jni/share/lpicache.cpp b/jni/share/lpicache.cpp
index d95879c..10c7bd9 100755..100644
--- a/jni/share/lpicache.cpp
+++ b/jni/share/lpicache.cpp
@@ -51,8 +51,8 @@ namespace ime_pinyin {
return lpi_cache_len_[splid] != 0;
}
- size_t LpiCache::put_cache ( uint16 splid, LmaPsbItem lpi_items[],
- size_t lpi_num ) {
+ Size_t LpiCache::put_cache ( uint16 splid, LmaPsbItem lpi_items[],
+ Size_t lpi_num ) {
uint16 num = kMaxLpiCachePerId;
if ( num > lpi_num )
{ num = static_cast<uint16> ( lpi_num ); }
@@ -63,8 +63,8 @@ namespace ime_pinyin {
return num;
}
- size_t LpiCache::get_cache ( uint16 splid, LmaPsbItem lpi_items[],
- size_t lpi_max ) {
+ Size_t LpiCache::get_cache ( uint16 splid, LmaPsbItem lpi_items[],
+ Size_t lpi_max ) {
if ( lpi_max > lpi_cache_len_[splid] )
{ lpi_max = lpi_cache_len_[splid]; }
LmaPsbItem *lpi_cache_this = lpi_cache_ + splid * kMaxLpiCachePerId;
diff --git a/jni/share/matrixsearch.cpp b/jni/share/matrixsearch.cpp
index 5df8527..3c44315 100755..100644
--- a/jni/share/matrixsearch.cpp
+++ b/jni/share/matrixsearch.cpp
@@ -13,7 +13,6 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-
#include <assert.h>
#include <math.h>
#include <stdio.h>
@@ -66,16 +65,16 @@ namespace ime_pinyin {
dict_trie_ = new DictTrie();
user_dict_ = static_cast<AtomDictBase *> ( new UserDict() );
spl_parser_ = new SpellingParser();
- size_t mtrx_nd_size = sizeof ( MatrixNode ) * kMtrxNdPoolSize;
- mtrx_nd_size = align_to_size_t ( mtrx_nd_size ) / sizeof ( size_t );
- size_t dmi_size = sizeof ( DictMatchInfo ) * kDmiPoolSize;
- dmi_size = align_to_size_t ( dmi_size ) / sizeof ( size_t );
- size_t matrix_size = sizeof ( MatrixRow ) * kMaxRowNum;
- matrix_size = align_to_size_t ( matrix_size ) / sizeof ( size_t );
- size_t dep_size = sizeof ( DictExtPara );
- dep_size = align_to_size_t ( dep_size ) / sizeof ( size_t );
+ Size_t mtrx_nd_size = sizeof ( MatrixNode ) * kMtrxNdPoolSize;
+ mtrx_nd_size = align_to_Size_t ( mtrx_nd_size ) / sizeof ( Size_t );
+ Size_t dmi_size = sizeof ( DictMatchInfo ) * kDmiPoolSize;
+ dmi_size = align_to_Size_t ( dmi_size ) / sizeof ( Size_t );
+ Size_t matrix_size = sizeof ( MatrixRow ) * kMaxRowNum;
+ matrix_size = align_to_Size_t ( matrix_size ) / sizeof ( Size_t );
+ Size_t dep_size = sizeof ( DictExtPara );
+ dep_size = align_to_Size_t ( dep_size ) / sizeof ( Size_t );
// share_buf's size is determined by the buffers for search.
- share_buf_ = new size_t[mtrx_nd_size + dmi_size + matrix_size + dep_size];
+ share_buf_ = new Size_t[mtrx_nd_size + dmi_size + matrix_size + dep_size];
if ( NULL == dict_trie_ || NULL == user_dict_ || NULL == spl_parser_ ||
NULL == share_buf_ )
{ return false; }
@@ -88,7 +87,7 @@ namespace ime_pinyin {
// The prediction buffer is also based on the share buffer.
npre_items_ = reinterpret_cast<NPredictItem *> ( share_buf_ );
npre_items_len_ = ( mtrx_nd_size + dmi_size + matrix_size + dep_size ) *
- sizeof ( size_t ) / sizeof ( NPredictItem );
+ sizeof ( Size_t ) / sizeof ( NPredictItem );
return true;
}
@@ -106,11 +105,11 @@ namespace ime_pinyin {
bool MatrixSearch::init ( const char *fn_sys_dict, const char *fn_usr_dict ) {
if ( NULL == fn_sys_dict || NULL == fn_usr_dict )
- { return false; }
+ { return false; }
if ( !alloc_resource() )
- { return false; }
+ { return false; }
if ( !dict_trie_->load_dict ( fn_sys_dict, 1, kSysDictIdEnd ) )
- { return false; }
+ { return false; }
// If engine fails to load the user dictionary, reset the user dictionary
// to NULL.
if ( !user_dict_->load_dict ( fn_usr_dict, kUserDictIdStart, kUserDictIdEnd ) ) {
@@ -143,7 +142,7 @@ namespace ime_pinyin {
return true;
}
- void MatrixSearch::set_max_lens ( size_t max_sps_len, size_t max_hzs_len ) {
+ void MatrixSearch::set_max_lens ( Size_t max_sps_len, Size_t max_hzs_len ) {
if ( 0 != max_sps_len )
{ max_sps_len_ = max_sps_len; }
if ( 0 != max_hzs_len )
@@ -206,7 +205,7 @@ namespace ime_pinyin {
return true;
}
- bool MatrixSearch::reset_search ( size_t ch_pos, bool clear_fixed_this_step,
+ bool MatrixSearch::reset_search ( Size_t ch_pos, bool clear_fixed_this_step,
bool clear_dmi_this_step,
bool clear_mtrx_this_step ) {
if ( !inited_ || ch_pos > pys_decoded_len_ || ch_pos >= kMaxRowNum )
@@ -254,7 +253,7 @@ namespace ime_pinyin {
( ( kLemmaIdComposing != lma_id_[0] ) ||
( kLemmaIdComposing == lma_id_[0] &&
spl_start_[c_phrase_.length] <= ch_pos ) ) ) {
- size_t fixed_ch_pos = ch_pos;
+ Size_t fixed_ch_pos = ch_pos;
if ( clear_fixed_this_step )
{ fixed_ch_pos = fixed_ch_pos > 0 ? fixed_ch_pos - 1 : 0; }
while ( NULL == matrix_[fixed_ch_pos].mtrx_nd_fixed && fixed_ch_pos > 0 )
@@ -355,14 +354,14 @@ namespace ime_pinyin {
return true;
}
- void MatrixSearch::del_in_pys ( size_t start, size_t len ) {
+ void MatrixSearch::del_in_pys ( Size_t start, Size_t len ) {
while ( start < kMaxRowNum - len && '\0' != pys_[start] ) {
pys_[start] = pys_[start + len];
start++;
}
}
- size_t MatrixSearch::search ( const char *py, size_t py_len ) {
+ Size_t MatrixSearch::search ( const char *py, Size_t py_len ) {
if ( !inited_ || NULL == py )
{ return 0; }
// If the search Pinyin string is too long, it will be truncated.
@@ -370,7 +369,7 @@ namespace ime_pinyin {
{ py_len = kMaxRowNum - 1; }
// Compare the new string with the previous one. Find their prefix to
// increase search efficiency.
- size_t ch_pos = 0;
+ Size_t ch_pos = 0;
for ( ch_pos = 0; ch_pos < pys_decoded_len_; ch_pos++ ) {
if ( '\0' == py[ch_pos] || py[ch_pos] != pys_[ch_pos] )
{ break; }
@@ -411,11 +410,11 @@ namespace ime_pinyin {
return ch_pos;
}
- size_t MatrixSearch::delsearch ( size_t pos, bool is_pos_in_splid,
+ Size_t MatrixSearch::delsearch ( Size_t pos, bool is_pos_in_splid,
bool clear_fixed_this_step ) {
if ( !inited_ )
{ return 0; }
- size_t reset_pos = pos;
+ Size_t reset_pos = pos;
// Out of range for both Pinyin mode and Spelling id mode.
if ( pys_decoded_len_ <= pos ) {
del_in_pys ( pos, 1 );
@@ -437,8 +436,8 @@ namespace ime_pinyin {
{ return pys_decoded_len_; }
// Begin to handle two modes respectively.
// Pinyin mode by default
- size_t c_py_len = 0; // The length of composing phrase's Pinyin
- size_t del_py_len = 1;
+ Size_t c_py_len = 0; // The length of composing phrase's Pinyin
+ Size_t del_py_len = 1;
if ( !is_pos_in_splid ) {
// Pinyin mode is only allowed to delete beyond the fixed lemmas.
if ( fixed_lmas_ > 0 && pos < spl_start_[lma_start_[fixed_lmas_]] )
@@ -513,15 +512,15 @@ namespace ime_pinyin {
return pys_decoded_len_;
}
- size_t MatrixSearch::get_candidate_num() {
+ Size_t MatrixSearch::get_candidate_num() {
if ( !inited_ || 0 == pys_decoded_len_ ||
0 == matrix_[pys_decoded_len_].mtrx_nd_num )
{ return 0; }
return 1 + lpi_total_;
}
- char16 *MatrixSearch::get_candidate ( size_t cand_id, char16 *cand_str,
- size_t max_len ) {
+ char16 *MatrixSearch::get_candidate ( Size_t cand_id, char16 *cand_str,
+ Size_t max_len ) {
if ( !inited_ || 0 == pys_decoded_len_ || NULL == cand_str )
{ return NULL; }
if ( 0 == cand_id ) {
@@ -557,7 +556,7 @@ namespace ime_pinyin {
if ( NULL != user_dict_ ) {
// Update the total frequency of all lemmas, including system lemmas and
// user dictionary lemmas.
- size_t total_freq = user_dict_->get_total_lemma_count();
+ Size_t total_freq = user_dict_->get_total_lemma_count();
dict_trie_->set_total_lemma_count_of_others ( total_freq );
}
}
@@ -610,7 +609,7 @@ namespace ime_pinyin {
}
bool MatrixSearch::try_add_cand0_to_userdict() {
- size_t new_cand_num = get_candidate_num();
+ Size_t new_cand_num = get_candidate_num();
if ( fixed_hzs_ > 0 && 1 == new_cand_num ) {
float score_from = 0;
uint16 lma_id_from = 0;
@@ -671,14 +670,14 @@ namespace ime_pinyin {
// 1.2.1. The whole sentence will be added as a user lemma. If the
// sentence contains user lemmas, -> hit, and add occuring count
// by 1.
- size_t MatrixSearch::choose ( size_t cand_id ) {
+ Size_t MatrixSearch::choose ( Size_t cand_id ) {
if ( !inited_ || 0 == pys_decoded_len_ )
{ return 0; }
if ( 0 == cand_id ) {
fixed_hzs_ = spl_id_num_;
matrix_[spl_start_[fixed_hzs_]].mtrx_nd_fixed = mtrx_nd_pool_ +
matrix_[spl_start_[fixed_hzs_]].mtrx_nd_pos;
- for ( size_t pos = fixed_lmas_; pos < lma_id_num_; pos++ ) {
+ for ( Size_t pos = fixed_lmas_; pos < lma_id_num_; pos++ ) {
fixed_lmas_no1_[pos] = 1;
}
fixed_lmas_ = lma_id_num_;
@@ -711,7 +710,7 @@ namespace ime_pinyin {
// Find the length of the candidate.
LemmaIdType id_chosen = lpi_items_[cand_id].id;
LmaScoreType score_chosen = lpi_items_[cand_id].psb;
- size_t cand_len = lpi_items_[cand_id].lma_len;
+ Size_t cand_len = lpi_items_[cand_id].lma_len;
assert ( cand_len > 0 );
// Notify the atom dictionary that this item is hit.
if ( is_user_lemma ( id_chosen ) ) {
@@ -722,10 +721,10 @@ namespace ime_pinyin {
}
// 3. Fixed the chosen item.
// 3.1 Get the steps number.
- size_t step_fr = spl_start_[fixed_hzs_];
- size_t step_to = spl_start_[fixed_hzs_ + cand_len];
+ Size_t step_fr = spl_start_[fixed_hzs_];
+ Size_t step_to = spl_start_[fixed_hzs_ + cand_len];
// 3.2 Save the length of the original string.
- size_t pys_decoded_len = pys_decoded_len_;
+ Size_t pys_decoded_len = pys_decoded_len_;
// 3.2 Reset the space of the fixed part.
reset_search ( step_to, false, false, true );
// 3.3 For the last character of the fixed part, the previous DMI
@@ -767,12 +766,12 @@ namespace ime_pinyin {
return get_candidate_num();
}
- size_t MatrixSearch::cancel_last_choice() {
+ Size_t MatrixSearch::cancel_last_choice() {
if ( !inited_ || 0 == pys_decoded_len_ )
{ return 0; }
- size_t step_start = 0;
+ Size_t step_start = 0;
if ( fixed_hzs_ > 0 ) {
- size_t step_end = spl_start_[fixed_hzs_];
+ Size_t step_end = spl_start_[fixed_hzs_];
MatrixNode *end_node = matrix_[step_end].mtrx_nd_fixed;
assert ( NULL != end_node );
step_start = end_node->from->step;
@@ -793,7 +792,7 @@ namespace ime_pinyin {
return get_candidate_num();
}
- size_t MatrixSearch::get_fixedlen() {
+ Size_t MatrixSearch::get_fixedlen() {
if ( !inited_ || 0 == pys_decoded_len_ )
{ return 0; }
return fixed_hzs_;
@@ -997,12 +996,12 @@ namespace ime_pinyin {
if ( sent_len > kMaxLemmaSize )
{ pfullsent = NULL; }
lpi_total_ = 0;
- size_t lpi_num_full_match = 0; // Number of items which are fully-matched.
+ Size_t lpi_num_full_match = 0; // Number of items which are fully-matched.
while ( lma_size > 0 ) {
- size_t lma_num;
+ Size_t lma_num;
lma_num = get_lpis ( spl_id_ + fixed_hzs_, lma_size,
lpi_items_ + lpi_total_,
- size_t ( kMaxLmaPsbItems - lpi_total_ ),
+ Size_t ( kMaxLmaPsbItems - lpi_total_ ),
pfullsent, lma_size == lma_size_max );
if ( lma_num > 0 ) {
lpi_total_ += lma_num;
@@ -1020,7 +1019,7 @@ namespace ime_pinyin {
sizeof ( LmaPsbItem ), cmp_lpi_with_unified_psb );
if ( kPrintDebug0 ) {
printf ( "-----Prepare candidates, score:\n" );
- for ( size_t a = 0; a < lpi_total_; a++ ) {
+ for ( Size_t a = 0; a < lpi_total_; a++ ) {
printf ( "[%03d]%d ", a, lpi_items_[a].psb );
if ( ( a + 1 ) % 6 == 0 ) { printf ( "\n" ); }
}
@@ -1031,20 +1030,20 @@ namespace ime_pinyin {
}
}
- const char *MatrixSearch::get_pystr ( size_t *decoded_len ) {
+ const char *MatrixSearch::get_pystr ( Size_t *decoded_len ) {
if ( !inited_ || NULL == decoded_len )
- { return NULL; }
+ { return NULL; }
*decoded_len = pys_decoded_len_;
return pys_;
}
- void MatrixSearch::merge_fixed_lmas ( size_t del_spl_pos ) {
+ void MatrixSearch::merge_fixed_lmas ( Size_t del_spl_pos ) {
if ( fixed_lmas_ == 0 )
{ return; }
// Update spelling segmentation information first.
spl_id_num_ -= 1;
uint16 del_py_len = spl_start_[del_spl_pos + 1] - spl_start_[del_spl_pos];
- for ( size_t pos = del_spl_pos; pos <= spl_id_num_; pos++ ) {
+ for ( Size_t pos = del_spl_pos; pos <= spl_id_num_; pos++ ) {
spl_start_[pos] = spl_start_[pos + 1] - del_py_len;
if ( pos == spl_id_num_ )
{ break; }
@@ -1164,7 +1163,7 @@ namespace ime_pinyin {
mtrx_nd = mtrx_nd->from;
}
// Reverse the result of spelling info
- for ( size_t pos = fixed_hzs_;
+ for ( Size_t pos = fixed_hzs_;
pos < fixed_hzs_ + ( spl_id_num_ - fixed_hzs_ + 1 ) / 2; pos++ ) {
if ( spl_id_num_ + fixed_hzs_ - pos != pos + 1 ) {
spl_start_[pos + 1] ^= spl_start_[spl_id_num_ - pos + fixed_hzs_];
@@ -1176,7 +1175,7 @@ namespace ime_pinyin {
}
}
// Reverse the result of lemma info
- for ( size_t pos = fixed_lmas_;
+ for ( Size_t pos = fixed_lmas_;
pos < fixed_lmas_ + ( lma_id_num_ - fixed_lmas_ + 1 ) / 2; pos++ ) {
assert ( lma_id_num_ + fixed_lmas_ - pos - 1 >= pos );
if ( lma_id_num_ + fixed_lmas_ - pos > pos + 1 ) {
@@ -1188,7 +1187,7 @@ namespace ime_pinyin {
lma_id_[pos] ^= lma_id_[lma_id_num_ - 1 - pos + fixed_lmas_];
}
}
- for ( size_t pos = fixed_lmas_ + 1; pos <= lma_id_num_; pos++ ) {
+ for ( Size_t pos = fixed_lmas_ + 1; pos <= lma_id_num_; pos++ ) {
if ( pos < lma_id_num_ )
lma_start_[pos] = lma_start_[pos - 1] +
( lma_start_[pos] - lma_start_[pos + 1] );
@@ -1198,7 +1197,7 @@ namespace ime_pinyin {
}
// Find the last fixed position
fixed_hzs_ = 0;
- for ( size_t pos = spl_id_num_; pos > 0; pos-- ) {
+ for ( Size_t pos = spl_id_num_; pos > 0; pos-- ) {
if ( NULL != matrix_[spl_start_[pos]].mtrx_nd_fixed ) {
fixed_hzs_ = pos;
break;
@@ -1207,13 +1206,13 @@ namespace ime_pinyin {
return;
}
- size_t MatrixSearch::get_spl_start ( const uint16 *&spl_start ) {
+ Size_t MatrixSearch::get_spl_start ( const uint16 *&spl_start ) {
get_spl_start_id();
spl_start = spl_start_;
return spl_id_num_;
}
- size_t MatrixSearch::extend_dmi ( DictExtPara *dep, DictMatchInfo *dmi_s ) {
+ Size_t MatrixSearch::extend_dmi ( DictExtPara *dep, DictMatchInfo *dmi_s ) {
if ( dmi_pool_used_ >= kDmiPoolSize ) { return 0; }
if ( dmi_c_phrase_ )
{ return extend_dmi_c ( dep, dmi_s ); }
@@ -1224,7 +1223,7 @@ namespace ime_pinyin {
{ cached = lpi_cache.is_cached ( splid ); }
// 1. If this is a half Id, get its corresponding full starting Id and
// number of full Id.
- size_t ret_val = 0;
+ Size_t ret_val = 0;
PoolPosType mtrx_dmi_fr = ( PoolPosType ) - 1; // From which dmi node
lpi_total_ = 0;
MileStoneHandle from_h[3];
@@ -1235,7 +1234,7 @@ namespace ime_pinyin {
from_h[1] = dmi_s->dict_handles[1];
}
// 2. Begin exgtending in the system dictionary
- size_t lpi_num = 0;
+ Size_t lpi_num = 0;
MileStoneHandle handles[2];
handles[0] = handles[1] = 0;
if ( from_h[0] > 0 || NULL == dmi_s ) {
@@ -1256,7 +1255,7 @@ namespace ime_pinyin {
&lpi_num );
if ( handles[1] > 0 ) {
if ( kPrintDebug0 ) {
- for ( size_t t = 0; t < lpi_num; t++ ) {
+ for ( Size_t t = 0; t < lpi_num; t++ ) {
printf ( "--Extend in user dict: uid:%d uscore:%d\n", lpi_items_[lpi_total_ + t].id,
lpi_items_[lpi_total_ + t].psb );
}
@@ -1297,7 +1296,7 @@ namespace ime_pinyin {
return ret_val;
}
- size_t MatrixSearch::extend_dmi_c ( DictExtPara *dep, DictMatchInfo *dmi_s ) {
+ Size_t MatrixSearch::extend_dmi_c ( DictExtPara *dep, DictMatchInfo *dmi_s ) {
lpi_total_ = 0;
uint16 pos = dep->splids_extended;
assert ( dmi_c_phrase_ );
@@ -1328,9 +1327,9 @@ namespace ime_pinyin {
return 0;
}
- size_t MatrixSearch::extend_mtrx_nd ( MatrixNode *mtrx_nd, LmaPsbItem lpi_items[],
- size_t lpi_num, PoolPosType dmi_fr,
- size_t res_row ) {
+ Size_t MatrixSearch::extend_mtrx_nd ( MatrixNode *mtrx_nd, LmaPsbItem lpi_items[],
+ Size_t lpi_num, PoolPosType dmi_fr,
+ Size_t res_row ) {
assert ( NULL != mtrx_nd );
matrix_[res_row].mtrx_nd_fixed = NULL;
if ( mtrx_nd_pool_used_ >= kMtrxNdPoolSize - kMaxNodeARow )
@@ -1342,17 +1341,17 @@ namespace ime_pinyin {
{ lpi_num = kMaxNodeARow; }
}
MatrixNode *mtrx_nd_res_min = mtrx_nd_pool_ + matrix_[res_row].mtrx_nd_pos;
- for ( size_t pos = 0; pos < lpi_num; pos++ ) {
+ for ( Size_t pos = 0; pos < lpi_num; pos++ ) {
float score = mtrx_nd->score + lpi_items[pos].psb;
if ( pos > 0 && score - PRUMING_SCORE > mtrx_nd_res_min->score )
{ break; }
// Try to add a new node
- size_t mtrx_nd_num = matrix_[res_row].mtrx_nd_num;
+ Size_t mtrx_nd_num = matrix_[res_row].mtrx_nd_num;
MatrixNode *mtrx_nd_res = mtrx_nd_res_min + mtrx_nd_num;
bool replace = false;
// Find its position
while ( mtrx_nd_res > mtrx_nd_res_min && score < ( mtrx_nd_res - 1 )->score ) {
- if ( static_cast<size_t> ( mtrx_nd_res - mtrx_nd_res_min ) < kMaxNodeARow )
+ if ( static_cast<Size_t> ( mtrx_nd_res - mtrx_nd_res_min ) < kMaxNodeARow )
{ *mtrx_nd_res = * ( mtrx_nd_res - 1 ); }
mtrx_nd_res--;
replace = true;
@@ -1371,7 +1370,7 @@ namespace ime_pinyin {
return matrix_[res_row].mtrx_nd_num;
}
- PoolPosType MatrixSearch::match_dmi ( size_t step_to, uint16 spl_ids[],
+ PoolPosType MatrixSearch::match_dmi ( Size_t step_to, uint16 spl_ids[],
uint16 spl_id_num ) {
if ( pys_decoded_len_ < step_to || 0 == matrix_[step_to].dmi_num ) {
return static_cast<PoolPosType> ( -1 );
@@ -1395,14 +1394,14 @@ namespace ime_pinyin {
return static_cast<PoolPosType> ( -1 );
}
- char16 *MatrixSearch::get_candidate0 ( char16 *cand_str, size_t max_len,
+ char16 *MatrixSearch::get_candidate0 ( char16 *cand_str, Size_t max_len,
uint16 *retstr_len,
bool only_unfixed ) {
if ( pys_decoded_len_ == 0 ||
matrix_[pys_decoded_len_].mtrx_nd_num == 0 )
{ return NULL; }
LemmaIdType idxs[kMaxRowNum];
- size_t id_num = 0;
+ Size_t id_num = 0;
MatrixNode *mtrx_nd = mtrx_nd_pool_ + matrix_[pys_decoded_len_].mtrx_nd_pos;
if ( kPrintDebug0 ) {
printf ( "--- sentence score: %f\n", mtrx_nd->score );
@@ -1423,7 +1422,7 @@ namespace ime_pinyin {
if ( kPrintDebug1 ) {
printf ( "<<==============Sentence DMI (reverse order) end=============\n" );
}
- size_t ret_pos = 0;
+ Size_t ret_pos = 0;
do {
id_num--;
if ( 0 == idxs[id_num] )
@@ -1453,36 +1452,36 @@ namespace ime_pinyin {
return cand_str;
}
- size_t MatrixSearch::get_lpis ( const uint16 *splid_str, size_t splid_str_len,
- LmaPsbItem *lma_buf, size_t max_lma_buf,
+ Size_t MatrixSearch::get_lpis ( const uint16 *splid_str, Size_t splid_str_len,
+ LmaPsbItem *lma_buf, Size_t max_lma_buf,
const char16 *pfullsent, bool sort_by_psb ) {
if ( splid_str_len > kMaxLemmaSize )
{ return 0; }
- size_t num1 = dict_trie_->get_lpis ( splid_str, splid_str_len,
+ Size_t num1 = dict_trie_->get_lpis ( splid_str, splid_str_len,
lma_buf, max_lma_buf );
- size_t num2 = 0;
+ Size_t num2 = 0;
if ( NULL != user_dict_ ) {
num2 = user_dict_->get_lpis ( splid_str, splid_str_len,
lma_buf + num1, max_lma_buf - num1 );
}
- size_t num = num1 + num2;
+ Size_t num = num1 + num2;
if ( 0 == num )
{ return 0; }
// Remove repeated items.
if ( splid_str_len > 1 ) {
LmaPsbStrItem *lpsis = reinterpret_cast<LmaPsbStrItem *> ( lma_buf + num );
- size_t lpsi_num = ( max_lma_buf - num ) * sizeof ( LmaPsbItem ) /
+ Size_t lpsi_num = ( max_lma_buf - num ) * sizeof ( LmaPsbItem ) /
sizeof ( LmaPsbStrItem );
assert ( lpsi_num > num );
if ( num > lpsi_num ) { num = lpsi_num; }
lpsi_num = num;
- for ( size_t pos = 0; pos < lpsi_num; pos++ ) {
+ for ( Size_t pos = 0; pos < lpsi_num; pos++ ) {
lpsis[pos].lpi = lma_buf[pos];
get_lemma_str ( lma_buf[pos].id, lpsis[pos].str, kMaxLemmaSize + 1 );
}
myqsort ( lpsis, lpsi_num, sizeof ( LmaPsbStrItem ), cmp_lpsi_with_str );
- size_t remain_num = 0;
- for ( size_t pos = 0; pos < lpsi_num; pos++ ) {
+ Size_t remain_num = 0;
+ for ( Size_t pos = 0; pos < lpsi_num; pos++ ) {
if ( pos > 0 && utf16_strcmp ( lpsis[pos].str, lpsis[pos - 1].str ) == 0 ) {
if ( lpsis[pos].lpi.psb < lpsis[pos - 1].lpi.psb ) {
assert ( remain_num > 0 );
@@ -1502,14 +1501,14 @@ namespace ime_pinyin {
// example, "de" and "di" are all valid for a Chinese character, so when
// the user input "d", repeated items are generated.
// For single character lemmas, Hanzis will be gotten
- for ( size_t pos = 0; pos < num; pos++ ) {
+ for ( Size_t pos = 0; pos < num; pos++ ) {
char16 hanzis[2];
get_lemma_str ( lma_buf[pos].id, hanzis, 2 );
lma_buf[pos].hanzi = hanzis[0];
}
myqsort ( lma_buf, num, sizeof ( LmaPsbItem ), cmp_lpi_with_hanzi );
- size_t remain_num = 0;
- for ( size_t pos = 0; pos < num; pos++ ) {
+ Size_t remain_num = 0;
+ for ( Size_t pos = 0; pos < num; pos++ ) {
if ( pos > 0 && lma_buf[pos].hanzi == lma_buf[pos - 1].hanzi ) {
if ( NULL != pfullsent &&
static_cast<char16> ( 0 ) == pfullsent[1] &&
@@ -1597,20 +1596,20 @@ namespace ime_pinyin {
return splid_num;
}
- size_t MatrixSearch::inner_predict ( const char16 *fixed_buf, uint16 fixed_len,
+ Size_t MatrixSearch::inner_predict ( const char16 *fixed_buf, uint16 fixed_len,
char16 predict_buf[][kMaxPredictSize + 1],
- size_t buf_len ) {
- size_t res_total = 0;
+ Size_t buf_len ) {
+ Size_t res_total = 0;
memset ( npre_items_, 0, sizeof ( NPredictItem ) * npre_items_len_ );
// In order to shorten the comments, j-character candidates predicted by
// i-character prefix are called P(i,j). All candiates predicted by
// i-character prefix are called P(i,*)
// Step 1. Get P(kMaxPredictSize, *) and sort them, here
// P(kMaxPredictSize, *) == P(kMaxPredictSize, 1)
- for ( size_t len = fixed_len; len > 0; len-- ) {
+ for ( Size_t len = fixed_len; len > 0; len-- ) {
// How many blank items are available
- size_t this_max = npre_items_len_ - res_total;
- size_t res_this;
+ Size_t this_max = npre_items_len_ - res_total;
+ Size_t res_this;
// If the history is longer than 1, and we can not get prediction from
// lemmas longer than 2, in this case, we will add lemmas with
// highest scores as the prediction result.
@@ -1618,7 +1617,7 @@ namespace ime_pinyin {
// Try to find if recent n (n>1) characters can be a valid lemma in system
// dictionary.
bool nearest_n_word = false;
- for ( size_t nlen = 2; nlen <= fixed_len; nlen++ ) {
+ for ( Size_t nlen = 2; nlen <= fixed_len; nlen++ ) {
if ( dict_trie_->get_lemma_id ( fixed_buf + fixed_len - nlen, nlen ) > 0 ) {
nearest_n_word = true;
break;
@@ -1673,16 +1672,16 @@ namespace ime_pinyin {
}
if ( kPrintDebug2 ) {
printf ( "/////////////////Predicted Items Begin////////////////////>>\n" );
- for ( size_t i = 0; i < res_total; i++ ) {
+ for ( Size_t i = 0; i < res_total; i++ ) {
printf ( "---" );
- for ( size_t j = 0; j < kMaxPredictSize; j++ ) {
+ for ( Size_t j = 0; j < kMaxPredictSize; j++ ) {
printf ( "%d ", npre_items_[i].pre_hzs[j] );
}
printf ( "\n" );
}
printf ( "<<///////////////Predicted Items End////////////////////////\n" );
}
- for ( size_t i = 0; i < res_total; i++ ) {
+ for ( Size_t i = 0; i < res_total; i++ ) {
utf16_strncpy ( predict_buf[i], npre_items_[i].pre_hzs,
kMaxPredictSize );
predict_buf[i][kMaxPredictSize] = '\0';
@@ -1690,10 +1689,10 @@ namespace ime_pinyin {
return res_total;
}
- size_t MatrixSearch::get_predicts ( const char16 fixed_buf[],
+ Size_t MatrixSearch::get_predicts ( const char16 fixed_buf[],
char16 predict_buf[][kMaxPredictSize + 1],
- size_t buf_len ) {
- size_t fixed_len = utf16_strlen ( fixed_buf );
+ Size_t buf_len ) {
+ Size_t fixed_len = utf16_strlen ( fixed_buf );
if ( 0 == fixed_len || fixed_len > kMaxPredictSize || 0 == buf_len )
{ return 0; }
return inner_predict ( fixed_buf, fixed_len, predict_buf, buf_len );
diff --git a/jni/share/mystdlib.cpp b/jni/share/mystdlib.cpp
index 86e069c..6b47c17 100755..100644
--- a/jni/share/mystdlib.cpp
+++ b/jni/share/mystdlib.cpp
@@ -15,19 +15,19 @@
*/
#include <stdlib.h>
-
+#include "../include/utf16char.h"
namespace ime_pinyin {
// For debug purpose. You can add a fixed version of qsort and bsearch functions
// here so that the output will be totally the same under different platforms.
- void myqsort ( void *p, size_t n, size_t es,
+ void myqsort ( void *p, Size_t n, Size_t es,
int ( *cmp ) ( const void *, const void * ) ) {
qsort ( p, n, es, cmp );
}
void *mybsearch ( const void *k, const void *b,
- size_t n, size_t es,
+ Size_t n, Size_t es,
int ( *cmp ) ( const void *, const void * ) ) {
return bsearch ( k, b, n, es, cmp );
}
diff --git a/jni/share/ngram.cpp b/jni/share/ngram.cpp
index 1ccbf34..8771cfc 100755..100644
--- a/jni/share/ngram.cpp
+++ b/jni/share/ngram.cpp
@@ -55,10 +55,10 @@ namespace ime_pinyin {
{ return qsearch_nearest ( code_book, freq, mid, end ); }
}
- size_t update_code_idx ( double freqs[], size_t num, double code_book[],
+ Size_t update_code_idx ( double freqs[], Size_t num, double code_book[],
CODEBOOK_TYPE *code_idx ) {
- size_t changed = 0;
- for ( size_t pos = 0; pos < num; pos++ ) {
+ Size_t changed = 0;
+ for ( Size_t pos = 0; pos < num; pos++ ) {
CODEBOOK_TYPE idx;
idx = qsearch_nearest ( code_book, freqs[pos], 0, kCodeBookSize - 1 );
if ( idx != code_idx[pos] )
@@ -68,21 +68,21 @@ namespace ime_pinyin {
return changed;
}
- double recalculate_kernel ( double freqs[], size_t num, double code_book[],
+ double recalculate_kernel ( double freqs[], Size_t num, double code_book[],
CODEBOOK_TYPE *code_idx ) {
double ret = 0;
- size_t *item_num = new size_t[kCodeBookSize];
+ Size_t *item_num = new Size_t[kCodeBookSize];
assert ( item_num );
- memset ( item_num, 0, sizeof ( size_t ) * kCodeBookSize );
+ memset ( item_num, 0, sizeof ( Size_t ) * kCodeBookSize );
double *cb_new = new double[kCodeBookSize];
assert ( cb_new );
memset ( cb_new, 0, sizeof ( double ) * kCodeBookSize );
- for ( size_t pos = 0; pos < num; pos++ ) {
+ for ( Size_t pos = 0; pos < num; pos++ ) {
ret += distance ( freqs[pos], code_book[code_idx[pos]] );
cb_new[code_idx[pos]] += freqs[pos];
item_num[code_idx[pos]] += 1;
}
- for ( size_t code = 0; code < kCodeBookSize; code++ ) {
+ for ( Size_t code = 0; code < kCodeBookSize; code++ ) {
assert ( item_num[code] > 0 );
code_book[code] = cb_new[code] / item_num[code];
}
@@ -91,12 +91,12 @@ namespace ime_pinyin {
return ret;
}
- void iterate_codes ( double freqs[], size_t num, double code_book[],
+ void iterate_codes ( double freqs[], Size_t num, double code_book[],
CODEBOOK_TYPE *code_idx ) {
- size_t iter_num = 0;
+ Size_t iter_num = 0;
double delta_last = 0;
do {
- size_t changed = update_code_idx ( freqs, num, code_book, code_idx );
+ Size_t changed = update_code_idx ( freqs, num, code_book, code_idx );
double delta = recalculate_kernel ( freqs, num, code_book, code_idx );
if ( kPrintDebug0 ) {
printf ( "---Unigram codebook iteration: %d : %d, %.9f\n",
@@ -146,7 +146,7 @@ namespace ime_pinyin {
{ return false; }
if ( 0 == idx_num_ || NULL == freq_codes_ || NULL == lma_freq_idx_ )
{ return false; }
- if ( fwrite ( &idx_num_, sizeof ( size_t ), 1, fp ) != 1 )
+ if ( fwrite ( &idx_num_, sizeof ( Size_t ), 1, fp ) != 1 )
{ return false; }
if ( fwrite ( freq_codes_, sizeof ( LmaScoreType ), kCodeBookSize, fp ) !=
kCodeBookSize )
@@ -160,7 +160,7 @@ namespace ime_pinyin {
if ( NULL == fp )
{ return false; }
initialized_ = false;
- if ( fread ( &idx_num_, sizeof ( size_t ), 1, fp ) != 1 )
+ if ( fread ( &idx_num_, sizeof ( Size_t ), 1, fp ) != 1 )
{ return false; }
if ( NULL != lma_freq_idx_ )
{ free ( lma_freq_idx_ ); }
@@ -182,7 +182,7 @@ namespace ime_pinyin {
return true;
}
- void NGram::set_total_freq_none_sys ( size_t freq_none_sys ) {
+ void NGram::set_total_freq_none_sys ( Size_t freq_none_sys ) {
total_freq_none_sys_ = freq_none_sys;
if ( 0 == total_freq_none_sys_ ) {
sys_score_compensation_ = 0;
@@ -210,7 +210,7 @@ namespace ime_pinyin {
}
#ifdef ___BUILD_MODEL___
- bool NGram::build_unigram ( LemmaEntry *lemma_arr, size_t lemma_num,
+ bool NGram::build_unigram ( LemmaEntry *lemma_arr, Size_t lemma_num,
LemmaIdType next_idx_unused ) {
if ( NULL == lemma_arr || 0 == lemma_num || next_idx_unused <= 1 )
{ return false; }
@@ -221,7 +221,7 @@ namespace ime_pinyin {
freqs[0] = ADD_COUNT;
total_freq += freqs[0];
LemmaIdType idx_now = 0;
- for ( size_t pos = 0; pos < lemma_num; pos++ ) {
+ for ( Size_t pos = 0; pos < lemma_num; pos++ ) {
if ( lemma_arr[pos].idx_by_hz == idx_now )
{ continue; }
idx_now++;
@@ -234,7 +234,7 @@ namespace ime_pinyin {
double max_freq = 0;
idx_num_ = idx_now + 1;
assert ( idx_now + 1 == next_idx_unused );
- for ( size_t pos = 0; pos < idx_num_; pos++ ) {
+ for ( Size_t pos = 0; pos < idx_num_; pos++ ) {
freqs[pos] = freqs[pos] / total_freq;
assert ( freqs[pos] > 0 );
if ( freqs[pos] > max_freq )
@@ -249,13 +249,13 @@ namespace ime_pinyin {
{ freq_codes_ = new LmaScoreType[kCodeBookSize]; }
assert ( freq_codes_ );
memset ( freq_codes_, 0, sizeof ( LmaScoreType ) * kCodeBookSize );
- size_t freq_pos = 0;
- for ( size_t code_pos = 0; code_pos < kCodeBookSize; code_pos++ ) {
+ Size_t freq_pos = 0;
+ for ( Size_t code_pos = 0; code_pos < kCodeBookSize; code_pos++ ) {
bool found = true;
while ( found ) {
found = false;
double cand = freqs[freq_pos];
- for ( size_t i = 0; i < code_pos; i++ )
+ for ( Size_t i = 0; i < code_pos; i++ )
if ( freq_codes_df_[i] == cand ) {
found = true;
break;
@@ -275,7 +275,7 @@ namespace ime_pinyin {
if ( kPrintDebug0 ) {
printf ( "\n------Language Model Unigram Codebook------\n" );
}
- for ( size_t code_pos = 0; code_pos < kCodeBookSize; code_pos++ ) {
+ for ( Size_t code_pos = 0; code_pos < kCodeBookSize; code_pos++ ) {
double log_score = log ( freq_codes_df_[code_pos] );
float final_score = convert_psb_to_score ( freq_codes_df_[code_pos] );
if ( kPrintDebug0 ) {
diff --git a/jni/share/pinyinime.cpp b/jni/share/pinyinime.cpp
index a72f3b4..5589d6a 100755..100644
--- a/jni/share/pinyinime.cpp
+++ b/jni/share/pinyinime.cpp
@@ -13,7 +13,6 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-
#include <stdlib.h>
#include "../include/pinyinime.h"
#include "../include/dicttrie.h"
@@ -27,7 +26,7 @@ extern "C" {
using namespace ime_pinyin;
// The maximum number of the prediction items.
- static const size_t kMaxPredictNum = 500;
+ static const Size_t kMaxPredictNum = 500;
// Used to search Pinyin string and give the best candidate.
MatrixSearch *matrix_search = NULL;
@@ -62,7 +61,7 @@ extern "C" {
matrix_search = NULL;
}
- void im_set_max_lens ( size_t max_sps_len, size_t max_hzs_len ) {
+ void im_set_max_lens ( Size_t max_sps_len, Size_t max_hzs_len ) {
if ( NULL != matrix_search ) {
matrix_search->set_max_lens ( max_sps_len, max_hzs_len );
}
@@ -74,14 +73,14 @@ extern "C" {
}
// To be updated.
- size_t im_search ( const char *pybuf, size_t pylen ) {
+ Size_t im_search ( const char *pybuf, Size_t pylen ) {
if ( NULL == matrix_search )
{ return 0; }
matrix_search->search ( pybuf, pylen );
return matrix_search->get_candidate_num();
}
- size_t im_delsearch ( size_t pos, bool is_pos_in_splid,
+ Size_t im_delsearch ( Size_t pos, bool is_pos_in_splid,
bool clear_fixed_this_step ) {
if ( NULL == matrix_search )
{ return 0; }
@@ -96,42 +95,45 @@ extern "C" {
}
// To be removed
- size_t im_add_letter ( char ch ) {
+ Size_t im_add_letter ( char ch ) {
return 0;
}
- const char *im_get_sps_str ( size_t *decoded_len ) {
+ const char *im_get_sps_str ( Size_t *decoded_len )
+ {
if ( NULL == matrix_search )
- { return NULL; }
+ {
+ return NULL;
+ }
return matrix_search->get_pystr ( decoded_len );
}
- char16 *im_get_candidate ( size_t cand_id, char16 *cand_str,
- size_t max_len ) {
+ char16 *im_get_candidate ( Size_t cand_id, char16 *cand_str,
+ Size_t max_len ) {
if ( NULL == matrix_search )
{ return NULL; }
return matrix_search->get_candidate ( cand_id, cand_str, max_len );
}
- size_t im_get_spl_start_pos ( const uint16 *&spl_start ) {
+ Size_t im_get_spl_start_pos ( const uint16 *&spl_start ) {
if ( NULL == matrix_search )
{ return 0; }
return matrix_search->get_spl_start ( spl_start );
}
- size_t im_choose ( size_t choice_id ) {
+ Size_t im_choose ( Size_t choice_id ) {
if ( NULL == matrix_search )
{ return 0; }
return matrix_search->choose ( choice_id );
}
- size_t im_cancel_last_choice() {
+ Size_t im_cancel_last_choice() {
if ( NULL == matrix_search )
{ return 0; }
return matrix_search->cancel_last_choice();
}
- size_t im_get_fixed_len() {
+ Size_t im_get_fixed_len() {
if ( NULL == matrix_search )
{ return 0; }
return matrix_search->get_fixedlen();
@@ -143,11 +145,11 @@ extern "C" {
}
- size_t im_get_predicts ( const char16 *his_buf,
+ Size_t im_get_predicts ( const char16 *his_buf,
char16 ( *&pre_buf ) [kMaxPredictSize + 1] ) {
if ( NULL == his_buf )
{ return 0; }
- size_t fixed_len = utf16_strlen ( his_buf );
+ Size_t fixed_len = utf16_strlen ( his_buf );
const char16 *fixed_ptr = his_buf;
if ( fixed_len > kMaxPredictSize ) {
fixed_ptr += fixed_len - kMaxPredictSize;
diff --git a/jni/share/searchutility.cpp b/jni/share/searchutility.cpp
index e1d46d5..2708992 100755..100644
--- a/jni/share/searchutility.cpp
+++ b/jni/share/searchutility.cpp
@@ -48,8 +48,8 @@ namespace ime_pinyin {
// The real unified psb is psb1 / lma_len1 and psb2 * lma_len2
// But we use psb1 * lma_len2 and psb2 * lma_len1 to get better
// precision.
- size_t up1 = item1->psb * ( item2->lma_len );
- size_t up2 = item2->psb * ( item1->lma_len );
+ Size_t up1 = item1->psb * ( item2->lma_len );
+ Size_t up2 = item2->psb * ( item1->lma_len );
if ( up1 < up2 ) {
return -1;
}
@@ -170,12 +170,12 @@ namespace ime_pinyin {
return 0;
}
- size_t remove_duplicate_npre ( NPredictItem *npre_items, size_t npre_num ) {
+ Size_t remove_duplicate_npre ( NPredictItem *npre_items, Size_t npre_num ) {
if ( NULL == npre_items || 0 == npre_num )
{ return 0; }
myqsort ( npre_items, npre_num, sizeof ( NPredictItem ), cmp_npre_by_hanzi_score );
- size_t remain_num = 1; // The first one is reserved.
- for ( size_t pos = 1; pos < npre_num; pos++ ) {
+ Size_t remain_num = 1; // The first one is reserved.
+ for ( Size_t pos = 1; pos < npre_num; pos++ ) {
if ( utf16_strncmp ( npre_items[pos].pre_hzs,
npre_items[remain_num - 1].pre_hzs,
kMaxPredictSize ) != 0 ) {
@@ -188,8 +188,8 @@ namespace ime_pinyin {
return remain_num;
}
- size_t align_to_size_t ( size_t size ) {
- size_t s = sizeof ( size_t );
+ Size_t align_to_Size_t ( Size_t size ) {
+ Size_t s = sizeof ( Size_t );
return ( size + s - 1 ) / s * s;
}
diff --git a/jni/share/spellingtable.cpp b/jni/share/spellingtable.cpp
index 29b8d3d..5d497fb 100755..100644
--- a/jni/share/spellingtable.cpp
+++ b/jni/share/spellingtable.cpp
@@ -39,12 +39,12 @@ namespace ime_pinyin {
( static_cast<const RawSpelling *> ( p2 ) )->str );
}
- size_t get_odd_next ( size_t value ) {
- size_t v_next = value;
+ Size_t get_odd_next ( Size_t value ) {
+ Size_t v_next = value;
while ( true ) {
- size_t v_next_sqrt = ( size_t ) sqrt ( v_next );
+ Size_t v_next_sqrt = ( Size_t ) sqrt ( v_next );
bool is_odd = true;
- for ( size_t v_dv = 2; v_dv < v_next_sqrt + 1; v_dv++ ) {
+ for ( Size_t v_dv = 2; v_dv < v_next_sqrt + 1; v_dv++ ) {
if ( v_next % v_dv == 0 ) {
is_odd = false;
break;
@@ -71,18 +71,18 @@ namespace ime_pinyin {
free_resource();
}
- size_t SpellingTable::get_hash_pos ( const char *spelling_str ) {
- size_t hash_pos = 0;
- for ( size_t pos = 0; pos < spelling_size_; pos++ ) {
+ Size_t SpellingTable::get_hash_pos ( const char *spelling_str ) {
+ Size_t hash_pos = 0;
+ for ( Size_t pos = 0; pos < spelling_size_; pos++ ) {
if ( '\0' == spelling_str[pos] )
{ break; }
- hash_pos += ( size_t ) spelling_str[pos];
+ hash_pos += ( Size_t ) spelling_str[pos];
}
hash_pos = hash_pos % spelling_max_num_;
return hash_pos;
}
- size_t SpellingTable::hash_pos_next ( size_t hash_pos ) {
+ Size_t SpellingTable::hash_pos_next ( Size_t hash_pos ) {
hash_pos += 123;
hash_pos = hash_pos % spelling_max_num_;
return hash_pos;
@@ -97,7 +97,7 @@ namespace ime_pinyin {
spelling_buf_ = NULL;
}
- bool SpellingTable::init_table ( size_t pure_spl_size, size_t spl_max_num,
+ bool SpellingTable::init_table ( Size_t pure_spl_size, Size_t spl_max_num,
bool need_score ) {
if ( pure_spl_size == 0 || spl_max_num == 0 )
{ return false; }
@@ -124,20 +124,20 @@ namespace ime_pinyin {
bool SpellingTable::put_spelling ( const char *spelling_str, double freq ) {
if ( frozen_ || NULL == spelling_str )
{ return false; }
- for ( size_t pos = 0; pos < kNotSupportNum; pos++ ) {
+ for ( Size_t pos = 0; pos < kNotSupportNum; pos++ ) {
if ( strcmp ( spelling_str, kNotSupportList[pos] ) == 0 ) {
return false;
}
}
total_freq_ += freq;
- size_t hash_pos = get_hash_pos ( spelling_str );
+ Size_t hash_pos = get_hash_pos ( spelling_str );
raw_spellings_[hash_pos].str[spelling_size_ - 1] = '\0';
if ( strncmp ( raw_spellings_[hash_pos].str, spelling_str,
spelling_size_ - 1 ) == 0 ) {
raw_spellings_[hash_pos].freq += freq;
return true;
}
- size_t hash_pos_ori = hash_pos;
+ Size_t hash_pos_ori = hash_pos;
while ( true ) {
if ( strncmp ( raw_spellings_[hash_pos].str,
spelling_str, spelling_size_ - 1 ) == 0 ) {
@@ -162,13 +162,13 @@ namespace ime_pinyin {
bool SpellingTable::contain ( const char *spelling_str ) {
if ( NULL == spelling_str || NULL == spelling_buf_ || frozen_ )
{ return false; }
- size_t hash_pos = get_hash_pos ( spelling_str );
+ Size_t hash_pos = get_hash_pos ( spelling_str );
if ( '\0' == raw_spellings_[hash_pos].str[0] )
{ return false; }
if ( strncmp ( raw_spellings_[hash_pos].str, spelling_str, spelling_size_ - 1 )
== 0 )
{ return true; }
- size_t hash_pos_ori = hash_pos;
+ Size_t hash_pos_ori = hash_pos;
while ( true ) {
hash_pos = hash_pos_next ( hash_pos );
if ( hash_pos_ori == hash_pos )
@@ -183,7 +183,7 @@ namespace ime_pinyin {
return false;
}
- const char *SpellingTable::arrange ( size_t *item_size, size_t *spl_num ) {
+ const char *SpellingTable::arrange ( Size_t *item_size, Size_t *spl_num ) {
if ( NULL == raw_spellings_ || NULL == spelling_buf_ ||
NULL == item_size || NULL == spl_num )
{ return NULL; }
@@ -191,7 +191,7 @@ namespace ime_pinyin {
compare_raw_spl_eb );
// After sorting, only the first spelling_num_ items are valid.
// Copy them to the destination buffer.
- for ( size_t pos = 0; pos < spelling_num_; pos++ ) {
+ for ( Size_t pos = 0; pos < spelling_num_; pos++ ) {
strncpy ( spelling_buf_ + pos * spelling_size_, raw_spellings_[pos].str,
spelling_size_ );
}
@@ -201,7 +201,7 @@ namespace ime_pinyin {
double max_score = 0;
double min_score = 0;
// After sorting, only the first spelling_num_ items are valid.
- for ( size_t pos = 0; pos < spelling_num_; pos++ ) {
+ for ( Size_t pos = 0; pos < spelling_num_; pos++ ) {
raw_spellings_[pos].freq /= total_freq_;
if ( need_score_ ) {
if ( 0 == pos ) {
@@ -226,7 +226,7 @@ namespace ime_pinyin {
// both of them are negative after log function.
score_amplifier_ = 1.0 * 255 / min_score;
double average_score = 0;
- for ( size_t pos = 0; pos < spelling_num_; pos++ ) {
+ for ( Size_t pos = 0; pos < spelling_num_; pos++ ) {
double score = log ( raw_spellings_[pos].freq ) * score_amplifier_;
assert ( score >= 0 );
average_score += score;
diff --git a/jni/share/spellingtrie.cpp b/jni/share/spellingtrie.cpp
index 0c0dbc1..92977e1 100644
--- a/jni/share/spellingtrie.cpp
+++ b/jni/share/spellingtrie.cpp
@@ -13,7 +13,6 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-
#include <stdio.h>
#include <string.h>
#include <assert.h>
@@ -239,15 +238,15 @@ namespace ime_pinyin {
void SpellingTrie::free_son_trie ( SpellingNode *node ) {
if ( NULL == node )
{ return; }
- for ( size_t pos = 0; pos < node->num_of_son; pos++ ) {
+ for ( Size_t pos = 0; pos < node->num_of_son; pos++ ) {
free_son_trie ( node->first_son + pos );
}
if ( NULL != node->first_son )
{ delete [] node->first_son; }
}
- bool SpellingTrie::construct ( const char *spelling_arr, size_t item_size,
- size_t item_num, float score_amplifier,
+ bool SpellingTrie::construct ( const char *spelling_arr, Size_t item_size,
+ Size_t item_num, float score_amplifier,
unsigned char average_score ) {
if ( spelling_arr == NULL )
{ return false; }
@@ -336,8 +335,8 @@ namespace ime_pinyin {
assert ( sucess );
}
}
- size_t ym_item_size; // '\0' is included
- size_t ym_num;
+ Size_t ym_item_size; // '\0' is included
+ Size_t ym_num;
const char *ym_buf;
ym_buf = spl_table->arrange ( &ym_item_size, &ym_num );
if ( NULL != ym_buf_ )
@@ -374,7 +373,7 @@ namespace ime_pinyin {
#endif
SpellingNode *SpellingTrie::construct_spellings_subset (
- size_t item_start, size_t item_end, size_t level, SpellingNode *parent ) {
+ Size_t item_start, Size_t item_end, Size_t level, SpellingNode *parent ) {
if ( level >= spelling_size_ || item_end <= item_start || NULL == parent )
{ return NULL; }
SpellingNode *first_son = NULL;
@@ -385,7 +384,7 @@ namespace ime_pinyin {
assert ( char_for_node >= 'A' && char_for_node <= 'Z' ||
'h' == char_for_node );
// Scan the array to find how many sons
- for ( size_t i = item_start + 1; i < item_end; i++ ) {
+ for ( Size_t i = item_start + 1; i < item_end; i++ ) {
const char *spelling_current = spelling_buf_ + spelling_size_ * i;
char char_current = spelling_current[level];
if ( char_current != char_for_node ) {
@@ -401,14 +400,14 @@ namespace ime_pinyin {
first_son = new SpellingNode[num_of_son];
memset ( first_son, 0, sizeof ( SpellingNode ) *num_of_son );
// Now begin construct tree
- size_t son_pos = 0;
+ Size_t son_pos = 0;
spelling_last_start = spelling_buf_ + spelling_size_ * item_start;
char_for_node = spelling_last_start[level];
bool spelling_endable = true;
if ( spelling_last_start[level + 1] != '\0' )
{ spelling_endable = false; }
- size_t item_start_next = item_start;
- for ( size_t i = item_start + 1; i < item_end; i++ ) {
+ Size_t item_start_next = item_start;
+ for ( Size_t i = item_start + 1; i < item_end; i++ ) {
const char *spelling_current = spelling_buf_ + spelling_size_ * i;
char char_current = spelling_current[level];
assert ( is_valid_spl_char ( char_current ) );
@@ -423,7 +422,7 @@ namespace ime_pinyin {
node_current->spelling_idx = kFullSplIdStart + item_start_next;
}
if ( spelling_last_start[level + 1] != '\0' || i - item_start_next > 1 ) {
- size_t real_start = item_start_next;
+ Size_t real_start = item_start_next;
if ( spelling_last_start[level + 1] == '\0' )
{ real_start++; }
node_current->first_son =
@@ -495,7 +494,7 @@ namespace ime_pinyin {
}
if ( spelling_last_start[level + 1] != '\0' ||
item_end - item_start_next > 1 ) {
- size_t real_start = item_start_next;
+ Size_t real_start = item_start_next;
if ( spelling_last_start[level + 1] == '\0' )
{ real_start++; }
node_current->first_son =
@@ -569,12 +568,14 @@ namespace ime_pinyin {
}
bool SpellingTrie::load_spl_trie ( FILE *fp ) {
+ spelling_num_=0;
if ( NULL == fp )
{ return false; }
- if ( fread ( &spelling_size_, sizeof ( size_t ), 1, fp ) != 1 )
- { return false; }
- if ( fread ( &spelling_num_, sizeof ( size_t ), 1, fp ) != 1 )
+ if ( fread ( &spelling_size_, sizeof ( Size_t ), 1, fp ) != 1 )
{ return false; }
+ if ( fread ( &spelling_num_, sizeof ( Size_t ), 1, fp ) != 1 )
+ {return false; }
+
if ( fread ( &score_amplifier_, sizeof ( float ), 1, fp ) != 1 )
{ return false; }
if ( fread ( &average_score_, sizeof ( unsigned char ), 1, fp ) != 1 )
@@ -583,10 +584,11 @@ namespace ime_pinyin {
{ delete [] spelling_buf_; }
spelling_buf_ = new char[spelling_size_ * spelling_num_];
if ( NULL == spelling_buf_ )
- { return false; }
+ return false;
if ( fread ( spelling_buf_, sizeof ( char ) * spelling_size_,
- spelling_num_, fp ) != spelling_num_ )
- { return false; }
+ spelling_num_, fp ) != spelling_num_ ){
+ return false;
+ }
return construct ( spelling_buf_, spelling_size_, spelling_num_,
score_amplifier_, average_score_ );
}
@@ -605,7 +607,7 @@ namespace ime_pinyin {
return true;
}
- size_t SpellingTrie::get_spelling_num() {
+ Size_t SpellingTrie::get_spelling_num() {
return spelling_num_;
}
@@ -647,7 +649,7 @@ namespace ime_pinyin {
splstr16_queried_[0] = '\0';
if ( splid >= kFullSplIdStart ) {
splid -= kFullSplIdStart;
- for ( size_t pos = 0; pos < spelling_size_; pos++ ) {
+ for ( Size_t pos = 0; pos < spelling_size_; pos++ ) {
splstr16_queried_[pos] = static_cast<char16>
( spelling_buf_[splid * spelling_size_ + pos] );
}
@@ -676,12 +678,12 @@ namespace ime_pinyin {
return splstr16_queried_;
}
- size_t SpellingTrie::get_spelling_str16 ( uint16 splid, char16 *splstr16,
- size_t splstr16_len ) {
+ Size_t SpellingTrie::get_spelling_str16 ( uint16 splid, char16 *splstr16,
+ Size_t splstr16_len ) {
if ( NULL == splstr16 || splstr16_len < kMaxPinyinSize + 1 ) { return 0; }
if ( splid >= kFullSplIdStart ) {
splid -= kFullSplIdStart;
- for ( size_t pos = 0; pos <= kMaxPinyinSize; pos++ ) {
+ for ( Size_t pos = 0; pos <= kMaxPinyinSize; pos++ ) {
splstr16[pos] = static_cast<char16>
( spelling_buf_[splid * spelling_size_ + pos] );
if ( static_cast<char16> ( '\0' ) == splstr16[pos] ) {
diff --git a/jni/share/splparser.cpp b/jni/share/splparser.cpp
index b9ccf22..b9ccf22 100755..100644
--- a/jni/share/splparser.cpp
+++ b/jni/share/splparser.cpp
diff --git a/jni/share/sync.cpp b/jni/share/sync.cpp
index 00f61da..00f61da 100755..100644
--- a/jni/share/sync.cpp
+++ b/jni/share/sync.cpp
diff --git a/jni/share/userdict.cpp b/jni/share/userdict.cpp
index a762a3f..02f8491 100755..100644
--- a/jni/share/userdict.cpp
+++ b/jni/share/userdict.cpp
@@ -317,7 +317,7 @@ namespace ime_pinyin {
return true;
}
- size_t UserDict::number_of_lemmas() {
+ Size_t UserDict::number_of_lemmas() {
return dict_info_.lemma_count;
}
@@ -328,7 +328,7 @@ namespace ime_pinyin {
MileStoneHandle UserDict::extend_dict ( MileStoneHandle from_handle,
const DictExtPara *dep,
LmaPsbItem *lpi_items,
- size_t lpi_max, size_t *lpi_num ) {
+ Size_t lpi_max, Size_t *lpi_num ) {
if ( is_valid_state() == false )
{ return 0; }
bool need_extend = false;
@@ -464,14 +464,14 @@ namespace ime_pinyin {
}
}
- size_t UserDict::get_lpis ( const uint16 *splid_str, uint16 splid_str_len,
- LmaPsbItem *lpi_items, size_t lpi_max ) {
+ Size_t UserDict::get_lpis ( const uint16 *splid_str, uint16 splid_str_len,
+ LmaPsbItem *lpi_items, Size_t lpi_max ) {
return _get_lpis ( splid_str, splid_str_len, lpi_items, lpi_max, NULL );
}
- size_t UserDict::_get_lpis ( const uint16 *splid_str,
+ Size_t UserDict::_get_lpis ( const uint16 *splid_str,
uint16 splid_str_len, LmaPsbItem *lpi_items,
- size_t lpi_max, bool *need_extend ) {
+ Size_t lpi_max, bool *need_extend ) {
bool tmp_extend;
if ( !need_extend )
{ need_extend = &tmp_extend; }
@@ -516,10 +516,10 @@ namespace ime_pinyin {
#endif
return 0;
}
- size_t lpi_current = 0;
+ Size_t lpi_current = 0;
bool fuzzy_break = false;
bool prefix_break = false;
- while ( ( size_t ) middle < max_off && !fuzzy_break && !prefix_break ) {
+ while ( ( Size_t ) middle < max_off && !fuzzy_break && !prefix_break ) {
if ( lpi_current >= lpi_max )
{ break; }
uint32 offset = offsets_[middle];
@@ -595,9 +595,9 @@ namespace ime_pinyin {
return i;
}
- size_t UserDict::predict ( const char16 last_hzs[], uint16 hzs_len,
- NPredictItem *npre_items, size_t npre_max,
- size_t b4_used ) {
+ Size_t UserDict::predict ( const char16 last_hzs[], uint16 hzs_len,
+ NPredictItem *npre_items, Size_t npre_max,
+ Size_t b4_used ) {
uint32 new_added = 0;
#ifdef ___PREDICT_ENABLED___
int32 end = dict_info_.lemma_count - 1;
@@ -929,7 +929,7 @@ namespace ime_pinyin {
return false;
}
uint32 version = kUserDictVersion;
- size_t wred = fwrite ( &version, 1, 4, fp );
+ Size_t wred = fwrite ( &version, 1, 4, fp );
UserDictInfo info;
memset ( &info, 0, sizeof ( info ) );
// By default, no limitation for lemma count and size
@@ -951,8 +951,8 @@ namespace ime_pinyin {
if ( !fp ) {
return false;
}
- size_t size;
- size_t readed;
+ Size_t size;
+ Size_t readed;
uint32 version;
UserDictInfo dict_info;
// validate
@@ -1004,7 +1004,7 @@ namespace ime_pinyin {
pthread_mutex_unlock ( &g_mutex_ );
return false;
}
- size_t readed, toread;
+ Size_t readed, toread;
UserDictInfo dict_info;
uint8 *lemmas = NULL;
uint32 *offsets = NULL;
@@ -1017,7 +1017,7 @@ namespace ime_pinyin {
#ifdef ___PREDICT_ENABLED___
uint32 *predicts = NULL;
#endif
- size_t i;
+ Size_t i;
int err;
err = fseek ( fp, -1 * sizeof ( dict_info ), SEEK_END );
if ( err ) { goto error; }
@@ -1210,7 +1210,7 @@ namespace ime_pinyin {
if ( err == -1 )
{ return; }
// New lemmas are always appended, no need to write whole lemma block
- size_t need_write = kUserDictPreAlloc *
+ Size_t need_write = kUserDictPreAlloc *
( 2 + ( kUserDictAverageNchar << 2 ) ) - lemma_size_left_;
err = lseek ( fd, dict_info_.lemma_size - need_write, SEEK_CUR );
if ( err == -1 )
@@ -1395,8 +1395,8 @@ namespace ime_pinyin {
if ( is_valid_state() == false )
{ return; }
// Fixup offsets_, set REMOVE flag to lemma's flag if needed
- size_t first_freed = 0;
- size_t first_inuse = 0;
+ Size_t first_freed = 0;
+ Size_t first_inuse = 0;
while ( first_freed < dict_info_.lemma_count ) {
// Find first freed offset
while ( ( offsets_[first_freed] & kUserDictOffsetFlagRemove ) == 0 &&
@@ -1468,12 +1468,12 @@ namespace ime_pinyin {
#endif
dict_info_.lemma_count = first_freed;
// Fixup lemmas_
- size_t begin = 0;
- size_t end = 0;
- size_t dst = 0;
+ Size_t begin = 0;
+ Size_t end = 0;
+ Size_t dst = 0;
int total_size = dict_info_.lemma_size + lemma_size_left_;
int total_count = dict_info_.lemma_count + lemma_count_left_;
- size_t real_size = total_size - lemma_size_left_;
+ Size_t real_size = total_size - lemma_size_left_;
while ( dst < real_size ) {
unsigned char flag = get_lemma_flag ( dst );
unsigned char nchr = get_lemma_nchar ( dst );
@@ -1509,7 +1509,7 @@ namespace ime_pinyin {
break;
}
memmove ( lemmas_ + dst, lemmas_ + begin, end - begin );
- for ( size_t j = 0; j < dict_info_.lemma_count; j++ ) {
+ for ( Size_t j = 0; j < dict_info_.lemma_count; j++ ) {
if ( offsets_[j] >= begin && offsets_[j] < end ) {
offsets_[j] -= ( begin - dst );
offsets_by_id_[ids_[j] - start_id_] = offsets_[j];
@@ -1521,7 +1521,7 @@ namespace ime_pinyin {
#endif
}
#ifdef ___SYNC_ENABLED___
- for ( size_t j = 0; j < dict_info_.sync_count; j++ ) {
+ for ( Size_t j = 0; j < dict_info_.sync_count; j++ ) {
if ( syncs_[j] >= begin && syncs_[j] < end ) {
syncs_[j] -= ( begin - dst );
}
@@ -1933,7 +1933,7 @@ namespace ime_pinyin {
}
int flushed = 0;
if ( lemma_count_left_ == 0 ||
- lemma_size_left_ < ( size_t ) ( 2 + ( lemma_len << 2 ) ) ) {
+ lemma_size_left_ < ( Size_t ) ( 2 + ( lemma_len << 2 ) ) ) {
// XXX When there is no space for new lemma, we flush to disk
// flush_cache() may be called by upper user
// and better place shoule be found instead of here
@@ -2015,23 +2015,23 @@ namespace ime_pinyin {
return 0;
}
- size_t UserDict::get_total_lemma_count() {
+ Size_t UserDict::get_total_lemma_count() {
return dict_info_.total_nfreq;
}
- void UserDict::set_total_lemma_count_of_others ( size_t count ) {
+ void UserDict::set_total_lemma_count_of_others ( Size_t count ) {
total_other_nfreq_ = count;
}
LemmaIdType UserDict::append_a_lemma ( char16 lemma_str[], uint16 splids[],
uint16 lemma_len, uint16 count, uint64 lmt ) {
LemmaIdType id = get_max_lemma_id() + 1;
- size_t offset = dict_info_.lemma_size;
+ Size_t offset = dict_info_.lemma_size;
if ( offset > kUserDictOffsetMask )
{ return 0; }
lemmas_[offset] = 0;
lemmas_[offset + 1] = ( uint8 ) lemma_len;
- for ( size_t i = 0; i < lemma_len; i++ ) {
+ for ( Size_t i = 0; i < lemma_len; i++ ) {
* ( ( uint16 * ) &lemmas_[offset + 2 + ( i << 1 )] ) = splids[i];
* ( ( char16 * ) &lemmas_[offset + 2 + ( lemma_len << 1 ) + ( i << 1 )] )
= lemma_str[i];
@@ -2051,7 +2051,7 @@ namespace ime_pinyin {
// Sort
UserDictSearchable searchable;
prepare_locate ( &searchable, splids, lemma_len );
- size_t i = 0;
+ Size_t i = 0;
while ( i < off ) {
offset = offsets_[i];
uint32 nchar = get_lemma_nchar ( offset );
diff --git a/jni/share/utf16char.cpp b/jni/share/utf16char.cpp
index a295a18..9a6bd74 100755..100644
--- a/jni/share/utf16char.cpp
+++ b/jni/share/utf16char.cpp
@@ -23,13 +23,13 @@ namespace ime_pinyin {
extern "C" {
#endif
- char16 *utf16_strtok ( char16 *utf16_str, size_t *token_size,
+ char16 *utf16_strtok ( char16 *utf16_str, Size_t *token_size,
char16 **utf16_str_next ) {
if ( NULL == utf16_str || NULL == token_size || NULL == utf16_str_next ) {
return NULL;
}
// Skip the splitters
- size_t pos = 0;
+ Size_t pos = 0;
while ( ( char16 ) ' ' == utf16_str[pos] || ( char16 ) '\n' == utf16_str[pos]
|| ( char16 ) '\t' == utf16_str[pos] )
{ pos++; }
@@ -58,7 +58,7 @@ namespace ime_pinyin {
{ return 0; }
int value = 0;
int sign = 1;
- size_t pos = 0;
+ Size_t pos = 0;
if ( ( char16 ) '-' == utf16_str[pos] ) {
sign = -1;
pos++;
@@ -79,24 +79,24 @@ namespace ime_pinyin {
return atof ( char8 );
}
- size_t utf16_strlen ( const char16 *utf16_str ) {
+ Size_t utf16_strlen ( const char16 *utf16_str ) {
if ( NULL == utf16_str )
{ return 0; }
- size_t size = 0;
+ Size_t size = 0;
while ( ( char16 ) '\0' != utf16_str[size] )
{ size++; }
return size;
}
int utf16_strcmp ( const char16 *str1, const char16 *str2 ) {
- size_t pos = 0;
+ Size_t pos = 0;
while ( str1[pos] == str2[pos] && ( char16 ) '\0' != str1[pos] )
{ pos++; }
return static_cast<int> ( str1[pos] ) - static_cast<int> ( str2[pos] );
}
- int utf16_strncmp ( const char16 *str1, const char16 *str2, size_t size ) {
- size_t pos = 0;
+ int utf16_strncmp ( const char16 *str1, const char16 *str2, Size_t size ) {
+ Size_t pos = 0;
while ( pos < size && str1[pos] == str2[pos] && ( char16 ) '\0' != str1[pos] )
{ pos++; }
if ( pos == size )
@@ -118,7 +118,7 @@ namespace ime_pinyin {
return dst;
}
- char16 *utf16_strncpy ( char16 *dst, const char16 *src, size_t size ) {
+ char16 *utf16_strncpy ( char16 *dst, const char16 *src, Size_t size ) {
if ( NULL == src || NULL == dst || 0 == size )
{ return NULL; }
if ( src == dst )
diff --git a/jni/share/utf16reader.cpp b/jni/share/utf16reader.cpp
index 3f4e0ca..0b460b6 100755..100644
--- a/jni/share/utf16reader.cpp
+++ b/jni/share/utf16reader.cpp
@@ -37,7 +37,7 @@ namespace ime_pinyin {
}
- bool Utf16Reader::open ( const char *filename, size_t buffer_len ) {
+ bool Utf16Reader::open ( const char *filename, Size_t buffer_len ) {
if ( filename == NULL )
{ return false; }
if ( buffer_len < MIN_BUF_LEN )
@@ -62,10 +62,10 @@ namespace ime_pinyin {
return true;
}
- char16 *Utf16Reader::readline ( char16 *read_buf, size_t max_len ) {
+ char16 *Utf16Reader::readline ( char16 *read_buf, Size_t max_len ) {
if ( NULL == fp_ || NULL == read_buf || 0 == max_len )
{ return NULL; }
- size_t ret_len = 0;
+ Size_t ret_len = 0;
do {
if ( buffer_valid_len_ == 0 ) {
buffer_next_pos_ = 0;
@@ -78,7 +78,7 @@ namespace ime_pinyin {
return read_buf;
}
}
- for ( size_t i = 0; i < buffer_valid_len_; i++ ) {
+ for ( Size_t i = 0; i < buffer_valid_len_; i++ ) {
if ( i == max_len - 1 ||
buffer_[buffer_next_pos_ + i] == ( char16 ) '\n' ) {
if ( ret_len + i > 0 && read_buf[ret_len + i - 1] == ( char16 ) '\r' ) {