blob: cc58ef03e49b01ab3621bc9924d326fb3f1b68bd
1 | /*********************************************************************** |
2 | * |
3 | * hash.c |
4 | * |
5 | * Implementation of hash tables. Each item inserted must include |
6 | * a hash_bucket member. |
7 | * |
8 | * Copyright (C) 2002 Roaring Penguin Software Inc. |
9 | * |
10 | * This software may be distributed under the terms of the GNU General |
11 | * Public License, Version 2 or (at your option) any later version. |
12 | * |
13 | * LIC: GPL |
14 | * |
15 | ***********************************************************************/ |
16 | |
17 | static char const RCSID[] = |
18 | "$Id$"; |
19 | |
20 | #include "hash.h" |
21 | |
22 | #include <limits.h> |
23 | #define BITS_IN_int ( sizeof(int) * CHAR_BIT ) |
24 | #define THREE_QUARTERS ((int) ((BITS_IN_int * 3) / 4)) |
25 | #define ONE_EIGHTH ((int) (BITS_IN_int / 8)) |
26 | #define HIGH_BITS ( ~((unsigned int)(~0) >> ONE_EIGHTH )) |
27 | |
28 | #define GET_BUCKET(tab, data) ((hash_bucket *) ((char *) (data) + (tab)->hash_offset)) |
29 | |
30 | #define GET_ITEM(tab, bucket) ((void *) (((char *) (void *) bucket) - (tab)->hash_offset)) |
31 | |
32 | static void *hash_next_cursor(hash_table *tab, hash_bucket *b); |
33 | |
34 | /********************************************************************** |
35 | * %FUNCTION: hash_init |
36 | * %ARGUMENTS: |
37 | * tab -- hash table |
38 | * hash_offset -- offset of hash_bucket data member in inserted items |
39 | * compute -- pointer to function to compute hash value |
40 | * compare -- pointer to comparison function. Returns 0 if items are equal, |
41 | * non-zero otherwise. |
42 | * %RETURNS: |
43 | * Nothing |
44 | * %DESCRIPTION: |
45 | * Initializes a hash table. |
46 | ***********************************************************************/ |
47 | void |
48 | hash_init(hash_table *tab, |
49 | size_t hash_offset, |
50 | unsigned int (*compute)(void *data), |
51 | int (*compare)(void *item1, void *item2)) |
52 | { |
53 | size_t i; |
54 | |
55 | tab->hash_offset = hash_offset; |
56 | tab->compute_hash = compute; |
57 | tab->compare = compare; |
58 | for (i=0; i<HASHTAB_SIZE; i++) { |
59 | tab->buckets[i] = NULL; |
60 | } |
61 | tab->num_entries = 0; |
62 | } |
63 | |
64 | /********************************************************************** |
65 | * %FUNCTION: hash_insert |
66 | * %ARGUMENTS: |
67 | * tab -- hash table to insert into |
68 | * item -- the item we're inserting |
69 | * %RETURNS: |
70 | * Nothing |
71 | * %DESCRIPTION: |
72 | * Inserts an item into the hash table. It must not currently be in any |
73 | * hash table. |
74 | ***********************************************************************/ |
75 | void |
76 | hash_insert(hash_table *tab, |
77 | void *item) |
78 | { |
79 | hash_bucket *b = GET_BUCKET(tab, item); |
80 | unsigned int val = tab->compute_hash(item); |
81 | b->hashval = val; |
82 | val %= HASHTAB_SIZE; |
83 | b->prev = NULL; |
84 | b->next = tab->buckets[val]; |
85 | if (b->next) { |
86 | b->next->prev = b; |
87 | } |
88 | tab->buckets[val] = b; |
89 | tab->num_entries++; |
90 | } |
91 | |
92 | /********************************************************************** |
93 | * %FUNCTION: hash_remove |
94 | * %ARGUMENTS: |
95 | * tab -- hash table |
96 | * item -- item in hash table |
97 | * %RETURNS: |
98 | * Nothing |
99 | * %DESCRIPTION: |
100 | * Removes item from hash table |
101 | ***********************************************************************/ |
102 | void |
103 | hash_remove(hash_table *tab, |
104 | void *item) |
105 | { |
106 | hash_bucket *b = GET_BUCKET(tab, item); |
107 | unsigned int val = b->hashval % HASHTAB_SIZE; |
108 | |
109 | if (b->prev) { |
110 | b->prev->next = b->next; |
111 | } else { |
112 | tab->buckets[val] = b->next; |
113 | } |
114 | if (b->next) { |
115 | b->next->prev = b->prev; |
116 | } |
117 | tab->num_entries--; |
118 | } |
119 | |
120 | /********************************************************************** |
121 | * %FUNCTION: hash_find |
122 | * %ARGUMENTS: |
123 | * tab -- hash table |
124 | * item -- item equal to one we're seeking (in the compare-function sense) |
125 | * %RETURNS: |
126 | * A pointer to the item in the hash table, or NULL if no such item |
127 | * %DESCRIPTION: |
128 | * Searches hash table for item. |
129 | ***********************************************************************/ |
130 | void * |
131 | hash_find(hash_table *tab, |
132 | void *item) |
133 | { |
134 | unsigned int val = tab->compute_hash(item) % HASHTAB_SIZE; |
135 | hash_bucket *b; |
136 | for (b = tab->buckets[val]; b; b = b->next) { |
137 | void *item2 = GET_ITEM(tab, b); |
138 | if (!tab->compare(item, item2)) return item2; |
139 | } |
140 | return NULL; |
141 | } |
142 | |
143 | /********************************************************************** |
144 | * %FUNCTION: hash_find_next |
145 | * %ARGUMENTS: |
146 | * tab -- hash table |
147 | * item -- an item returned by hash_find or hash_find_next |
148 | * %RETURNS: |
149 | * A pointer to the next equal item in the hash table, or NULL if no such item |
150 | * %DESCRIPTION: |
151 | * Searches hash table for anoter item equivalent to this one. Search |
152 | * starts from item. |
153 | ***********************************************************************/ |
154 | void * |
155 | hash_find_next(hash_table *tab, |
156 | void *item) |
157 | { |
158 | hash_bucket *b = GET_BUCKET(tab, item); |
159 | for (b = b->next; b; b = b->next) { |
160 | void *item2 = GET_ITEM(tab, b); |
161 | if (!tab->compare(item, item2)) return item2; |
162 | } |
163 | return NULL; |
164 | } |
165 | /********************************************************************** |
166 | * %FUNCTION: hash_start |
167 | * %ARGUMENTS: |
168 | * tab -- hash table |
169 | * cursor -- a void pointer to keep track of location |
170 | * %RETURNS: |
171 | * "first" entry in hash table, or NULL if table is empty |
172 | * %DESCRIPTION: |
173 | * Starts an iterator -- sets cursor so hash_next will return next entry. |
174 | ***********************************************************************/ |
175 | void * |
176 | hash_start(hash_table *tab, void **cursor) |
177 | { |
178 | int i; |
179 | for (i=0; i<HASHTAB_SIZE; i++) { |
180 | if (tab->buckets[i]) { |
181 | /* Point cursor to NEXT item so it is valid |
182 | even if current item is free'd */ |
183 | *cursor = hash_next_cursor(tab, tab->buckets[i]); |
184 | return GET_ITEM(tab, tab->buckets[i]); |
185 | } |
186 | } |
187 | *cursor = NULL; |
188 | return NULL; |
189 | } |
190 | |
191 | /********************************************************************** |
192 | * %FUNCTION: hash_next |
193 | * %ARGUMENTS: |
194 | * tab -- hash table |
195 | * cursor -- cursor into hash table |
196 | * %RETURNS: |
197 | * Next item in table, or NULL. |
198 | * %DESCRIPTION: |
199 | * Steps cursor to next item in table. |
200 | ***********************************************************************/ |
201 | void * |
202 | hash_next(hash_table *tab, void **cursor) |
203 | { |
204 | hash_bucket *b; |
205 | |
206 | if (!*cursor) return NULL; |
207 | |
208 | b = (hash_bucket *) *cursor; |
209 | *cursor = hash_next_cursor(tab, b); |
210 | return GET_ITEM(tab, b); |
211 | } |
212 | |
213 | /********************************************************************** |
214 | * %FUNCTION: hash_next_cursor |
215 | * %ARGUMENTS: |
216 | * tab -- a hash table |
217 | * b -- a hash bucket |
218 | * %RETURNS: |
219 | * Cursor value for bucket following b in hash table. |
220 | ***********************************************************************/ |
221 | static void * |
222 | hash_next_cursor(hash_table *tab, hash_bucket *b) |
223 | { |
224 | unsigned int i; |
225 | if (!b) return NULL; |
226 | if (b->next) return b->next; |
227 | |
228 | i = b->hashval % HASHTAB_SIZE; |
229 | for (++i; i<HASHTAB_SIZE; ++i) { |
230 | if (tab->buckets[i]) return tab->buckets[i]; |
231 | } |
232 | return NULL; |
233 | } |
234 | |
235 | size_t |
236 | hash_num_entries(hash_table *tab) |
237 | { |
238 | return tab->num_entries; |
239 | } |
240 | |
241 | /********************************************************************** |
242 | * %FUNCTION: hash_pjw |
243 | * %ARGUMENTS: |
244 | * str -- a zero-terminated string |
245 | * %RETURNS: |
246 | * A hash value using the hashpjw algorithm |
247 | * %DESCRIPTION: |
248 | * An adaptation of Peter Weinberger's (PJW) generic hashing |
249 | * algorithm based on Allen Holub's version. Accepts a pointer |
250 | * to a datum to be hashed and returns an unsigned integer. |
251 | ***********************************************************************/ |
252 | unsigned int |
253 | hash_pjw(char const * str) |
254 | { |
255 | unsigned int hash_value, i; |
256 | |
257 | for (hash_value = 0; *str; ++str) { |
258 | hash_value = ( hash_value << ONE_EIGHTH ) + *str; |
259 | if (( i = hash_value & HIGH_BITS ) != 0 ) { |
260 | hash_value = |
261 | ( hash_value ^ ( i >> THREE_QUARTERS )) & |
262 | ~HIGH_BITS; |
263 | } |
264 | } |
265 | return hash_value; |
266 | } |
267 |