散列是數(shù)組存儲方式的一種發(fā)展,相比數(shù)組,散列的數(shù)據(jù)訪問速度要高于數(shù)組,因為可以依據(jù)存儲數(shù)據(jù)的部分內(nèi)容找到數(shù)據(jù)在數(shù)組中的存儲位置,進而能夠快速實現(xiàn)數(shù)據(jù)的訪問,理想的散列訪問速度是非常迅速的,而不像在數(shù)組中的遍歷過程,采用存儲數(shù)組中內(nèi)容的部分元素作為映射函數(shù)的輸入,映射函數(shù)的輸出就是存儲數(shù)據(jù)的位置,這樣的訪問速度就省去了遍歷數(shù)組的實現(xiàn),因此時間復(fù)雜度可以認為為O(1),而數(shù)組遍歷的時間復(fù)雜度為O(n)。
散列是能一種快速實現(xiàn)訪問的存儲方式。通常作為檢索部分的數(shù)據(jù)項是整形或者字符串,當是字符串時,字符串的數(shù)量要遠遠大于數(shù)組的長度,這時候就會有多個字符串映射到一個存儲位置的情況,這就是所謂的沖突問題,而且沖突時肯定存在的,這時候如何實現(xiàn)數(shù)據(jù)的存儲又是需要解決的。
目前主要的解決方式有兩大類,第一種采用鏈表的形式,將所有沖突的數(shù)據(jù)項采用鏈表的形式鏈接起來,這樣搜索數(shù)據(jù)的復(fù)雜度就包含了鏈表的遍歷問題,特別是當所有的項都鏈接到一個鏈表下時,這時候?qū)嶋H上就是遍歷鏈表,復(fù)雜度并不一定有很大的進步,但是這種鏈表鏈接的方式有很高的填充率。第二種就是充分利用沒有實現(xiàn)的存儲空間,利用探測法探測空閑的空間,進而實現(xiàn)數(shù)據(jù)的存儲,目前有三種探測方式:線性探測法、平方探測法,以及雙散列法,三種方式中平方探測法運用比較多,但是都存在各種各樣的優(yōu)缺點,這時候的散列搜索優(yōu)勢就沒有理想情況下那么明顯。有時候甚至比遍歷數(shù)組更加的慢。但是確實不失為一種處理方式。
映射函數(shù)可選擇的比較多,其實完全可以定義自己的映射函數(shù),但是有時候為了降低沖突的概率設(shè)置了一些比較好的映射函數(shù),比如求和取余,或者乘以一定的系數(shù)再求和取余等。
本文采用平方探測法解決了沖突問題,具體的實現(xiàn)如下所示:
1、結(jié)構(gòu)體定義
#ifndef __HASHMAP_H_H_
#define __HASHMAP_H_H_
#include "list.h"
#define TABSIZE 101
/*狀態(tài)變量*/
typedef enum STATE{EMPTY = 0, ACTIVE = 1, DELETED = 2} State;
/*鍵值結(jié)構(gòu)體*/
typedef struct _pair
{
char *key;
char *value;
}Pair_t, *Pair_handle_t;
/*每一個實際的存儲對象*/
typedef struct _hashEntry
{
Pair_handle_t pair;
State state;
}HashEntry_t, *HashEntry_handle_t;
/*哈希表結(jié)構(gòu)體,便于創(chuàng)建*/
typedef struct _hashmap
{
HashEntry_t *map;
/*存儲實際的存儲量*/
int size;
/*容量*/
int capacity;
}Hashmap_t, *Hashmap_handle_t;
/*隱射函數(shù)類型定義*/
typedef int(*hashfunc)(const char *, int);
#ifdef __cplusplus
extern "C"
{
#endif
bool alloc_hashmap(Hashmap_handle_t *hashmap, int capacity);
bool init_hashmap(Hashmap_handle_t hashmap, int capacity);
bool insert_hashnode(Hashmap_handle_t hashmap, const char *key, const char *value);
Pair_handle_t search_hashnode(Hashmap_handle_t hashmap, const char *key);
char *GetValue(Hashmap_handle_t hashmap, const char *key);
bool delete_hashnode(Hashmap_handle_t hashmap, const char *key);
int Length(Hashmap_handle_t hashmap);
int Capacity(Hashmap_handle_t hashmap);
void delete_hashmap(Hashmap_handle_t hashmap);
void free_hashmap(Hashmap_handle_t *hashmap);
char *key_pair(Pair_handle_t pair);
char *value_pair(Pair_handle_t pair);
Hashmap_handle_t copy_hashmap(Hashmap_handle_t hashmap);
bool resize(Hashmap_handle_t hashmap);
#ifdef __cplusplus
}
#endif
#endif
實現(xiàn)表的分配和創(chuàng)建,采用了動態(tài)分配的方式實現(xiàn),這樣可能在性能上比不上靜態(tài)數(shù)據(jù),但是為了實現(xiàn)數(shù)組大小的調(diào)整,我選擇了動態(tài)分配的實現(xiàn)方式。
/*分配一個新的對象,可以實現(xiàn)自動分配*/
bool alloc_hashmap(Hashmap_handle_t *hashmap, int capacity)
{
HashEntry_handle_t temp = NULL;
Hashmap_t * map = NULL;
if(*hashmap == NULL)
{
/*分配一個散列對象*/
map = (Hashmap_handle_t)malloc(sizeof(Hashmap_t));
if(map == NULL)
return false;
/*指針指向當前對象*/
*hashmap = map;
map = NULL;
/*分配一個數(shù)組空間,大小可以控制*/
temp = (HashEntry_handle_t)malloc(
sizeof(HashEntry_t)*capacity);
if(temp != NULL)
{
/*散列對象的指針指向數(shù)組*/
(*hashmap)->map = temp;
temp = NULL;
/*設(shè)置參數(shù)*/
(*hashmap)->capacity = capacity;
(*hashmap)->size = 0;
/*初始化分配的數(shù)組空間*/
Tabinital((*hashmap)->map,capacity);
return true;
}
}
return false;
}
/*初始化一個新的對象,這個對象已經(jīng)創(chuàng)建,只是沒有初始化而已*/
bool init_hashmap(Hashmap_handle_t hashmap, int capacity)
{
HashEntry_handle_t temp = NULL;
if(hashmap != NULL)
{
/*分配數(shù)組空間*/
temp = (HashEntry_handle_t)malloc(
sizeof(HashEntry_t)*capacity);
if(temp != NULL)
{
/*完成對象的填充操作*/
hashmap->map = temp;
temp = NULL;
hashmap->capacity = capacity;
hashmap->size = 0;
/*初始化數(shù)組對象*/
Tabinital(hashmap->map,capacity);
return true;
}
}
return false;
}
關(guān)于數(shù)組中對象的創(chuàng)建,和釋放操作,如下所示:
/*分配一個pair對象*/
static bool make_pair(Pair_handle_t *pair, const char *key, const char *value)
{
Pair_handle_t newpair =(Pair_handle_t)malloc(sizeof(Pair_t));
char *newstr = NULL;
if(newpair == NULL)
return false;
newstr = (char *)malloc(strlen(key) + 1);
if(newstr == NULL)
return false;
strcpy(newstr, key);
newstr[strlen(key)] = '\0';
newpair->key = newstr;
newstr = NULL;
newstr = (char *)malloc(strlen(value) + 1);
if(newstr == NULL)
return false;
strcpy(newstr, value);
newstr[strlen(value)] = '\0';
newpair->value = newstr;
newstr = NULL;
(*pair) = newpair;
return true;
}
/*釋放一個對象pair*/
static void delete_pair(Pair_handle_t *pair)
{
Pair_handle_t temp = NULL;
if(*pair == NULL)
return ;
temp = *pair;
free(temp->key);
temp->key = NULL;
free(temp->value);
temp->value = NULL;
free(temp);
temp = NULL;
*pair = NULL;
}
數(shù)組元素的基本操作:
/*完成數(shù)組對象的初始化操作*/
static void Tabinital(HashEntry_t *tab, int size)
{
int i = 0;
for(; i < size; ++ i)
{
tab[i].pair = NULL;
tab[i].state = EMPTY;
}
}
static void delete_array(HashEntry_handle_t *array, int size)
{
int i = 0;
if(*array != NULL)
{
for(i = 0; i < size; ++ i)
{
if((*array)[i].state == ACTIVE)
{
delete_pair(&((*array)[i].pair));
(*array)[i].state = DELETED;
}
}
free(*array);
*array = NULL;
}
}
插入元素的操作、有兩個函數(shù)的創(chuàng)建,其中一個為了便于后期大小的調(diào)整操作。
/*插入數(shù)據(jù)到散列中,采用了二次探測的實現(xiàn)方式,并設(shè)置了退出條件*/
static bool insert_data(Hashmap_handle_t hashmap,
const char *key, const char *value, hashfunc func)
{
int hashval = func(key,hashmap->capacity);
int index = 0;
char * newstr = NULL;
Pair_handle_t newpair = NULL;
while(hashmap->map[hashval].state != EMPTY)
{
if((hashmap->map[hashval].state == ACTIVE)
&& (strcmp(hashmap->map[hashval].pair->key,key) == 0))
break;
index ++;
hashval += index * index;
hashval %= hashmap->capacity;
if(index == 200)
break;
}
if(hashmap->map[hashval].state == EMPTY)
{
if(make_pair(&newpair,key,value))
{
hashmap->map[hashval].state = ACTIVE;
hashmap->map[hashval].pair = newpair;
newpair = NULL;
hashmap->size ++;
return true;
}
}
else if((hashmap->map[hashval].state == ACTIVE)
&& (strcmp(hashmap->map[hashval].pair->key, key) == 0))
{
newstr = (char *)malloc(strlen(value) + 1);
if(newstr != NULL)
{
strcpy(newstr,value);
newstr[strlen(value)] = '\0';
free(hashmap->map[hashval].pair->value);
hashmap->map[hashval].pair->value = newstr;
newstr = NULL;
return true;
}
}
return false;
}
static bool insert2map(HashEntry_handle_t map,
const char *key, const char *value,
hashfunc func, int size)
{
int hashval = func(key,size);
int index = 0;
char *newstr = NULL;
Pair_handle_t newpair = NULL;
if(map != NULL)
{
while(map[hashval].state != EMPTY)
{
if((map[hashval].state == ACTIVE)
&& (strcmp(map[hashval].pair->key, key) == 0))
break;
index ++;
hashval += index * index;
hashval %= size;
/*防止死循環(huán)*/
if(index == 200)
break;
}
if(map[hashval].state == EMPTY)
{
if(!make_pair(&newpair,key,value))
return false;
map[hashval].pair = newpair;
map[hashval].state = ACTIVE;
newpair = NULL;
return true;
}
else if((map[hashval].state == ACTIVE) &&
(strcmp(map[hashval].pair->key, key) == 0))
{
newstr = (char *)malloc(strlen(value) +1);
if(newstr != NULL)
{
free(map[hashval].pair->value);
map[hashval].pair->value = NULL;
strcpy(newstr, value);
newstr[strlen(value)] = '\0';
map[hashval].pair->value = newstr;
return true;
}
}
}
return false;
}
調(diào)整大小和插入的實際操作。
bool resize(Hashmap_handle_t hashmap)
{
int i = 0;
HashEntry_handle_t newarray = NULL;
int size = next_prime(2*hashmap->capacity);
if(hashmap != NULL)
{
newarray = (HashEntry_handle_t)malloc(sizeof(HashEntry_t)
*size);
if(newarray == NULL)
return false;
/*這一步至關(guān)重要*/
Tabinital(newarray, size);
for(; i < hashmap->capacity ; ++ i)
{
if(hashmap->map[i].state == ACTIVE)
{
if(!insert2map(newarray,
hashmap->map[i].pair->key,
hashmap->map[i].pair->value,
HashFunc, size))
return false;
}
}
delete_array(&hashmap->map,hashmap->capacity);
hashmap->map = newarray;
hashmap->capacity = size;
return true;
}
return false;
}
bool insert_hashnode(Hashmap_handle_t hashmap,
const char *key, const char *value)
{
if(hashmap->size < hashmap->capacity)
{
if(insert_data(hashmap,key,value,HashFunc))
{
//hashmap->size ++;
}
return true;
}
else /*增加容量*/
{
if(!resize(hashmap))
return false;
if(insert_data(hashmap,key,value,HashFunc))
{
//hashmap->size ++;
}
return true;
}
return false;
}
搜索操作
static Pair_handle_t search_data(Hashmap_handle_t hashmap, const char *key, hashfunc func)
{
int hashval = func(key, hashmap->capacity);
int index = 0;
while(hashmap->map[hashval].state != EMPTY)
{
if((hashmap->map[hashval].state == ACTIVE)
&& (strcmp(hashmap->map[hashval].pair->key,key) == 0))
break;
index ++;
hashval += index * index;
hashval %= hashmap->capacity;
if(index == 200)
break;
}
if((hashmap->map[hashval].state == ACTIVE)
&& (strcmp(hashmap->map[hashval].pair->key, key) == 0))
{
return hashmap->map[hashval].pair;
}
return NULL;
}
Pair_handle_t search_hashnode(Hashmap_handle_t hashmap, const char * key)
{
return search_data(hashmap,key,HashFunc);
}
刪除操作
static bool delete_node(Hashmap_handle_t hashmap,const char *key,hashfunc func)
{
int hashval = func(key, hashmap->capacity);
int index = 0;
/**********************************************
*退出循環(huán)的條件是找到空閑的單元,或者關(guān)鍵字匹配
*********************************************/
while(hashmap->map[hashval].state != EMPTY)
{
if((hashmap->map[hashval].state == ACTIVE)
&& strcmp(hashmap->map[hashval].pair->key,key) == 0)
break;
index ++;
hashval += index * index;
hashval %= hashmap->capacity;
if(index == 200)
break;
}
/*判斷刪除條件*/
if((hashmap->map[hashval].state == ACTIVE) &&
(strcmp(hashmap->map[hashval].pair->key, key) == 0))
{
hashmap->map[hashval].state = DELETED;
delete_pair(&(hashmap->map[hashval].pair));
hashmap->map[hashval].pair = NULL;
return true;
}
return false;
}
bool delete_hashnode(Hashmap_handle_t hashmap, const char *key)
{
if(delete_node(hashmap, key, HashFunc))
{
hashmap->size --;
return true;
}
return false;
}
參數(shù)獲取函數(shù);
int Length(Hashmap_t *map)
{
return map->size;
}
int Capacity(Hashmap_t *map)
{
return map->capacity;
}
void delete_hashmap(Hashmap_handle_t hashmap)
{
int i = 0;
int size = hashmap->capacity;
if(hashmap != NULL)
{
for(; i < size; ++ i)
{
if(hashmap->map[i].state == ACTIVE)
{
delete_pair(&(hashmap->map[i].pair));
hashmap->map[i].state = DELETED;
hashmap->map[i].pair = NULL;
hashmap->size --;
}
}
free(hashmap->map);
hashmap->map = NULL;
}
}
void free_hashmap(Hashmap_handle_t *hashmap)
{
delete_hashmap(*hashmap);
free(*hashmap);
*hashmap = NULL;
}
char *key_pair(Pair_handle_t pair)
{
if(pair != NULL)
{
return pair->key;
}
return NULL;
}
char *value_pair(Pair_handle_t pair)
{
if(pair != NULL)
{
return pair->value;
}
return NULL;
}
/*復(fù)制散列操作,相當于創(chuàng)建了一個新的散列對象,而不是指針復(fù)制*/
Hashmap_handle_t copy_hashmap(Hashmap_handle_t hashmap)
{
Hashmap_handle_t newhashmap = NULL;
int i = 0;
if(hashmap == NULL)
return NULL;
/*采用動態(tài)分配的方式實現(xiàn)*/
if(!alloc_hashmap(&newhashmap,hashmap->capacity))
return NULL;
for(; i < hashmap->capacity ; ++ i)
{
if(hashmap->map[i].state == ACTIVE)
{
/*得到當前中的值實現(xiàn)插入操作*/
insert_hashnode(newhashmap,
hashmap->map[i].pair->key,
hashmap->map[i].pair->value);
}
else if(hashmap->map[i].state == DELETED)
{
newhashmap->map[i].state = DELETED;
}
}
return newhashmap;
}
測試函數(shù):
#include "hashmap.h"
#include <time.h>
#define CAPACITY 13
char *keys[] = {
"abcd",
"defh",
"abcd",
"12345",
"a1b2c3",
"12345",
"a1b2c3",
"23456",
"hijhk",
"test1",
"test1",
"789123",
"Input",
};
char *values[] = {
"abcd",
"defh",
"abcd",
"12345",
"a1b2c3",
"12345",
"a1b2c3",
"23456",
"hijhk",
"test1",
"test1",
"789123",
"Input",
};
int myhashFunc(const char *key, int Tabsize)
{
int hashVal = 0;
int i = 0;
int len = strlen(key);
for(; i < len ; ++ i )
hashVal += 37 *hashVal + key[i];
hashVal %= Tabsize;
if(hashVal < 0)
hashVal += Tabsize;
return hashVal;
}
int main()
{
int i = 0;
char str1[13];
char str2[13];
Hashmap_t mymap ;
Hashmap_handle_t map = NULL;
Hashmap_handle_t doubmap = NULL;
Pair_handle_t pair;
/*靜態(tài)分配*/
srand((int)time(0));
printf("init and alloc test:\n");
if(!init_hashmap(&mymap,13))
{
return false;
}
/*動態(tài)分配*/
if(!alloc_hashmap(&map,13))
{
return false;
}
printf("Sucessed!\n");
/*插入測試*/
printf("insert test:\n");
for(i = 0; i < CAPACITY + 10; ++ i)
{
sprintf(str1,"%d%d",rand()%10+1,rand()%10+1);
sprintf(str2,"%d%d",rand()%10+1,rand()%10+1);
printf("%s->-f(%s)->%d->%s\n",str1,str1,
myhashFunc(str1,CAPACITY),str2);
// sprintf(str1,"%s",keys[i]);
// sprintf(str2,"%s",values[i]);
if(!insert_hashnode(&mymap,str1,str2))
{
printf("i = %d, insert to mymap failed\n", i);
break;
}
if(!insert_hashnode(map,str1,str2))
{
printf("i = %d, insert to map failed\n", i);
break;
}
}
printf("Sucessed!\n");
/*查找測試*/
printf("search test:\n");
if((pair = search_hashnode(&mymap,str1)) != NULL)
{
printf("%s->%s\n",key_pair(pair),value_pair(pair));
}
if((pair = search_hashnode(map,str1)) != NULL)
{
printf("%s->%s\n",key_pair(pair),value_pair(pair));
}
printf("Sucessed!\n");
/*delete*/
printf("delete test:\n");
if(delete_hashnode(&mymap,str1))
{
printf("Deleted success!!\n");
}
else
{
printf("Sorry, Failed!!\n");
}
if(delete_hashnode(map,str1))
{
printf("Deleted success!!\n");
}
else
{
printf("Sorry, Failed!!\n");
}
printf("Valid length : %d, Capacity : %d\n",
Length(&mymap),Capacity(&mymap));
printf("Valid length : %d, Capacity : %d\n",
Length(map),Capacity(map));
/*改變長度*/
printf("resize test:\n");
if(resize(&mymap))
printf("Sucessed!\n");
if(resize(map))
printf("Sucessed!\n");
/*長度*/
printf("Valid length : %d, Capacity : %d\n",
Length(&mymap),Capacity(&mymap));
printf("Valid length : %d, Capacity : %d\n",
Length(map),Capacity(map));
printf("Valid length : %d, Capacity : %d\n",
Length(&mymap),Capacity(&mymap));
printf("Valid length : %d, Capacity : %d\n",
Length(map),Capacity(map));
printf("copy test:\n");
doubmap = copy_hashmap(&mymap);
printf("Valid length : %d, Capacity : %d\n",
Length(doubmap),Capacity(doubmap));
printf("Sucessed!\n");
/*釋放內(nèi)存*/
printf("free test:\n");
delete_hashmap(&mymap);
free_hashmap(&map);
free_hashmap(&doubmap);
printf("Valid length : %d, Capacity : %d\n",
Length(&mymap),Capacity(&mymap));
printf("Sucessed!\n");
return 0;
}
測試結(jié)果:
[gong@Gong-Computer newversion]$ ./main
init and alloc test:
insert test:
48->-f(48)->4->49
108->-f(108)->5->910
78->-f(78)->1->98
87->-f(87)->12->73
36->-f(36)->3->109
59->-f(59)->4->98
32->-f(32)->12->48
210->-f(210)->10->91
105->-f(105)->2->22
41->-f(41)->10->82
1010->-f(1010)->11->69
19->-f(19)->8->64
25->-f(25)->3->45
28->-f(28)->6->104
16->-f(16)->5->83
44->-f(44)->0->86
85->-f(85)->10->72
51->-f(51)->9->27
54->-f(54)->12->57
107->-f(107)->4->210
73->-f(73)->9->27
1010->-f(1010)->11->61
63->-f(63)->10->63
search test:
63->63
63->63
delete test:
Deleted
Deleted
Valid length : 21, Capacity : 29
Valid length : 21, Capacity : 29
resize test:
Valid length : 21, Capacity : 59
Valid length : 21, Capacity : 59
Valid length : 21, Capacity : 59
Valid length : 21, Capacity : 59
copy test:
Valid length : 21, Capacity : 59
free test:
Valid length : 0, Capacity : 59
從實驗效果可知,基本上實現(xiàn)了散列的基本操作。