1 Star 0 Fork 0

晚风不及你的笑 / 作业库

加入 Gitee
与超过 1200万 开发者一起发现、参与优秀开源项目,私有仓库也完全免费 :)
免费加入
该仓库未声明开源许可证文件(LICENSE),使用请关注具体项目描述及其代码上游依赖。
克隆/下载
BloomFilter.h 2.81 KB
一键复制 编辑 原始数据 按行查看 历史
晚风不及你的笑 提交于 2023-03-11 20:22 . 增加了布隆过滤器
#pragma once
struct BKDRHash
{
size_t operator()(const string& s)
{
// BKDR
size_t value = 0;
for (auto ch : s)
{
value *= 131;
value += ch;
}
return value;
}
};
struct DJBHash
{
size_t operator()(const string& s)
{
unsigned int hash = 5381;
for(auto& ch: s)
{
hash += (hash << 5) + ch;
}
return hash;
}
}; struct APHash
{
size_t operator()(const string& s)
{
unsigned int hash = 0;
int i = 0;
for (auto& ch :s)
{
if ((i & 1) == 0)
{
hash ^= ((hash << 7) ^ ch ^ (hash >> 3));
}
else
{
hash ^= (~((hash << 11) ^ ch ^ (hash >> 5)));
}
++i;
}
return hash;
}
};
//假设N是最多存储元素的个数,k就是平均存储一个元素需要开辟的比特位
template<size_t N, size_t k = 6,
class Key = string,
class HashFunc1 = BKDRHash,
class HashFunc2 = APHash,
class HashFunc3 = DJBHash>
class BloomFilter
{
public:
void set(const Key& key)
{
size_t hash1 = HashFunc1()(key) % (N * k);
size_t hash2 = HashFunc2()(key) % (N * k);
size_t hash3 = HashFunc3()(key) % (N * k);
_bf.set(hash1);
_bf.set(hash2);
_bf.set(hash3);
}
bool test(const Key& key)
{
size_t hash1 = HashFunc1()(key) % (N * k);
if (!_bf.test(hash1))
return false;
size_t hash2 = HashFunc2()(key) % (N * k);
if (!_bf.test(hash2))
return false;
size_t hash3 = HashFunc3()(key) % (N * k);
if (!_bf.test(hash3))
return false;
return true; //存在但是会有误判
}
private:
std::bitset<N * k> _bf;
};
void testBF()
{
BloomFilter<10> bf;
string str[] = { "孙悟空","猪八戒","沙悟净","唐三藏","白龙马",
"孙悟空1","1孙悟空","孙1悟空","1孙悟空1","11孙悟空"};
for (auto& s : str)
{
bf.set(s);
}
for (auto& s : str)
{
cout << bf.test(s) << endl;
}
cout << endl;
srand(time(0));
for (const auto& s : str)
{
cout << bf.test(s+ to_string(rand())) << endl;
}
}
void test_bloomfilter2()
{
srand(time(0));
const size_t N = 10000;
BloomFilter<N> bf;
std::vector<std::string> v1;
std::string url = "https://www.cnblogs.com/-clq/archive/2012/05/31/2528153.html";
for (size_t i = 0; i < N; ++i)
{
v1.push_back(url + std::to_string(i));
}
for (auto& str : v1)
{
bf.set(str);
}
// v2跟v1是相似字符串集,但是不一样
std::vector<std::string> v2;
for (size_t i = 0; i < N; ++i)
{
std::string url = "https://www.cnblogs.com/-clq/archive/2012/05/31/2528153.html";
url += std::to_string(999999 + i);
v2.push_back(url);
}
size_t n2 = 0;
for (auto& str : v2)
{
if (bf.test(str))
{
++n2;
}
}
cout << "相似字符串误判率:" << (double)n2 / (double)N << endl;
// 不相似字符串集
std::vector<std::string> v3;
for (size_t i = 0; i < N; ++i)
{
string url = "zhihu.com";
url += std::to_string(i + rand());
v3.push_back(url);
}
size_t n3 = 0;
for (auto& str : v3)
{
if (bf.test(str))
{
++n3;
}
}
cout << "不相似字符串误判率:" << (double)n3 / (double)N << endl;
}
1
https://gitee.com/sendy-jing/job-library.git
git@gitee.com:sendy-jing/job-library.git
sendy-jing
job-library
作业库
master

搜索帮助