A Discrete-Event Network Simulator
API
Loading...
Searching...
No Matches
hash-example.cc
Go to the documentation of this file.
1/*
2 * SPDX-License-Identifier: GPL-2.0-only
3 */
4
5#include "ns3/core-module.h"
6#include "ns3/hash.h"
7
8#include <algorithm> // find
9#include <climits> // CHAR_BIT
10#include <fstream>
11#include <iomanip>
12#include <iostream>
13#include <map>
14#include <vector>
15
16/**
17 * \file
18 * \ingroup core-examples
19 * \ingroup hash
20 * Example usage of ns3::Hash.
21 *
22 * This example reads words from a list of files, creates a dictionary
23 * mapping words to hash values, reports collisions, and measures the
24 * execution time of the hash function implementations.
25 *
26 * See \ref hash
27 *
28 * Example Output:
29 * \verbatim
30
31./ns3 run "hasher-example --time \
32 --dict=/usr/share/dict/web2 \
33 --dict=/usr/share/dict/web2a \
34 --dict=/usr/share/dict/propernames \
35 --dict=/usr/share/dict/connectives"
36
37'build' finished successfully (3.028s)
38
39Hasher
40Hashing the dictionaries
41Dictionary file: /usr/share/dict/web2
42Dictionary file: /usr/share/dict/web2a
43Dictionary file: /usr/share/dict/propernames
44Dictionary file: /usr/share/dict/connectives
45
46Number of words or phrases: 312094
47Expected number of collisions: (32-bit table) 11.3389
48Expected number of collisions: (64-bit table) 2.6401e-09
49
50FNV1a (32-bit version): 13 collisions:
51a75b0ae7 elephantlike interventralia
52091c4808 diversionary propenseness
53172be6ba bairnishness sora
54e6cb5099 purifier spongoblastic
554a841078 ameliorable unsmotherable
566ed21de2 brand-newness peripherial
5722acb19b Petrarchism dewy-pinioned
585723634a grain gold hyphenation
59f58026c1 seven-channeled turritella
60946fc6ec multiradiate sister block
6188625851 brachtmema ule tree
62dc28b5ea Un-lutheran gutturotetany
639255bf44 re-sorter working stress
64
65FNV1a (64-bit version): 0 collisions:
66
67Murmur3 (32-bit version): 11 collisions:
685ea83eee impalace metahewettite
69e06fbdde constancy oligosynthetic
702a713795 hypermonosyllable presatisfaction
71c8bf0ef9 Hadromerina starky
72d9c04b3d Accipiter syllable
73c0da8f81 seriation trigonon
7417612b26 daemon unerring
75c2349ad7 air spring iron
761d91386f nine-pounder semicrescentic
77fe17b1a5 cone speaker oblong-wedgeshaped
78faa12798 saw bearing wilting point
79
80Murmur3 (64-bit version): 0 collisions:
81
82Hash timing Phrases Reps Ticks ns/hash
83FNV1a (32-bit version) 312094 100 3140531 100.628
84FNV1a (64-bit version) 312094 100 3145240 100.779
85Murmur3 (32-bit version) 312094 100 4152139 133.041
86Murmur3 (64-bit version) 312094 100 4191464 134.301
87
88 \endverbatim
89 */
90
91namespace ns3
92{
93
95
96namespace Hash
97{
98
99/**
100 * \ingroup hash
101 * Namespace for hasher-example.
102 */
103namespace Example
104{
105
106/**
107 * Keep track of collisions
108 */
110{
111 public:
112 std::string m_name; /**< Name of this hash. */
113 Hasher m_hash; /**< The hash. */
114
115 /** The size of hash function being tested. */
116 enum Bits
117 {
118 Bits32, /**< Use 32-bit hash function. */
119 Bits64 /**< Use 64-bit hash function. */
120 };
121
122 /**
123 * Constructor.
124 *
125 * \param [in] name Hash function name.
126 * \param [in] hash Hash function.
127 * \param [in] bits Which hash length to use.
128 */
129 Collider(const std::string name, Hasher hash, const Bits bits)
130 : m_name(name),
131 m_hash(hash),
132 m_bits(bits)
133 {
134 }
135
136 /**
137 * Add a string to the Collider.
138 *
139 * \param [in] phrase The string to add.
140 * \return \c true If this was a new string.
141 */
142 bool Add(const std::string phrase)
143 {
144 uint64_t h = GetHash(phrase);
145
146 // Check for collisions
147 if (m_dict.count(h) > 0)
148 {
149 // we've seen this hash before, why?
150 if (phrase == m_dict[h])
151 {
152 // duplicate phrase
153 return false;
154 }
155
156 // new phrase generating a real collision
157 // alphabetize
158 if (m_dict[h] < phrase)
159 {
160 m_coll.emplace_back(h, phrase);
161 }
162 else
163 {
164 m_coll.emplace_back(h, m_dict[h]);
165 m_dict[h] = phrase;
166 }
167 }
168 else
169 {
170 // Insert new hash
171 m_dict.insert(std::make_pair(h, phrase));
172 }
173 return true;
174 } // Add ()
175
176 /**
177 * \return The hash name, including the length.
178 */
179 std::string GetName() const
180 {
181 std::string name = m_name;
182
183 switch (m_bits)
184 {
185 case Bits32:
186 name += " (32-bit version)";
187 break;
188 case Bits64:
189 name += " (64-bit version)";
190 break;
191 default:
192 name += " (unknown!?!)";
193 }
194 return name;
195 }
196
197 /** Print the collisions found. */
198 void Report() const
199 {
200 std::cout << std::endl;
201
202 std::cout << GetName() << ": " << m_coll.size() << " collisions:" << std::endl;
203 for (const auto& collision : m_coll)
204 {
205 uint64_t h = collision.first;
206
207 std::cout << std::setfill('0') << std::hex << std::setw(8) << h << std::dec
208 << std::setfill(' ') << " " << std::setw(20) << std::left
209 << m_dict.find(h)->second << collision.second << std::right << std::endl;
210 }
211 } // Report ()
212
213 private:
214 /**
215 * Get the appropriate hash value.
216 *
217 * \param [in] phrase The string to hash.
218 * \return The hash value, using the number of bits set in the constructor.
219 */
220 uint64_t GetHash(const std::string phrase)
221 {
222 m_hash.clear();
223 uint64_t h = 0;
224
225 if (m_bits == Bits32)
226 {
227 h = m_hash.GetHash32(phrase);
228 }
229 else
230 {
231 h = m_hash.GetHash64(phrase);
232 }
233 return h;
234 }
235
236 /** Hash function. */
238
239 /** Hashed dictionary of first instance of each hash. */
240 typedef std::map<uint64_t, std::string> hashdict_t;
241
242 /** The dictionary map, indexed by hash. */
244
245 /** Collision map of subsequent instances. */
246 typedef std::vector<std::pair<uint64_t, std::string>> collision_t;
247
248 /** The list of collisions. */
250
251}; // class Collider
252
253/**
254 * Word list and hashers to test.
255 */
257{
258 public:
259 /** Constructor. */
261 : m_nphrases(0)
262 {
263 m_words.reserve(320000);
264 }
265
266 /**
267 * Add a Collider containing a hash function.
268 *
269 * \param [in] c The Collider to add.
270 */
271 void Add(Collider c)
272 {
273 m_hashes.push_back(c);
274 }
275
276 /**
277 * Add a string to the dictionary.
278 *
279 * \param [in] phrase The string to add.
280 */
281 void Add(const std::string phrase)
282 {
283 if (phrase.empty())
284 {
285 return;
286 }
287
288 bool newPhrases = false;
289 for (auto& collider : m_hashes)
290 {
291 newPhrases |= collider.Add(phrase);
292 }
293
294 if (newPhrases)
295 {
296 ++m_nphrases;
297 m_words.push_back(phrase);
298 }
299 } // Add ()
300
301 /**
302 * Report the expected number of collisions.
303 *
304 * See, e.g.,
305 * http://www.math.dartmouth.edu/archive/m19w03/public_html/Section6-5.pdf
306 *
307 * \f[
308 * E(collisions) = n - k + k (1 - 1/k)^n
309 * \f]
310 *
311 * where <i>n</i> is the number of entries in the table, and
312 * <i>k</i> is the number of buckets.
313 *
314 * This form is numerically unstable for low collision rates.
315 * Expanding for large \f$ k \f$ we get
316 *
317 * \f{eqnarray*}{
318 * E(c) &=& \frac{1}{k} \binom{n}{2}
319 * - \frac{1}{{{k^2}}} \binom{n}{3}
320 * + \frac{1}{{{k^3}}} \binom{n}{4}
321 * - \ldots \\
322 * &=& \frac{1}{k} \binom{n}{2}
323 * \left[ {1 - \frac{{n - 2}}{{3k}}
324 * + \frac{{\left( {n - 2} \right)
325 * \left( {n - 3} \right)}}{{12{k^2}}}
326 * - \ldots } \right] \\
327 * &=& \frac{1}{k} \binom{n}{2}
328 * \left\{ {1 - \frac{{n - 2}}{{3k}}
329 * \left[ {1 + \frac{{n - 3}}{{4k}}
330 * - \ldots }
331 * \right]}
332 * \right\}
333 * \f}
334 *
335 * For simplicity, we'll use the first two terms
336 * of the second form.
337 */
339 {
340 // Expected number of collisions
341 //
342 // Number of buckets = k = 2^bits
343 long double k32 = 0xFFFFFFFF;
344 auto k64 = static_cast<long double>(0xFFFFFFFFFFFFFFFFULL);
345
346 long double n = m_nphrases;
347 long double Ec32 = n * (n - 1) / (2 * k32) * (1 - (n - 2) / (3 * k32));
348 long double Ec64 = n * (n - 1) / (2 * k64) * (1 - (n - 2) / (3 * k64));
349
350 // Output collisions
351 std::cout << "" << std::endl;
352 std::cout << "Number of words or phrases: " << n << std::endl;
353 std::cout << "Expected number of collisions: (32-bit table) " << Ec32 << std::endl;
354 std::cout << "Expected number of collisions: (64-bit table) " << Ec64 << std::endl;
355 } // ReportExpectedCollisions
356
357 /** Print the collisions for each Collider. */
358 void Report() const
359 {
361
362 for (const auto& collider : m_hashes)
363 {
364 collider.Report();
365 }
366 } // Report ()
367
368 /**
369 * Time and report the execution of one hash across the entire Dictionary.
370 *
371 * \param [in] collider The hash Collider to use.
372 */
373 void TimeOne(const Collider& collider)
374 {
375 // Hashing speed
376 uint32_t reps = 100;
377 Hasher h = collider.m_hash;
378 int start = clock();
379 for (const auto& word : m_words)
380 {
381 for (uint32_t i = 0; i < reps; ++i)
382 {
383 h.clear().GetHash32(word);
384 }
385 }
386 int stop = clock();
387 double delta = stop - start;
388 double per = 1e9 * delta / (m_nphrases * reps * CLOCKS_PER_SEC);
389
390 std::cout << std::left << std::setw(32) << collider.GetName() << std::right << std::setw(10)
391 << m_nphrases << std::setw(10) << reps << std::setw(10) << stop - start
392 << std::setw(12) << per << std::endl;
393
394 } // TimeOne ()
395
396 /** Report the execution time of each hash across the entire Dictionary. */
397 void Time()
398 {
399 std::cout << "" << std::endl;
400 std::cout << std::left << std::setw(32) << "Hash timing" << std::right << std::setw(10)
401 << "Phrases" << std::setw(10) << "Reps" << std::setw(10) << "Ticks"
402 << std::setw(12) << "ns/hash" << std::endl;
403
404 for (const auto& collider : m_hashes)
405 {
406 TimeOne(collider);
407 }
408 } // Time ()
409
410 private:
411 unsigned long m_nphrases; /**< Number of strings hashed. */
412 std::vector<Collider> m_hashes; /**< List of hash Colliders. */
413 std::vector<std::string> m_words; /**< List of unique words. */
414
415}; // class Dictionary
416
417/**
418 * Source word list files.
419 */
421{
422 public:
423 /**
424 * CommandLine callback function to add a file argument to the list.
425 *
426 * \param [in] file The word file to add.
427 * \return \c true If the file is new to the list.
428 */
429 bool Add(const std::string& file)
430 {
431 if (std::find(m_files.begin(), m_files.end(), file) == m_files.end())
432 {
433 m_files.push_back(file);
434 }
435
436 return true;
437 }
438
439 /** \return The default dictionary path. */
440 static std::string GetDefault()
441 {
442 return "/usr/share/dict/words";
443 }
444
445 /**
446 * Add phrases from the files into the dict.
447 *
448 * \param [in,out] dict The Dictionary to add words to.
449 */
451 {
452 if (m_files.empty())
453 {
454 Add(GetDefault());
455 }
456
457 std::cout << "Hashing the dictionar" << (m_files.size() == 1 ? "y" : "ies") << std::endl;
458
459 for (const auto& dictFile : m_files)
460 {
461 std::cout << "Dictionary file: " << dictFile << std::endl;
462
463 // Find collisions
464
465 // Open the file
466 std::ifstream dictStream;
467 dictStream.open(dictFile);
468 if (!dictStream.is_open())
469 {
470 std::cerr << "Failed to open dictionary file."
471 << "'" << dictFile << "'" << std::endl;
472 continue;
473 }
474
475 while (dictStream.good())
476 {
477 std::string phrase;
478 getline(dictStream, phrase);
479 dict.Add(phrase);
480 } // while
481
482 dictStream.close();
483
484 } // for m_files
485
486 } // ReadInto
487
488 private:
489 std::vector<std::string> m_files; /**< List of word files to use. */
490
491}; // class DictFiles
492
493} // namespace Example
494
495} // namespace Hash
496
497} // namespace ns3
498
499using namespace ns3;
500using namespace ns3::Hash::Example;
501
502int
503main(int argc, char* argv[])
504{
505 std::cout << std::endl;
506 std::cout << "Hasher" << std::endl;
507
508 bool timing = false;
509 DictFiles files;
510
511 CommandLine cmd(__FILE__);
512 cmd.Usage("Find hash collisions in the dictionary.");
513 cmd.AddValue("dict",
514 "Dictionary file to hash",
517
518 cmd.AddValue("time", "Run timing test", timing);
519 cmd.Parse(argc, argv);
520
521 Dictionary dict;
524
527
528 files.ReadInto(dict);
529
530 dict.Report();
531
532 if (timing)
533 {
534 dict.Time();
535 } // if (timing)
536
537 return 0;
538} // main
Parse command-line arguments.
Keep track of collisions.
Collider(const std::string name, Hasher hash, const Bits bits)
Constructor.
collision_t m_coll
The list of collisions.
std::map< uint64_t, std::string > hashdict_t
Hashed dictionary of first instance of each hash.
uint64_t GetHash(const std::string phrase)
Get the appropriate hash value.
std::string m_name
Name of this hash.
std::vector< std::pair< uint64_t, std::string > > collision_t
Collision map of subsequent instances.
void Report() const
Print the collisions found.
Bits
The size of hash function being tested.
@ Bits64
Use 64-bit hash function.
@ Bits32
Use 32-bit hash function.
bool Add(const std::string phrase)
Add a string to the Collider.
std::string GetName() const
hashdict_t m_dict
The dictionary map, indexed by hash.
Source word list files.
std::vector< std::string > m_files
List of word files to use.
bool Add(const std::string &file)
CommandLine callback function to add a file argument to the list.
static std::string GetDefault()
void ReadInto(Dictionary &dict)
Add phrases from the files into the dict.
Word list and hashers to test.
void ReportExpectedCollisions() const
Report the expected number of collisions.
void Add(Collider c)
Add a Collider containing a hash function.
std::vector< std::string > m_words
List of unique words.
std::vector< Collider > m_hashes
List of hash Colliders.
void Report() const
Print the collisions for each Collider.
void TimeOne(const Collider &collider)
Time and report the execution of one hash across the entire Dictionary.
unsigned long m_nphrases
Number of strings hashed.
void Add(const std::string phrase)
Add a string to the dictionary.
void Time()
Report the execution time of each hash across the entire Dictionary.
Generic Hash function interface.
Definition hash.h:76
uint32_t GetHash32(const char *buffer, const std::size_t size)
Compute 32-bit hash of a byte buffer.
Definition hash.h:225
uint64_t GetHash64(const char *buffer, const std::size_t size)
Compute 64-bit hash of a byte buffer.
Definition hash.h:232
Hasher & clear()
Restore initial state.
Definition hash.cc:45
#define NS_LOG_COMPONENT_DEFINE(name)
Define a Log component with a specific name.
Definition log.h:191
Ptr< T > Create(Ts &&... args)
Create class instances by constructors with varying numbers of arguments and return them by Ptr.
Definition ptr.h:436
Namespace for hasher-example.
Every class exported by the ns3 library is enclosed in the ns3 namespace.
Callback< R, Args... > MakeCallback(R(T::*memPtr)(Args...), OBJ objPtr)
Build Callbacks for class method members which take varying numbers of arguments and potentially retu...
Definition callback.h:684