heap.c
Go to the documentation of this file.
1 /*
2  * Copyright (C) 1996-2023 The Squid Software Foundation and contributors
3  *
4  * Squid software is distributed under GPLv2+ license and includes
5  * contributions from numerous individuals and organizations.
6  * Please see the COPYING and CONTRIBUTORS files for details.
7  */
8 
9 /*
10  * AUTHOR: John Dilley, Hewlett Packard
11  */
12 
13 /****************************************************************************
14  * Heap implementation
15  * Copyright (C) 1999 by Hewlett Packard
16  ****************************************************************************/
17 
18 #include "squid.h"
19 #include "heap.h"
20 
21 #if HAVE_STDLIB_H
22 #include <stdlib.h>
23 #endif
24 #if HAVE_ASSERT_H
25 #include <assert.h>
26 #endif
27 #if HAVE_STRING_H
28 #include <string.h>
29 #endif
30 
31 #include "util.h"
32 
33 /*
34  * Hacks for non-synchronized heap implementation.
35  */
36 #define mutex_lock(m) (void)0
37 #define mutex_unlock(m) (void)0
38 #define mutex_trylock(m) (void)0
39 #define mutex_init(m) ((m)=123456)
40 
41 /*
42  * Private function prototypes.
43  */
44 static void _heap_ify_up(heap * hp, heap_node * elm);
45 static void _heap_ify_down(heap * hp, heap_node * elm);
46 static int _heap_should_grow(heap * hp);
47 static void _heap_grow(heap * hp);
48 static void _heap_swap_element(heap * hp, heap_node * elm1, heap_node * elm2);
49 static int _heap_node_exist(heap * hp, int id);
50 
51 #ifdef HEAP_DEBUG
52 void _heap_print_tree(heap * hp, heap_node * node);
53 #endif /* HEAP_DEBUG */
54 
55 #define Left(x) (2 * (x) + 1)
56 #define Right(x) (2 * (x) + 2)
57 #define Parent(x) ((int)((x)-1)/2)
58 
59 #define Threshold 10000
60 #define NormalRate 2
61 #define SlowRate 1.5
62 #define MinSize 32
63 
64 /****************************************************************************
65  * Public functions
66  ****************************************************************************/
67 
68 /*
69  * Return a newly created heap. INITSIZE is the initial size of the heap.
70  */
71 heap *
72 new_heap(int initSize, heap_key_func gen_key)
73 {
74  heap *hp = xmalloc(sizeof(*hp));
75  assert(hp != NULL);
76 
77  if (initSize <= 0)
78  initSize = MinSize;
79  hp->nodes = xcalloc(initSize, sizeof(heap_node *));
80  assert(hp->nodes != NULL);
81 
82  hp->size = initSize;
83  hp->last = 0;
84  hp->gen_key = gen_key;
85  hp->age = 0;
86 
87  return hp;
88 }
89 
90 /*
91  * Free memory used by a heap. Does not free the metadata pointed to by the
92  * heap nodes, only the heap's internal memory.
93  */
94 void
96 {
97  int i;
98  assert(hp != NULL);
99  for (i = 0; i < hp->last; i++) {
100  xfree(hp->nodes[i]);
101  }
102  xfree(hp->nodes);
103  xfree(hp);
104 }
105 
106 /*
107  * Insert DAT based on KY into HP maintaining the heap property.
108  * Return the newly inserted heap node. The fields of ELM other
109  * than ID are never changed until ELM is deleted from HP, i.e.
110  * caller can assume that the heap node always exist at the same
111  * place in memory unless heap_delete or heap_extractmin is called
112  * on that node. This function exposes the heap's internal data
113  * structure to the caller. This is required in order to do O(lgN)
114  * deletion.
115  */
116 heap_node *
117 heap_insert(heap * hp, void *dat)
118 {
119  heap_node *elm = xmalloc(sizeof(*elm));
120 
121  elm->key = heap_gen_key(hp, dat);
122  elm->data = dat;
123 
124  if (_heap_should_grow(hp))
125  _heap_grow(hp);
126 
127  hp->nodes[hp->last] = elm;
128  elm->id = hp->last;
129  hp->last += 1;
130 
131  _heap_ify_up(hp, elm);
132 
133  return elm;
134 }
135 
136 /*
137  * Delete ELM while maintaining the heap property. ELM may be modified.
138  * Assumes that ELM is not NULL and frees it. Returns the data pointed to
139  * in, which the caller must free if necessary.
140  */
141 heap_t
143 {
144  heap_node *lastNode;
145  heap_t data = elm->data;
146 
147  assert(_heap_node_exist(hp, hp->last - 1));
148 
149  lastNode = hp->nodes[hp->last - 1];
150  _heap_swap_element(hp, lastNode, elm);
151  heap_extractlast(hp);
152 
153  if (elm == lastNode) {
154  /*
155  * lastNode just got freed, so don't access it in the next
156  * block.
157  */
158  (void) 0;
159  } else if (hp->last > 0) {
160  if (lastNode->key < hp->nodes[Parent(lastNode->id)]->key)
161  _heap_ify_up(hp, lastNode); /* COOL! */
162  _heap_ify_down(hp, lastNode);
163  }
164  return data;
165 }
166 
167 /*
168  * Delete the last element (leaf) out of the heap. Does not require a
169  * heapify operation.
170  */
171 
172 #ifndef heap_gen_key
173 /*
174  * Function to generate keys. See macro definition in heap.h.
175  */
176 heap_key
178 {
179  return hp->gen_key(dat, hp->age);
180 }
181 #endif /* heap_gen_key */
182 
183 /*
184  * Returns the data of the node with the largest KEY value and removes that
185  * node from the heap. Returns NULL if the heap was empty.
186  */
187 heap_t
189 {
190  heap_t data;
191 
192  if (hp->last <= 0)
193  return NULL;
194 
195  mutex_lock(hp->lock);
196 
197  data = hp->nodes[0]->data;
198  heap_delete(hp, hp->nodes[0]); /* Delete the root */
199 
200  mutex_unlock(hp->lock);
201 
202  return data;
203 }
204 
205 /*
206  * Remove the last node in HP. Frees the heap internal structure and
207  * returns the data pointes to by the last node.
208  */
209 heap_t
211 {
212  heap_t data;
213  assert(_heap_node_exist(hp, hp->last - 1));
214  hp->last -= 1;
215  data = hp->nodes[hp->last]->data;
216  xfree(hp->nodes[hp->last]);
217  return data;
218 }
219 
220 /*
221  * The semantics of this routine is the same as the followings:
222  * heap_delete(hp, elm);
223  * heap_insert(hp, dat);
224  * Returns the old data object from elm (the one being replaced). The
225  * caller must free this as necessary.
226  */
227 heap_t
228 heap_update(heap * hp, heap_node * elm, void *dat)
229 {
230  heap_t old = elm->data;
231  heap_key ky = heap_gen_key(hp, dat);
232 
233  elm->key = ky;
234  elm->data = dat;
235 
236  if (elm->key < hp->nodes[Parent(elm->id)]->key)
237  _heap_ify_up(hp, elm);
238  _heap_ify_down(hp, elm);
239 
240  return old;
241 }
242 
243 /*
244  * A pointer to the root node's DATA.
245  */
246 void *
248 {
249  assert(_heap_node_exist(hp, 0));
250  return hp->nodes[0]->data;
251 }
252 
253 /*
254  * The KEY of the root node.
255  */
256 heap_key
258 {
259  assert(_heap_node_exist(hp, 0));
260  return hp->nodes[0]->key;
261 }
262 
263 /*
264  * Same as heap_peep except that this return the KEY of the node.
265  * Only meant for iteration.
266  */
267 heap_key
268 heap_peepkey(heap * hp, int n)
269 {
270  assert(_heap_node_exist(hp, n));
271  return hp->nodes[n]->key;
272 }
273 
274 /*
275  * A pointer to Nth node's DATA. The caller can iterate through HP by
276  * calling this routine. eg. Caller can execute the following code:
277  * for(i = 0; i < heap_nodes(hp); i++)
278  * data = heap_peep(hp, i);
279  */
280 void *
281 heap_peep(heap * hp, int n)
282 {
283  void *data;
284  assert(_heap_node_exist(hp, n));
285  data = hp->nodes[n]->data;
286  return data;
287 }
288 
289 #ifndef heap_nodes
290 /*
291  * Current number of nodes in HP.
292  */
293 int
295 {
296  return hp->last;
297 }
298 #endif /* heap_nodes */
299 
300 #ifndef heap_empty
301 /*
302  * Determine if the heap is empty. Returns 1 if HP has no elements and 0
303  * otherwise.
304  */
305 int
307 {
308  return (hp->last <= 0) ? 1 : 0;
309 }
310 #endif /* heap_empty */
311 
312 /****************** Private Functions *******************/
313 
314 /*
315  * Maintain the heap order property (parent is smaller than children) which
316  * may only be violated at ELM downwards. Assumes caller has locked the heap.
317  */
318 static void
320 {
321  heap_node *kid;
322  int left = 0, right = 0;
323  int isTrue = 1;
324  while (isTrue) {
325  left = Left(elm->id);
326  right = Right(elm->id);
327  if (!_heap_node_exist(hp, left)) {
328  /* At the bottom of the heap (no child). */
329 
330  assert(!_heap_node_exist(hp, right));
331  break;
332  } else if (!_heap_node_exist(hp, right))
333  /* Only left child exists. */
334 
335  kid = hp->nodes[left];
336  else {
337  if (hp->nodes[right]->key < hp->nodes[left]->key)
338  kid = hp->nodes[right];
339  else
340  kid = hp->nodes[left];
341  }
342  if (elm->key <= kid->key)
343  break;
344  _heap_swap_element(hp, kid, elm);
345  }
346 }
347 
348 /*
349  * Maintain the heap property above ELM. Caller has locked the heap.
350  */
351 static void
353 {
354  heap_node *parentNode;
355  while (elm->id > 0) {
356  parentNode = hp->nodes[Parent(elm->id)];
357  if (parentNode->key <= elm->key)
358  break;
359  _heap_swap_element(hp, parentNode, elm); /* Demote the parent. */
360  }
361 }
362 
363 /*
364  * Swap the position of ELM1 and ELM2 in heap structure. Their IDs are also
365  * swapped.
366  */
367 static void
369 {
370  int elm1Id = elm1->id;
371  elm1->id = elm2->id;
372  elm2->id = elm1Id;
373  hp->nodes[elm1->id] = elm1;
374  hp->nodes[elm2->id] = elm2;
375 }
376 
377 /*
378  * True if HP needs to be grown in size.
379  */
380 static int
382 {
383  if (hp->size <= hp->last)
384  return 1;
385  return 0;
386 }
387 
388 /*
389  * Grow HP.
390  */
391 static void
393 {
394  int newSize;
395 
396  if (hp->size > Threshold)
397  newSize = hp->size * SlowRate;
398  else
399  newSize = hp->size * NormalRate;
400 
401  hp->nodes = xrealloc(hp->nodes, newSize * sizeof(heap_node *));
402  hp->size = newSize;
403 }
404 
405 /*
406  * True if a node with ID exists in HP.
407  */
408 static int
409 _heap_node_exist(heap * hp, int id)
410 {
411  if ((id >= hp->last) || (id < 0) || (hp->nodes[id] == NULL))
412  return 0;
413  return 1;
414 }
415 
416 /****************************************************************************
417  * Printing and debug functions
418  ****************************************************************************/
419 
420 /*
421  * Print the heap in element order, id..last.
422  */
423 static void
425 {
426  while (id < hp->last) {
427  printf("%d\tKey = %.04f\n", id, hp->nodes[id]->key);
428  id++;
429  }
430 }
431 
432 /*
433  * Returns 1 if HP maintains the heap property and 0 otherwise.
434  */
435 int
437 {
438  int i = 0;
439  int correct = 1;
440  for (i = 0; i < hp->last / 2; i++) {
441  correct = 1;
442  if (_heap_node_exist(hp, Left(i)))
443  if (hp->nodes[i]->key > hp->nodes[Left(i)]->key)
444  correct = 0;
445  if (_heap_node_exist(hp, Right(i)))
446  if (hp->nodes[i]->key > hp->nodes[Right(i)]->key)
447  correct = 0;
448  if (!correct) {
449  printf("verifyHeap: violated at %d", i);
450  heap_print_inorder(hp, 0);
451  break;
452  }
453  }
454  return correct;
455 }
456 
457 #ifdef MEASURE_HEAP_SKEW
458 
459 /****************************************************************************
460  * Heap skew computation
461  ****************************************************************************/
462 
463 int
464 compare_heap_keys(const void *a, const void *b)
465 {
466  heap_node **an = (heap_node **) a;
467  heap_node **bn = (heap_node **) b;
468  float cmp = (*an)->key - (*bn)->key;
469  if (cmp < 0)
470  return -1;
471  else
472  return 1;
473 }
474 
475 /*
476  * Compute the heap skew for HEAP, a measure of how out-of-order the
477  * elements in the heap are. The skew of a heap node is the difference
478  * between its current position in the heap and where it would be if the
479  * heap were in sorted order. To compute this we have to sort the heap. At
480  * the end if the flag REPLACE is non-zero the heap will be returned in
481  * sorted order (with skew == 0). Note: using REPLACE does not help the
482  * performance of the heap, so only do this if you really want to have a
483  * sorted heap. It is faster not to replace.
484  */
485 float
486 calc_heap_skew(heap * heap, int replace)
487 {
488  heap_node **nodes;
489  long id, diff, skew = 0;
490 #ifdef HEAP_DEBUG_SKEW
491  long skewsq = 0;
492 #endif /* HEAP_DEBUG_SKEW */
493  float norm = 0;
494  unsigned long max;
495 
496  /*
497  * Lock the heap to copy it. If replacing it need to keep the heap locked
498  * until we are all done.
499  */
500  mutex_lock(hp->lock);
501 
502  max = heap_nodes(heap);
503 
504  /*
505  * Copy the heap nodes to a new storage area for offline sorting.
506  */
507  nodes = xmalloc(max * sizeof(heap_node *));
508  memcpy(nodes, heap->nodes, max * sizeof(heap_node *));
509 
510  if (replace == 0) {
511  /*
512  * Unlock the heap to allow updates from other threads before the sort.
513  * This allows other heap operations to proceed concurrently with the
514  * heap skew computation on the heap at the time of the call ...
515  */
516  mutex_unlock(hp->lock);
517  }
518  qsort(nodes, max, sizeof(heap_node *), compare_heap_keys);
519 
520  for (id = 0; id < max; id++) {
521  diff = id - nodes[id]->id;
522  skew += abs(diff);
523 
524 #ifdef HEAP_DEBUG_SKEW
525  skewsq += diff * diff;
526 #ifdef HEAP_DEBUG_ALL
527  printf("%d\tKey = %f, diff = %d\n", id, nodes[id]->key, diff);
528 #endif /* HEAP_DEBUG */
529 #endif /* HEAP_DEBUG_SKEW */
530  }
531 
532  if (replace != 0) {
533  /*
534  * Replace the original heap with the newly sorted heap and let it
535  * continue. Then compute the skew using the copy of the previous heap
536  * which we maintain as private data.
537  */
538  memcpy(heap->nodes, nodes, max * sizeof(heap_node *));
539 
540  for (id = 0; id < max; id++) {
541  /*
542  * Fix up all the ID values in the copied nodes.
543  */
544  heap->nodes[id]->id = id;
545  }
546 
547  mutex_unlock(hp->lock);
548  }
549  /*
550  * The skew value is normalized to a range of [0..1]; the distribution
551  * appears to be a skewed Gaussian distribution. For random insertions
552  * into a heap the normalized skew will be slightly less than 0.5. The
553  * maximum value of skew/N^2 (for any value of N) is about 0.39 and is
554  * fairly stable.
555  */
556  norm = skew * 2.56 / (max * max);
557 
558  /*
559  * Free the nodes array; note this is just an array of pointers, not data!
560  */
561  xfree(nodes);
562  return norm;
563 }
564 
565 #endif /* MEASURE_HEAP_SKEW */
566 
Definition: parse.c:104
void * xcalloc(size_t n, size_t sz)
Definition: xalloc.cc:71
#define Parent(x)
Definition: heap.c:57
#define xmalloc
#define MinSize
Definition: heap.c:62
static void _heap_ify_up(heap *hp, heap_node *elm)
Definition: heap.c:352
static void _heap_ify_down(heap *hp, heap_node *elm)
Definition: heap.c:319
static void heap_print_inorder(heap *hp, int id)
Definition: heap.c:424
unsigned long last
Definition: heap.h:55
unsigned long size
Definition: heap.h:54
#define mutex_lock(m)
Definition: heap.c:36
int heap_nodes(heap *hp)
Definition: heap.c:294
double heap_key
Definition: heap.h:33
const A & max(A const &lhs, A const &rhs)
static char last
Definition: parse.c:451
#define Right(x)
Definition: heap.c:56
heap_node * heap_insert(heap *hp, void *dat)
Definition: heap.c:117
#define mutex_unlock(m)
Definition: heap.c:37
int verify_heap_property(heap *hp)
Definition: heap.c:436
heap_t heap_extractlast(heap *hp)
Definition: heap.c:210
heap_key heap_gen_key(heap *hp, heap_t dat)
Definition: heap.c:177
static void _heap_swap_element(heap *hp, heap_node *elm1, heap_node *elm2)
Definition: heap.c:368
void * heap_peep(heap *hp, int n)
Definition: heap.c:281
int heap_empty(heap *hp)
Definition: heap.c:306
heap_key_func * gen_key
Definition: heap.h:56
heap_t heap_update(heap *hp, heap_node *elm, void *dat)
Definition: heap.c:228
#define NULL
Definition: types.h:145
static int _heap_node_exist(heap *hp, int id)
Definition: heap.c:409
heap_key heap_peepminkey(heap *hp)
Definition: heap.c:257
heap * new_heap(int initSize, heap_key_func gen_key)
Definition: heap.c:72
heap_key heap_peepkey(heap *hp, int n)
Definition: heap.c:268
#define SlowRate
Definition: heap.c:61
heap_key age
Definition: heap.h:57
heap_node ** nodes
Definition: heap.h:58
#define assert(EX)
Definition: assert.h:17
unsigned long id
Definition: heap.h:43
static void _heap_grow(heap *hp)
Definition: heap.c:392
#define xfree
heap_key heap_key_func(heap_t, heap_key)
Definition: heap.h:34
static int _heap_should_grow(heap *hp)
Definition: heap.c:381
Definition: heap.h:52
void * heap_peepmin(heap *hp)
Definition: heap.c:247
heap_t heap_delete(heap *hp, heap_node *elm)
Definition: heap.c:142
heap_mutex_t lock
Definition: heap.h:53
void * heap_t
Definition: heap.h:32
heap_key key
Definition: heap.h:42
void * xrealloc(void *s, size_t sz)
Definition: xalloc.cc:126
heap_t heap_extractmin(heap *hp)
Definition: heap.c:188
#define Threshold
Definition: heap.c:59
#define Left(x)
Definition: heap.c:55
void delete_heap(heap *hp)
Definition: heap.c:95
#define NormalRate
Definition: heap.c:60
heap_t data
Definition: heap.h:44

 

Introduction

Documentation

Support

Miscellaneous