自定义实现STL容器（hashmap）

看风景D人 2014-04-25

展开全文

// 编写STL容器(hashmap)
// hashmap.h

#ifndef _HASHMAP_H
#define _HASHMAP_H

#include <vector>
#include <string>
#include <list>
#include <stdexcept>
#include <map>

using namespace std;

template <typename Key, typename T, typename Compare, typename Hash>
class HashIterator;

// Any Hash Class must provide two methods: hash() and numBuckets().
template <typename T>
class DefaultHash
{
public:
   // Throws invalid_argument if numBuckets in nonpositive
   DefaultHash(int numBuckets = 101) throw (invalid_argument);
   int hash(const T& key) const;
   int numBuckets() const { return mNumBuckets; }

protected:
   int mNumBuckets;
};

// Throws invalid_argument if numBuckets is nonpositive
template <typename T>
DefaultHash<T>::DefaultHash(int numBuckets) throw (invalid_argument)
{
   if (numBuckets <= 0) {
       throw (invalid_argument("numBuckets must be > 0"));
   }
   mNumBuckets = numBuckets;
}

// Uses the division method for hashing.
// Treats the key as a sequence of bytes, sums the ASCII
// values of the bytes, and mods the total b y the number
// of buckets.
template <typename T>
int DefaultHash<T>::hash(const T& key) const
{
   int bytes = sizeof(key);
   unsigned long res = 0;
   for (int i = 0; i < bytes; ++i) {
       res += *((char*)&key + i);
   }
   return (res % mNumBuckets);
}

// Specialization for the strings
template <>
class DefaultHash<string>
{
public:
   // Throws invalid_argument if numBuckets is nonpositive
   DefaultHash(int numBuckets = 101) throw (invalid_argument);
   int hash(const string& key) const;
   int numBuckets() const { return mNumBuckets; }

protected:
   int mNumBuckets;
};

// Throws invalid_argument if numBuckets is nonpositive
DefaultHash<string>::DefaultHash(int numBuckets) throw (invalid_argument)
{
   if (numBuckets <= 0) {
       throw (invalid_argument("numBuckets must be > 0"));
   }
   mNumBuckets = numBuckets;
}

// Uses the division method for hashing after suming the
// ASCII values of all the characters in key.
int DefaultHash<string>::hash(const string& key) const
{
   int sum = 0;

   for (size_t i = 0; i < key.size(); ++i) {
       sum += key[i];
   }
   return (sum % mNumBuckets);
}

//
// hashmap interface
//
template <typename Key, typename T, typename Compare = std::equal_to<Key>,
   typename Hash = DefaultHash<Key> >
class hashmap
{
public:
   typedef Key key_type;
   typedef T mapped_type;
   typedef pair<const Key, T> value_type;
   typedef pair<const Key, T>& reference;
   typedef const pair<const Key, T>& const_reference;
   typedef size_t size_type;
   typedef ptrdiff_t difference_type;

   typedef HashIterator<Key, T, Compare, Hash> iterator;
   typedef HashIterator<Key, T, Compare, Hash> const_iterator;

   // Iterator methods
   iterator begin();
   iterator end();
   const_iterator begin() const;
   const_iterator end() const;
   //Remainder of class definition omitted for brevity

   // Constructors
   // Throws invalid_argument if the hash object specifies a nonpositive
   // number of buckets
   explicit hashmap(const Compare& comp = Compare(),
       const Hash& hash = Hash()) throw (invalid_argument);

   // destructor, copy constructor, assignment operator
   ~hashmap();
   hashmap(const hashmap<Key, T, Compare, Hash>& src);
   hashmap<Key, T, Compare, Hash>& operator= (
       const hashmap<Key, T, Compare, Hash>& rhs);

   // Element insert
   // Inserts the key/value pair x
   void insert(const value_type& x);

   // Element delete
   // Remove the element with key x, if it exists
   void erase(const key_type& x);

   // Element lookup
   // find returns a pointer to the element with key x
   // Returns NULL if no element with that key exists.
   value_type* find(const key_type& x);

   typename list<pair<const Key, T> >::iterator
       findElement(const key_type& x, int& bucket) const;

   bool empty() const;
   size_type size() const;
   size_type max_size() const;
   void swap(hashmap<Key, T, Compare, Hash>& hashIn);


   // operator[] finds the element with key x or inserts an
   // element with that key if none exists yet. Returns a reference to the
   // value corresponding to that key.
   T& operator[] (const key_type& x);


   typedef list<value_type> ListType;

   // In this first impelmentation, it would be easier to use a vector
   // instead of a pointer to a vector, which requires dynamic allocation.
   // However, we use a ptr to a vector so that, in the final
   // implementation, swap() can be implementation in constant time.
   vector<ListType>* mElems;
   int mSize;
   Compare mComp;
   Hash mHash;
};

// Construct mElems with the number of buckets.
template <typename Key, typename T, typename Compare, typename Hash>
hashmap<Key, T, Compare, Hash>::hashmap(
   const Compare& comp, const Hash& hash) throw (invalid_argument) :
   mSize(0), mComp(comp), mHash(hash)
{
   if (mHash.numBuckets() <= 0) {
       throw (invalid_argument("Number of buckets must be positive"));
   }
   mElems = new vector<list<value_type> >(mHash.numBuckets());
}

template <typename Key, typename T, typename Compare, typename Hash>
hashmap<Key, T, Compare, Hash>::~hashmap()
{
   delete mElems;
}

template <typename Key, typename T, typename Compare, typename Hash>
hashmap<Key, T, Compare, Hash>::hashmap(
   const hashmap<Key, T, Compare, Hash>& src) :
   mSize(src.mSize), mComp(src.mComp), mHash(src.mHash)
{
   // Don't need to bother checking if numBuckets is positive, because
   // we know src checked

   // Use the vector copy constructor
   mElems = new vector<list<value_type> >(*(src.mElems));
}

template <typename Key, typename T, typename Compare, typename Hash>
hashmap<Key, T, Compare, Hash>& hashmap<Key, T, Compare, Hash>::operator=(
   const hashmap<Key, T, Compare, Hash>& rhs)
{
   // Check for self-assignment.
   if (this != &rhs) {
       delete mElems;
       mSize = rhs.mSize;
       mComp = rhs.mComp;
       mHash = rhs.mHash;

       // Don't need to bother checking if numBuckets is positive, because
       // we know rhs checked

       // Use the vector copy constructor.
       mElems = new vector<list<value_type> >(*(rhs.mElems));
   }
   return (*this);
}

template <typename Key, typename T, typename Compare, typename Hash>
typename list<pair<const Key, T> >::iterator
hashmap<Key, T, Compare, Hash>::findElement(const key_type& x, int& bucket) const
{
   // Hash the key to get the bucket.
   bucket = mHash.hash(x);

   // Look for the key in the bucket.
   for (typename ListType::iterator it = (*mElems)[bucket].begin();
       it != (*mElems)[bucket].end(); ++it) {
       if (mComp(it->first, x)) {
           return (it);
       }
   }
   return ((*mElems)[bucket].end());
}

template <typename Key, typename T, typename Compare, typename Hash>
typename hashmap<Key, T, Compare, Hash>::value_type*
hashmap<Key, T, Compare, Hash>::find(const key_type& x)
{
   int bucket;
   // Use the findElement() helper.
   typename ListType::iterator it = findElement(x, bucket);
   if (it == (*mElems)[bucket].end()) {
       // We didn't find the element--return NULL.
       return (NULL);
   }
   // We found the element. Return a pointer to it.
   return (&(*it));
}

template <typename Key, typename T, typename Compare, typename Hash>
T& hashmap<Key, T, Compare, Hash>::operator[] (const key_type& x)
{
   // Try to find the element.
   // If it doesn't exist, add a new element.
   value_type* found = find(x);
   if (found == NULL) {
       insert(make_pair(x, T()));
       found = find(x);
   }
   return (found->second);
}

template <typename Key, typename T, typename Compare, typename Hash>
void hashmap<Key, T, Compare, Hash>::insert(const value_type& x)
{
   int bucket;
   // Try to find the element.
   typename ListType::iterator it = findElement(x.first, bucket);

   if (it != (*mElems)[bucket].end()) {
       // The element already exists.
       return;
   } else {
       // We didn't find the element, so insert a new one.
       mSize++;
       (*mElems)[bucket].insert((*mElems)[bucket].end(), x);
   }
}

template <typename Key, typename T, typename Compare, typename Hash>
void hashmap<Key, T, Compare, Hash>::erase(const key_type& x)
{
   int bucket;

   // First, try to find the element.
   typename ListType::iterator it = findElement(x, bucket);

   if (it != (*mElems)[bucket].end()) {
       // The element already exists--erase it.
       (*mElems)[bucket].erase(it);
       mSize--;
   }
}

template <typename Key, typename T, typename Compare, typename Hash>
bool hashmap<Key, T, Compare, Hash>::empty() const
{
   return (mSize == 0);
}

template <typename Key, typename T, typename Compare, typename Hash>
typename hashmap<Key, T, Compare, Hash>::size_type
hashmap<Key, T, Compare, Hash>::size() const
{
   return (mSize);
}

template <typename Key, typename T, typename Compare, typename Hash>
typename hashmap<Key, T, Compare, Hash>::size_type
   hashmap<Key, T, Compare, Hash>::max_size() const
{
   // In the worst case, all the elements hash to the
   // same list, so the max_size is the max_size of a single
   // list. This code assumes that all the lists have the same
   // max_size.
   return ((*mElems)[0].max_size());
}

// Just swap the four data members.
// Use the generic swap template.
template <typename Key, typename T, typename Compare, typename Hash>
void hashmap<Key, T, Compare, Hash>::swap(
   hashmap<Key, T, Compare, Hash>& hashIn)
{
   // Explicitly quality with std:: so the compiler doesn't think
   // it's a recursive call.
   std::swap(*this, hashIn);
}

template <typename Key, typename T, typename Compare, typename Hash>
typename hashmap<Key, T, Compare, Hash>::iterator
   hashmap<Key, T, Compare, Hash>::begin()
{
   if (mSize == 0) {
       // Special case : there are no elements, so return the end iterator
       return (end());
   }

   // We know there is at least one element. Find the first element.
   for (size_t i = 0; i < mElems->size(); ++i) {
       if (!((*mElems)[i].empty())) {
           return (HashIterator<Key, T, Compare, Hash>(i,
               (*mElems)[i].begin(), this));
       }
   }
   // Should never reach here, but if we do, return the end iterator
   return (end());
}

template <typename Key, typename T, typename Compare, typename Hash>
typename hashmap<Key, T, Compare, Hash>::iterator
   hashmap<Key, T, Compare, Hash>::end()
{
   // The end iterator is just the end iterator of the list last bucket.
   return (HashIterator<Key, T, Compare, Hash>(mElems->size() - 1,
       (*mElems)[mElems->size() - 1].end(), this));
}

//
// HashIterator
//

// HashIterator
template <typename Key, typename T, typename Compare, typename Hash>
class HashIterator : public std::iterator<std::bidirectional_iterator_tag,
   pair<const Key, T> >
{
public:
   HashIterator();       // Bidirectional iterators must supply default ctors.
   HashIterator(int bucket,
       typename list<pair<const Key, T> >::iterator listIt,
       const hashmap<Key, T, Compare, Hash>* inHashmap);
   pair<const Key, T>& operator*() const;

   // Return type must be something to which -> can be applied.
   // Reurn a pointer to a pair <const Key, T>, to which the compiler will
   // apply -> again.
   pair<const Key, T>* operator->() const;
   HashIterator<Key, T, Compare, Hash>& operator++();
   const HashIterator<Key, T, Compare, Hash> operator++(int);

   HashIterator<Key, T, Compare, Hash>& operator--();
   const HashIterator<Key, T, Compare, Hash> operator--(int);

   // Don't need to define a copy constructor or operator= because the
   // default behavior is what we want

   // Don't need destructor because the default behavior
   // (not deleting mHashmap) is what we want.

   // These are ok as member functions because we don't support
   // comparisons of different types to this one.
   bool operator==(const HashIterator& rhs) const;
   bool operator!=(const HashIterator& rhs) const;

protected:
   int mBucket;
   typename list<pair<const Key, T> >::iterator mIt;
   const hashmap<Key, T, Compare, Hash>* mHashmap;

   // Helper methods for operator++ and operator--
   void increment();
   void decrement();
};

// Dereferencing or incrementing an iterator constructed with the
// default ctor is undefined, so it doesn't matter what values we give
// here.
template <typename Key, typename T, typename Compare, typename Hash>
HashIterator<Key, T, Compare, Hash>::HashIterator()
{
   mBucket = -1;
   mIt = list<pair<const Key, T> >::iterator();
   mHashmap = NULL;
}

template <typename Key, typename T, typename Compare, typename Hash>
HashIterator<Key, T, Compare, Hash>::HashIterator(
   int bucket, typename list<pair<const Key, T> >::iterator listIt,
   const hashmap<Key, T, Compare, Hash>* inHashmap) :
   mBucket(bucket), mIt(listIt), mHashmap(inHashmap)
{
}

// Return the actual element
template <typename Key, typename T, typename Compare, typename Hash>
pair<const Key, T>& HashIterator<Key, T, Compare, Hash>::operator*() const
{
   return (*mIt);
}

// Return the iterator, so the compiler can apply -> to it to access
// the actual desired field.
template <typename Key, typename T, typename Compare, typename Hash>
pair<const Key, T>*
HashIterator<Key, T, Compare, Hash>::operator->() const
{
   return (&(*mIt));
}

// Defer the details to the increment() helper.
template <typename Key, typename T, typename Compare, typename Hash>
HashIterator<Key, T, Compare, Hash>&
   HashIterator<Key, T, Compare, Hash>::operator++()
{
   increment();
   return (*this);
}

// Defer the details to the increment() helper.
template <typename Key, typename T, typename Compare, typename Hash>
const HashIterator<Key, T, Compare, Hash>
   HashIterator<Key, T, Compare, Hash>::operator++(int)
{
   HashIterator<Key, T, Compare, Hash> oldIt = *this;
   increment();
   return (oldIt);
}

// Defer the details to the decrement() helper.
template <typename Key, typename T, typename Compare, typename Hash>
HashIterator<Key, T, Compare, Hash>&
   HashIterator<Key, T, Compare, Hash>::operator--()
{
   decrement();
   return (*this);
}

// Defer the details to the decrement() helper.
template <typename Key, typename T, typename Compare, typename Hash>
const HashIterator<Key, T, Compare, Hash>
   HashIterator<Key, T, Compare, Hash>::operator--(int)
{
   HashIterator<Key, T, Compare, Hash> newIt = *this;
   decrement();
   return (newIt);
}

// Behavior is undefined if mIt already refers to the past-the-end
// element in the table, or is otherwise invalid.
template <typename Key, typename T, typename Compare, typename Hash>
void HashIterator<Key, T, Compare, Hash>::increment()
{
   // mIt is an iterator into a single bucket.
   // Increment it.
   ++mIt;
   // If we're at the end of the current bucket,
   // find the next bucket with elements.
   if (mIt == (*mHashmap->mElems)[mBucket].end()) {
       for (int i = mBucket + 1; i < (*mHashmap->mElems).size(); i++) {
           if (!((*mHashmap->mElems)[i].empty())) {
               // We found a nonempty bucket.
               // Make mIt refer to the first element in it.
               mIt = (*mHashmap->mElems)[i].begin();
               mBucket = i;
               return;
           }
       }
       // No more empty buckets. Assign mIt to refer to the end
       // iterator of the last list.
       mBucket = (*mHashmap->mElems).size() - 1;
       mIt = (*mHashmap->mElems)[mBucket].end();
   }
}

// Behavior is undefined if mIt already refers to the first element
// in the table, or is otherwise invalid.
template <typename Key, typename T, typename Compare, typename Hash>
void HashIterator<Key, T, Compare, Hash>::decrement()
{
   // mIt is an iterator into a single bucket.
   // If it's at the beginning of the current bucket, don't decrement it.
   // Instread, try to find a nonempty bucket ahead of the current one.
   if (mIt == (*mHashmap->mElems)[mBucket].begin()) {
       for (int i = mBucket - 1; i >= 0; --i) {
           if (!((*mHashmap->mElems)[i].empty())) {
               mIt = (*mHashmap->mElems)[i].end();
               --mIt;
               mBucket = i;
               return;
           }
       }
       // No more nonempty buckets. This is an invalid decrement.
       // Assign mIt to refer to one before the start element of the first
       // list (an invalid position).
       mIt = (*mHashmap->mElems)[0].begin();
       --mIt;
       mBucket = 0;
   } else {
       // We've not at the beginning of the bucket, so
       // just move down.
       --mIt;
   }
}

template <typename Key, typename T, typename Compare, typename Hash>
bool HashIterator<Key, T, Compare, Hash>::operator==(
   const HashIterator& rhs) const
{
   // All fields, including the hashmap to which the iterators refer,
   // must be equal.
   return (mHashmap == rhs.mHashmap && mBucket == rhs.mBucket &&
       mIt == rhs.mIt);
}

template <typename Key, typename T, typename Compare, typename Hash>
bool HashIterator<Key, T, Compare, Hash>::operator!=(
   const HashIterator& rhs) const
{
   return (!operator==(rhs));
}
#endif

// TestHashmap.cpp

#include "hashmap.h"
#include <iostream>
#include <map>

using namespace std;

int main(int argc, char** argv)
{
    hashmap<string, int> myHash;
    myHash.insert(make_pair("KeyOne", 100));
    myHash.insert(make_pair("KeyTwo", 200));
    myHash.insert(make_pair("KeyThree", 300));

    for (hashmap<string, int>::iterator it = myHash.begin();
        it != myHash.end(); ++it) {
        // Use both -> and * to test the operations.
        cout << it->first << " maps to " << (*it).second << endl;
    }

    // Create a map with all the elements in the hashmap.
    map<string, int> myMap(myHash.begin(), myHash.end());
    for (map<string, int>::iterator it = myMap.begin();
        it != myMap.end(); ++it) {
        // Use both -> and * to test the operations.
        cout << it->first << " maps to " << (*it).second << endl;
    }

}