/* spellhsh - Matt Mahoney, mmahoney@cs.fit.edu This program takes 2 file name arguments. It prints any word that occurs in the first file but not in the second, along with the line number. If the second file is a dictionary, then the program acts as a spell checker. A word is defined as any sequence of letters (a-z), excluding punctuation. Comparison is not case sensitive. This program is idetical to spellchk.cpp except that it uses a hash table instead of a map if compiled using "g++ -DHASH" */ #include #include #include #include #include #include using namespace std; /* A Hashtable associates values of type T with string keys. If h is a Hashtable and s is a string, then h[s] is a reference to the unique object of type T associated with s. If s is used for the first time, then a new value is created with value T(). For example: Hashtable h; h["hello"] = 5; cout << h["bye"]; // 0 A Hashtable is implemented as a vector of key-value pairs which are looked up by hashing the key. A key of "" indicates an unused pair. As a special case, the value associated with the empty string is stored separately. Hash collisions are resolved by going to the next element and wrapping around from the end back to the beginning. When the hash table is 3/4 full, the size is doubled. The initial size is 4. */ template class Hashtable { private: vector > table; // Hash table int n; // Number of occupied elements in hash table unsigned int index(const string& s); // Find/insert s in table T emptystring; // Value associated with key "" public: Hashtable(): table(4), n(0) {} T& operator[](const string& s); // Return value associated with s }; // Return index of s in table. If s is not in the table, insert it first. template unsigned int Hashtable::index(const string& s) { // Compute hash of s unsigned int h=0; for (int i=s.size()-1; i>=0; --i) h=(h+s[i])*123456789; h &= table.size()-1; // Find first element containing s or empty while (!table[h].first.empty() && table[h].first!=s) if (++h==table.size()) h=0; if (table[h].first.empty()) { // If empty, insert s ++n; table[h].first=s; } return h; } // Return reference to object associated with s, creating if needed. // When hash table is 3/4 full, double the size and rehash all the // elements into the new table. template T& Hashtable::operator[](const string& s) { if (s.empty()) return emptystring; if (n*4 >= int(table.size())*3) { // Double the table size n=0; vector > oldtable=table; table.resize(0); table.resize(oldtable.size()*2); for (int i=0; i dict; // dict[word] is true if word is read from f2 #else map dict; #endif string s; // current word char c; while (f2.get(c)) { if (isalpha(c)) s+=c; else if (s!="") { dict[lowercase(s)]=true; s=""; } } // Test words in file f1. Print "line: word" if not in dict. s=""; int line=1; // Line number in f1 while (f1.get(c)) { if (isalpha(c)) s+=c; else if (s!="") { if (!dict[lowercase(s)]) cout << line << ": " << s << "\n"; s=""; } if (c=='\n') ++line; } return 0; }