/* tree1.cpp - Spring 2003 CSE2050 Homework 6 part 1 Matt Mahoney, mmahoney@cs.fit.edu This program takes a file name and a word as arguments, then prints the number of times the word occurs in the file. If the word is omitted, then it outputs a histogram of all words that occur n times, in decreasing order of n. In either case, it prints the number of different words and number of word instances. For example: more < test This is a test. This is another test. This and this and this too. tree1 test 1 words occur 5 times: this 3 words occur 2 times: is test and 3 words occur 1 time: a another too test has 14 instances of 7 words. If more than 5 words occur n times, then just the first 5 are printed, followed by ... 8 words occur 2 times: cat dog horse worm bird ... With two arguments, the histogram is omitted, and only the frequency of the given word is output. tree1 test THIS this occurs 4 times. test has 14 instances of 7 words. tree1 test foo foo occurs 0 times. test has 14 instances of 7 words. Input is not case sensitive. A word is considered to be any sequence of letters (a-z). Thus, "IT'S" and "it s" are equivalent (both are 2 words). This code tests all of the operations required for class Tree in part 3 (but using maps for now). It tests begin(), end(), find(), size(), and operator[]. The program also tests all forward iterator operations: ++ (pre and post), ==, !=, *, and ->. Iterators are declared both with and without an initial value. */ #include #include #include #include #include #include using namespace std; int main(int argc, char** argv) { // Open input file, fail if not specified or not found if (argc < 2) { cerr << "Usage: tree1 file [word]\n"; return 1; } ifstream f(argv[1]); if (!f) { cerr << "File not found: " << argv[1] << endl; return 1; } // Count words map count; // count[s] is number of occurrences of word s int wordcount = 0; // Number of word instances string s; // Current input word char c; while (f.get(c)) { if (isalpha(c)) s+=char(tolower(c)); else if (s != "") { ++wordcount; ++count[s]; s=""; } } // If there is a word, report its count map::iterator p; if (argc>2) { string word = argv[2]; for (int i=0; isecond << " times\n"; } // Otherwise, print a histogram: m words occur n times... by decreasing n else { map > hist; // hist[-n] is words occurring n times for (p=count.begin(); !(p==count.end()); ++p) hist[-(*p).second].push_back(p->first); for (map >::iterator q=hist.begin(); q!=hist.end(); q++) { const int m = (*q).second.size(); const int n = -q->first; if (m==1) cout << "1 word occurs "; else cout << m << " words occur "; if (n==1) cout << "1 time:"; else cout << n << " times:"; for (int i=0; i<5 && isecond[i]; if (m > 5) cout << " ..."; cout << endl; } } // Print overall statistics cout << argv[1] << " has " << wordcount << " instances of " << int(count.size()) << " words\n"; return 0; }