/* index.cpp - Matt Mahoney, mmahoney@cs.fit.edu This program reads a file (specified on the command line, or from standard input if no file name is given) and for each word, prints the first 10 line numbers on which it is found at least once. A word is any sequence of letters (a-z). Upper and lower case are equivalent. The index is printed to standard output with the words in lower case, listed alphabetically, one word per line, followed by a list of up to 10 line numbers (separated by spaces), and ... if the word appears on more than 10 lines. */ #include #include #include #include #include #include using namespace std; typedef map > Index; // Maps words to line numbers // make_index(in, m) reads from the open istream in until EOF and // constructs an index into the initialy empty Index m. // Each entry is a lowercase word mapped to a list of up to 11 unique line // numbers in ascending order. void make_index(istream& in, Index& m) { string word; // Current word char c; // Current char int line=1; // Line number while (in.get(c)) { if (isalpha(c)) word+=tolower(c); else if (word.size()>0) { // End of word? Store line number. vector& v=m[word]; if (v.size()<11 && (v.size()==0 || v.back()!=line)) v.push_back(line); word=""; } if (c=='\n') ++line; } } int main(int argc, char** argv) { // Open argv[1] or use cin if absent, and pass to make_index() Index m; // Index if (argc>1) { ifstream in(argv[1]); if (!in) { cerr << "File not found: " << argv[1] << endl; return 1; } make_index(in, m); } else make_index(cin, m); // Print the index, replacing 11'th line number with ... for (Index::const_iterator p=m.begin(); p!=m.end(); ++p) { cout << p->first; for (int i=0; isecond.size(); ++i) { if (i==10) cout << " ..."; else cout << " " << p->second[i]; } cout << "\n"; } return 0; }