// Author: mikedebo@gmail.com (Michael DiBernardo)
// Copyright Michael DiBernardo 2006
//
// Exercise 8 from Column 1 of Bentley's Programming Pearls. 
//
// I didn't bother with the lookups. Assuming that lookups will be online (e.g.
// user will occasionally come and look up numbers, instead of a single-shot
// batch lookup), simply making a map of area codes to bit vectors can provide
// you with an easy lookup that doesn't consume a gigantic amount of space.
//
// To sort 10-digit numbers by area code using less than a megabyte of
// space, I did two passes of the file for each area code. I deal with only
// half of the range of 7-digit numbers for each area code at each step, which
// means we only need 5 million bits (roughly 625K) in each iteration.
//
// I used the C I/O libraries to read in the phone numbers, because 10 digit
// numbers need at least a long long int on some platforms (like mine!) and the
// C++ input streams aren't guaranteed to know how to deal with those. 

#include <cassert>
#include <cstdio>
#include <cstdlib>
#include <iostream>

#include "BitVector.h"
using namespace std; // No one is going to include this file, so this is OK.

//------------------------------------------------------------------------------ 
// Constants
//------------------------------------------------------------------------------ 
const int kSortSetSize = 1e7 / 2; // Size of set that we will sort in each pass.
const int kNumCodes = 3; // Number of available area codes.
const int kAreaCodes[] = { 800, 877, 888 };

//------------------------------------------------------------------------------ 
// Helper classes
//------------------------------------------------------------------------------ 

// Abstract a file as an iterator over long long ints (phone numbers).
class NumberStream {
 public:
  // Factory function for number streams. Returns NULL if the given file cannot
  // be read.
  static NumberStream* Create(const char* filename) {
    FILE* number_file = fopen(filename, "r");
    if (number_file == NULL) return NULL;

    NumberStream* stream = new NumberStream(number_file);
    return stream;
  }

  // Free resources used by this stream.
  ~NumberStream() {
    int close_status = fclose(number_file_);
    if (close_status != 0) {
      cerr << "WARNING: File close status was " << close_status << endl;
    }
  }

  // Does the stream have another number available?
  bool HasNext() const {
    return has_next_;
  }

  // Get the next number in the stream.
  long long Next() {
    assert(has_next_);

    long long to_return = next_;
    ReadNext();
    return to_return;
  }

  // Reset the stream.
  void Reset() {
    rewind(number_file_);
    ReadNext();
  }

 private:
  // Assumes the given file is valid, open, and ready to be read from.
  explicit NumberStream(FILE* number_file) 
      : number_file_(number_file),
        next_(0),
        has_next_(true) {
    Reset();
  }

  // Read the next number from the underlying file descriptor.
  void ReadNext() {
    has_next_ = true;
    // Read a long long from the stream and count how many we found.
    int num_scanned = fscanf(number_file_, "%lld", &next_);
    // If it's not 1, we're going to assume we're at the end of file and stop.
    if (num_scanned != 1) {
      has_next_ = false;
    }
  }

  // Disable copy constructor.
  NumberStream(const NumberStream&) {}

  // Source of the numbers.
  FILE* number_file_;
  // Number to return at next call to 'Next()'.
  long long next_;
  // Is there a valid number to return?
  bool has_next_;
};

//------------------------------------------------------------------------------
// Helper functions 
//------------------------------------------------------------------------------

// Get the area code of a 10-digit number.  
int GetAreaCodeFromFullNumber(long long full_number) {
  // Precondition: The number should be 10 digits long.
  assert(1e9 <= full_number && full_number < 1e10);
  return (full_number / 1e7);
}

// Get the last 7 digits of a 10-digit phone number.
int GetNumberFromFullNumber(long long full_number) {
  // Precondition: The number should be 10 digits long.
  assert(1e9 <= full_number && full_number < 1e10);
  int code = GetAreaCodeFromFullNumber(full_number);
  return (full_number - code * 1e7);
}

// Does the given 10-digit phone number start with the given area code.
bool HasAreaCode(int code, long long full_number) {
  // Precondition: The number should be 10 digits long.
  assert(1e9 <= full_number && full_number < 1e10);
  return (code == GetAreaCodeFromFullNumber(full_number));
}

// Given a stream of 10-digit phone numbers, sort them numerically within each
// area code without using much more than ~625K (five million bits time 1 byte
// per 8 bits) at any given time. This will make 2*(# of area codes) passes
// over the stream of numbers.
void PrintSortedNumbers(NumberStream* numbers) {
  // BitVector used to track numbers seen in each pass.
  BitVector sorter(kSortSetSize);
  
  // For each code:
  for (int code_index = 0; code_index < kNumCodes; ++code_index) {
    // Iterate through all the numbers, finding the ones that have the current
    // area code as a prefix and that have a number less than the sorted set
    // size. Add these to the vector.
    int current_code = kAreaCodes[code_index];

    while (numbers->HasNext()) {
      long long current_full_number = numbers->Next();
      if (HasAreaCode(current_code, current_full_number)) {
        int current_codeless_number = GetNumberFromFullNumber(current_full_number);
        if (current_codeless_number < kSortSetSize) {
          sorter.Set(current_codeless_number);
        }
      }
    }

    // Now print all the elements in the bit vector.
    for (int number = 0; number < kSortSetSize; ++number) {
      if (sorter.Get(number))
        cout << "(" << current_code << ") " << number << endl;
    }

    numbers->Reset();
    sorter.ClearAll();

    // Repeat the process, using the same area code but taking numbers that are
    // larger than the sorted set size (i.e. the numbers in the last half of
    // the range). This code is almost duplicated from the previous: If I was
    // to have to duplicate it one more time, I would refactor :)
    while (numbers->HasNext()) {
      long long current_full_number = numbers->Next();
      if (HasAreaCode(current_code, current_full_number)) {
        int current_codeless_number = GetNumberFromFullNumber(current_full_number);
        // Don't consider the case where it's bigger than the vector size, we
        // assume this is impossible.
        if (current_codeless_number >= kSortSetSize) {
          sorter.Set(current_codeless_number - kSortSetSize);
        }
      }
    }

    // Now print all the elements in the bit vector.
    for (int number = 0; number < kSortSetSize; ++number) {
      if (sorter.Get(number))
        cout << "(" << current_code << ") " << (number + kSortSetSize) << endl;
    }

    numbers->Reset();
    sorter.ClearAll();
  }
}

int main(int argc, char** argv) {
  // Validate arguments.
  if (argc != 2) {
    cout << "Usage: morecodes <filename>"
         << endl;
    exit(-1);
  }

  // Feed numbers to sorter.
  const char* filename = argv[1];
  NumberStream* number_stream = NumberStream::Create(filename);
  PrintSortedNumbers(number_stream);
}

