/* Copyright (C) 2003 Reliable Software Group 
 *                    - University of California, Santa Barbara
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public License
 * as published by the Free Software Foundation; either version 2
 * of the License, or (at your option) any later version.
 * 
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 * 
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
 */

/* CVS $Id: histogram.cpp,v 1.15 2003/10/06 23:17:17 dhm Exp $ */

#include <stdio.h>
#include <math.h>
#include <anomaly.h>

#define STD_BUCKETS 6
#define ELEMENTS_PER_BUCKET 5

/* default constructor */
Histogram::Histogram() 
{
  _element_count = 0;
  _inserts = 0;

  _mu = _sigma = 0.0;
}

/* default destructor - must be virtual */
Histogram::~Histogram()
{
}

void Histogram::insert_item(Item *item) throw (ModelInputException)
{
  NumericalItem *_item = dynamic_cast<NumericalItem *>(item);
  if (_item == 0) {

    ListCollection *lc = dynamic_cast<ListCollection *>(item); 
    if (lc != 0)
      return _insert_item_list(lc);
    else
      throw ModelInputException("NumericalItem or ListCollection required for Histogram::insert_item");
  }
  else {
    return _insert_item(_item);
  }
}

void Histogram::_insert_item(NumericalItem *item)
{
  HistogramIter iter;

  iter = _elements.find(item->value());

  if (iter != _elements.end())
    iter->second++;
  else 
    _elements[item->value()] = 1;

  ++_element_count;
  ++_inserts;
}

void Histogram::_insert_item_list(ListCollection *list) throw (ModelInputException)
{
  ListCollectionIterator list_iter;
  HistogramIter hist_iter;
  
  ItemCount *ic;
  NumericalItem *nitem;
  
  for (list_iter = list->begin(); list_iter != list->end(); ++list_iter) {

    ic = dynamic_cast<ItemCount *>(*list_iter);
    if (ic == 0)
      goto illegal_type;

    nitem = dynamic_cast<NumericalItem *>(ic->getValue());
    if (nitem == 0)
      goto illegal_type;
       
    hist_iter = _elements.find(nitem->value());

    if (hist_iter != _elements.end())
      hist_iter->second += ic->getCount();
    else
      _elements[nitem->value()] = ic->getCount();
    
    _element_count += ic->getCount();
    ++_inserts;
  }

  return;

 illegal_type:
  throw ModelInputException("ListCollection of ItemCount that point to NumericalItem required for Histogram::insert_item_count");
}


unsigned long Histogram::get_element_count()
{
  return _element_count;
}


void Histogram::switch_mode(ModelMode mode) throw (ModelException)
{
  HistogramIter iter;
  double sum = 0.0;
  unsigned long N;

  if ((_mode == Training) && (mode == Detection)) {

    if ((N = _element_count) < 1) 
      throw ModelException("Histogram::switch_mode needs at least a single data point");

    // cout << "CharDist Statistics: ";

    /* calculate the mean */
    for (iter = _elements.begin(); iter != _elements.end(); ++iter) {
      sum += (double) (iter->first * iter->second);
      // cout << iter->first << "/" << iter->second << " ";
    }
    _mu =  sum / (double) N;

    // cout << " = " << _mu << endl;

    /* calculate the standard deviation */
    if (N > 1) {
      sum = 0.0;
      for (iter = _elements.begin(); iter != _elements.end(); ++iter)
	sum += (double) iter->second * ((double) iter->first - _mu) * ((double)iter->first - _mu);
      _sigma = sqrt(sum / (double) (N - 1));
    }
    else
      _sigma = 0;

    /* calculate the optimized cdf function */
    double cdf = 0.0;
    for (iter = _elements.begin(); iter != _elements.end(); ++iter) {
      cdf += ((double) iter->second / (double) N); 
      _element_cdf[iter->first] = cdf;
    }

    /* 
     * 2 steps to calculate the confidence 
     */

     /* first, validate the derived function against the data (stored in histogram) */
     try {
       _confidence = validate_data(this);
    
       /* then, adapt this value by the number of calls to insert_item in the histogram */
       double scaling = ((double) _inserts / (double) (STD_BUCKETS * ELEMENTS_PER_BUCKET));
       if (scaling < 1.0)
 	_confidence *= scaling;
     }
     catch (ModelException ex) {
       _confidence = 0;
     }

    _valid = true;
    _mode = mode;
  }
  else
    throw ModelException("Histogram::switch_mode performs illegal mode transition");
}

double Histogram::f(double x)
{
  HistogramIter iter;
  
  if ((iter = _elements.find((int) x)) == _elements.end())
    return 0.0;
  else
    return ((double) iter->second / (double) get_element_count());
}

double Histogram::F(double x)
{
  CdfIter first, iter;
  int _rx = (int) x;

  iter = _element_cdf.upper_bound(_rx);

  if (iter == _element_cdf.begin())
    return 0.0;
  else if (iter == _element_cdf.end())
    return 1.0;
  else 
    return (--iter)->second;
}

double Histogram::get_mean() throw (ModelException)
{
  if (_mode != Detection)
    throw ModelException("Histogram::get_mean must be in detection mode to be called");
  else
    return _mu; 
}

double Histogram::get_variance() throw (ModelException)
{
  if (_mode != Detection)
    throw ModelException("Histogram::get_variance must be in detection mode to be called");
  else
    return _sigma * _sigma; 
}

double Histogram::get_confidence()
{
  if (_mode != Detection)
    return 0.0;
  else
    return _confidence;
}


bool Histogram::test(bool verbose) {

  if (verbose) {
    cerr << "Regression Test for Class libAnomaly::Histogram\n";
    cerr << "Allocated Objects -- " << Item::get_allocated()  << "\n";
  }

  unsigned int limit = 5;
  unsigned int i;
  ListCollection *list = new ListCollection();
  Histogram *h = new Histogram();
  double computedMean, computedVariance;

  for (i = 1; i <= limit; i++) {
    if (verbose) 
      cout << "  inserting IntegerItem(" << i << ")" << endl;

    IntegerItem *ii = new IntegerItem(i);

    list->push_back(ii);
    h->insert_item(ii);
  }

  if (verbose) {
    cout << "  generating mean and variance (true and computed)" << endl;
  }

  double trueMean = numericalUtilities::mean(list);
  double trueVariance = numericalUtilities::variance(list);

  list->release();

  if (verbose) {
    cout << "  true mean: " << trueMean << ", true variance: " << trueVariance
	 << endl;
  }

  if (verbose)
    cout << "  switching mode to Detection" << endl;

  h->switch_mode(Detection);

  try {
    computedMean = h->get_mean();
    computedVariance = h->get_variance();
  }
  catch (ModelException &e) {
    cout << "Exception caught: " << e.get_message() << "\n";
    return false;
   }

  if (!numericalUtilities::fpEqual(trueMean, computedMean)) {
    if (verbose) {
      cout << "  mean computation failed: " << trueMean << " != " 
	   << computedMean << endl;
    }

    return false;
  }

  if (!numericalUtilities::fpEqual(trueVariance, computedVariance)) {
    if (verbose) {
      cout << "  variance computation failed: " << trueVariance << " != " 
	   << computedVariance << endl;
    }

    return false;
  }

  if (verbose) {
    cout << "  computed mean     = " << computedMean << endl;
    cout << "  computed variance = " << computedVariance << endl;
  }

  double x;
  for (x = 0; x <= limit + 1; x += 0.5) {
    double fx = h->f(x);
    double Fx = h->F(x);
    double truefx;
    double trueFx;

    if (floor(x) > limit) {
      truefx = 0.0;
      trueFx = 1.0;
    }
    else if (numericalUtilities::fpEqual(floor(x), 0)) {
      truefx = 0.0;
      trueFx = 0.0;
    }
    else {
      truefx = 1 / (double) limit;
      trueFx = floor(x) / (double) limit;
    }

    if (verbose) {
      cout << "  pdf(" << x << ") = " << fx << ", ";
      cout << "  cdf(" << x << ") = " << Fx << endl;
    }

    if (!numericalUtilities::fpEqual(truefx, fx)) {
      if (verbose) {
	cout << "  pdf computation failed: " << truefx << " != " 
	     << fx << endl;
      }
      return false;
    }

    if (!numericalUtilities::fpEqual(trueFx, Fx)) {
      if (verbose) {
	cout << "  cdf computation failed: " << trueFx << " != " 
	     << Fx << endl;
      }
      return false;
    }
  }

  for (i = 1; i <= limit; i++) {
    Item *item = new IntegerItem(i);
    double evaluationResult = h->check_item(item);
    item->release();

    if (verbose)
      cout << "  check_item(" << i << ") = " << evaluationResult << endl;
  }

  /* test the chi2 implementation */
  if (verbose) cout << "Chi-2 test implementation\n";

  double result, confidence, tmp;

  ListCollection lc, lcc; 
  ListCollectionIterator lciter, lcciter;

  Histogram *reference = new Histogram();
  Histogram *to_check = new Histogram();

  cout << "Test1: check identical histograms ";
  lc.push_back(new ItemCount(new IntegerItem(0), 10));
  lc.push_back(new ItemCount(new IntegerItem(4), 2));
  lc.push_back(new ItemCount(new IntegerItem(5), 2));
  lc.push_back(new ItemCount(new IntegerItem(6), 2));
  reference->insert_item(&lc);
  reference->switch_mode(Detection);

  lcc.push_back(new ItemCount(new IntegerItem(0), 10));
  lcc.push_back(new ItemCount(new IntegerItem(4), 2));
  lcc.push_back(new ItemCount(new IntegerItem(5), 2));
  lcc.push_back(new ItemCount(new IntegerItem(6), 2));
  to_check->insert_item(&lcc);
  result = reference->validate_data(to_check);
  if (result < 0.995) {
    if (verbose) cout << result << " failed\n";
    return false;
  }
  else {
    if (verbose) cout << result << " ok\n";
  }
  confidence = reference->get_confidence();
  if (verbose) cerr << "       confidence = " << confidence;
  tmp = (confidence - ((0.995 * 4.0) / ((double) (STD_BUCKETS * ELEMENTS_PER_BUCKET))));
  if (tmp < 0.0)
    tmp *= -1.0;
  if (tmp < 0.001) {
    if (verbose) cerr << " ... ok\n";
  }
  else {
    if (verbose) cerr << " ... failed\n";
    return false;
  }
  
  for (lciter = lc.begin(); lciter != lc.end(); lciter++)
    (*lciter)->release();
  lc.clear();

  for (lcciter = lcc.begin(); lcciter != lcc.end(); lcciter++)
    (*lcciter)->release();
  lcc.clear();

  delete reference;
  delete to_check;

  Histogram *test2ref = new Histogram();
  Histogram *test2 = new Histogram();
  cout << "Test2: check diverging histograms ";
  lc.push_back(new ItemCount(new IntegerItem(0), 1));
  lc.push_back(new ItemCount(new IntegerItem(1), 2));
  lc.push_back(new ItemCount(new IntegerItem(2), 4));
  lc.push_back(new ItemCount(new IntegerItem(3), 8));
  lc.push_back(new ItemCount(new IntegerItem(4), 16));
  lc.push_back(new ItemCount(new IntegerItem(5), 16));
  lc.push_back(new ItemCount(new IntegerItem(6), 8));
  lc.push_back(new ItemCount(new IntegerItem(7), 4));
  lc.push_back(new ItemCount(new IntegerItem(8), 2));
  lc.push_back(new ItemCount(new IntegerItem(9), 1));
  test2ref->insert_item(&lc);
  test2ref->switch_mode(Detection);

  lcc.push_back(new ItemCount(new IntegerItem(0), 16));
  lcc.push_back(new ItemCount(new IntegerItem(1), 8));
  lcc.push_back(new ItemCount(new IntegerItem(2), 4));
  lcc.push_back(new ItemCount(new IntegerItem(3), 2));
  lcc.push_back(new ItemCount(new IntegerItem(4), 1));
  lcc.push_back(new ItemCount(new IntegerItem(5), 1));
  lcc.push_back(new ItemCount(new IntegerItem(6), 2));
  lcc.push_back(new ItemCount(new IntegerItem(7), 4));
  lcc.push_back(new ItemCount(new IntegerItem(8), 8));
  lcc.push_back(new ItemCount(new IntegerItem(9), 16));
  test2->insert_item(&lcc);
  result = test2ref->validate_data(test2);
  if (result > 0.005) {
    if (verbose) cout << result << " failed\n";
    return false;
  }
  else {
    if (verbose) cout << result << " ok\n";
  }
  confidence = test2ref->get_confidence();
  if (verbose) cerr << "       confidence = " << confidence;
  tmp = (confidence - ((0.995 * 10.0) / ((double) (STD_BUCKETS * ELEMENTS_PER_BUCKET))));
  if (tmp < 0.0)
    tmp *= -1.0;
  if (tmp < 0.001) {
    if (verbose) cerr << " ... ok\n";
  }
  else {
    if (verbose) cerr << " ... failed\n";
    return false;
  }

  for (lciter = lc.begin(); lciter != lc.end(); lciter++)
    (*lciter)->release();
  lc.clear();

  for (lcciter = lcc.begin(); lcciter != lcc.end(); lcciter++)
    (*lcciter)->release();
  lcc.clear();

  delete test2ref;
  delete test2;
  Histogram *test3ref = new Histogram();
  Histogram *test3 = new Histogram();
  cout << "Test3: check overlapping histograms ";
  lc.push_back(new ItemCount(new IntegerItem(0), 16));
  lc.push_back(new ItemCount(new IntegerItem(1), 14));
  lc.push_back(new ItemCount(new IntegerItem(2), 12));
  lc.push_back(new ItemCount(new IntegerItem(3), 8));
  lc.push_back(new ItemCount(new IntegerItem(4), 6));
  lc.push_back(new ItemCount(new IntegerItem(5), 4));
  lc.push_back(new ItemCount(new IntegerItem(6), 2));
  lc.push_back(new ItemCount(new IntegerItem(7), 1));
  lc.push_back(new ItemCount(new IntegerItem(8), 1));
  test3ref->insert_item(&lc);
  test3ref->switch_mode(Detection);

  lcc.push_back(new ItemCount(new IntegerItem(0), 21));
  lcc.push_back(new ItemCount(new IntegerItem(1), 12));
  lcc.push_back(new ItemCount(new IntegerItem(2), 10));
  lcc.push_back(new ItemCount(new IntegerItem(3), 9));
  lcc.push_back(new ItemCount(new IntegerItem(4), 5));
  lcc.push_back(new ItemCount(new IntegerItem(5), 7));
  lcc.push_back(new ItemCount(new IntegerItem(6), 2));
  lcc.push_back(new ItemCount(new IntegerItem(7), 2));
  lcc.push_back(new ItemCount(new IntegerItem(8), 3));
  lcc.push_back(new ItemCount(new IntegerItem(9), 1));
  test3->insert_item(&lcc);
  result = test3ref->validate_data(test3);
  if ((result < 0.05) || (result > 0.75)) {
    if (verbose) cout << result << " failed\n";
    return false;
  }
  else {
    if (verbose) cout << result << " ok\n";
  }

  confidence = test3ref->get_confidence();
  if (verbose) cerr << "       confidence = " << confidence;
  tmp = (confidence - ((0.995 * 9.0) / ((double) (STD_BUCKETS * ELEMENTS_PER_BUCKET))));
  if (tmp < 0.0)
    tmp *= -1.0;
  if (tmp < 0.001) {
    if (verbose) cerr << " ... ok\n";
  }
  else {
    if (verbose) cerr << " ... failed\n";
    return false;
  }

 for (lciter = lc.begin(); lciter != lc.end(); lciter++)
    (*lciter)->release();
  lc.clear();

  for (lcciter = lcc.begin(); lcciter != lcc.end(); lcciter++)
    (*lcciter)->release();
  lcc.clear();

  delete test3ref;
  delete test3;

  delete h;

  if (verbose) {
    cerr << "Allocated Objects (should be equal to number above plus 2 (local vars)) -- " << Item::get_allocated()  << "\n";
    cout << "\n";
  }

  return true;
}


Model* CharacterDistributionFactory::instance(void)
{ 
  Histogram *hist = new Histogram();
  
  /* fill the interval ist that is used for the character distribution */
  hist->add_interval_bound(0);
  hist->add_interval_bound(1);
  hist->add_interval_bound(4);
  hist->add_interval_bound(7);
  hist->add_interval_bound(12);
  hist->add_interval_bound(16);
  hist->add_interval_bound(256);

  return hist;
}


void Histogram::display_elements(void) {
  HistogramIter i;
  unsigned int counter;

  cout << "Histogram::display_elements for instance #" << _instanceNumber 
       << " (" << _elements.size() 
       << " elements):" << endl;

  for (i = _elements.begin(), counter = 0; i != _elements.end(); 
       i++, counter++) {
    int bucket = (*i).first;
    int value = (*i).second;
    cout << "  x[" << bucket << "] = " << value << endl;
  }
}
