/* Copyright (C) 2003 Reliable Software Group
 *                    - University of California, Santa Barbara
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public License
 * as published by the Free Software Foundation; either version 2
 * of the License, or (at your option) any later version.
 * 
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 * 
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
 */

/* CVS $Id: token_finder.cpp,v 1.9 2003/10/02 02:56:37 chris Exp $ */

#include <stdio.h>
#include <math.h>
#include <anomaly.h>


#define TYPE_Z_VALUE 2.575

typedef __gnu_cxx::hash_map<Item *, unsigned long>::iterator TokenIter;
typedef list<int>::iterator ListIntIter;


/* default constructor */
TokenFinder::TokenFinder() 
{
  _element_count = 0;
}

/* default destructor */
TokenFinder::~TokenFinder()
{
  /* delete the items in the hash map */
  list<Item *> kill_list;
  list<Item *>::iterator kill_iter;
  TokenIter clean;

  for (clean = _elements.begin(); clean != _elements.end(); ++clean)
    kill_list.push_back(clean->first);
  for (kill_iter = kill_list.begin(); kill_iter != kill_list.end(); ++kill_iter)
    (*kill_iter)->release();
}

/* insert an item into the model */
void TokenFinder::insert_item(Item *item)
{
  TokenIter iter;

  iter = _elements.find(item);

  if (iter == _elements.end()) {
    _elements[item] = 1;

    /* item is inserted -> set the lock to prevent deletion */
    item->lock();
  }
  else
    iter->second++;
  
  ++_element_count;
}

double TokenFinder::get_int_mean(list<int> &list) throw (ModelException)
{
  unsigned int N = list.size();
  if (N <= 0)
    throw ModelException("TokenFinder::get_int_mean list needs at least one element");

  ListIntIter iter;
  long sum = 0;
  
  for (iter = list.begin(); iter != list.end(); iter++)
    sum += *iter;

  return (double) sum / (double) N;
}

double TokenFinder::get_int_variance(list<int> &list) throw (ModelException)
{
  unsigned int N = list.size();
  if (N <= 1)
    throw ModelException("TokenFinder::get_int_variance list needs at least two elements");

  double mean = get_int_mean(list);

  ListIntIter iter;
  double sum = 0.0;
  
  for (iter = list.begin(); iter != list.end(); iter++)
    sum += (((double) *iter - mean) * ((double) *iter - mean));
  
  return sum / (double)(N-1);
}

double TokenFinder::get_int_covariance(list<int> &l1, list<int> &l2) throw (ModelException)
{
  unsigned int N = l1.size();

  if ((N <= 1) || (N != l2.size()))
    throw ModelException("TokenFinder::get_int_covariance list needs two lists with same length and at least two elements");

  double mean_l1 = get_int_mean(l1);
  // cout << "meanx = " << mean_l1 << endl;

  double mean_l2 = get_int_mean(l2);
  // cout << "meany = " << mean_l2 << endl;

  list<int>::iterator iter_l1;
  list<int>::iterator iter_l2;
  
  double sum = 0.0;

  for (iter_l1 = l1.begin(), iter_l2 = l2.begin(); ((iter_l1 != l1.end()) && (iter_l2 != l2.end())); iter_l1++, iter_l2++)
    sum += ((double) *iter_l1 - mean_l1) * ((double) *iter_l2 - mean_l2);

  // cout << "sum = " << sum << endl;

  return sum / (double)(N-1);
}

/* check if the elements in the two arrays are correlated or not */
bool TokenFinder::is_correlated(list<int> &ref, list<int> &wit, bool &unique)
{
  double covar, varx, vary, rho, z, addend, upper, lower;
  int N;

  N = ref.size();
  if ((N < 4) || (N != wit.size()))
    return false;

  varx = get_int_variance(ref); 
  // cout << "varx = " << varx << endl;
  vary = get_int_variance(wit); 
  // cout << "vary = " << vary << endl;

  covar = get_int_covariance(ref, wit);

  /* !! the next line is correct, but the following one yields far better results */
  /* rho = covar / (sqrt(varx * vary)); */
  rho = covar / varx;

  // cout << "rho = " << rho << endl;

  if (rho > 0.998)
    rho = 0.998;
  else if (rho < -0.998)
    rho = -0.998;
  z = 0.5 * log((1.0 + rho)/(1.0 - rho));
    
  addend = TYPE_Z_VALUE / sqrt((double) (N - 3));
      
  upper = (exp(2*(z+addend)) - 1) / (exp(2*(z+addend)) + 1);
  lower = (exp(2*(z-addend)) - 1) / (exp(2*(z-addend)) + 1);

  // cout << "upper,lower = " << upper << "," << lower << endl;

  if (lower > 0.9) 
    unique = true;
  else
    unique = false;
  
  return true;
  
  
#ifdef __undef    
  if ((upper > 0.0) && (lower > 0.0)) {
    unique = true;
    return true;
  }
  else if ((upper < 0.0) && (lower < 0.0)) {
    unique = false;
    return true;
  }
  else {
    unique = false;
    return false;
  }
#endif

}

/* switch to different mode */
void TokenFinder::switch_mode(ModelMode mode) throw (ModelException)
{ 
  int cnt;
  unsigned long tmp;
  bool unique;

  if (_mode == Training && mode == Detection) {

    list<int> ref, witnessed;
    TokenIter iter;

    if (_element_count <= 1) {
      // Assume only one valid token.
      _mode = Detection;
      return;
    }

    iter = _elements.begin();
    tmp = iter->second;
    ref.push_back(0);
    witnessed.push_back(0);

    for (cnt = 1; cnt <= _element_count; ++cnt) {
      ref.push_back(cnt);

      if (--tmp == 0) {
	witnessed.push_back(witnessed.back() - 1);
	iter++; 
	if (cnt < _element_count) {
	  if (iter == _elements.end())
	    throw ModelConsistencyException("TokenFinder::switch_mode internal error - list too short");
	  tmp = iter->second;
	}
      }
      else {
	witnessed.push_back(witnessed.back() + 1);
      }
    }

    if (iter != _elements.end())
      throw ModelConsistencyException("TokenFinder::switch_mode internal error - list too long");

//       cout << "tokens < ";
//       for (iter = _elements.begin(); iter != _elements.end(); ++iter) {
//         cout << "(";
//         iter->first->to_string(cout);
//         cout << "/" << iter->second << ") ";
//       }
//       cout << " > end tokens" << endl;


    /* if no correlation is detected or the correlation indicates unique elements, clear the hash_map */
    if (!is_correlated(ref, witnessed, unique) || !unique) {

      /* delete the items in the hash map */
      list<Item *> kill_list;
      list<Item *>::iterator kill_iter;
      TokenIter clean;

      for (clean = _elements.begin(); clean != _elements.end(); ++clean)
	kill_list.push_back(clean->first);
      _elements.clear();
      _element_count = 0;
      for (kill_iter = kill_list.begin(); kill_iter != kill_list.end(); ++kill_iter)
	(*kill_iter)->release();
    }

//       if (unique)
//         cout << "tokenfinder = unique" << endl;
//       else    
//         cout << "tokenfinder = _not_ unique" << endl;

    _mode = Detection;
  }
  else
    throw ModelException("TokenFinder::switch_mode performs illegal mode transition");
}

/* check an item to accordance to the model 
 * return 1.0 when every item is unique (i.e. unrelated) 
 * return 1.0 when items are tokens and item is in the database
 * return 0.0 when items are tokens and item is NOT in the database
 */
double TokenFinder::check_item(Item * item) throw (ModelException)
{
  if (_mode == Detection) {
    if (_element_count > 0) {
      TokenIter iter;
      iter = _elements.find(item);
      return (iter == _elements.end()) ? 0.0 : 1.0;
    }
    else
      return 1.0;
  }
  else
    throw ModelException("TokenFinder::check_item called without being in detection mode");
}


double TokenFinder::get_confidence()
{
  /* only return confidence when in Detection mode */
  /* no elements means that all items are unique -> no confidence */
  if (_mode == Detection && _element_count > 0) 
    return 1.0;
  else 
    return 0.0;
}

static bool test_check_item(TokenFinder *finder, Item *item, bool target, bool output)
{
  bool res = finder->check_item(item);
  item->release();

  if (output) {
    if (res)
      cerr << "+";
    else
      cerr << "-";
  }

  if (res != target) {
    if (output) cerr << " ... failed\n";
    return false;
  }
  else
    return true;
}


bool TokenFinder::test(bool output)
{
  int cnt;
  bool unique;
  Item *item;
  double confidence;

  if (output) cerr << "Regression Test for Class libAnomaly::TokenFinder\n";
  if (output) cerr << "Allocated Objects -- " << Item::get_allocated()  << "\n";

  /* Test1: insert 100 unique elements */
  TokenFinder *test1 = new TokenFinder();
  for (cnt = 0; cnt < 100; ++cnt) {
    item = new IntegerItem(cnt);
    test1->insert_item(item);
    item->release();
  }
  try {
    test1->switch_mode(Detection);
  }
  catch (ModelException ex) {
    if (output) 
      cerr << "Test1: " << ex.get_message() << " ... failed\n";
    return false;
  } 
  unique = (test1->_element_count == 0);
  if (output) {
    cerr << "Test1: ";
    if (unique) cerr << "unique"; else cerr << "enumeration";
    cerr << " ... ";
    if (unique) cerr << "ok"; else cerr << "failed";
    cerr << "\n";
  }
  if (!unique) return false;
  if (output) cerr << "       ";
  try {
    Item *item; 
    if (!test_check_item(test1, new IntegerItem(1), true, output)) return false;
    if (!test_check_item(test1, new IntegerItem(2), true, output)) return false;
    if (!test_check_item(test1, new IntegerItem(7), true, output)) return false;
    if (!test_check_item(test1, new IntegerItem(42), true, output)) return false;
  }
  catch (ModelException ex) {
    if (output)
      cerr << ex.get_message() << " ... failed\n";
    return false;
  }
  if (output) cerr << " ... ok\n";
  confidence = test1->get_confidence();
  if (output) cerr << "       confidence = " << confidence;
  if (confidence == 0.0) {
    if (output) cerr << " ... ok\n";
  }
  else {
    if (output) cerr << " ... failed\n";
    return false;
  }

  /* Test2: insert 100 identical elements */
  TokenFinder *test2 = new TokenFinder();
  for (cnt = 0; cnt < 100; ++cnt) {
    item = new IntegerItem(42);
    test2->insert_item(item);
    item->release();
  }
  try {
    test2->switch_mode(Detection);
  }
  catch (ModelException ex) {
    if (output) 
      cerr << "Test2: " << ex.get_message() << " ... failed\n";
    return false;
  } 
  unique = (test2->_element_count == 0);
  if (output) {
    cerr << "Test2: ";
    if (unique) cerr << "unique"; else cerr << "enumeration";
    cerr << " ... ";
    if (!unique) cerr << "ok"; else cerr << "failed";
    cerr << "\n";
  }
  if (unique) return false;
  if (output) cerr << "       ";
  try {
    if (!test_check_item(test2, new IntegerItem(1), false, output)) return false;
    if (!test_check_item(test2, new IntegerItem(2), false, output)) return false;
    if (!test_check_item(test2, new IntegerItem(7), false, output)) return false;
    if (!test_check_item(test2, new IntegerItem(42), true, output)) return false;
  }
  catch (ModelException ex) {
    if (output)
      cerr << ex.get_message() << " ... failed\n";
    return false;
  }
  if (output) cerr << " ... ok\n";
  confidence = test2->get_confidence();
  if (output) cerr << "       confidence = " << confidence;
  if (confidence == 1.0) {
    if (output) cerr << " ... ok\n";
  }
  else {
    if (output) cerr << " ... failed\n";
    return false;
  }


  /* Test3: insert 20 unique elements with minor repitions */ 
  TokenFinder *test3 = new TokenFinder();
  item = new IntegerItem(2);
  test3->insert_item(item);
  item->release();
  item = new IntegerItem(7);
  test3->insert_item(item);
  item->release();
  item = new IntegerItem(11);
  test3->insert_item(item);
  item->release();
  item = new IntegerItem(12);
  test3->insert_item(item);
  item->release();
  for (cnt = 0; cnt < 16; ++cnt) {
    item = new IntegerItem(cnt);
    test3->insert_item(item);
    item->release();
  }
  try {
    test3->switch_mode(Detection);
  }
  catch (ModelException ex) {
    if (output) 
      cerr << "Test3: " << ex.get_message() << " ... failed\n";
    return false;
  } 
  unique = (test3->_element_count == 0);
  if (output) {
    cerr << "Test3: ";
    if (unique) cerr << "unique"; else cerr << "enumeration";
    cerr << " ... ";
    if (unique) cerr << "ok"; else cerr << "failed";
    cerr << "\n";
  }
  if (!unique) return false;
  if (output) cerr << "       ";
  try {
    if (!test_check_item(test3, new IntegerItem(1), true, output)) return false;
    if (!test_check_item(test3, new IntegerItem(2), true, output)) return false;
    if (!test_check_item(test3, new IntegerItem(7), true, output)) return false;
    if (!test_check_item(test3, new IntegerItem(42), true, output)) return false;
  }
  catch (ModelException ex) {
    if (output)
      cerr << ex.get_message() << " ... failed\n";
    return false;
  }
  if (output) cerr << " ... ok\n";
  confidence = test3->get_confidence();
  if (output) cerr << "       confidence = " << confidence;
  if (confidence == 0.0) {
    if (output) cerr << " ... ok\n";
  }
  else {
    if (output) cerr << " ... failed\n";
    return false;
  }


  /* Test4: insert 20 elements consisting of 4 tokens */ 
  TokenFinder *test4 = new TokenFinder();
  for (cnt = 0; cnt < 5; ++cnt) {
    item = new IntegerItem(1);
    test4->insert_item(item);
    item->release();
  }
  for (cnt = 0; cnt < 5; ++cnt) {
    item = new IntegerItem(2);
    test4->insert_item(item);
    item->release();
  } 
  for (cnt = 0; cnt < 5; ++cnt) {
     item = new IntegerItem(3);
    test4->insert_item(item);
    item->release();
  }
  for (cnt = 0; cnt < 5; ++cnt) {
    item = new IntegerItem(4);
    test4->insert_item(item);
    item->release();
  }
  try {
    test4->switch_mode(Detection);
  }
  catch (ModelException ex) {
    if (output) 
      cerr << "Test4: " << ex.get_message() << " ... failed\n";
    return false;
  } 
  unique = (test4->_element_count == 0);
  if (output) {
    cerr << "Test4: ";
    if (unique) cerr << "unique"; else cerr << "enumeration";
    cerr << " ... ";
    if (!unique) cerr << "ok"; else cerr << "failed";
    cerr << "\n";
  }
  if (unique) return false;
  if (output) cerr << "       ";
  try {
    if (!test_check_item(test4, new IntegerItem(1), true, output)) return false;
    if (!test_check_item(test4, new IntegerItem(2), true, output)) return false;
    if (!test_check_item(test4, new IntegerItem(7), false, output)) return false;
    if (!test_check_item(test4, new IntegerItem(42), false, output)) return false;
  }
  catch (ModelException ex) {
    if (output)
      cerr << ex.get_message() << " ... failed\n";
    return false;
  }
  if (output) cerr << " ... ok\n";
  confidence = test4->get_confidence();
  if (output) cerr << "       confidence = " << confidence;
  if (confidence == 1.0) {
    if (output) cerr << " ... ok\n";
  }
  else {
    if (output) cerr << " ... failed\n";
    return false;
  }

 
  delete test1;
  delete test2;
  delete test3;
  delete test4;
 
  /* all tests have been successful */
  if (output) cerr << "Allocated Objects (should be equal to number above) -- " << Item::get_allocated()  << "\n";
  if (output) cerr << "\n";
  return true;
}

Model * TokenFinderFactory::instance()
{
  return new TokenFinder(); 
}
