/*
 *  docitem.h
 *  classifier
 *
 *  daniel wojcik
 *
 */

#ifndef doc_h_
#define doc_h_

#include 
#include 
#include "classifier.h"

/*class termp
{
	public:
		termp() {};
		termp(const std::string& t) {term = t; count = 0;}
		unsigned int getcount() const {return count;}
		const std::string& getterm() const {return term;}
		void increment() {++count;}
		void print();
	private:
		std::string term;
		unsigned int count;
};*/

struct docterm
{
	std::string term;
	unsigned int count;
};

struct termstat
{
	std::map ccounts;
	unsigned int count;
	unsigned int dcount;
	unsigned int tcount;
	float idf;
};

struct termshort
{
	unsigned int count;
	unsigned int dcount;
	unsigned int tcount;
	float idf;
};

class docitem
{
	public:
		docitem();
		//docitem(const docitem& old);
		void addterm(const std::string& t);
		unsigned int getcount(const std::string& t);
		void increment(const std::string& t);
		//void stats();
		
		std::map charterms;
		std::string classification[classtypes];
		std::string realclass[classtypes];
		unsigned int termcount;
		
	private:
		std::map terms;
};

struct classstat
{
	std::map charterms;
	unsigned int termsize;
	unsigned int dcount;
	unsigned int cluster;
	unsigned int termcount;
	bool seen;
	
	double point;
	std::pair neighbors[neark];
	dlib::svm_pegasos svmtrainer;
	//dlib::probabilistic_decision_function pfunct_type;
};

struct cluster
{
	std::list classes;
	double meanpoint;
	double meanclass;
	unsigned int count;
};

//terrible coding, but necessary to help abstract some
//things out in order to make use of dlib more easily.
class globals
{
	public:
		globals() {};
		std::map gterms;
		//std::list docs;
		std::map docclasses;
		std::map clusters;
		unsigned int doccount;
		unsigned int termcount;
};

termshort stattoshort(termstat& old);

bool operator== (const std::pair& left, const std::pair& right);

#endif