/* * docitem.h * classifier * * daniel wojcik * */ #ifndef doc_h_ #define doc_h_ #include #include #include "classifier.h" /*class termp { public: termp() {}; termp(const std::string& t) {term = t; count = 0;} unsigned int getcount() const {return count;} const std::string& getterm() const {return term;} void increment() {++count;} void print(); private: std::string term; unsigned int count; };*/ struct docterm { std::string term; unsigned int count; }; struct termstat { std::map ccounts; unsigned int count; unsigned int dcount; unsigned int tcount; float idf; }; struct termshort { unsigned int count; unsigned int dcount; unsigned int tcount; float idf; }; class docitem { public: docitem(); //docitem(const docitem& old); void addterm(const std::string& t); unsigned int getcount(const std::string& t); void increment(const std::string& t); //void stats(); std::map charterms; std::string classification[classtypes]; std::string realclass[classtypes]; unsigned int termcount; private: std::map terms; }; struct classstat { std::map charterms; unsigned int termsize; unsigned int dcount; unsigned int cluster; unsigned int termcount; bool seen; double point; std::pair neighbors[neark]; dlib::svm_pegasos svmtrainer; //dlib::probabilistic_decision_function pfunct_type; }; struct cluster { std::list classes; double meanpoint; double meanclass; unsigned int count; }; //terrible coding, but necessary to help abstract some //things out in order to make use of dlib more easily. class globals { public: globals() {}; std::map gterms; //std::list docs; std::map docclasses; std::map clusters; unsigned int doccount; unsigned int termcount; }; termshort stattoshort(termstat& old); bool operator== (const std::pair& left, const std::pair& right); #endif