/* * DocItem.h * Classifier * * Daniel Wojcik * */ #ifndef doc_h_ #define doc_h_ #include #include #include "classifier.h" /*class TermP { public: TermP() {}; TermP(const std::string& t) {term = t; count = 0;} unsigned int getCount() const {return count;} const std::string& getTerm() const {return term;} void increment() {++count;} void print(); private: std::string term; unsigned int count; };*/ struct DocTerm { std::string term; unsigned int count; }; struct TermStat { std::map cCounts; unsigned int count; unsigned int dCount; unsigned int tCount; float idf; }; struct TermShort { unsigned int count; unsigned int dCount; unsigned int tCount; float idf; }; class DocItem { public: DocItem(); //DocItem(const DocItem& old); void addTerm(const std::string& t); unsigned int getCount(const std::string& t); void increment(const std::string& t); //void stats(); std::map charTerms; std::string classification[classTypes]; std::string realClass[classTypes]; unsigned int termCount; private: std::map terms; }; struct ClassStat { std::map charTerms; unsigned int termSize; unsigned int dCount; unsigned int cluster; unsigned int termCount; bool seen; double point; std::pair neighbors[nearK]; dlib::svm_pegasos svmTrainer; //dlib::probabilistic_decision_function pfunct_type; }; struct Cluster { std::list classes; double meanPoint; double meanClass; unsigned int count; }; //Terrible coding, but necessary to help abstract some //things out in order to make use of dlib more easily. class Globals { public: Globals() {}; std::map gTerms; //std::list docs; std::map docClasses; std::map clusters; unsigned int docCount; unsigned int termCount; }; TermShort statToShort(TermStat& old); bool operator== (const std::pair& left, const std::pair& right); #endif