00001 #ifndef WIBBLE_REGEXP_H
00002 #define WIBBLE_REGEXP_H
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024 #include <wibble/exception.h>
00025 #include <sys/types.h>
00026 #include <regex.h>
00027
00028 namespace wibble {
00029 namespace exception {
00030
00032
00033 class Regexp : public wibble::exception::Generic
00034 {
00035 protected:
00036 int m_code;
00037 std::string m_message;
00038
00039 public:
00040 Regexp(const regex_t& re, int code, const std::string& context)
00041 throw ();
00042 ~Regexp() throw () {}
00043
00045 virtual int code() const throw () { return m_code; }
00046
00047 virtual const char* type() const throw () { return "Regexp"; }
00048 virtual std::string desc() const throw () { return m_message; }
00049 };
00050
00051 }
00052
00053 class Regexp
00054 {
00055 protected:
00056 regex_t re;
00057 regmatch_t* pmatch;
00058 int nmatch;
00059 std::string lastMatch;
00060
00061 public:
00062 Regexp(const std::string& expr, int match_count = 0, int flags = 0) throw (wibble::exception::Regexp);
00063 ~Regexp() throw ();
00064
00065 bool match(const std::string& str, int flags = 0) throw (wibble::exception::Regexp);
00066
00067 std::string operator[](int idx) throw (wibble::exception::OutOfRange);
00068
00069 size_t matchStart(int idx) throw (wibble::exception::OutOfRange);
00070 size_t matchEnd(int idx) throw (wibble::exception::OutOfRange);
00071 size_t matchLength(int idx) throw (wibble::exception::OutOfRange);
00072 };
00073
00074 class ERegexp : public Regexp
00075 {
00076 public:
00077 ERegexp(const std::string& expr, int match_count = 0, int flags = 0) throw (wibble::exception::Regexp)
00078 : Regexp(expr, match_count, flags | REG_EXTENDED) {}
00079 };
00080
00081 class Tokenizer
00082 {
00083 const std::string& str;
00084 wibble::Regexp re;
00085
00086 public:
00087 class const_iterator
00088 {
00089 Tokenizer& tok;
00090 size_t beg, end;
00091 public:
00092 typedef std::string value_type;
00093 typedef ptrdiff_t difference_type;
00094 typedef value_type *pointer;
00095 typedef value_type &reference;
00096 typedef std::forward_iterator_tag iterator_category;
00097 const_iterator(Tokenizer& tok) : tok(tok), beg(0), end(0) { operator++(); }
00098 const_iterator(Tokenizer& tok, bool) : tok(tok), beg(tok.str.size()), end(tok.str.size()) {}
00099
00100 const_iterator& operator++();
00101
00102 std::string operator*() const
00103 {
00104 return tok.str.substr(beg, end-beg);
00105 }
00106 bool operator==(const const_iterator& ti) const
00107 {
00108 return beg == ti.beg && end == ti.end;
00109 }
00110 bool operator!=(const const_iterator& ti) const
00111 {
00112 return beg != ti.beg || end != ti.end;
00113 }
00114 };
00115
00116 Tokenizer(const std::string& str, const std::string& re, int flags)
00117 : str(str), re(re, 1, flags) {}
00118
00119 const_iterator begin() { return const_iterator(*this); }
00120 const_iterator end() { return const_iterator(*this, false); }
00121 };
00122
00136 class Splitter
00137 {
00138 wibble::Regexp re;
00139
00140 public:
00145
00146 class const_iterator
00147 {
00148 wibble::Regexp& re;
00149 std::string cur;
00150 std::string next;
00151
00152 public:
00153 const_iterator(wibble::Regexp& re, const std::string& str) : re(re), next(str) { ++*this; }
00154 const_iterator(wibble::Regexp& re) : re(re) {}
00155
00156 const_iterator& operator++();
00157
00158 const std::string& operator*() const
00159 {
00160 return cur;
00161 }
00162 const std::string* operator->() const
00163 {
00164 return &cur;
00165 }
00166 bool operator==(const const_iterator& ti) const
00167 {
00168 return cur == ti.cur && next == ti.next;
00169 }
00170 bool operator!=(const const_iterator& ti) const
00171 {
00172 return cur != ti.cur || next != ti.next;
00173 }
00174 };
00175
00179 Splitter(const std::string& re, int flags)
00180 : re(re, 1, flags) {}
00181
00185 const_iterator begin(const std::string& str) { return const_iterator(re, str); }
00186 const_iterator end() { return const_iterator(re); }
00187 };
00188
00189 }
00190
00191
00192 #endif