libstdc++
regex_executor.h
Go to the documentation of this file.
00001 // class template regex -*- C++ -*-
00002 
00003 // Copyright (C) 2013-2018 Free Software Foundation, Inc.
00004 //
00005 // This file is part of the GNU ISO C++ Library.  This library is free
00006 // software; you can redistribute it and/or modify it under the
00007 // terms of the GNU General Public License as published by the
00008 // Free Software Foundation; either version 3, or (at your option)
00009 // any later version.
00010 
00011 // This library is distributed in the hope that it will be useful,
00012 // but WITHOUT ANY WARRANTY; without even the implied warranty of
00013 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00014 // GNU General Public License for more details.
00015 
00016 // Under Section 7 of GPL version 3, you are granted additional
00017 // permissions described in the GCC Runtime Library Exception, version
00018 // 3.1, as published by the Free Software Foundation.
00019 
00020 // You should have received a copy of the GNU General Public License and
00021 // a copy of the GCC Runtime Library Exception along with this program;
00022 // see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
00023 // <http://www.gnu.org/licenses/>.
00024 
00025 /**
00026  *  @file bits/regex_executor.h
00027  *  This is an internal header file, included by other library headers.
00028  *  Do not attempt to use it directly. @headername{regex}
00029  */
00030 
00031 // FIXME convert comments to doxygen format.
00032 
00033 namespace std _GLIBCXX_VISIBILITY(default)
00034 {
00035 _GLIBCXX_BEGIN_NAMESPACE_VERSION
00036 
00037 namespace __detail
00038 {
00039   /**
00040    * @addtogroup regex-detail
00041    * @{
00042    */
00043 
00044   /**
00045    * @brief Takes a regex and an input string and does the matching.
00046    *
00047    * The %_Executor class has two modes: DFS mode and BFS mode, controlled
00048    * by the template parameter %__dfs_mode.
00049    */
00050   template<typename _BiIter, typename _Alloc, typename _TraitsT,
00051            bool __dfs_mode>
00052     class _Executor
00053     {
00054       using __search_mode = integral_constant<bool, __dfs_mode>;
00055       using __dfs = true_type;
00056       using __bfs = false_type;
00057 
00058       enum class _Match_mode : unsigned char { _Exact, _Prefix };
00059 
00060     public:
00061       typedef typename iterator_traits<_BiIter>::value_type _CharT;
00062       typedef basic_regex<_CharT, _TraitsT>                 _RegexT;
00063       typedef std::vector<sub_match<_BiIter>, _Alloc>       _ResultsVec;
00064       typedef regex_constants::match_flag_type              _FlagT;
00065       typedef typename _TraitsT::char_class_type            _ClassT;
00066       typedef _NFA<_TraitsT>                                _NFAT;
00067 
00068     public:
00069       _Executor(_BiIter         __begin,
00070                 _BiIter         __end,
00071                 _ResultsVec&    __results,
00072                 const _RegexT&  __re,
00073                 _FlagT          __flags)
00074       : _M_begin(__begin),
00075       _M_end(__end),
00076       _M_re(__re),
00077       _M_nfa(*__re._M_automaton),
00078       _M_results(__results),
00079       _M_rep_count(_M_nfa.size()),
00080       _M_states(_M_nfa._M_start(), _M_nfa.size()),
00081       _M_flags((__flags & regex_constants::match_prev_avail)
00082                ? (__flags
00083                   & ~regex_constants::match_not_bol
00084                   & ~regex_constants::match_not_bow)
00085                : __flags)
00086       { }
00087 
00088       // Set matched when string exactly matches the pattern.
00089       bool
00090       _M_match()
00091       {
00092         _M_current = _M_begin;
00093         return _M_main(_Match_mode::_Exact);
00094       }
00095 
00096       // Set matched when some prefix of the string matches the pattern.
00097       bool
00098       _M_search_from_first()
00099       {
00100         _M_current = _M_begin;
00101         return _M_main(_Match_mode::_Prefix);
00102       }
00103 
00104       bool
00105       _M_search();
00106 
00107     private:
00108       void
00109       _M_rep_once_more(_Match_mode __match_mode, _StateIdT);
00110 
00111       void
00112       _M_handle_repeat(_Match_mode, _StateIdT);
00113 
00114       void
00115       _M_handle_subexpr_begin(_Match_mode, _StateIdT);
00116 
00117       void
00118       _M_handle_subexpr_end(_Match_mode, _StateIdT);
00119 
00120       void
00121       _M_handle_line_begin_assertion(_Match_mode, _StateIdT);
00122 
00123       void
00124       _M_handle_line_end_assertion(_Match_mode, _StateIdT);
00125 
00126       void
00127       _M_handle_word_boundary(_Match_mode, _StateIdT);
00128 
00129       void
00130       _M_handle_subexpr_lookahead(_Match_mode, _StateIdT);
00131 
00132       void
00133       _M_handle_match(_Match_mode, _StateIdT);
00134 
00135       void
00136       _M_handle_backref(_Match_mode, _StateIdT);
00137 
00138       void
00139       _M_handle_accept(_Match_mode, _StateIdT);
00140 
00141       void
00142       _M_handle_alternative(_Match_mode, _StateIdT);
00143 
00144       void
00145       _M_dfs(_Match_mode __match_mode, _StateIdT __start);
00146 
00147       bool
00148       _M_main(_Match_mode __match_mode)
00149       { return _M_main_dispatch(__match_mode, __search_mode{}); }
00150 
00151       bool
00152       _M_main_dispatch(_Match_mode __match_mode, __dfs);
00153 
00154       bool
00155       _M_main_dispatch(_Match_mode __match_mode, __bfs);
00156 
00157       bool
00158       _M_is_word(_CharT __ch) const
00159       {
00160         static const _CharT __s[2] = { 'w' };
00161         return _M_re._M_automaton->_M_traits.isctype
00162           (__ch, _M_re._M_automaton->_M_traits.lookup_classname(__s, __s+1));
00163       }
00164 
00165       bool
00166       _M_at_begin() const
00167       {
00168         return _M_current == _M_begin
00169           && !(_M_flags & (regex_constants::match_not_bol
00170                            | regex_constants::match_prev_avail));
00171       }
00172 
00173       bool
00174       _M_at_end() const
00175       {
00176         return _M_current == _M_end
00177           && !(_M_flags & regex_constants::match_not_eol);
00178       }
00179 
00180       bool
00181       _M_word_boundary() const;
00182 
00183       bool
00184       _M_lookahead(_StateIdT __next);
00185 
00186        // Holds additional information used in BFS-mode.
00187       template<typename _SearchMode, typename _ResultsVec>
00188         struct _State_info;
00189 
00190       template<typename _ResultsVec>
00191         struct _State_info<__bfs, _ResultsVec>
00192         {
00193           explicit
00194           _State_info(_StateIdT __start, size_t __n)
00195           : _M_visited_states(new bool[__n]()), _M_start(__start)
00196           { }
00197 
00198           bool _M_visited(_StateIdT __i)
00199           {
00200             if (_M_visited_states[__i])
00201               return true;
00202             _M_visited_states[__i] = true;
00203             return false;
00204           }
00205 
00206           void _M_queue(_StateIdT __i, const _ResultsVec& __res)
00207           { _M_match_queue.emplace_back(__i, __res); }
00208 
00209           // Dummy implementations for BFS mode.
00210           _BiIter* _M_get_sol_pos() { return nullptr; }
00211 
00212           // Saves states that need to be considered for the next character.
00213           vector<pair<_StateIdT, _ResultsVec>>  _M_match_queue;
00214           // Indicates which states are already visited.
00215           unique_ptr<bool[]>                    _M_visited_states;
00216           // To record current solution.
00217           _StateIdT _M_start;
00218         };
00219 
00220       template<typename _ResultsVec>
00221         struct _State_info<__dfs, _ResultsVec>
00222         {
00223           explicit
00224           _State_info(_StateIdT __start, size_t) : _M_start(__start)
00225           { }
00226 
00227           // Dummy implementations for DFS mode.
00228           bool _M_visited(_StateIdT) const { return false; }
00229           void _M_queue(_StateIdT, const _ResultsVec&) { }
00230 
00231           _BiIter* _M_get_sol_pos() { return &_M_sol_pos; }
00232 
00233           // To record current solution.
00234           _StateIdT _M_start;
00235           _BiIter   _M_sol_pos;
00236         };
00237 
00238     public:
00239       _ResultsVec                                           _M_cur_results;
00240       _BiIter                                               _M_current;
00241       _BiIter                                               _M_begin;
00242       const _BiIter                                         _M_end;
00243       const _RegexT&                                        _M_re;
00244       const _NFAT&                                          _M_nfa;
00245       _ResultsVec&                                          _M_results;
00246       vector<pair<_BiIter, int>>                            _M_rep_count;
00247       _State_info<__search_mode, _ResultsVec>               _M_states;
00248       _FlagT                                                _M_flags;
00249       // Do we have a solution so far?
00250       bool                                                  _M_has_sol;
00251     };
00252 
00253  //@} regex-detail
00254 } // namespace __detail
00255 _GLIBCXX_END_NAMESPACE_VERSION
00256 } // namespace std
00257 
00258 #include <bits/regex_executor.tcc>