tst.ipp 8.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281
  1. /*=============================================================================
  2. Copyright (c) 2001-2003 Joel de Guzman
  3. http://spirit.sourceforge.net/
  4. Use, modification and distribution is subject to the Boost Software
  5. License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at
  6. http://www.boost.org/LICENSE_1_0.txt)
  7. =============================================================================*/
  8. #ifndef BOOST_SPIRIT_TST_IPP
  9. #define BOOST_SPIRIT_TST_IPP
  10. ///////////////////////////////////////////////////////////////////////////////
  11. #include <boost/move/unique_ptr.hpp>
  12. #include <boost/spirit/home/classic/core/assert.hpp>
  13. ///////////////////////////////////////////////////////////////////////////////
  14. namespace boost { namespace spirit {
  15. BOOST_SPIRIT_CLASSIC_NAMESPACE_BEGIN
  16. namespace impl
  17. {
  18. ///////////////////////////////////////////////////////////////////////////////
  19. //
  20. // tst class
  21. //
  22. // Ternary Search Tree implementation. The data structure is faster than
  23. // hashing for many typical search problems especially when the search
  24. // interface is iterator based. Searching for a string of length k in a
  25. // ternary search tree with n strings will require at most O(log n+k)
  26. // character comparisons. TSTs are many times faster than hash tables
  27. // for unsuccessful searches since mismatches are discovered earlier
  28. // after examining only a few characters. Hash tables always examine an
  29. // entire key when searching.
  30. //
  31. // For details see http://www.cs.princeton.edu/~rs/strings/.
  32. //
  33. // *** This is a low level class and is
  34. // not meant for public consumption ***
  35. //
  36. ///////////////////////////////////////////////////////////////////////////////
  37. template <typename T, typename CharT>
  38. struct tst_node
  39. {
  40. tst_node(CharT value_)
  41. : value(value_)
  42. , left(0)
  43. , right(0)
  44. { middle.link = 0; }
  45. ~tst_node()
  46. {
  47. delete left;
  48. delete right;
  49. if (value)
  50. delete middle.link;
  51. else
  52. delete middle.data;
  53. }
  54. tst_node*
  55. clone() const
  56. {
  57. boost::movelib::unique_ptr<tst_node> copy(new tst_node(value));
  58. if (left)
  59. copy->left = left->clone();
  60. if (right)
  61. copy->right = right->clone();
  62. if (value && middle.link)
  63. {
  64. copy->middle.link = middle.link->clone();
  65. }
  66. else
  67. {
  68. boost::movelib::unique_ptr<T> mid_data(new T(*middle.data));
  69. copy->middle.data = mid_data.release();
  70. }
  71. return copy.release();
  72. }
  73. union center {
  74. tst_node* link;
  75. T* data;
  76. };
  77. CharT value;
  78. tst_node* left;
  79. center middle;
  80. tst_node* right;
  81. };
  82. ///////////////////////////////////////////////////////////////////////////
  83. template <typename T, typename CharT>
  84. class tst
  85. {
  86. public:
  87. struct search_info
  88. {
  89. T* data;
  90. std::size_t length;
  91. };
  92. tst()
  93. : root(0) {}
  94. tst(tst const& other)
  95. : root(other.root ? other.root->clone() : 0) {}
  96. ~tst()
  97. { delete root; }
  98. tst&
  99. operator=(tst const& other)
  100. {
  101. if (this != &other)
  102. {
  103. node_t* new_root = other.root ? other.root->clone() : 0;
  104. delete root;
  105. root = new_root;
  106. }
  107. return *this;
  108. }
  109. template <typename IteratorT>
  110. T* add(IteratorT first, IteratorT const& last, T const& data)
  111. {
  112. if (first == last)
  113. return 0;
  114. node_t** np = &root;
  115. CharT ch = *first;
  116. BOOST_SPIRIT_ASSERT((first == last || ch != 0)
  117. && "Won't add string containing null character");
  118. for (;;)
  119. {
  120. if (*np == 0 || ch == 0)
  121. {
  122. node_t* right = 0;
  123. if (np != 0)
  124. right = *np;
  125. *np = new node_t(ch);
  126. if (right)
  127. (**np).right = right;
  128. }
  129. if (ch < (**np).value)
  130. {
  131. np = &(**np).left;
  132. }
  133. else
  134. {
  135. if (ch == (**np).value)
  136. {
  137. if (ch == 0)
  138. {
  139. if ((**np).middle.data == 0)
  140. {
  141. (**np).middle.data = new T(data);
  142. return (**np).middle.data;
  143. }
  144. else
  145. {
  146. // re-addition is disallowed
  147. return 0;
  148. }
  149. }
  150. ++first;
  151. ch = (first == last) ? CharT(0) : *first;
  152. BOOST_SPIRIT_ASSERT((first == last || ch != 0)
  153. && "Won't add string containing null character");
  154. np = &(**np).middle.link;
  155. }
  156. else
  157. {
  158. np = &(**np).right;
  159. }
  160. }
  161. }
  162. }
  163. template <typename ScannerT>
  164. search_info find(ScannerT const& scan) const
  165. {
  166. search_info result = { 0, 0 };
  167. if (scan.at_end()) {
  168. return result;
  169. }
  170. typedef typename ScannerT::iterator_t iterator_t;
  171. node_t* np = root;
  172. CharT ch = *scan;
  173. iterator_t save = scan.first;
  174. iterator_t latest = scan.first;
  175. std::size_t latest_len = 0;
  176. while (np)
  177. {
  178. if (ch < np->value) // => go left!
  179. {
  180. if (np->value == 0)
  181. {
  182. result.data = np->middle.data;
  183. if (result.data)
  184. {
  185. latest = scan.first;
  186. latest_len = result.length;
  187. }
  188. }
  189. np = np->left;
  190. }
  191. else if (ch == np->value) // => go middle!
  192. {
  193. // Matching the null character is not allowed.
  194. if (np->value == 0)
  195. {
  196. result.data = np->middle.data;
  197. if (result.data)
  198. {
  199. latest = scan.first;
  200. latest_len = result.length;
  201. }
  202. break;
  203. }
  204. ++scan;
  205. ch = scan.at_end() ? CharT(0) : *scan;
  206. np = np->middle.link;
  207. ++result.length;
  208. }
  209. else // (ch > np->value) => go right!
  210. {
  211. if (np->value == 0)
  212. {
  213. result.data = np->middle.data;
  214. if (result.data)
  215. {
  216. latest = scan.first;
  217. latest_len = result.length;
  218. }
  219. }
  220. np = np->right;
  221. }
  222. }
  223. if (result.data == 0)
  224. {
  225. scan.first = save;
  226. }
  227. else
  228. {
  229. scan.first = latest;
  230. result.length = latest_len;
  231. }
  232. return result;
  233. }
  234. private:
  235. typedef tst_node<T, CharT> node_t;
  236. node_t* root;
  237. };
  238. ///////////////////////////////////////////////////////////////////////////////
  239. } // namespace impl
  240. BOOST_SPIRIT_CLASSIC_NAMESPACE_END
  241. }} // namespace boost::spirit
  242. #endif