uuid_x86.ipp 5.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159
  1. /*
  2. * Copyright Andrey Semashev 2013, 2022, 2024.
  3. * Distributed under the Boost Software License, Version 1.0.
  4. * (See accompanying file LICENSE_1_0.txt or copy at
  5. * https://www.boost.org/LICENSE_1_0.txt)
  6. */
  7. /*!
  8. * \file uuid/detail/uuid_x86.ipp
  9. *
  10. * \brief This header contains optimized SSE implementation of \c boost::uuid operations.
  11. */
  12. #ifndef BOOST_UUID_DETAIL_UUID_X86_IPP_INCLUDED_
  13. #define BOOST_UUID_DETAIL_UUID_X86_IPP_INCLUDED_
  14. #include <boost/uuid/detail/endian.hpp>
  15. #include <cstdint>
  16. #if defined(BOOST_UUID_REPORT_IMPLEMENTATION)
  17. #include <boost/config/pragma_message.hpp>
  18. #if defined(BOOST_UUID_USE_AVX10_1)
  19. BOOST_PRAGMA_MESSAGE( "Using uuid_x86.ipp, AVX10.1" )
  20. #elif defined(BOOST_UUID_USE_SSE41)
  21. BOOST_PRAGMA_MESSAGE( "Using uuid_x86.ipp, SSE4.1" )
  22. #elif defined(BOOST_UUID_USE_SSE3)
  23. BOOST_PRAGMA_MESSAGE( "Using uuid_x86.ipp, SSE3" )
  24. #else
  25. BOOST_PRAGMA_MESSAGE( "Using uuid_x86.ipp, SSE2" )
  26. #endif
  27. #endif // #if defined(BOOST_UUID_REPORT_IMPLEMENTATION)
  28. // MSVC does not always have immintrin.h (at least, not up to MSVC 10), so include the appropriate header for each instruction set
  29. #if defined(BOOST_UUID_USE_AVX10_1)
  30. #include <immintrin.h>
  31. #elif defined(BOOST_UUID_USE_SSE41)
  32. #include <smmintrin.h>
  33. #elif defined(BOOST_UUID_USE_SSE3)
  34. #include <pmmintrin.h>
  35. #else
  36. #include <emmintrin.h>
  37. #endif
  38. namespace boost {
  39. namespace uuids {
  40. namespace detail {
  41. BOOST_FORCEINLINE __m128i load_unaligned_si128(const std::uint8_t* p) noexcept
  42. {
  43. return _mm_loadu_si128(reinterpret_cast< const __m128i* >(p));
  44. }
  45. BOOST_FORCEINLINE void compare(uuid const& lhs, uuid const& rhs, std::uint32_t& cmp, std::uint32_t& rcmp) noexcept
  46. {
  47. __m128i mm_left = uuids::detail::load_unaligned_si128(lhs.data);
  48. __m128i mm_right = uuids::detail::load_unaligned_si128(rhs.data);
  49. // To emulate lexicographical_compare behavior we have to perform two comparisons - the forward and reverse one.
  50. // Then we know which bytes are equivalent and which ones are different, and for those different the comparison results
  51. // will be opposite. Then we'll be able to find the first differing comparison result (for both forward and reverse ways),
  52. // and depending on which way it is for, this will be the result of the operation. There are a few notes to consider:
  53. //
  54. // 1. Due to little endian byte order the first bytes go into the lower part of the xmm registers,
  55. // so the comparison results in the least significant bits will actually be the most signigicant for the final operation result.
  56. // This means we have to determine which of the comparison results have the least significant bit on, and this is achieved with
  57. // the "(x - 1) ^ x" trick. With BMI, this will produce a single blsmsk instruction.
  58. // 2. Because there is only signed byte comparison until AVX-512, we have to invert byte comparison results whenever signs of the
  59. // corresponding bytes are different. I.e. in signed comparison it's -1 < 1, but in unsigned it is the opposite (255 > 1). To do
  60. // that we XOR left and right, making the most significant bit of each byte 1 if the signs are different, and later apply this mask
  61. // with another XOR to the comparison results.
  62. // 3. Until AVX-512, there is only pcmpgtb instruction that compares for "greater" relation, so we swap the arguments to get what we need.
  63. #if defined(BOOST_UUID_USE_AVX10_1)
  64. __mmask16 k_cmp = _mm_cmplt_epu8_mask(mm_left, mm_right);
  65. __mmask16 k_rcmp = _mm_cmplt_epu8_mask(mm_right, mm_left);
  66. cmp = static_cast< std::uint32_t >(_cvtmask16_u32(k_cmp));
  67. rcmp = static_cast< std::uint32_t >(_cvtmask16_u32(k_rcmp));
  68. #else // defined(BOOST_UUID_USE_AVX10_1)
  69. const __m128i mm_signs_mask = _mm_xor_si128(mm_left, mm_right);
  70. __m128i mm_cmp = _mm_cmpgt_epi8(mm_right, mm_left), mm_rcmp = _mm_cmpgt_epi8(mm_left, mm_right);
  71. mm_cmp = _mm_xor_si128(mm_signs_mask, mm_cmp);
  72. mm_rcmp = _mm_xor_si128(mm_signs_mask, mm_rcmp);
  73. cmp = static_cast< std::uint32_t >(_mm_movemask_epi8(mm_cmp));
  74. rcmp = static_cast< std::uint32_t >(_mm_movemask_epi8(mm_rcmp));
  75. #endif // defined(BOOST_UUID_USE_AVX10_1)
  76. cmp = (cmp - 1u) ^ cmp;
  77. rcmp = (rcmp - 1u) ^ rcmp;
  78. }
  79. } // namespace detail
  80. inline bool uuid::is_nil() const noexcept
  81. {
  82. __m128i mm = uuids::detail::load_unaligned_si128(data);
  83. #if defined(BOOST_UUID_USE_SSE41)
  84. return _mm_test_all_zeros(mm, mm) != 0;
  85. #else
  86. mm = _mm_cmpeq_epi32(mm, _mm_setzero_si128());
  87. return _mm_movemask_epi8(mm) == 0xFFFF;
  88. #endif
  89. }
  90. inline void uuid::swap(uuid& rhs) noexcept
  91. {
  92. __m128i mm_this = uuids::detail::load_unaligned_si128(data);
  93. __m128i mm_rhs = uuids::detail::load_unaligned_si128(rhs.data);
  94. _mm_storeu_si128(reinterpret_cast< __m128i* >(rhs.data+0), mm_this);
  95. _mm_storeu_si128(reinterpret_cast< __m128i* >(data+0), mm_rhs);
  96. }
  97. inline bool operator== (uuid const& lhs, uuid const& rhs) noexcept
  98. {
  99. __m128i mm_left = uuids::detail::load_unaligned_si128(lhs.data);
  100. __m128i mm_right = uuids::detail::load_unaligned_si128(rhs.data);
  101. #if defined(BOOST_UUID_USE_SSE41)
  102. __m128i mm = _mm_xor_si128(mm_left, mm_right);
  103. return _mm_test_all_zeros(mm, mm) != 0;
  104. #else
  105. __m128i mm_cmp = _mm_cmpeq_epi32(mm_left, mm_right);
  106. return _mm_movemask_epi8(mm_cmp) == 0xFFFF;
  107. #endif
  108. }
  109. inline bool operator< (uuid const& lhs, uuid const& rhs) noexcept
  110. {
  111. std::uint32_t cmp, rcmp;
  112. uuids::detail::compare(lhs, rhs, cmp, rcmp);
  113. return cmp < rcmp;
  114. }
  115. #if defined(BOOST_UUID_HAS_THREE_WAY_COMPARISON)
  116. inline std::strong_ordering operator<=> (uuid const& lhs, uuid const& rhs) noexcept
  117. {
  118. std::uint32_t cmp, rcmp;
  119. uuids::detail::compare(lhs, rhs, cmp, rcmp);
  120. return cmp <=> rcmp;
  121. }
  122. #endif
  123. } // namespace uuids
  124. } // namespace boost
  125. #endif // BOOST_UUID_DETAIL_UUID_X86_IPP_INCLUDED_