fence_arch_ops_gcc_x86.hpp 2.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869
  1. /*
  2. * Distributed under the Boost Software License, Version 1.0.
  3. * (See accompanying file LICENSE_1_0.txt or copy at
  4. * http://www.boost.org/LICENSE_1_0.txt)
  5. *
  6. * Copyright (c) 2020 Andrey Semashev
  7. */
  8. /*!
  9. * \file atomic/detail/fence_arch_ops_gcc_x86.hpp
  10. *
  11. * This header contains implementation of the \c fence_arch_operations struct.
  12. */
  13. #ifndef BOOST_ATOMIC_DETAIL_FENCE_ARCH_OPS_GCC_X86_HPP_INCLUDED_
  14. #define BOOST_ATOMIC_DETAIL_FENCE_ARCH_OPS_GCC_X86_HPP_INCLUDED_
  15. #include <boost/memory_order.hpp>
  16. #include <boost/atomic/detail/config.hpp>
  17. #include <boost/atomic/detail/header.hpp>
  18. #ifdef BOOST_HAS_PRAGMA_ONCE
  19. #pragma once
  20. #endif
  21. namespace boost {
  22. namespace atomics {
  23. namespace detail {
  24. //! Fence operations for x86
  25. struct fence_arch_operations_gcc_x86
  26. {
  27. static BOOST_FORCEINLINE void thread_fence(memory_order order) BOOST_NOEXCEPT
  28. {
  29. if (order == memory_order_seq_cst)
  30. {
  31. // We could generate mfence for a seq_cst fence here, but a dummy lock-prefixed instruction is enough
  32. // and is faster than mfence on most modern x86 CPUs (as of 2020).
  33. // Note that we want to apply the atomic operation on any location so that:
  34. // - It is not shared with other threads. A variable on the stack suits this well.
  35. // - It is likely in cache. Being close to the top of the stack fits this well.
  36. // - It does not alias existing data on the stack, so that we don't introduce a false data dependency.
  37. // See some performance data here: https://shipilev.net/blog/2014/on-the-fence-with-dependencies/
  38. // Unfortunately, to make tools like valgrind happy, we have to initialize the dummy, which is
  39. // otherwise not needed.
  40. unsigned char dummy = 0u;
  41. __asm__ __volatile__ ("lock; notb %0" : "+m" (dummy) : : "memory");
  42. }
  43. else if ((static_cast< unsigned int >(order) & (static_cast< unsigned int >(memory_order_acquire) | static_cast< unsigned int >(memory_order_release))) != 0u)
  44. {
  45. __asm__ __volatile__ ("" ::: "memory");
  46. }
  47. }
  48. static BOOST_FORCEINLINE void signal_fence(memory_order order) BOOST_NOEXCEPT
  49. {
  50. if (order != memory_order_relaxed)
  51. __asm__ __volatile__ ("" ::: "memory");
  52. }
  53. };
  54. typedef fence_arch_operations_gcc_x86 fence_arch_operations;
  55. } // namespace detail
  56. } // namespace atomics
  57. } // namespace boost
  58. #include <boost/atomic/detail/footer.hpp>
  59. #endif // BOOST_ATOMIC_DETAIL_FENCE_ARCH_OPS_GCC_X86_HPP_INCLUDED_