mirror of
https://github.com/ziglang/zig.git
synced 2024-11-15 16:45:27 +00:00
745 lines
28 KiB
C++
Vendored
745 lines
28 KiB
C++
Vendored
//===-- tsan_rtl_access.cpp -----------------------------------------------===//
|
|
//
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
//
|
|
// This file is a part of ThreadSanitizer (TSan), a race detector.
|
|
//
|
|
// Definitions of memory access and function entry/exit entry points.
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
#include "tsan_rtl.h"
|
|
|
|
namespace __tsan {
|
|
|
|
ALWAYS_INLINE USED bool TryTraceMemoryAccess(ThreadState* thr, uptr pc,
|
|
uptr addr, uptr size,
|
|
AccessType typ) {
|
|
DCHECK(size == 1 || size == 2 || size == 4 || size == 8);
|
|
if (!kCollectHistory)
|
|
return true;
|
|
EventAccess* ev;
|
|
if (UNLIKELY(!TraceAcquire(thr, &ev)))
|
|
return false;
|
|
u64 size_log = size == 1 ? 0 : size == 2 ? 1 : size == 4 ? 2 : 3;
|
|
uptr pc_delta = pc - thr->trace_prev_pc + (1 << (EventAccess::kPCBits - 1));
|
|
thr->trace_prev_pc = pc;
|
|
if (LIKELY(pc_delta < (1 << EventAccess::kPCBits))) {
|
|
ev->is_access = 1;
|
|
ev->is_read = !!(typ & kAccessRead);
|
|
ev->is_atomic = !!(typ & kAccessAtomic);
|
|
ev->size_log = size_log;
|
|
ev->pc_delta = pc_delta;
|
|
DCHECK_EQ(ev->pc_delta, pc_delta);
|
|
ev->addr = CompressAddr(addr);
|
|
TraceRelease(thr, ev);
|
|
return true;
|
|
}
|
|
auto* evex = reinterpret_cast<EventAccessExt*>(ev);
|
|
evex->is_access = 0;
|
|
evex->is_func = 0;
|
|
evex->type = EventType::kAccessExt;
|
|
evex->is_read = !!(typ & kAccessRead);
|
|
evex->is_atomic = !!(typ & kAccessAtomic);
|
|
evex->size_log = size_log;
|
|
// Note: this is important, see comment in EventAccessExt.
|
|
evex->_ = 0;
|
|
evex->addr = CompressAddr(addr);
|
|
evex->pc = pc;
|
|
TraceRelease(thr, evex);
|
|
return true;
|
|
}
|
|
|
|
ALWAYS_INLINE
|
|
bool TryTraceMemoryAccessRange(ThreadState* thr, uptr pc, uptr addr, uptr size,
|
|
AccessType typ) {
|
|
if (!kCollectHistory)
|
|
return true;
|
|
EventAccessRange* ev;
|
|
if (UNLIKELY(!TraceAcquire(thr, &ev)))
|
|
return false;
|
|
thr->trace_prev_pc = pc;
|
|
ev->is_access = 0;
|
|
ev->is_func = 0;
|
|
ev->type = EventType::kAccessRange;
|
|
ev->is_read = !!(typ & kAccessRead);
|
|
ev->is_free = !!(typ & kAccessFree);
|
|
ev->size_lo = size;
|
|
ev->pc = CompressAddr(pc);
|
|
ev->addr = CompressAddr(addr);
|
|
ev->size_hi = size >> EventAccessRange::kSizeLoBits;
|
|
TraceRelease(thr, ev);
|
|
return true;
|
|
}
|
|
|
|
void TraceMemoryAccessRange(ThreadState* thr, uptr pc, uptr addr, uptr size,
|
|
AccessType typ) {
|
|
if (LIKELY(TryTraceMemoryAccessRange(thr, pc, addr, size, typ)))
|
|
return;
|
|
TraceSwitchPart(thr);
|
|
UNUSED bool res = TryTraceMemoryAccessRange(thr, pc, addr, size, typ);
|
|
DCHECK(res);
|
|
}
|
|
|
|
void TraceFunc(ThreadState* thr, uptr pc) {
|
|
if (LIKELY(TryTraceFunc(thr, pc)))
|
|
return;
|
|
TraceSwitchPart(thr);
|
|
UNUSED bool res = TryTraceFunc(thr, pc);
|
|
DCHECK(res);
|
|
}
|
|
|
|
NOINLINE void TraceRestartFuncEntry(ThreadState* thr, uptr pc) {
|
|
TraceSwitchPart(thr);
|
|
FuncEntry(thr, pc);
|
|
}
|
|
|
|
NOINLINE void TraceRestartFuncExit(ThreadState* thr) {
|
|
TraceSwitchPart(thr);
|
|
FuncExit(thr);
|
|
}
|
|
|
|
void TraceMutexLock(ThreadState* thr, EventType type, uptr pc, uptr addr,
|
|
StackID stk) {
|
|
DCHECK(type == EventType::kLock || type == EventType::kRLock);
|
|
if (!kCollectHistory)
|
|
return;
|
|
EventLock ev;
|
|
ev.is_access = 0;
|
|
ev.is_func = 0;
|
|
ev.type = type;
|
|
ev.pc = CompressAddr(pc);
|
|
ev.stack_lo = stk;
|
|
ev.stack_hi = stk >> EventLock::kStackIDLoBits;
|
|
ev._ = 0;
|
|
ev.addr = CompressAddr(addr);
|
|
TraceEvent(thr, ev);
|
|
}
|
|
|
|
void TraceMutexUnlock(ThreadState* thr, uptr addr) {
|
|
if (!kCollectHistory)
|
|
return;
|
|
EventUnlock ev;
|
|
ev.is_access = 0;
|
|
ev.is_func = 0;
|
|
ev.type = EventType::kUnlock;
|
|
ev._ = 0;
|
|
ev.addr = CompressAddr(addr);
|
|
TraceEvent(thr, ev);
|
|
}
|
|
|
|
void TraceTime(ThreadState* thr) {
|
|
if (!kCollectHistory)
|
|
return;
|
|
FastState fast_state = thr->fast_state;
|
|
EventTime ev;
|
|
ev.is_access = 0;
|
|
ev.is_func = 0;
|
|
ev.type = EventType::kTime;
|
|
ev.sid = static_cast<u64>(fast_state.sid());
|
|
ev.epoch = static_cast<u64>(fast_state.epoch());
|
|
ev._ = 0;
|
|
TraceEvent(thr, ev);
|
|
}
|
|
|
|
NOINLINE void DoReportRace(ThreadState* thr, RawShadow* shadow_mem, Shadow cur,
|
|
Shadow old,
|
|
AccessType typ) SANITIZER_NO_THREAD_SAFETY_ANALYSIS {
|
|
// For the free shadow markers the first element (that contains kFreeSid)
|
|
// triggers the race, but the second element contains info about the freeing
|
|
// thread, take it.
|
|
if (old.sid() == kFreeSid)
|
|
old = Shadow(LoadShadow(&shadow_mem[1]));
|
|
// This prevents trapping on this address in future.
|
|
for (uptr i = 0; i < kShadowCnt; i++)
|
|
StoreShadow(&shadow_mem[i], i == 0 ? Shadow::kRodata : Shadow::kEmpty);
|
|
// See the comment in MemoryRangeFreed as to why the slot is locked
|
|
// for free memory accesses. ReportRace must not be called with
|
|
// the slot locked because of the fork. But MemoryRangeFreed is not
|
|
// called during fork because fork sets ignore_reads_and_writes,
|
|
// so simply unlocking the slot should be fine.
|
|
if (typ & kAccessSlotLocked)
|
|
SlotUnlock(thr);
|
|
ReportRace(thr, shadow_mem, cur, Shadow(old), typ);
|
|
if (typ & kAccessSlotLocked)
|
|
SlotLock(thr);
|
|
}
|
|
|
|
#if !TSAN_VECTORIZE
|
|
ALWAYS_INLINE
|
|
bool ContainsSameAccess(RawShadow* s, Shadow cur, int unused0, int unused1,
|
|
AccessType typ) {
|
|
for (uptr i = 0; i < kShadowCnt; i++) {
|
|
auto old = LoadShadow(&s[i]);
|
|
if (!(typ & kAccessRead)) {
|
|
if (old == cur.raw())
|
|
return true;
|
|
continue;
|
|
}
|
|
auto masked = static_cast<RawShadow>(static_cast<u32>(old) |
|
|
static_cast<u32>(Shadow::kRodata));
|
|
if (masked == cur.raw())
|
|
return true;
|
|
if (!(typ & kAccessNoRodata) && !SANITIZER_GO) {
|
|
if (old == Shadow::kRodata)
|
|
return true;
|
|
}
|
|
}
|
|
return false;
|
|
}
|
|
|
|
ALWAYS_INLINE
|
|
bool CheckRaces(ThreadState* thr, RawShadow* shadow_mem, Shadow cur,
|
|
int unused0, int unused1, AccessType typ) {
|
|
bool stored = false;
|
|
for (uptr idx = 0; idx < kShadowCnt; idx++) {
|
|
RawShadow* sp = &shadow_mem[idx];
|
|
Shadow old(LoadShadow(sp));
|
|
if (LIKELY(old.raw() == Shadow::kEmpty)) {
|
|
if (!(typ & kAccessCheckOnly) && !stored)
|
|
StoreShadow(sp, cur.raw());
|
|
return false;
|
|
}
|
|
if (LIKELY(!(cur.access() & old.access())))
|
|
continue;
|
|
if (LIKELY(cur.sid() == old.sid())) {
|
|
if (!(typ & kAccessCheckOnly) &&
|
|
LIKELY(cur.access() == old.access() && old.IsRWWeakerOrEqual(typ))) {
|
|
StoreShadow(sp, cur.raw());
|
|
stored = true;
|
|
}
|
|
continue;
|
|
}
|
|
if (LIKELY(old.IsBothReadsOrAtomic(typ)))
|
|
continue;
|
|
if (LIKELY(thr->clock.Get(old.sid()) >= old.epoch()))
|
|
continue;
|
|
DoReportRace(thr, shadow_mem, cur, old, typ);
|
|
return true;
|
|
}
|
|
// We did not find any races and had already stored
|
|
// the current access info, so we are done.
|
|
if (LIKELY(stored))
|
|
return false;
|
|
// Choose a random candidate slot and replace it.
|
|
uptr index =
|
|
atomic_load_relaxed(&thr->trace_pos) / sizeof(Event) % kShadowCnt;
|
|
StoreShadow(&shadow_mem[index], cur.raw());
|
|
return false;
|
|
}
|
|
|
|
# define LOAD_CURRENT_SHADOW(cur, shadow_mem) UNUSED int access = 0, shadow = 0
|
|
|
|
#else /* !TSAN_VECTORIZE */
|
|
|
|
ALWAYS_INLINE
|
|
bool ContainsSameAccess(RawShadow* unused0, Shadow unused1, m128 shadow,
|
|
m128 access, AccessType typ) {
|
|
// Note: we could check if there is a larger access of the same type,
|
|
// e.g. we just allocated/memset-ed a block (so it contains 8 byte writes)
|
|
// and now do smaller reads/writes, these can also be considered as "same
|
|
// access". However, it will make the check more expensive, so it's unclear
|
|
// if it's worth it. But this would conserve trace space, so it's useful
|
|
// besides potential speed up.
|
|
if (!(typ & kAccessRead)) {
|
|
const m128 same = _mm_cmpeq_epi32(shadow, access);
|
|
return _mm_movemask_epi8(same);
|
|
}
|
|
// For reads we need to reset read bit in the shadow,
|
|
// because we need to match read with both reads and writes.
|
|
// Shadow::kRodata has only read bit set, so it does what we want.
|
|
// We also abuse it for rodata check to save few cycles
|
|
// since we already loaded Shadow::kRodata into a register.
|
|
// Reads from rodata can't race.
|
|
// Measurements show that they can be 10-20% of all memory accesses.
|
|
// Shadow::kRodata has epoch 0 which cannot appear in shadow normally
|
|
// (thread epochs start from 1). So the same read bit mask
|
|
// serves as rodata indicator.
|
|
const m128 read_mask = _mm_set1_epi32(static_cast<u32>(Shadow::kRodata));
|
|
const m128 masked_shadow = _mm_or_si128(shadow, read_mask);
|
|
m128 same = _mm_cmpeq_epi32(masked_shadow, access);
|
|
// Range memory accesses check Shadow::kRodata before calling this,
|
|
// Shadow::kRodatas is not possible for free memory access
|
|
// and Go does not use Shadow::kRodata.
|
|
if (!(typ & kAccessNoRodata) && !SANITIZER_GO) {
|
|
const m128 ro = _mm_cmpeq_epi32(shadow, read_mask);
|
|
same = _mm_or_si128(ro, same);
|
|
}
|
|
return _mm_movemask_epi8(same);
|
|
}
|
|
|
|
NOINLINE void DoReportRaceV(ThreadState* thr, RawShadow* shadow_mem, Shadow cur,
|
|
u32 race_mask, m128 shadow, AccessType typ) {
|
|
// race_mask points which of the shadow elements raced with the current
|
|
// access. Extract that element.
|
|
CHECK_NE(race_mask, 0);
|
|
u32 old;
|
|
// Note: _mm_extract_epi32 index must be a constant value.
|
|
switch (__builtin_ffs(race_mask) / 4) {
|
|
case 0:
|
|
old = _mm_extract_epi32(shadow, 0);
|
|
break;
|
|
case 1:
|
|
old = _mm_extract_epi32(shadow, 1);
|
|
break;
|
|
case 2:
|
|
old = _mm_extract_epi32(shadow, 2);
|
|
break;
|
|
case 3:
|
|
old = _mm_extract_epi32(shadow, 3);
|
|
break;
|
|
}
|
|
Shadow prev(static_cast<RawShadow>(old));
|
|
// For the free shadow markers the first element (that contains kFreeSid)
|
|
// triggers the race, but the second element contains info about the freeing
|
|
// thread, take it.
|
|
if (prev.sid() == kFreeSid)
|
|
prev = Shadow(static_cast<RawShadow>(_mm_extract_epi32(shadow, 1)));
|
|
DoReportRace(thr, shadow_mem, cur, prev, typ);
|
|
}
|
|
|
|
ALWAYS_INLINE
|
|
bool CheckRaces(ThreadState* thr, RawShadow* shadow_mem, Shadow cur,
|
|
m128 shadow, m128 access, AccessType typ) {
|
|
// Note: empty/zero slots don't intersect with any access.
|
|
const m128 zero = _mm_setzero_si128();
|
|
const m128 mask_access = _mm_set1_epi32(0x000000ff);
|
|
const m128 mask_sid = _mm_set1_epi32(0x0000ff00);
|
|
const m128 mask_read_atomic = _mm_set1_epi32(0xc0000000);
|
|
const m128 access_and = _mm_and_si128(access, shadow);
|
|
const m128 access_xor = _mm_xor_si128(access, shadow);
|
|
const m128 intersect = _mm_and_si128(access_and, mask_access);
|
|
const m128 not_intersect = _mm_cmpeq_epi32(intersect, zero);
|
|
const m128 not_same_sid = _mm_and_si128(access_xor, mask_sid);
|
|
const m128 same_sid = _mm_cmpeq_epi32(not_same_sid, zero);
|
|
const m128 both_read_or_atomic = _mm_and_si128(access_and, mask_read_atomic);
|
|
const m128 no_race =
|
|
_mm_or_si128(_mm_or_si128(not_intersect, same_sid), both_read_or_atomic);
|
|
const int race_mask = _mm_movemask_epi8(_mm_cmpeq_epi32(no_race, zero));
|
|
if (UNLIKELY(race_mask))
|
|
goto SHARED;
|
|
|
|
STORE : {
|
|
if (typ & kAccessCheckOnly)
|
|
return false;
|
|
// We could also replace different sid's if access is the same,
|
|
// rw weaker and happens before. However, just checking access below
|
|
// is not enough because we also need to check that !both_read_or_atomic
|
|
// (reads from different sids can be concurrent).
|
|
// Theoretically we could replace smaller accesses with larger accesses,
|
|
// but it's unclear if it's worth doing.
|
|
const m128 mask_access_sid = _mm_set1_epi32(0x0000ffff);
|
|
const m128 not_same_sid_access = _mm_and_si128(access_xor, mask_access_sid);
|
|
const m128 same_sid_access = _mm_cmpeq_epi32(not_same_sid_access, zero);
|
|
const m128 access_read_atomic =
|
|
_mm_set1_epi32((typ & (kAccessRead | kAccessAtomic)) << 30);
|
|
const m128 rw_weaker =
|
|
_mm_cmpeq_epi32(_mm_max_epu32(shadow, access_read_atomic), shadow);
|
|
const m128 rewrite = _mm_and_si128(same_sid_access, rw_weaker);
|
|
const int rewrite_mask = _mm_movemask_epi8(rewrite);
|
|
int index = __builtin_ffs(rewrite_mask);
|
|
if (UNLIKELY(index == 0)) {
|
|
const m128 empty = _mm_cmpeq_epi32(shadow, zero);
|
|
const int empty_mask = _mm_movemask_epi8(empty);
|
|
index = __builtin_ffs(empty_mask);
|
|
if (UNLIKELY(index == 0))
|
|
index = (atomic_load_relaxed(&thr->trace_pos) / 2) % 16;
|
|
}
|
|
StoreShadow(&shadow_mem[index / 4], cur.raw());
|
|
// We could zero other slots determined by rewrite_mask.
|
|
// That would help other threads to evict better slots,
|
|
// but it's unclear if it's worth it.
|
|
return false;
|
|
}
|
|
|
|
SHARED:
|
|
m128 thread_epochs = _mm_set1_epi32(0x7fffffff);
|
|
// Need to unwind this because _mm_extract_epi8/_mm_insert_epi32
|
|
// indexes must be constants.
|
|
# define LOAD_EPOCH(idx) \
|
|
if (LIKELY(race_mask & (1 << (idx * 4)))) { \
|
|
u8 sid = _mm_extract_epi8(shadow, idx * 4 + 1); \
|
|
u16 epoch = static_cast<u16>(thr->clock.Get(static_cast<Sid>(sid))); \
|
|
thread_epochs = _mm_insert_epi32(thread_epochs, u32(epoch) << 16, idx); \
|
|
}
|
|
LOAD_EPOCH(0);
|
|
LOAD_EPOCH(1);
|
|
LOAD_EPOCH(2);
|
|
LOAD_EPOCH(3);
|
|
# undef LOAD_EPOCH
|
|
const m128 mask_epoch = _mm_set1_epi32(0x3fff0000);
|
|
const m128 shadow_epochs = _mm_and_si128(shadow, mask_epoch);
|
|
const m128 concurrent = _mm_cmplt_epi32(thread_epochs, shadow_epochs);
|
|
const int concurrent_mask = _mm_movemask_epi8(concurrent);
|
|
if (LIKELY(concurrent_mask == 0))
|
|
goto STORE;
|
|
|
|
DoReportRaceV(thr, shadow_mem, cur, concurrent_mask, shadow, typ);
|
|
return true;
|
|
}
|
|
|
|
# define LOAD_CURRENT_SHADOW(cur, shadow_mem) \
|
|
const m128 access = _mm_set1_epi32(static_cast<u32>((cur).raw())); \
|
|
const m128 shadow = _mm_load_si128(reinterpret_cast<m128*>(shadow_mem))
|
|
#endif
|
|
|
|
char* DumpShadow(char* buf, RawShadow raw) {
|
|
if (raw == Shadow::kEmpty) {
|
|
internal_snprintf(buf, 64, "0");
|
|
return buf;
|
|
}
|
|
Shadow s(raw);
|
|
AccessType typ;
|
|
s.GetAccess(nullptr, nullptr, &typ);
|
|
internal_snprintf(buf, 64, "{tid=%u@%u access=0x%x typ=%x}",
|
|
static_cast<u32>(s.sid()), static_cast<u32>(s.epoch()),
|
|
s.access(), static_cast<u32>(typ));
|
|
return buf;
|
|
}
|
|
|
|
// TryTrace* and TraceRestart* functions allow to turn memory access and func
|
|
// entry/exit callbacks into leaf functions with all associated performance
|
|
// benefits. These hottest callbacks do only 2 slow path calls: report a race
|
|
// and trace part switching. Race reporting is easy to turn into a tail call, we
|
|
// just always return from the runtime after reporting a race. But trace part
|
|
// switching is harder because it needs to be in the middle of callbacks. To
|
|
// turn it into a tail call we immidiately return after TraceRestart* functions,
|
|
// but TraceRestart* functions themselves recurse into the callback after
|
|
// switching trace part. As the result the hottest callbacks contain only tail
|
|
// calls, which effectively makes them leaf functions (can use all registers,
|
|
// no frame setup, etc).
|
|
NOINLINE void TraceRestartMemoryAccess(ThreadState* thr, uptr pc, uptr addr,
|
|
uptr size, AccessType typ) {
|
|
TraceSwitchPart(thr);
|
|
MemoryAccess(thr, pc, addr, size, typ);
|
|
}
|
|
|
|
ALWAYS_INLINE USED void MemoryAccess(ThreadState* thr, uptr pc, uptr addr,
|
|
uptr size, AccessType typ) {
|
|
RawShadow* shadow_mem = MemToShadow(addr);
|
|
UNUSED char memBuf[4][64];
|
|
DPrintf2("#%d: Access: %d@%d %p/%zd typ=0x%x {%s, %s, %s, %s}\n", thr->tid,
|
|
static_cast<int>(thr->fast_state.sid()),
|
|
static_cast<int>(thr->fast_state.epoch()), (void*)addr, size,
|
|
static_cast<int>(typ), DumpShadow(memBuf[0], shadow_mem[0]),
|
|
DumpShadow(memBuf[1], shadow_mem[1]),
|
|
DumpShadow(memBuf[2], shadow_mem[2]),
|
|
DumpShadow(memBuf[3], shadow_mem[3]));
|
|
|
|
FastState fast_state = thr->fast_state;
|
|
Shadow cur(fast_state, addr, size, typ);
|
|
|
|
LOAD_CURRENT_SHADOW(cur, shadow_mem);
|
|
if (LIKELY(ContainsSameAccess(shadow_mem, cur, shadow, access, typ)))
|
|
return;
|
|
if (UNLIKELY(fast_state.GetIgnoreBit()))
|
|
return;
|
|
if (!TryTraceMemoryAccess(thr, pc, addr, size, typ))
|
|
return TraceRestartMemoryAccess(thr, pc, addr, size, typ);
|
|
CheckRaces(thr, shadow_mem, cur, shadow, access, typ);
|
|
}
|
|
|
|
void MemoryAccess16(ThreadState* thr, uptr pc, uptr addr, AccessType typ);
|
|
|
|
NOINLINE
|
|
void RestartMemoryAccess16(ThreadState* thr, uptr pc, uptr addr,
|
|
AccessType typ) {
|
|
TraceSwitchPart(thr);
|
|
MemoryAccess16(thr, pc, addr, typ);
|
|
}
|
|
|
|
ALWAYS_INLINE USED void MemoryAccess16(ThreadState* thr, uptr pc, uptr addr,
|
|
AccessType typ) {
|
|
const uptr size = 16;
|
|
FastState fast_state = thr->fast_state;
|
|
if (UNLIKELY(fast_state.GetIgnoreBit()))
|
|
return;
|
|
Shadow cur(fast_state, 0, 8, typ);
|
|
RawShadow* shadow_mem = MemToShadow(addr);
|
|
bool traced = false;
|
|
{
|
|
LOAD_CURRENT_SHADOW(cur, shadow_mem);
|
|
if (LIKELY(ContainsSameAccess(shadow_mem, cur, shadow, access, typ)))
|
|
goto SECOND;
|
|
if (!TryTraceMemoryAccessRange(thr, pc, addr, size, typ))
|
|
return RestartMemoryAccess16(thr, pc, addr, typ);
|
|
traced = true;
|
|
if (UNLIKELY(CheckRaces(thr, shadow_mem, cur, shadow, access, typ)))
|
|
return;
|
|
}
|
|
SECOND:
|
|
shadow_mem += kShadowCnt;
|
|
LOAD_CURRENT_SHADOW(cur, shadow_mem);
|
|
if (LIKELY(ContainsSameAccess(shadow_mem, cur, shadow, access, typ)))
|
|
return;
|
|
if (!traced && !TryTraceMemoryAccessRange(thr, pc, addr, size, typ))
|
|
return RestartMemoryAccess16(thr, pc, addr, typ);
|
|
CheckRaces(thr, shadow_mem, cur, shadow, access, typ);
|
|
}
|
|
|
|
NOINLINE
|
|
void RestartUnalignedMemoryAccess(ThreadState* thr, uptr pc, uptr addr,
|
|
uptr size, AccessType typ) {
|
|
TraceSwitchPart(thr);
|
|
UnalignedMemoryAccess(thr, pc, addr, size, typ);
|
|
}
|
|
|
|
ALWAYS_INLINE USED void UnalignedMemoryAccess(ThreadState* thr, uptr pc,
|
|
uptr addr, uptr size,
|
|
AccessType typ) {
|
|
DCHECK_LE(size, 8);
|
|
FastState fast_state = thr->fast_state;
|
|
if (UNLIKELY(fast_state.GetIgnoreBit()))
|
|
return;
|
|
RawShadow* shadow_mem = MemToShadow(addr);
|
|
bool traced = false;
|
|
uptr size1 = Min<uptr>(size, RoundUp(addr + 1, kShadowCell) - addr);
|
|
{
|
|
Shadow cur(fast_state, addr, size1, typ);
|
|
LOAD_CURRENT_SHADOW(cur, shadow_mem);
|
|
if (LIKELY(ContainsSameAccess(shadow_mem, cur, shadow, access, typ)))
|
|
goto SECOND;
|
|
if (!TryTraceMemoryAccessRange(thr, pc, addr, size, typ))
|
|
return RestartUnalignedMemoryAccess(thr, pc, addr, size, typ);
|
|
traced = true;
|
|
if (UNLIKELY(CheckRaces(thr, shadow_mem, cur, shadow, access, typ)))
|
|
return;
|
|
}
|
|
SECOND:
|
|
uptr size2 = size - size1;
|
|
if (LIKELY(size2 == 0))
|
|
return;
|
|
shadow_mem += kShadowCnt;
|
|
Shadow cur(fast_state, 0, size2, typ);
|
|
LOAD_CURRENT_SHADOW(cur, shadow_mem);
|
|
if (LIKELY(ContainsSameAccess(shadow_mem, cur, shadow, access, typ)))
|
|
return;
|
|
if (!traced && !TryTraceMemoryAccessRange(thr, pc, addr, size, typ))
|
|
return RestartUnalignedMemoryAccess(thr, pc, addr, size, typ);
|
|
CheckRaces(thr, shadow_mem, cur, shadow, access, typ);
|
|
}
|
|
|
|
void ShadowSet(RawShadow* p, RawShadow* end, RawShadow v) {
|
|
DCHECK_LE(p, end);
|
|
DCHECK(IsShadowMem(p));
|
|
DCHECK(IsShadowMem(end));
|
|
UNUSED const uptr kAlign = kShadowCnt * kShadowSize;
|
|
DCHECK_EQ(reinterpret_cast<uptr>(p) % kAlign, 0);
|
|
DCHECK_EQ(reinterpret_cast<uptr>(end) % kAlign, 0);
|
|
#if !TSAN_VECTORIZE
|
|
for (; p < end; p += kShadowCnt) {
|
|
p[0] = v;
|
|
for (uptr i = 1; i < kShadowCnt; i++) p[i] = Shadow::kEmpty;
|
|
}
|
|
#else
|
|
m128 vv = _mm_setr_epi32(
|
|
static_cast<u32>(v), static_cast<u32>(Shadow::kEmpty),
|
|
static_cast<u32>(Shadow::kEmpty), static_cast<u32>(Shadow::kEmpty));
|
|
m128* vp = reinterpret_cast<m128*>(p);
|
|
m128* vend = reinterpret_cast<m128*>(end);
|
|
for (; vp < vend; vp++) _mm_store_si128(vp, vv);
|
|
#endif
|
|
}
|
|
|
|
static void MemoryRangeSet(uptr addr, uptr size, RawShadow val) {
|
|
if (size == 0)
|
|
return;
|
|
DCHECK_EQ(addr % kShadowCell, 0);
|
|
DCHECK_EQ(size % kShadowCell, 0);
|
|
// If a user passes some insane arguments (memset(0)),
|
|
// let it just crash as usual.
|
|
if (!IsAppMem(addr) || !IsAppMem(addr + size - 1))
|
|
return;
|
|
RawShadow* begin = MemToShadow(addr);
|
|
RawShadow* end = begin + size / kShadowCell * kShadowCnt;
|
|
// Don't want to touch lots of shadow memory.
|
|
// If a program maps 10MB stack, there is no need reset the whole range.
|
|
// UnmapOrDie/MmapFixedNoReserve does not work on Windows.
|
|
if (SANITIZER_WINDOWS ||
|
|
size <= common_flags()->clear_shadow_mmap_threshold) {
|
|
ShadowSet(begin, end, val);
|
|
return;
|
|
}
|
|
// The region is big, reset only beginning and end.
|
|
const uptr kPageSize = GetPageSizeCached();
|
|
// Set at least first kPageSize/2 to page boundary.
|
|
RawShadow* mid1 =
|
|
Min(end, reinterpret_cast<RawShadow*>(RoundUp(
|
|
reinterpret_cast<uptr>(begin) + kPageSize / 2, kPageSize)));
|
|
ShadowSet(begin, mid1, val);
|
|
// Reset middle part.
|
|
RawShadow* mid2 = RoundDown(end, kPageSize);
|
|
if (mid2 > mid1) {
|
|
if (!MmapFixedSuperNoReserve((uptr)mid1, (uptr)mid2 - (uptr)mid1))
|
|
Die();
|
|
}
|
|
// Set the ending.
|
|
ShadowSet(mid2, end, val);
|
|
}
|
|
|
|
void MemoryResetRange(ThreadState* thr, uptr pc, uptr addr, uptr size) {
|
|
uptr addr1 = RoundDown(addr, kShadowCell);
|
|
uptr size1 = RoundUp(size + addr - addr1, kShadowCell);
|
|
MemoryRangeSet(addr1, size1, Shadow::kEmpty);
|
|
}
|
|
|
|
void MemoryRangeFreed(ThreadState* thr, uptr pc, uptr addr, uptr size) {
|
|
// Callers must lock the slot to ensure synchronization with the reset.
|
|
// The problem with "freed" memory is that it's not "monotonic"
|
|
// with respect to bug detection: freed memory is bad to access,
|
|
// but then if the heap block is reallocated later, it's good to access.
|
|
// As the result a garbage "freed" shadow can lead to a false positive
|
|
// if it happens to match a real free in the thread trace,
|
|
// but the heap block was reallocated before the current memory access,
|
|
// so it's still good to access. It's not the case with data races.
|
|
DCHECK(thr->slot_locked);
|
|
DCHECK_EQ(addr % kShadowCell, 0);
|
|
size = RoundUp(size, kShadowCell);
|
|
// Processing more than 1k (2k of shadow) is expensive,
|
|
// can cause excessive memory consumption (user does not necessary touch
|
|
// the whole range) and most likely unnecessary.
|
|
size = Min<uptr>(size, 1024);
|
|
const AccessType typ = kAccessWrite | kAccessFree | kAccessSlotLocked |
|
|
kAccessCheckOnly | kAccessNoRodata;
|
|
TraceMemoryAccessRange(thr, pc, addr, size, typ);
|
|
RawShadow* shadow_mem = MemToShadow(addr);
|
|
Shadow cur(thr->fast_state, 0, kShadowCell, typ);
|
|
#if TSAN_VECTORIZE
|
|
const m128 access = _mm_set1_epi32(static_cast<u32>(cur.raw()));
|
|
const m128 freed = _mm_setr_epi32(
|
|
static_cast<u32>(Shadow::FreedMarker()),
|
|
static_cast<u32>(Shadow::FreedInfo(cur.sid(), cur.epoch())), 0, 0);
|
|
for (; size; size -= kShadowCell, shadow_mem += kShadowCnt) {
|
|
const m128 shadow = _mm_load_si128((m128*)shadow_mem);
|
|
if (UNLIKELY(CheckRaces(thr, shadow_mem, cur, shadow, access, typ)))
|
|
return;
|
|
_mm_store_si128((m128*)shadow_mem, freed);
|
|
}
|
|
#else
|
|
for (; size; size -= kShadowCell, shadow_mem += kShadowCnt) {
|
|
if (UNLIKELY(CheckRaces(thr, shadow_mem, cur, 0, 0, typ)))
|
|
return;
|
|
StoreShadow(&shadow_mem[0], Shadow::FreedMarker());
|
|
StoreShadow(&shadow_mem[1], Shadow::FreedInfo(cur.sid(), cur.epoch()));
|
|
StoreShadow(&shadow_mem[2], Shadow::kEmpty);
|
|
StoreShadow(&shadow_mem[3], Shadow::kEmpty);
|
|
}
|
|
#endif
|
|
}
|
|
|
|
void MemoryRangeImitateWrite(ThreadState* thr, uptr pc, uptr addr, uptr size) {
|
|
DCHECK_EQ(addr % kShadowCell, 0);
|
|
size = RoundUp(size, kShadowCell);
|
|
TraceMemoryAccessRange(thr, pc, addr, size, kAccessWrite);
|
|
Shadow cur(thr->fast_state, 0, 8, kAccessWrite);
|
|
MemoryRangeSet(addr, size, cur.raw());
|
|
}
|
|
|
|
void MemoryRangeImitateWriteOrResetRange(ThreadState* thr, uptr pc, uptr addr,
|
|
uptr size) {
|
|
if (thr->ignore_reads_and_writes == 0)
|
|
MemoryRangeImitateWrite(thr, pc, addr, size);
|
|
else
|
|
MemoryResetRange(thr, pc, addr, size);
|
|
}
|
|
|
|
ALWAYS_INLINE
|
|
bool MemoryAccessRangeOne(ThreadState* thr, RawShadow* shadow_mem, Shadow cur,
|
|
AccessType typ) {
|
|
LOAD_CURRENT_SHADOW(cur, shadow_mem);
|
|
if (LIKELY(ContainsSameAccess(shadow_mem, cur, shadow, access, typ)))
|
|
return false;
|
|
return CheckRaces(thr, shadow_mem, cur, shadow, access, typ);
|
|
}
|
|
|
|
template <bool is_read>
|
|
NOINLINE void RestartMemoryAccessRange(ThreadState* thr, uptr pc, uptr addr,
|
|
uptr size) {
|
|
TraceSwitchPart(thr);
|
|
MemoryAccessRangeT<is_read>(thr, pc, addr, size);
|
|
}
|
|
|
|
template <bool is_read>
|
|
void MemoryAccessRangeT(ThreadState* thr, uptr pc, uptr addr, uptr size) {
|
|
const AccessType typ =
|
|
(is_read ? kAccessRead : kAccessWrite) | kAccessNoRodata;
|
|
RawShadow* shadow_mem = MemToShadow(addr);
|
|
DPrintf2("#%d: MemoryAccessRange: @%p %p size=%d is_read=%d\n", thr->tid,
|
|
(void*)pc, (void*)addr, (int)size, is_read);
|
|
|
|
#if SANITIZER_DEBUG
|
|
if (!IsAppMem(addr)) {
|
|
Printf("Access to non app mem %zx\n", addr);
|
|
DCHECK(IsAppMem(addr));
|
|
}
|
|
if (!IsAppMem(addr + size - 1)) {
|
|
Printf("Access to non app mem %zx\n", addr + size - 1);
|
|
DCHECK(IsAppMem(addr + size - 1));
|
|
}
|
|
if (!IsShadowMem(shadow_mem)) {
|
|
Printf("Bad shadow addr %p (%zx)\n", static_cast<void*>(shadow_mem), addr);
|
|
DCHECK(IsShadowMem(shadow_mem));
|
|
}
|
|
if (!IsShadowMem(shadow_mem + size * kShadowCnt - 1)) {
|
|
Printf("Bad shadow addr %p (%zx)\n",
|
|
static_cast<void*>(shadow_mem + size * kShadowCnt - 1),
|
|
addr + size - 1);
|
|
DCHECK(IsShadowMem(shadow_mem + size * kShadowCnt - 1));
|
|
}
|
|
#endif
|
|
|
|
// Access to .rodata section, no races here.
|
|
// Measurements show that it can be 10-20% of all memory accesses.
|
|
// Check here once to not check for every access separately.
|
|
// Note: we could (and should) do this only for the is_read case
|
|
// (writes shouldn't go to .rodata). But it happens in Chromium tests:
|
|
// https://bugs.chromium.org/p/chromium/issues/detail?id=1275581#c19
|
|
// Details are unknown since it happens only on CI machines.
|
|
if (*shadow_mem == Shadow::kRodata)
|
|
return;
|
|
|
|
FastState fast_state = thr->fast_state;
|
|
if (UNLIKELY(fast_state.GetIgnoreBit()))
|
|
return;
|
|
|
|
if (!TryTraceMemoryAccessRange(thr, pc, addr, size, typ))
|
|
return RestartMemoryAccessRange<is_read>(thr, pc, addr, size);
|
|
|
|
if (UNLIKELY(addr % kShadowCell)) {
|
|
// Handle unaligned beginning, if any.
|
|
uptr size1 = Min(size, RoundUp(addr, kShadowCell) - addr);
|
|
size -= size1;
|
|
Shadow cur(fast_state, addr, size1, typ);
|
|
if (UNLIKELY(MemoryAccessRangeOne(thr, shadow_mem, cur, typ)))
|
|
return;
|
|
shadow_mem += kShadowCnt;
|
|
}
|
|
// Handle middle part, if any.
|
|
Shadow cur(fast_state, 0, kShadowCell, typ);
|
|
for (; size >= kShadowCell; size -= kShadowCell, shadow_mem += kShadowCnt) {
|
|
if (UNLIKELY(MemoryAccessRangeOne(thr, shadow_mem, cur, typ)))
|
|
return;
|
|
}
|
|
// Handle ending, if any.
|
|
if (UNLIKELY(size)) {
|
|
Shadow cur(fast_state, 0, size, typ);
|
|
if (UNLIKELY(MemoryAccessRangeOne(thr, shadow_mem, cur, typ)))
|
|
return;
|
|
}
|
|
}
|
|
|
|
template void MemoryAccessRangeT<true>(ThreadState* thr, uptr pc, uptr addr,
|
|
uptr size);
|
|
template void MemoryAccessRangeT<false>(ThreadState* thr, uptr pc, uptr addr,
|
|
uptr size);
|
|
|
|
} // namespace __tsan
|
|
|
|
#if !SANITIZER_GO
|
|
// Must be included in this file to make sure everything is inlined.
|
|
# include "tsan_interface.inc"
|
|
#endif
|