d5/d0c/parallel__move__resolver_8cc_source.html

// Copyright (c) 2023, the Dart project authors.  Please see the AUTHORS file

// for details. All rights reserved. Use of this source code is governed by a

// BSD-style license that can be found in the LICENSE file.


#include "vm/compiler/backend/parallel_move_resolver.h"


namespace dart {


// Simple dynamically allocated array of fixed length.

template <typename Subclass, typename Element>

class FixedArray {

 public:

  static Subclass& Allocate(intptr_t length) {

    static_assert(Utils::IsAligned(alignof(Subclass), alignof(Element)));

    auto result =

        reinterpret_cast<void*>(Thread::Current()->zone()->AllocUnsafe(

            sizeof(Subclass) + length * sizeof(Element)));

    return *new (result) Subclass(length);

  }


  intptr_t length() const { return length_; }


  Element& operator[](intptr_t i) {

    ASSERT(0 <= i && i < length_);

    return data()[i];

  }


  const Element& operator[](intptr_t i) const {

    ASSERT(0 <= i && i < length_);

    return data()[i];

  }


  Element* data() { OPEN_ARRAY_START(Element, Element); }

  const Element* data() const { OPEN_ARRAY_START(Element, Element); }


  Element* begin() { return data(); }

  const Element* begin() const { return data(); }


  Element* end() { return data() + length_; }

  const Element* end() const { return data() + length_; }


 protected:

  explicit FixedArray(intptr_t length) : length_(length) {}


 private:

  intptr_t length_;


  DISALLOW_COPY_AND_ASSIGN(FixedArray);

};


class MoveSchedule : public FixedArray<MoveSchedule, ParallelMoveResolver::Op> {

 public:

  // Converts the given list of |ParallelMoveResolver::Op| operations

  // into a |MoveSchedule| and filters out all |kNop| operations.

  static const MoveSchedule& From(

      const GrowableArray<ParallelMoveResolver::Op>& ops) {

    intptr_t count = 0;

    for (const auto& op : ops) {

      if (op.kind != ParallelMoveResolver::OpKind::kNop) count++;

    }


    auto& result = FixedArray::Allocate(count);

    intptr_t i = 0;

    for (const auto& op : ops) {

      if (op.kind != ParallelMoveResolver::OpKind::kNop) {

        result[i++] = op;

      }

    }

    return result;

  }


 private:

  friend class FixedArray<MoveSchedule, ParallelMoveResolver::Op>;


  explicit MoveSchedule(intptr_t length) : FixedArray(length) {}


  DISALLOW_COPY_AND_ASSIGN(MoveSchedule);

};


static uword RegMaskBit(Register reg) {

  return ((reg) != kNoRegister) ? (1 << (reg)) : 0;

}


ParallelMoveResolver::ParallelMoveResolver() : moves_(32) {}


void ParallelMoveResolver::Resolve(ParallelMoveInstr* parallel_move) {

  ASSERT(moves_.is_empty());


  // Build up a worklist of moves.

  BuildInitialMoveList(parallel_move);


  const InstructionSource& move_source = InstructionSource(

      TokenPosition::kParallelMove, parallel_move->inlining_id());

  for (intptr_t i = 0; i < moves_.length(); ++i) {

    const MoveOperands& move = moves_[i];

    // Skip constants to perform them last.  They don't block other moves

    // and skipping such moves with register destinations keeps those

    // registers free for the whole algorithm.

    if (!move.IsEliminated() && !move.src().IsConstant()) {

      PerformMove(move_source, i);

    }

  }


  // Perform the moves with constant sources.

  for (const auto& move : moves_) {

    if (!move.IsEliminated()) {

      ASSERT(move.src().IsConstant());

      scheduled_ops_.Add({OpKind::kMove, move});

    }

  }

  moves_.Clear();


  // Schedule is ready. Update parallel move itself.

  parallel_move->set_move_schedule(MoveSchedule::From(scheduled_ops_));

  scheduled_ops_.Clear();

}


void ParallelMoveResolver::BuildInitialMoveList(

    ParallelMoveInstr* parallel_move) {

  // Perform a linear sweep of the moves to add them to the initial list of

  // moves to perform, ignoring any move that is redundant (the source is

  // the same as the destination, the destination is ignored and

  // unallocated, or the move was already eliminated).

  for (int i = 0; i < parallel_move->NumMoves(); i++) {

    MoveOperands* move = parallel_move->MoveOperandsAt(i);

    if (!move->IsRedundant()) moves_.Add(*move);

  }

}


void ParallelMoveResolver::PerformMove(const InstructionSource& source,

                                       int index) {

  // Each call to this function performs a move and deletes it from the move

  // graph.  We first recursively perform any move blocking this one.  We

  // mark a move as "pending" on entry to PerformMove in order to detect

  // cycles in the move graph.  We use operand swaps to resolve cycles,

  // which means that a call to PerformMove could change any source operand

  // in the move graph.


  ASSERT(!moves_[index].IsPending());

  ASSERT(!moves_[index].IsRedundant());


  // Clear this move's destination to indicate a pending move.  The actual

  // destination is saved in a stack-allocated local.  Recursion may allow

  // multiple moves to be pending.

  ASSERT(!moves_[index].src().IsInvalid());

  Location destination = moves_[index].MarkPending();


  // Perform a depth-first traversal of the move graph to resolve

  // dependencies.  Any unperformed, unpending move with a source the same

  // as this one's destination blocks this one so recursively perform all

  // such moves.

  for (int i = 0; i < moves_.length(); ++i) {

    const MoveOperands& other_move = moves_[i];

    if (other_move.Blocks(destination) && !other_move.IsPending()) {

      // Though PerformMove can change any source operand in the move graph,

      // this call cannot create a blocking move via a swap (this loop does

      // not miss any).  Assume there is a non-blocking move with source A

      // and this move is blocked on source B and there is a swap of A and

      // B.  Then A and B must be involved in the same cycle (or they would

      // not be swapped).  Since this move's destination is B and there is

      // only a single incoming edge to an operand, this move must also be

      // involved in the same cycle.  In that case, the blocking move will

      // be created but will be "pending" when we return from PerformMove.

      PerformMove(source, i);

    }

  }


  // We are about to resolve this move and don't need it marked as

  // pending, so restore its destination.

  moves_[index].ClearPending(destination);


  // This move's source may have changed due to swaps to resolve cycles and

  // so it may now be the last move in the cycle.  If so remove it.

  if (moves_[index].src().Equals(destination)) {

    moves_[index].Eliminate();

    return;

  }


  // The move may be blocked on a (at most one) pending move, in which case

  // we have a cycle.  Search for such a blocking move and perform a swap to

  // resolve it.

  for (auto& other_move : moves_) {

    if (other_move.Blocks(destination)) {

      ASSERT(other_move.IsPending());

      AddSwapToSchedule(index);

      return;

    }

  }


  // This move is not blocked.

  AddMoveToSchedule(index);

}


void ParallelMoveResolver::AddMoveToSchedule(int index) {

  auto& move = moves_[index];

  scheduled_ops_.Add({OpKind::kMove, move});

  move.Eliminate();

}


void ParallelMoveResolver::AddSwapToSchedule(int index) {

  auto& move = moves_[index];

  const auto source = move.src();

  const auto destination = move.dest();


  scheduled_ops_.Add({OpKind::kSwap, move});


  // The swap of source and destination has executed a move from source to

  // destination.

  move.Eliminate();


  // Any unperformed (including pending) move with a source of either

  // this move's source or destination needs to have their source

  // changed to reflect the state of affairs after the swap.

  for (auto& other_move : moves_) {

    if (other_move.Blocks(source)) {

      other_move.set_src(destination);

    } else if (other_move.Blocks(destination)) {

      other_move.set_src(source);

    }

  }

}


void ParallelMoveEmitter::EmitNativeCode() {

  const auto& move_schedule = parallel_move_->move_schedule();

  for (intptr_t i = 0; i < move_schedule.length(); i++) {

    current_move_ = i;

    const auto& op = move_schedule[i];

    switch (op.kind) {

      case ParallelMoveResolver::OpKind::kNop:

        // |MoveSchedule::From| is expected to filter nops.

        UNREACHABLE();

        break;

      case ParallelMoveResolver::OpKind::kMove:

        EmitMove(op.operands);

        break;

      case ParallelMoveResolver::OpKind::kSwap:

        EmitSwap(op.operands);

        break;

    }

  }

}


void ParallelMoveEmitter::EmitMove(const MoveOperands& move) {

  Location src = move.src();

  Location dst = move.dest();

#if defined(TARGET_ARCH_RISCV32) || defined(TARGET_ARCH_RISCV64)

  dst = compiler_->RebaseIfImprovesAddressing(dst);

  src = compiler_->RebaseIfImprovesAddressing(src);

#endif

  ParallelMoveEmitter::TemporaryAllocator temp(this, /*blocked=*/kNoRegister);

  compiler_->EmitMove(dst, src, &temp);

#if defined(DEBUG)

  // Allocating a scratch register here may cause stack spilling. Neither the

  // source nor destination register should be SP-relative in that case.

  for (const Location& loc : {dst, src}) {

    ASSERT(!temp.DidAllocateTemporary() || !loc.HasStackIndex() ||

           loc.base_reg() != SPREG);

  }

#endif

}


bool ParallelMoveEmitter::IsScratchLocation(Location loc) {

  const auto& move_schedule = parallel_move_->move_schedule();

  for (intptr_t i = current_move_; i < move_schedule.length(); i++) {

    const auto& op = move_schedule[i];

    if (op.operands.src().Equals(loc) ||

        (op.kind == ParallelMoveResolver::OpKind::kSwap &&

         op.operands.dest().Equals(loc))) {

      return false;

    }

  }


  for (intptr_t i = current_move_ + 1; i < move_schedule.length(); i++) {

    const auto& op = move_schedule[i];

    if (op.kind == ParallelMoveResolver::OpKind::kMove &&

        op.operands.dest().Equals(loc)) {

      return true;

    }

  }


  return false;

}


intptr_t ParallelMoveEmitter::AllocateScratchRegister(

    Location::Kind kind,

    uword blocked_mask,

    intptr_t first_free_register,

    intptr_t last_free_register,

    bool* spilled) {

  COMPILE_ASSERT(static_cast<intptr_t>(sizeof(blocked_mask)) * kBitsPerByte >=

                 kNumberOfFpuRegisters);

  COMPILE_ASSERT(static_cast<intptr_t>(sizeof(blocked_mask)) * kBitsPerByte >=

                 kNumberOfCpuRegisters);

  intptr_t scratch = -1;

  for (intptr_t reg = first_free_register; reg <= last_free_register; reg++) {

    if ((((1 << reg) & blocked_mask) == 0) &&

        IsScratchLocation(Location::MachineRegisterLocation(kind, reg))) {

      scratch = reg;

      break;

    }

  }


  if (scratch == -1) {

    *spilled = true;

    for (intptr_t reg = first_free_register; reg <= last_free_register; reg++) {

      if (((1 << reg) & blocked_mask) == 0) {

        scratch = reg;

        break;

      }

    }

  } else {

    *spilled = false;

  }


  return scratch;

}


ParallelMoveEmitter::ScratchFpuRegisterScope::ScratchFpuRegisterScope(

    ParallelMoveEmitter* emitter,

    FpuRegister blocked)

    : emitter_(emitter), reg_(kNoFpuRegister), spilled_(false) {

  COMPILE_ASSERT(FpuTMP != kNoFpuRegister);

  uword blocked_mask =

      ((blocked != kNoFpuRegister) ? 1 << blocked : 0) | 1 << FpuTMP;

  reg_ = static_cast<FpuRegister>(

      emitter_->AllocateScratchRegister(Location::kFpuRegister, blocked_mask, 0,

                                        kNumberOfFpuRegisters - 1, &spilled_));


  if (spilled_) {

    emitter->SpillFpuScratch(reg_);

  }

}


ParallelMoveEmitter::ScratchFpuRegisterScope::~ScratchFpuRegisterScope() {

  if (spilled_) {

    emitter_->RestoreFpuScratch(reg_);

  }

}


ParallelMoveEmitter::TemporaryAllocator::TemporaryAllocator(

    ParallelMoveEmitter* emitter,

    Register blocked)

    : emitter_(emitter),

      blocked_(blocked),

      reg_(kNoRegister),

      spilled_(false) {}


Register ParallelMoveEmitter::TemporaryAllocator::AllocateTemporary() {

  ASSERT(reg_ == kNoRegister);


  uword blocked_mask = RegMaskBit(blocked_) | kReservedCpuRegisters;

  if (emitter_->compiler_->intrinsic_mode()) {

    // Block additional registers that must be preserved for intrinsics.

    blocked_mask |= RegMaskBit(ARGS_DESC_REG);

#if !defined(TARGET_ARCH_IA32)

    // Need to preserve CODE_REG to be able to store the PC marker

    // and load the pool pointer.

    blocked_mask |= RegMaskBit(CODE_REG);

#endif

  }

  reg_ = static_cast<Register>(

      emitter_->AllocateScratchRegister(Location::kRegister, blocked_mask, 0,

                                        kNumberOfCpuRegisters - 1, &spilled_));


  if (spilled_) {

    emitter_->SpillScratch(reg_);

  }


  DEBUG_ONLY(allocated_ = true;)

  return reg_;

}


void ParallelMoveEmitter::TemporaryAllocator::ReleaseTemporary() {

  if (spilled_) {

    emitter_->RestoreScratch(reg_);

  }

  reg_ = kNoRegister;

}


ParallelMoveEmitter::ScratchRegisterScope::ScratchRegisterScope(

    ParallelMoveEmitter* emitter,

    Register blocked)

    : allocator_(emitter, blocked) {

  reg_ = allocator_.AllocateTemporary();

}


ParallelMoveEmitter::ScratchRegisterScope::~ScratchRegisterScope() {

  allocator_.ReleaseTemporary();

}


template <>

void FlowGraphSerializer::WriteTrait<const MoveSchedule*>::Write(

    FlowGraphSerializer* s,

    const MoveSchedule* schedule) {

  ASSERT(schedule != nullptr);

  const intptr_t len = schedule->length();

  s->Write<intptr_t>(len);

  for (intptr_t i = 0; i < len; ++i) {

    const auto& op = (*schedule)[i];

    s->Write<uint8_t>(static_cast<uint8_t>(op.kind));

    op.operands.Write(s);

  }

}


template <>

const MoveSchedule* FlowGraphDeserializer::ReadTrait<const MoveSchedule*>::Read(

    FlowGraphDeserializer* d) {

  const intptr_t len = d->Read<intptr_t>();

  MoveSchedule& schedule = MoveSchedule::Allocate(len);

  for (intptr_t i = 0; i < len; ++i) {

    schedule[i].kind =

        static_cast<ParallelMoveResolver::OpKind>(d->Read<uint8_t>());

    schedule[i].operands = MoveOperands(d);

  }

  return &schedule;

}


}  // namespace dart

count
int count
Definition: FontMgrTest.cpp:50

ops
SkPathOp ops[]
Definition: PathOpsOpTest.cpp:1181

UNREACHABLE
#define UNREACHABLE()
Definition: assert.h:248

dart::BaseGrowableArray::Add
void Add(const T &value)
Definition: growable_array.h:84

dart::BaseGrowableArray::Clear
void Clear()
Definition: growable_array.h:137

dart::FixedArray
Definition: parallel_move_resolver.cc:11

dart::FixedArray::Allocate
static Subclass & Allocate(intptr_t length)
Definition: parallel_move_resolver.cc:13

dart::FixedArray::data
const Element * data() const
Definition: parallel_move_resolver.cc:34

dart::FixedArray::operator[]
Element & operator[](intptr_t i)
Definition: parallel_move_resolver.cc:23

dart::FixedArray::end
const Element * end() const
Definition: parallel_move_resolver.cc:40

dart::FixedArray::FixedArray
FixedArray(intptr_t length)
Definition: parallel_move_resolver.cc:43

dart::FixedArray::length
intptr_t length() const
Definition: parallel_move_resolver.cc:21

dart::FixedArray::begin
Element * begin()
Definition: parallel_move_resolver.cc:36

dart::FixedArray::begin
const Element * begin() const
Definition: parallel_move_resolver.cc:37

dart::FixedArray::data
Element * data()
Definition: parallel_move_resolver.cc:33

dart::FixedArray::operator[]
const Element & operator[](intptr_t i) const
Definition: parallel_move_resolver.cc:28

dart::FixedArray::end
Element * end()
Definition: parallel_move_resolver.cc:39

dart::FlowGraphCompiler::EmitMove
void EmitMove(Location dst, Location src, TemporaryRegisterAllocator *temp)

dart::FlowGraphDeserializer
Definition: il_serializer.h:326

dart::FlowGraphSerializer
Definition: il_serializer.h:146

dart::GrowableArray
Definition: growable_array.h:22

dart::Instruction::inlining_id
virtual intptr_t inlining_id() const
Definition: il.h:1311

dart::Location
Definition: locations.h:198

dart::Location::MachineRegisterLocation
static Location MachineRegisterLocation(Kind kind, intptr_t reg)
Definition: locations.h:425

dart::Location::IsConstant
bool IsConstant() const
Definition: locations.h:292

dart::Location::Kind
Kind
Definition: locations.h:219

dart::Location::kFpuRegister
@ kFpuRegister
Definition: locations.h:247

dart::Location::kRegister
@ kRegister
Definition: locations.h:243

dart::MoveOperands
Definition: il.h:1528

dart::MoveOperands::IsEliminated
bool IsEliminated() const
Definition: il.h:1581

dart::MoveOperands::src
Location src() const
Definition: il.h:1540

dart::MoveOperands::Eliminate
void Eliminate()
Definition: il.h:1580

dart::MoveOperands::dest
Location dest() const
Definition: il.h:1541

dart::MoveOperands::IsRedundant
bool IsRedundant() const
Definition: il.h:1575

dart::MoveSchedule
Definition: parallel_move_resolver.cc:51

dart::MoveSchedule::From
static const MoveSchedule & From(const GrowableArray< ParallelMoveResolver::Op > &ops)
Definition: parallel_move_resolver.cc:55

dart::ParallelMoveEmitter::EmitNativeCode
void EmitNativeCode()
Definition: parallel_move_resolver.cc:223

dart::ParallelMoveInstr
Definition: il.h:1594

dart::ParallelMoveInstr::NumMoves
intptr_t NumMoves() const
Definition: il.h:1617

dart::ParallelMoveInstr::move_schedule
const MoveSchedule & move_schedule() const
Definition: il.h:1625

dart::ParallelMoveInstr::MoveOperandsAt
MoveOperands * MoveOperandsAt(intptr_t index) const
Definition: il.h:1615

dart::ParallelMoveInstr::set_move_schedule
void set_move_schedule(const MoveSchedule &schedule)
Definition: il.h:1630

dart::ParallelMoveResolver
Definition: parallel_move_resolver.h:21

dart::ParallelMoveResolver::Resolve
void Resolve(ParallelMoveInstr *parallel_move)
Definition: parallel_move_resolver.cc:86

dart::ParallelMoveResolver::ParallelMoveResolver
ParallelMoveResolver()
Definition: parallel_move_resolver.cc:84

dart::ThreadState::zone
Zone * zone() const
Definition: thread_state.h:37

dart::Thread::Current
static Thread * Current()
Definition: thread.h:362

dart::Utils::IsAligned
static constexpr bool IsAligned(T x, uintptr_t alignment, uintptr_t offset=0)
Definition: utils.h:92

dart::Zone::AllocUnsafe
void * AllocUnsafe(intptr_t size)
Definition: unit_test_custom_zone.cc:29

ASSERT
#define ASSERT(E)
Definition: entrypoints_verification_test.cc:25

d
VULKAN_HPP_DEFAULT_DISPATCH_LOADER_DYNAMIC_STORAGE auto & d
Definition: main.cc:19

source
SkBitmap source
Definition: examples.cpp:28

s
struct MyStruct s

i
int i
Definition: fl_socket_accessible.cc:18

result
GAsyncResult * result
Definition: fl_text_input_plugin.cc:106

Element
Definition: dart.idl:42

Location
Definition: dart.idl:364

dart
Definition: dart_vm.cc:33

dart::Equals
static bool Equals(const Object &expected, const Object &actual)
Definition: snapshot_test.cc:26

dart::kNoFpuRegister
const FpuRegister kNoFpuRegister
Definition: constants_arm.h:301

dart::RegMaskBit
static uword RegMaskBit(Register reg)
Definition: parallel_move_resolver.cc:80

dart::kReservedCpuRegisters
const RegList kReservedCpuRegisters
Definition: constants_arm.h:664

dart::kBitsPerByte
constexpr intptr_t kBitsPerByte
Definition: globals.h:463

dart::FpuTMP
const FpuRegister FpuTMP
Definition: constants_arm.h:297

dart::uword
uintptr_t uword
Definition: globals.h:501

dart::CODE_REG
const Register CODE_REG
Definition: constants_arm.h:318

dart::ARGS_DESC_REG
const Register ARGS_DESC_REG
Definition: constants_arm.h:317

dart::Register
Register
Definition: constants_arm.h:81

dart::kNumberOfCpuRegisters
@ kNumberOfCpuRegisters
Definition: constants_arm.h:98

dart::kNoRegister
@ kNoRegister
Definition: constants_arm.h:99

dart::kNumberOfFpuRegisters
const int kNumberOfFpuRegisters
Definition: constants_arm.h:300

dart::FpuRegister
QRegister FpuRegister
Definition: constants_arm.h:295

dart::false
false
Definition: isolate_reload.cc:58

dart::SPREG
const Register SPREG
Definition: constants_arm.h:314

dart::COMPILE_ASSERT
COMPILE_ASSERT(kUnreachableReference==WeakTable::kNoValue)

gn.cp.dst
dst
Definition: cp.py:12

gn.find_headers.len
len
Definition: find_headers.py:30

mskp_parser.src
src
Definition: mskp_parser.py:22

skcms_private::Op
Op
Definition: skcms_Transform.h:107

parallel_move_resolver.h

DEBUG_ONLY
#define DEBUG_ONLY(code)
Definition: globals.h:141

dart::FlowGraphDeserializer::ReadTrait::Read
const AbstractType * Read(FlowGraphDeserializer *d)
Definition: il_serializer.cc:65

dart::FlowGraphSerializer::WriteTrait::Write
void Write(FlowGraphSerializer *s, const AbstractType *x)
Definition: il_serializer.cc:53

dart::InstructionSource
Definition: code_descriptors.h:180

OPEN_ARRAY_START
#define OPEN_ARRAY_START(type, align)
Definition: globals.h:154