From 9d90a9b79e3c0fa46b21162bc041a4869751ee1d Mon Sep 17 00:00:00 2001 From: Shubham Verma Date: Mon, 11 Apr 2022 16:53:29 -0400 Subject: [PATCH] LoopVersioner changes for OffHeap support Teaches LoopVersioner about dataAddr loads Whenever we see a dataAddrPointer; all tests gets generated, for null and array type, to use the child of the aloadi tree. Fix `depsForLoopEntryPrep` tree pattern matching to allow for array accesses to the 1st element using the dataAddrPointer, as that case does not have an offset tree. Fixing `requiresPrivatization()` to return false for dataAddr nodes as they are internal-pointers. Getting `childInRequiredForm` for the offheap case accessed the wrong node with an extra `getFirstChild()` at the end. Signed-off-by: Abdulrahman Alattas --- compiler/optimizer/LoopVersioner.cpp | 195 +++++++++++++++++---------- 1 file changed, 127 insertions(+), 68 deletions(-) diff --git a/compiler/optimizer/LoopVersioner.cpp b/compiler/optimizer/LoopVersioner.cpp index 0d353d03cdd..19ea5661532 100644 --- a/compiler/optimizer/LoopVersioner.cpp +++ b/compiler/optimizer/LoopVersioner.cpp @@ -8023,6 +8023,9 @@ bool TR_LoopVersioner::requiresPrivatization(TR::Node *node) if (!node->getOpCode().hasSymbolReference()) return false; + if (node->isDataAddrPointer()) + return false; + if (node->getOpCodeValue() == TR::loadaddr || node->getOpCode().isTreeTop()) return false; @@ -8194,19 +8197,26 @@ bool TR_LoopVersioner::depsForLoopEntryPrep( // If this is an indirect access // if (node->isInternalPointer() || - (((node->getOpCode().isIndirect() && node->getOpCode().hasSymbolReference() && !node->getSymbolReference()->getSymbol()->isStatic()) || node->getOpCode().isArrayLength()) && - !node->getFirstChild()->isInternalPointer())) + (((node->getOpCode().isIndirect() && node->getOpCode().hasSymbolReference() && + !node->getSymbolReference()->getSymbol()->isStatic()) || + node->getOpCode().isArrayLength()) && !node->getFirstChild()->isInternalPointer())) { - if (!node->getFirstChild()->isThisPointer()) + TR::Node *firstChild = node->getFirstChild(); + if (!firstChild->isThisPointer()) { - dumpOptDetailsCreatingTest("null", node->getFirstChild()); - TR::Node *ifacmpeqNode = TR::Node::createif(TR::ifacmpeq, node->getFirstChild(), TR::Node::aconst(node, 0), _exitGotoTarget); + // if we are dealing with dataAddr load use the child for null check. + // We can reach here either with dataAddr load or with parent of dataAddr load. + if (firstChild->isDataAddrPointer()) + firstChild = firstChild->getFirstChild(); // Array base is the child of dataAddr load + + dumpOptDetailsCreatingTest("null", firstChild); + TR::Node *ifacmpeqNode = TR::Node::createif(TR::ifacmpeq, firstChild, TR::Node::aconst(node, 0), _exitGotoTarget); LoopEntryPrep *nullTestPrep = addLoopEntryPrepDep(LoopEntryPrep::TEST, ifacmpeqNode, deps, visited); if (nullTestPrep == NULL) { - dumpOptDetailsFailedToCreateTest("null", node->getFirstChild()); + dumpOptDetailsFailedToCreateTest("null", firstChild); return false; } @@ -8214,7 +8224,6 @@ bool TR_LoopVersioner::depsForLoopEntryPrep( _curLoop->_nullTestPreps.insert(std::make_pair(refExpr, nullTestPrep)); } - TR::Node *firstChild = node->getFirstChild(); bool instanceOfReqd = true; TR_OpaqueClassBlock *otherClassObject = NULL; TR::Node *duplicateClassPtr = NULL; @@ -8324,21 +8333,21 @@ bool TR_LoopVersioner::depsForLoopEntryPrep( { if (otherClassObject) { - dumpOptDetailsCreatingTest("type", node->getFirstChild()); - TR::Node *instanceofNode = TR::Node::createWithSymRef(TR::instanceof, 2, 2, node->getFirstChild(), duplicateClassPtr, comp()->getSymRefTab()->findOrCreateInstanceOfSymbolRef(comp()->getMethodSymbol())); + dumpOptDetailsCreatingTest("type", firstChild); + TR::Node *instanceofNode = TR::Node::createWithSymRef(TR::instanceof, 2, 2, firstChild, duplicateClassPtr, comp()->getSymRefTab()->findOrCreateInstanceOfSymbolRef(comp()->getMethodSymbol())); TR::Node *ificmpeqNode = TR::Node::createif(TR::ificmpeq, instanceofNode, TR::Node::create(node, TR::iconst, 0, 0), _exitGotoTarget); if (addLoopEntryPrepDep(LoopEntryPrep::TEST, ificmpeqNode, deps, visited) == NULL) { - dumpOptDetailsFailedToCreateTest("type", node->getFirstChild()); + dumpOptDetailsFailedToCreateTest("type", firstChild); return false; } } else if (testIsArray) { #ifdef J9_PROJECT_SPECIFIC - dumpOptDetailsCreatingTest("array type", node->getFirstChild()); + dumpOptDetailsCreatingTest("array type", firstChild); - TR::Node *vftLoad = TR::Node::createWithSymRef(TR::aloadi, 1, 1, node->getFirstChild(), comp()->getSymRefTab()->findOrCreateVftSymbolRef()); + TR::Node *vftLoad = TR::Node::createWithSymRef(TR::aloadi, 1, 1, firstChild, comp()->getSymRefTab()->findOrCreateVftSymbolRef()); //TR::Node *componentTypeLoad = TR::Node::create(TR::aloadi, 1, vftLoad, comp()->getSymRefTab()->findOrCreateArrayComponentTypeSymbolRef()); TR::Node *classFlag = NULL; if (comp()->target().is32Bit()) @@ -8355,7 +8364,7 @@ bool TR_LoopVersioner::depsForLoopEntryPrep( TR::Node *cmp = TR::Node::createif(TR::ificmpne, andNode, andConstNode, _exitGotoTarget); if (addLoopEntryPrepDep(LoopEntryPrep::TEST, cmp, deps, visited) == NULL) { - dumpOptDetailsFailedToCreateTest("array type", node->getFirstChild()); + dumpOptDetailsFailedToCreateTest("array type", firstChild); return false; } #endif @@ -8376,81 +8385,131 @@ bool TR_LoopVersioner::depsForLoopEntryPrep( // This is an array access; so we need to insert explicit // checks to mimic the bounds check for the access. // - TR::Node *offset = node->getFirstChild()->getSecondChild(); TR::Node *childInRequiredForm = NULL; TR::Node *indexNode = NULL; - - int32_t headerSize = static_cast(TR::Compiler->om.contiguousArrayHeaderSizeInBytes()); - static struct temps - { - TR::ILOpCodes addOp; - TR::ILOpCodes constOp; - int32_t k; - } - a[] = - {{TR::iadd, TR::iconst, headerSize}, - {TR::isub, TR::iconst, -headerSize}, - {TR::ladd, TR::lconst, headerSize}, - {TR::lsub, TR::lconst, -headerSize}}; - - for (int32_t index = sizeof(a)/sizeof(temps) - 1; index >= 0; --index) - { - if (offset->getOpCodeValue() == a[index].addOp && - offset->getSecondChild()->getOpCodeValue() == a[index].constOp && - offset->getSecondChild()->getInt() == a[index].k) - { - childInRequiredForm = offset->getFirstChild(); - break; - } - } - - TR::DataType type = offset->getType(); + TR::DataType type = TR::NoType; // compute the right shift width - // int32_t dataWidth = TR::Symbol::convertTypeToSize(node->getDataType()); - if (comp()->useCompressedPointers() && - node->getDataType() == TR::Address) + if (comp()->useCompressedPointers() && node->getDataType() == TR::Address) dataWidth = TR::Compiler->om.sizeofReferenceField(); - int32_t shiftWidth = TR::TransformUtil::convertWidthToShift(dataWidth); - if (childInRequiredForm) + // Holds the array node, or the dataAddr node for off heap + TR::Node *arrayBaseAddressNode = NULL; + + if (node->getFirstChild()->isDataAddrPointer()) + { + // This protects the case when off heap allocation is enabled and array + // access is for the first element in the array, eliminating offset tree. + // *loadi <== node + // aloadi dataAddr_ptr (dataAddrPointer sharedMemory ) + // aload objt_ptr + + // If array access does not have offset tree, its accessing the first element using the dataAddrPointer + arrayBaseAddressNode = node->getFirstChild(); + indexNode = TR::Node::iconst(node, 0); + type = TR::Int32; + } + else { - if (childInRequiredForm->getOpCodeValue() == TR::ishl || childInRequiredForm->getOpCodeValue() == TR::lshl) + int32_t headerSize = static_cast(TR::Compiler->om.contiguousArrayHeaderSizeInBytes()); + arrayBaseAddressNode = node->getFirstChild()->getFirstChild(); + TR::Node *offset = node->getFirstChild()->getSecondChild(); + type = offset->getType(); + + /* childInRequiredForm is expected to be shift/multiply node. When + off heap allocation is enabled and the sibling is dataAddr pointer + node, instead of using objt_ptr + header_size we load address of first + array element from dataAddr field in the header and add the offset. */ + if (arrayBaseAddressNode->isDataAddrPointer()) { - if (childInRequiredForm->getSecondChild()->getOpCode().isLoadConst() && - (childInRequiredForm->getSecondChild()->getInt() == shiftWidth)) - indexNode = childInRequiredForm->getFirstChild(); + // This is what trees will look like with dataAddr load + // aloadi < node + // aladd + // ==>aload dataAddr_ptr (dataAddrPointer sharedMemory ) + // shl/mul < childInRequiredForm/offset + childInRequiredForm = node->getFirstChild()->getSecondChild(); } - else if (childInRequiredForm->getOpCodeValue() == TR::imul || childInRequiredForm->getOpCodeValue() == TR::lmul) + else { - if (childInRequiredForm->getSecondChild()->getOpCode().isLoadConst() && - (childInRequiredForm->getSecondChild()->getInt() == dataWidth)) - indexNode = childInRequiredForm->getFirstChild(); + // aloadi < node + // aladd + // ==>aload objt_ptr + // add < offset + // shl/mul < childInRequiredForm + // const array_header + static struct temps + { + TR::ILOpCodes addOp; + TR::ILOpCodes constOp; + int32_t k; + } + a[] = + {{TR::iadd, TR::iconst, headerSize}, + {TR::isub, TR::iconst, -headerSize}, + {TR::ladd, TR::lconst, headerSize}, + {TR::lsub, TR::lconst, -headerSize}}; + for (int32_t index = sizeof(a)/sizeof(temps) - 1; index >= 0; --index) + { + if (offset->getOpCodeValue() == a[index].addOp && + offset->getSecondChild()->getOpCodeValue() == a[index].constOp && + offset->getSecondChild()->getInt() == a[index].k) + { + childInRequiredForm = offset->getFirstChild(); + break; + } + } } - } + int32_t shiftWidth = TR::TransformUtil::convertWidthToShift(dataWidth); - if (!indexNode) - { - if (!childInRequiredForm) + if (childInRequiredForm) { - TR::Node *constNode = TR::Node::create(node, type.isInt32() ? TR::iconst : TR::lconst, 0, 0); - indexNode = TR::Node::create(type.isInt32() ? TR::isub : TR::lsub, 2, offset, constNode); - if (type.isInt64()) - constNode->setLongInt((int64_t)headerSize); - else - constNode->setInt((int64_t)headerSize); + if (childInRequiredForm->getOpCodeValue() == TR::ishl || childInRequiredForm->getOpCodeValue() == TR::lshl) + { + if (childInRequiredForm->getSecondChild()->getOpCode().isLoadConst() && + (childInRequiredForm->getSecondChild()->getInt() == shiftWidth)) + indexNode = childInRequiredForm->getFirstChild(); + } + else if (childInRequiredForm->getOpCodeValue() == TR::imul || childInRequiredForm->getOpCodeValue() == TR::lmul) + { + if (childInRequiredForm->getSecondChild()->getOpCode().isLoadConst() && + (childInRequiredForm->getSecondChild()->getInt() == dataWidth)) + indexNode = childInRequiredForm->getFirstChild(); + } } - else - indexNode = childInRequiredForm; + + if (!indexNode) + { + if (!childInRequiredForm) + { + // We don't need to subtract the header size when we are using dataAddr pointer node + // because we already have a pointer to the first element in the array. + // For Reference see Walker.cpp:calculateElementAddressInContiguousArray + if (arrayBaseAddressNode->isDataAddrPointer()) + indexNode = offset; + else + { + TR::Node *constNode = TR::Node::create(node, type.isInt32() ? TR::iconst : TR::lconst, 0, 0); + indexNode = TR::Node::create(type.isInt32() ? TR::isub : TR::lsub, 2, offset, constNode); + if (type.isInt64()) + constNode->setLongInt((int64_t)headerSize); + else + constNode->setInt((int64_t)headerSize); + } + } + else + indexNode = childInRequiredForm; - indexNode = TR::Node::create(type.isInt32() ? TR::iushr : TR::lushr, 2, indexNode, - TR::Node::create(node, TR::iconst, 0, shiftWidth)); + indexNode = TR::Node::create(type.isInt32() ? TR::iushr : TR::lushr, 2, indexNode, + TR::Node::create(node, TR::iconst, 0, shiftWidth)); + } } - TR::Node *base = node->getFirstChild()->getFirstChild(); + TR::Node *base = arrayBaseAddressNode; + if (base->isDataAddrPointer()) + base = base->getFirstChild(); TR::Node *arrayLengthNode = TR::Node::create(TR::arraylength, 1, base); arrayLengthNode->setArrayStride(dataWidth);