Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

(Closes #1431) Initial changes to add the canonical loop trans abstract class #2190

Draft
wants to merge 2 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions src/psyclone/psyir/transformations/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,8 @@
import LoopTiling2DTrans
from psyclone.psyir.transformations.loop_trans import LoopTrans
from psyclone.psyir.transformations.nan_test_trans import NanTestTrans
from psyclone.psyir.transformations.omp_canonical_loop_trans import \
OMPCanonicalLoopTrans
from psyclone.psyir.transformations.omp_loop_trans import OMPLoopTrans
from psyclone.psyir.transformations.omp_target_trans import OMPTargetTrans
from psyclone.psyir.transformations.omp_taskwait_trans import OMPTaskwaitTrans
Expand Down Expand Up @@ -116,6 +118,7 @@
'LoopTiling2DTrans',
'LoopTrans',
'NanTestTrans',
'OMPCanonicalLoopTrans',
'OMPLoopTrans',
'OMPTaskwaitTrans',
'OMPTargetTrans',
Expand Down
277 changes: 277 additions & 0 deletions src/psyclone/psyir/transformations/omp_canonical_loop_trans.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,277 @@
# -----------------------------------------------------------------------------
# BSD 3-Clause License
#
# Copyright (c) 2017-2022, Science and Technology Facilities Council.
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# * Redistributions of source code must retain the above copyright notice, this
# list of conditions and the following disclaimer.
#
# * Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
#
# * Neither the name of the copyright holder nor the names of its
# contributors may be used to endorse or promote products derived from
# this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
# FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
# COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
# BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
# ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.
# -----------------------------------------------------------------------------
# Authors: A. B. G. Chalk, STFC Daresbury Lab

''' Base OpenMP Loop transformation to validate directives that require
canonical loop form.'''

import abc

from psyclone.core import VariablesAccessInfo, Signature
from psyclone.psyir.nodes import (
Reference, BinaryOperation, Literal)
from psyclone.psyir.symbols import DataSymbol, INTEGER_TYPE, ScalarType
from psyclone.psyir.transformations.parallel_loop_trans import \
ParallelLoopTrans

class OMPCanonicalLoopTrans(ParallelLoopTrans, metaclass=abc.ABCMeta):

def _check_valid_format(self, node, loop_var):
'''
TODO
'''
# We know node contains a Reference to loop_var to reach this call.

if isinstance(node, Reference):
return True

# All other allowed cases are binary operations
if not isinstance(node, BinaryOperation):
return False
# We allow var + a2, a2 + var
if (node.operator == BinaryOperation.Operator.ADD and
isinstance(node.children[0], Reference) and
isinstance(node.children[1], Reference)):
return True

# We allow var - a2 and a2 - var
if (node.operator == BinaryOperation.Operator.SUB and
isinstance(node.children[0], Reference) and
isinstance(node.children[1], Reference)):
return True

# We allow var * a1 and a1 * var
if (node.operator == BinaryOperation.Operator.MUL and
isinstance(node.children[0], Reference) and
isinstance(node.children[1], Reference)):
return True

# Simple cases analysed, remaining allowed cases:
# a1 * var + a2 - Handled
# a2 + a1 * var - Handled
# a1 * var - a2
# a2 - a1 * var
# var * a1 + a2 - Handled
# a2 + var * a1 - Handled
# var * a1 - a2
# a2 - var * a1

# Check for a1 * var + a2 - temporary version. Quick test
# seems to show this is
# BinaryOperation[ADD:BinaryOperation[MUL:a1, var],a2]
# We allow the inner binary operation to be a1*var or var*a1
# I think this also handles a2 + (a1 * var)
if (node.operator == BinaryOperation.Operator.ADD and
isinstance(node.children[1], Reference) and
node.children[1].symbol != loop_var and
isinstance(node.children[0], BinaryOperation) and
node.children[1].operator == BinaryOperation.Operator.MUL and
(node.children[0].children[0].symbol == loop_var or
node.children[0].children[1].symbol == loop_var)):
return True

# Check for a2 - a1*var or a2 - var*a1
if (node.operator == BinaryOperation.Operator.SUB and
isinstance(node.children[0], Reference) and
node.children[0].symbol != loop_var and
isinstance(node.children[1], BinaryOperation) and
node.children[1].operator == BinaryOperation.Operator.MUL and
(node.children[1].children[0].symbol == loop_var or
node.children[1].children[1].symbol == loop_var)):
return True

# Check for a1*var - a2 or var*a1 - a2
if (node.operator == BinaryOperation.Operator.SUB and
isinstance(node.children[1], Reference) and
node.children[1].symbol != loop_var and
isinstance(node.children[0], BinaryOperation) and
node.children[0].operator == BinaryOperation.Operator.MUL and
(node.children[0].children[0].symbol == loop_var or
node.children[0].children[1].symbol == loop_var)):
return True
Copy link
Collaborator

@hiker hiker Jun 24, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

These tests still don't feel right. Too many very similar tests, and I am not even sure that they are complete (what about -a1\*var-a2 or +a1\*var+a2, which from memory adds another UnaryOperation node).
SymPy offers a polynomial class, which gives you access to the coefficients, and for the example above at least it will convert them all to a similar structure:

>>> sp.Poly(a1 * var - a2, var).all_coeffs()
[a1, -a2]
>>> sp.Poly(-a1 * var - a2, var).all_coeffs()
[-a1, -a2]
>>> sp.Poly(a2 - a1 * var,var).all_coeffs()
[-a1, a2]

etc. Unfortunately, it does still first simplify the expressions, so

>>> sp.Poly(a2 - a1 * var + 2*var,var).all_coeffs()
[2 - a1, a2]

Could we perhaps in a case like this rewrite the PSyIR with the equivalent expression (psyad is already doing this when expanding terms). Or you could additionally count the number of references and symbols in the original PSyIR, which should be less than or equal to 4? I checked, even in the SymPy internal representation the -a1 is a special node (multiplication of a1 by -1, so that doesn't really help).

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I looked again and my interpretation of the OpenMP standard was also wrong, in that a1 and a2 simply need to be "loop invariant integer expressions", and not simply variables.
I think I should have already checked that any Reference objects in these terms are loop invariant in validate before reaching this function, so what actually needs to be done is:

  1. Check we have some structure that could be handled as one of the allowed structures.
  2. Check that the a1 and a2 expressions (if present) are integer expressions.
    I wonder if 2 would have been handled already during input/tree construction or if we can just ignore it.
    I don't have a good idea how to check the structure conforms though, maybe the sp.Poly class you recommend will work straight out the box? But I'm not clear how to use this - could you provide a quick example?

I'm trying to work out what this actually disallows, and I think any operation involving all integer values/references, and only +-* operations is allowed (since you could transform x*y*var_outer into a1*var_outer since a1=x*y. Equally division (maybe any integer-safe) operations should be ok provided they are not performed on var_outer. Does that make sense or have I missed something?



# Any other structure is not allowed
return False


def validate(self, node, options=None):
'''
Perform validation checks before applying the transformation

:param node: the node we are checking.
:type node: :py:class:`psyclone.psyir.nodes.Node`
:param options: a dictionary with options for transformations.\
This transform supports "collapse", which is the\
number of nested loops to collapse.
:type options: Optional[Dict[str, Any]]
:param int options["collapse"]: number of nested loops to collapse \
or None.
:param bool options["force"]: whether to force parallelisation of the \
target loop (i.e. ignore any dependence analysis).

:raises TransformationError: if the \
:py:class:`psyclone.psyir.nodes.Loop` loop iterates over \
colours.
:raises TransformationError: if 'collapse' is supplied with an \
invalid number of loops.
:raises TransformationError: if there is a data dependency that \
prevents the parallelisation of the loop unless \
`options["force"]` is True.

'''
super().validate(node, options=options)

# We need to check the loop is of the canonical loop form.
# For a loop to satisfy the OpenMP canonical loop form, the following
# must be true:
# 1. Loop variable must be an integer type.
# 2. The start and stop conditions must either be a variable of a type
# compatible to the loop variable, and either invariant with
# respect to the outermost associated loop, or one of a few
# specific formats based upon the outermost associated loop, with
# two values that are invariant with respect to the outermost
# associated loop
# 3. The step must be invariant with respect to the outermost
# associated loop, and an integer type.

# The allowed formats are:
# var-outer
# var-outer + a2
# a2 + var-outer
# var-outer - a2
# a2 - var-outer
# a1 * var-outer
# a1 * var-outer + a2
# a2 + a1 * var-outer
# a1 * var-outer - a2
# a2 - a1 * var-outer
# var-outer * a1
# var-outer * a1 + a2
# a2 + var-outer * a1
# var-outer * a1 - a2
# a2 - var-outer * a1

outer_loop_var = node.variable
# I believe the outmost associated loop should always be node
# and any sub nodes we don't need to check according to the
# OpenMP manual - even in the case where we use collapse.

# TODO This appears to be false, and depends if we allow collapse.
# For now, it only applies to the immediate child, but if we allow
# collapse we need to check all loops we're collapsing.

# TODO All of the expressions here must be integer expressions
# and we should probably check this explicitly if possible.

# Check the loop variable is an integer type
if (not isinstance(node.variable, (ScalarType, DataSymbol)) or
node.variable.datatype.intrinsic !=
ScalarType.Intrinsic.INTEGER):
assert False # TODO Throw error message

all_var_accesses = VariablesAccessInfo(node.children[3])
# Find all of the References in the start condition.
start_refs = node.start_expr.walk(Reference)
# For each reference, if its not the node.variable check it is not
# modified inside the loop.
contains_outermost_loop_var = False
start_var_accesses = VariablesAccessInfo(node.children[0])
for ref in start_refs:
if ref.symbol is not outer_loop_var:
# Have to check if the variable is written to in the loop.
# If so we assume it is invariant w.r.t the outer loop.
sig, _ = ref.get_signature_and_indices()
all_ref_accesses = all_var_accesses.get(sig)
start_ref_accesses = start_var_accesses.get(sig)
if (all_ref_accesses is not None and
all_ref_accesses.is_written()
and start_ref_accesses is not None
and start_ref_accesses.is_read()):
assert False
else:
contains_outermost_loop_var = True

# If the loop variable exists in the start condition, we need to check
# the format is one of the allowed formats.
if contains_outermost_loop_var:
if not self._check_valid_format(node.start_expr, outer_loop_var):
assert False # TODO Throw error message

# Same check for the stop conditions
stop_refs = node.stop_expr.walk(Reference)
stop_var_accesses = VariablesAccessInfo(node.children[1])
# For each reference, if its not the node.variable check it is not
# modified inside the loop.
contains_outermost_loop_var = False
for ref in stop_refs:
if ref.symbol is not outer_loop_var:
# Have to check if the variable is written to in the loop.
# If so we assume it is invariant w.r.t the outer loop.
sig, _ = ref.get_signature_and_indices()
all_ref_accesses = all_var_accesses.get(sig)
stop_ref_accesses = stop_var_accesses.get(sig)
if (all_ref_accesses is not None and
all_ref_accesses.is_written()
and stop_ref_accesses is not None
and stop_ref_accesses.is_read()):
assert False
else:
contains_outermost_loop_var = True

# If the loop variable exists in the start condition, we need to check
# the format is one of the allowed formats.
if contains_outermost_loop_var:
if not self._check_valid_format(node.stop_expr, outer_loop_var):
assert False # TODO Throw error message

# Step must be a loop invariantinteger expression
step_refs = node.step_expr.walk(Reference)
step_var_accesses = VariablesAccessInfo(node.children[2])
# For each reference, if its not the node.variable check it is not
# modified inside the loop.
for ref in step_refs:
if ref.symbol is not outer_loop_var:
# Have to check if the variable is written to in the loop.
# If so we assume it is invariant w.r.t the outer loop.
sig, _ = ref.get_signature_and_indices()
all_ref_accesses = all_var_accesses.get(sig)
step_ref_accesses = step_var_accesses.get(sig)
if (all_ref_accesses is not None and
all_ref_accesses.is_written()
and step_ref_accesses is not None
and step_ref_accesses.is_read()):
assert False
else:
# Step cannot access outer loop variable I think
assert False
6 changes: 3 additions & 3 deletions src/psyclone/psyir/transformations/omp_loop_trans.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,8 +40,8 @@
Routine, OMPDoDirective, OMPLoopDirective, OMPParallelDoDirective,
OMPTeamsDistributeParallelDoDirective, OMPScheduleClause)
from psyclone.psyir.symbols import DataSymbol, INTEGER_TYPE
from psyclone.psyir.transformations.parallel_loop_trans import \
ParallelLoopTrans
from psyclone.psyir.transformations.omp_canonical_loop_trans import \
OMPCanonicalLoopTrans

MAP_STR_TO_LOOP_DIRECTIVES = {
"do": OMPDoDirective,
Expand All @@ -52,7 +52,7 @@
VALID_OMP_DIRECTIVES = list(MAP_STR_TO_LOOP_DIRECTIVES.keys())


class OMPLoopTrans(ParallelLoopTrans):
class OMPLoopTrans(OMPCanonicalLoopTrans):
'''
Adds an OpenMP directive to parallelise this loop. It can insert different
directives such as "omp do/for", "omp parallel do/for", "omp teams
Expand Down
4 changes: 3 additions & 1 deletion src/psyclone/transformations.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,8 @@
from psyclone.psyir.symbols import ArgumentInterface, DataSymbol, \
DeferredType, INTEGER_TYPE, ScalarType, Symbol, SymbolError
from psyclone.psyir.transformations.loop_trans import LoopTrans
from psyclone.psyir.transformations.omp_canonical_loop_trans import \
OMPCanonicalLoopTrans
from psyclone.psyir.transformations.omp_loop_trans import OMPLoopTrans
from psyclone.psyir.transformations.parallel_loop_trans import \
ParallelLoopTrans
Expand Down Expand Up @@ -101,7 +103,7 @@ def check_intergrid(node):
f" is such a kernel.")


class OMPTaskloopTrans(ParallelLoopTrans):
class OMPTaskloopTrans(OMPCanonicalLoopTrans):
'''
Adds an OpenMP taskloop directive to a loop. Only one of grainsize or
num_tasks must be specified.
Expand Down
Loading