Source code for mars.tensor.base.isin

#!/usr/bin/env python
# -*- coding: utf-8 -*-
# Copyright 1999-2021 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#      http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import numpy as np

from ... import opcodes as OperandDef
from ...core import recursive_tile
from ...serialization.serializables import KeyField, BoolField
from ...utils import has_unknown_shape
from ..operands import TensorOperand, TensorOperandMixin
from ..datasource import tensor as astensor
from ..array_utils import as_same_device, device
from ..core import TensorOrder
from .ravel import ravel


class TensorIsIn(TensorOperand, TensorOperandMixin):
    _op_type_ = OperandDef.ISIN

    _element = KeyField('element')
    _test_elements = KeyField('test_elements')
    _assume_unique = BoolField('assume_unique')
    _invert = BoolField('invert')

    def __init__(self, assume_unique=None, invert=None, dtype=None, **kw):
        dtype = np.dtype(bool) if dtype is None else dtype
        super().__init__(_assume_unique=assume_unique, _invert=invert,
                         dtype=dtype, **kw)

    @property
    def element(self):
        return self._element

    @property
    def test_elements(self):
        return self._test_elements

    @property
    def assume_unique(self):
        return self._assume_unique

    @property
    def invert(self):
        return self._invert

    def _set_inputs(self, inputs):
        super()._set_inputs(inputs)
        self._element = self._inputs[0]
        self._test_elements = self._inputs[1]

    def __call__(self, element, test_elements):
        element, test_elements = astensor(element), ravel(astensor(test_elements))

        return self.new_tensor([element, test_elements], element.shape, order=TensorOrder.C_ORDER)

    @classmethod
    def tile(cls, op):
        in_tensor = op.element
        test_elements = op.test_elements
        out_tensor = op.outputs[0]

        if len(test_elements.chunks) != 1:
            if has_unknown_shape(test_elements):
                yield
            test_elements = yield from recursive_tile(
                test_elements.rechunk(len(test_elements)))
        test_elements_chunk = test_elements.chunks[0]

        out_chunks = []
        for c in in_tensor.chunks:
            chunk_op = op.copy().reset_key()
            out_chunk = chunk_op.new_chunk([c, test_elements_chunk], shape=c.shape,
                                           index=c.index, order=out_tensor.order)
            out_chunks.append(out_chunk)

        new_op = op.copy()
        return new_op.new_tensors([in_tensor, test_elements], out_tensor.shape,
                                  order=out_tensor.order, chunks=out_chunks,
                                  nsplits=in_tensor.nsplits)

    @classmethod
    def execute(cls, ctx, op):
        (element, test_elements), device_id, xp = as_same_device(
            [ctx[c.key] for c in op.inputs], device=op.device, ret_extra=True)

        with device(device_id):
            ctx[op.outputs[0].key] = xp.isin(element, test_elements,
                                             assume_unique=op.assume_unique,
                                             invert=op.invert)


[docs]def isin(element, test_elements, assume_unique=False, invert=False): """ Calculates `element in test_elements`, broadcasting over `element` only. Returns a boolean array of the same shape as `element` that is True where an element of `element` is in `test_elements` and False otherwise. Parameters ---------- element : array_like Input tensor. test_elements : array_like The values against which to test each value of `element`. This argument is flattened if it is a tensor or array_like. See notes for behavior with non-array-like parameters. assume_unique : bool, optional If True, the input tensors are both assumed to be unique, which can speed up the calculation. Default is False. invert : bool, optional If True, the values in the returned tensor are inverted, as if calculating `element not in test_elements`. Default is False. ``mt.isin(a, b, invert=True)`` is equivalent to (but faster than) ``mt.invert(mt.isin(a, b))``. Returns ------- isin : Tensor, bool Has the same shape as `element`. The values `element[isin]` are in `test_elements`. See Also -------- in1d : Flattened version of this function. Notes ----- `isin` is an element-wise function version of the python keyword `in`. ``isin(a, b)`` is roughly equivalent to ``mt.array([item in b for item in a])`` if `a` and `b` are 1-D sequences. `element` and `test_elements` are converted to tensors if they are not already. If `test_elements` is a set (or other non-sequence collection) it will be converted to an object tensor with one element, rather than a tensor of the values contained in `test_elements`. This is a consequence of the `tensor` constructor's way of handling non-sequence collections. Converting the set to a list usually gives the desired behavior. Examples -------- >>> import mars.tensor as mt >>> element = 2*mt.arange(4).reshape((2, 2)) >>> element.execute() array([[0, 2], [4, 6]]) >>> test_elements = [1, 2, 4, 8] >>> mask = mt.isin(element, test_elements) >>> mask.execute() array([[ False, True], [ True, False]]) >>> element[mask].execute() array([2, 4]) >>> mask = mt.isin(element, test_elements, invert=True) >>> mask.execute() array([[ True, False], [ False, True]]) >>> element[mask] array([0, 6]) Because of how `array` handles sets, the following does not work as expected: >>> test_set = {1, 2, 4, 8} >>> mt.isin(element, test_set).execute() array([[ False, False], [ False, False]]) Casting the set to a list gives the expected result: >>> mt.isin(element, list(test_set)).execute() array([[ False, True], [ True, False]]) """ op = TensorIsIn(assume_unique, invert) return op(element, test_elements)