kneron_model_converter/libs_V2/dynasty/release/include/tensor.h



#pragma once

#include <stddef.h>
#include <iostream>
#include <string>
#include <vector>
#include <utility>
#include <string.h>
#include <memory>

namespace dynasty {
namespace common {
struct BCHW {
    uint32_t b_;
    uint32_t c_;
    uint32_t h_;
    uint32_t w_;
    BCHW(): b_(0),c_(0),h_(0),w_(0) {};
    BCHW(const std::vector<int32_t>& shape) {
        b_ = shape.size() > 0 ? shape.at(0) : 1;
        c_ = shape.size() > 1 ? shape.at(1) : 1;
        h_ = shape.size() > 2 ? shape.at(2) : 1;
        w_ = shape.size() > 3 ? shape.at(3) : 1;
    }
    size_t getBatchSize() const {
        return c_*h_*w_;
    }
    size_t getChannelSize() const {
        return h_*w_;
    }
    size_t getSize() const {
        return b_*c_*h_*w_;
    }
};
//--------------------------------------------------------------------
// Tensor Shape
//--------------------------------------------------------------------
class TensorShape : private std::vector<int32_t> {
    // We use negative numbers for unknown symbolic dimension. Each negative
    // number represents a unique symbolic dimension.
    // Private inheritance is used to prevent ambiguity of element versus dimension size
   public:
    TensorShape() = default;

    TensorShape(const TensorShape& /*other*/) = default;
    TensorShape& operator=(const TensorShape& /*other*/) = default;

    TensorShape(TensorShape&& /*other*/) = default;
    TensorShape& operator=(TensorShape&& /*other*/) = default;

    TensorShape(const std::vector<int32_t>& dims) : std::vector<int32_t>(dims) {}

    TensorShape(std::vector<int32_t>&& dims) : std::vector<int32_t>(std::move(dims)) {}

    TensorShape(const std::initializer_list<int32_t>& dims) : std::vector<int32_t>(dims) {}

    TensorShape(const std::vector<int32_t>& dims, size_t start, size_t end){
        assign(dims.begin() + start, dims.begin() + end);
    }

    /**
       Return the dimension specified by <idx>.
    */
    const int32_t& operator[](size_t idx) const {
        return std::vector<int32_t>::operator[](static_cast<int>(idx));
    }

    int32_t& operator[](size_t idx) {
        return std::vector<int32_t>::operator[](static_cast<int>(idx));
    }

    bool operator==(const TensorShape& other) const noexcept {
        auto thisVector = static_cast<const std::vector<int32_t>*>(this);
        auto otherVector = static_cast<const std::vector<int32_t>*>(&other);
        return *thisVector == *otherVector;
    }

    bool operator!=(const TensorShape& other) const noexcept {
        return !(*this == other);
    }

    size_t NumDimensions() const noexcept {
        return size();
    }

    /**
       Copy dims into an array with given size
    */
    void CopyDims(int32_t* dims, size_t num_dims) const {
        memcpy(dims, data(), sizeof(value_type) * std::min(num_dims, NumDimensions()));
    }

    /**
       Return underlying vector representation.
    */
    const std::vector<int32_t>& GetDims() const { return *this; }

    /**
     * Return the total number of elements. Returns 1 for an empty (rank 0) TensorShape.
     *
     * May return -1
     */
    int32_t Size() const{
        size_t arraySize = size();
        return  SizeHelper(0, arraySize);
    }

    /**
       Return the total number of elements up to the specified dimension.
       If the dimension interval is empty (dimension == 0), return 1.
       @param dimension Return size up to this dimension. Value must be between 0 and this->NumDimensions(), inclusive.
    */
    int32_t SizeToDimension(size_t dimension) const{
        const size_t num_dims = size();
        if ( dimension > num_dims) {
            printf("SizeToDimension: incorrect dimension\n");
            exit(-1);
        }
        return SizeHelper(0, dimension);

    }

    /**
       Return the total number of elements from the specified dimension to the end of the tensor shape.
       If the dimension interval is empty (dimension == this->NumDimensions()), return 1.
       @param dimension Return size from this dimension to the end. Value must be between 0 and this->NumDimensions(),
                        inclusive.
    */
    int32_t SizeFromDimension(size_t dimension) const{
        const size_t num_dims = size();
        if ( dimension > num_dims) {
            printf("SizeToDimension: incorrect dimension\n");
            exit(-1);
        }
        return SizeHelper(dimension, num_dims);

    }

    std::string ToString() const {
        std::string result;

        result.append("{");
        bool first = true;
        for (auto dim : (*this)) {
            if (!first) {
                result.append(",");
            }

            result.append(std::to_string(dim));
            first = false;
        }
        result.append("}");

        return result;
    }

    /**
       Calculate size between start and end.
       Assumes start and end are between 0 and this->NumDimensions(), inclusive, and that
       start < end.
    */
    int32_t SizeHelper(size_t start, size_t end) const{
        int32_t size = 1;  // this is used to calculate the size, which is used for memory allocations, so validate no overflow
        for (size_t i = start; i < end; i++) {
            size *= (*this)[i];
        }
        return size;
    }
    /**
       empty shape or 1D shape (1) is regarded as scalar tensor
    */
    bool IsScalar() const {
        size_t len = size();
        return len == 0 || (len == 1 && operator[](0) == 1);
    }

    BCHW GetBCHW() const{
        return BCHW(GetDims());
    }
};


//---------------------------------------------------------------------------------------
// Tensor
//    . Placeholder for a piece of memory, with additional shape information.
//    . It's abstract class with virtual fun:  host(),  device()
//---------------------------------------------------------------------------------------
using Deletor = void(*)(void*);

class Tensor : public std::enable_shared_from_this<Tensor> {
   public:
    Tensor() = default;
    Tensor(const TensorShape& shape, void * p_data, Deletor deletor) : p_data_(p_data),shape_(shape), deletor_(deletor){}
    virtual  ~Tensor() {
        if(deletor_) deletor_(p_data_);
    }

    // Force to use of pointer after it's created
    Tensor(const Tensor& other) = delete ;
    Tensor(Tensor&& other) = delete;
    Tensor& operator=(const Tensor& other) =delete;// copy assignment
    Tensor& operator=(Tensor&& other) =delete; // move assignment
    /**
       Returns the shape of the tensor.
    */
    const TensorShape& Shape() const noexcept { return shape_; }

    /**
       May return nullptr if tensor size is zero
    */
    template<typename T>
    T* MutableData() {
        return reinterpret_cast<T*>(p_data_);
    }

    template<typename T>
    std::pair<T*, size_t> MutableDataAndSize() {
        return std::make_pair<T *, size_t>(MutableData<T>(), Size());
    }

    template<typename T>
    const T* Data() const {
        return reinterpret_cast<const T*>(p_data_);
    }

    template<typename T>
    std::pair<const T*, size_t> DataAndSize() const {
        return std::make_pair<const T *, size_t>(Data<T>(), Size());
    }

    size_t Size() const { return shape_.Size();}
    const std::vector<int32_t>& GetShapeVector() const { return shape_.GetDims();}

    virtual std::shared_ptr<Tensor> host()  =0;
    virtual std::shared_ptr<Tensor> device() =0;

   protected:

    void* p_data_ {};
    TensorShape shape_ {};
    Deletor deletor_{};
};

//----------------------------------------------------------------------------------
// CPU Tensor
//-----------------------------------------------------------------------------------
template <typename T>
class CPUTensor : public Tensor{
   public:
    CPUTensor() = default;
    CPUTensor(const TensorShape& shape, void * p_data, Deletor deletor=nullptr) : Tensor(shape, p_data, deletor) {}
    virtual ~CPUTensor() = default;

    std::shared_ptr<Tensor> host()   override;
    std::shared_ptr<Tensor> device()  override;

};

//----------------------------------------------------------------------------------
// CUDA Tensor
//-----------------------------------------------------------------------------------
template <typename T>
class CUDATensor : public Tensor {
   public:
    CUDATensor() = default;
    CUDATensor(const TensorShape& shape, void * p_data, Deletor deletor=nullptr) : Tensor(shape, p_data, deletor) {}
    virtual ~CUDATensor() = default;

    std::shared_ptr<Tensor> host()   override;
    std::shared_ptr<Tensor> device()  override;

};

//----------------------------------------------------------------------------------
// OpenCL Tensor
//-----------------------------------------------------------------------------------
template <typename T>
class OpenCLTensor : public Tensor{
   public:
    OpenCLTensor() = default;
    OpenCLTensor(const TensorShape& shape, void * p_data, Deletor deletor=nullptr) : Tensor(shape, p_data, deletor) {}
    virtual ~OpenCLTensor() = default;

    std::shared_ptr<Tensor> host()   override;
    std::shared_ptr<Tensor> device()  override;

};


}  // namespace common
}  // namespace dynasty