|
@@ -1,3149 +0,0 @@
|
|
|
-"use strict";
|
|
|
-/**
|
|
|
- * @license
|
|
|
- * Copyright 2017 Google Inc. All Rights Reserved.
|
|
|
- * Licensed under the Apache License, Version 2.0 (the "License");
|
|
|
- * you may not use this file except in compliance with the License.
|
|
|
- * You may obtain a copy of the License at
|
|
|
- *
|
|
|
- * http://www.apache.org/licenses/LICENSE-2.0
|
|
|
- *
|
|
|
- * Unless required by applicable law or agreed to in writing, software
|
|
|
- * distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
- * See the License for the specific language governing permissions and
|
|
|
- * limitations under the License.
|
|
|
- * =============================================================================
|
|
|
- */
|
|
|
-var __extends = (this && this.__extends) || (function () {
|
|
|
- var extendStatics = function (d, b) {
|
|
|
- extendStatics = Object.setPrototypeOf ||
|
|
|
- ({ __proto__: [] } instanceof Array && function (d, b) { d.__proto__ = b; }) ||
|
|
|
- function (d, b) { for (var p in b) if (b.hasOwnProperty(p)) d[p] = b[p]; };
|
|
|
- return extendStatics(d, b);
|
|
|
- };
|
|
|
- return function (d, b) {
|
|
|
- extendStatics(d, b);
|
|
|
- function __() { this.constructor = d; }
|
|
|
- d.prototype = b === null ? Object.create(b) : (__.prototype = b.prototype, new __());
|
|
|
- };
|
|
|
-})();
|
|
|
-var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
|
|
|
- return new (P || (P = Promise))(function (resolve, reject) {
|
|
|
- function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
|
|
|
- function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
|
|
|
- function step(result) { result.done ? resolve(result.value) : new P(function (resolve) { resolve(result.value); }).then(fulfilled, rejected); }
|
|
|
- step((generator = generator.apply(thisArg, _arguments || [])).next());
|
|
|
- });
|
|
|
-};
|
|
|
-var __generator = (this && this.__generator) || function (thisArg, body) {
|
|
|
- var _ = { label: 0, sent: function() { if (t[0] & 1) throw t[1]; return t[1]; }, trys: [], ops: [] }, f, y, t, g;
|
|
|
- return g = { next: verb(0), "throw": verb(1), "return": verb(2) }, typeof Symbol === "function" && (g[Symbol.iterator] = function() { return this; }), g;
|
|
|
- function verb(n) { return function (v) { return step([n, v]); }; }
|
|
|
- function step(op) {
|
|
|
- if (f) throw new TypeError("Generator is already executing.");
|
|
|
- while (_) try {
|
|
|
- if (f = 1, y && (t = op[0] & 2 ? y["return"] : op[0] ? y["throw"] || ((t = y["return"]) && t.call(y), 0) : y.next) && !(t = t.call(y, op[1])).done) return t;
|
|
|
- if (y = 0, t) op = [op[0] & 2, t.value];
|
|
|
- switch (op[0]) {
|
|
|
- case 0: case 1: t = op; break;
|
|
|
- case 4: _.label++; return { value: op[1], done: false };
|
|
|
- case 5: _.label++; y = op[1]; op = [0]; continue;
|
|
|
- case 7: op = _.ops.pop(); _.trys.pop(); continue;
|
|
|
- default:
|
|
|
- if (!(t = _.trys, t = t.length > 0 && t[t.length - 1]) && (op[0] === 6 || op[0] === 2)) { _ = 0; continue; }
|
|
|
- if (op[0] === 3 && (!t || (op[1] > t[0] && op[1] < t[3]))) { _.label = op[1]; break; }
|
|
|
- if (op[0] === 6 && _.label < t[1]) { _.label = t[1]; t = op; break; }
|
|
|
- if (t && _.label < t[2]) { _.label = t[2]; _.ops.push(op); break; }
|
|
|
- if (t[2]) _.ops.pop();
|
|
|
- _.trys.pop(); continue;
|
|
|
- }
|
|
|
- op = body.call(thisArg, _);
|
|
|
- } catch (e) { op = [6, e]; y = 0; } finally { f = t = 0; }
|
|
|
- if (op[0] & 5) throw op[1]; return { value: op[0] ? op[1] : void 0, done: true };
|
|
|
- }
|
|
|
-};
|
|
|
-Object.defineProperty(exports, "__esModule", { value: true });
|
|
|
-var seedrandom = require("seedrandom");
|
|
|
-var engine_1 = require("../../engine");
|
|
|
-var environment_1 = require("../../environment");
|
|
|
-var log_1 = require("../../log");
|
|
|
-var array_ops_util = require("../../ops/array_ops_util");
|
|
|
-var axis_util = require("../../ops/axis_util");
|
|
|
-var broadcast_util = require("../../ops/broadcast_util");
|
|
|
-var complex_ops_1 = require("../../ops/complex_ops");
|
|
|
-var concat_util = require("../../ops/concat_util");
|
|
|
-var erf_util = require("../../ops/erf_util");
|
|
|
-var gather_nd_util = require("../../ops/gather_nd_util");
|
|
|
-var ops = require("../../ops/ops");
|
|
|
-var ops_1 = require("../../ops/ops");
|
|
|
-var scatter_nd_util = require("../../ops/scatter_nd_util");
|
|
|
-var selu_util = require("../../ops/selu_util");
|
|
|
-var slice_util_1 = require("../../ops/slice_util");
|
|
|
-var tensor_1 = require("../../tensor");
|
|
|
-var types_1 = require("../../types");
|
|
|
-var util = require("../../util");
|
|
|
-var util_1 = require("../../util");
|
|
|
-var backend_1 = require("../backend");
|
|
|
-var backend_util = require("../backend_util");
|
|
|
-var complex_util = require("../complex_util");
|
|
|
-var non_max_suppression_impl_1 = require("../non_max_suppression_impl");
|
|
|
-var split_shared_1 = require("../split_shared");
|
|
|
-var tile_impl_1 = require("../tile_impl");
|
|
|
-var topk_impl_1 = require("../topk_impl");
|
|
|
-var where_impl_1 = require("../where_impl");
|
|
|
-var cpu_util_1 = require("./cpu_util");
|
|
|
-function mapActivation(backend, x, activation, preluActivationWeights) {
|
|
|
- if (activation === 'linear') {
|
|
|
- return backend.linear(x);
|
|
|
- }
|
|
|
- else if (activation === 'relu') {
|
|
|
- return backend.relu(x);
|
|
|
- }
|
|
|
- else if (activation === 'elu') {
|
|
|
- return backend.elu(x);
|
|
|
- }
|
|
|
- else if (activation === 'relu6') {
|
|
|
- return backend.relu6(x);
|
|
|
- }
|
|
|
- else if (activation === 'prelu') {
|
|
|
- return backend.prelu(x, preluActivationWeights);
|
|
|
- }
|
|
|
- throw new Error("Activation " + activation + " has not been implemented for the CPU backend.");
|
|
|
-}
|
|
|
-var MathBackendCPU = /** @class */ (function (_super) {
|
|
|
- __extends(MathBackendCPU, _super);
|
|
|
- function MathBackendCPU() {
|
|
|
- var _this = _super.call(this) || this;
|
|
|
- _this.blockSize = 48;
|
|
|
- _this.firstUse = true;
|
|
|
- _this.data = new backend_1.DataStorage(_this, engine_1.ENGINE);
|
|
|
- return _this;
|
|
|
- }
|
|
|
- MathBackendCPU.prototype.write = function (values, shape, dtype) {
|
|
|
- if (this.firstUse) {
|
|
|
- this.firstUse = false;
|
|
|
- if (environment_1.env().get('IS_NODE')) {
|
|
|
- log_1.warn('\n============================\n' +
|
|
|
- 'Hi there 👋. Looks like you are running TensorFlow.js in ' +
|
|
|
- 'Node.js. To speed things up dramatically, install our node ' +
|
|
|
- 'backend, which binds to TensorFlow C++, by running ' +
|
|
|
- 'npm i @tensorflow/tfjs-node, ' +
|
|
|
- 'or npm i @tensorflow/tfjs-node-gpu if you have CUDA. ' +
|
|
|
- 'Then call require(\'@tensorflow/tfjs-node\'); (-gpu ' +
|
|
|
- 'suffix for CUDA) at the start of your program. ' +
|
|
|
- 'Visit https://github.com/tensorflow/tfjs-node for more details.' +
|
|
|
- '\n============================');
|
|
|
- }
|
|
|
- }
|
|
|
- var dataId = {};
|
|
|
- this.data.set(dataId, { values: values, dtype: dtype });
|
|
|
- return dataId;
|
|
|
- };
|
|
|
- MathBackendCPU.prototype.move = function (dataId, values, shape, dtype) {
|
|
|
- this.data.set(dataId, { values: values, dtype: dtype });
|
|
|
- };
|
|
|
- MathBackendCPU.prototype.numDataIds = function () {
|
|
|
- return this.data.numDataIds();
|
|
|
- };
|
|
|
- MathBackendCPU.prototype.read = function (dataId) {
|
|
|
- return __awaiter(this, void 0, void 0, function () {
|
|
|
- return __generator(this, function (_a) {
|
|
|
- return [2 /*return*/, this.readSync(dataId)];
|
|
|
- });
|
|
|
- });
|
|
|
- };
|
|
|
- MathBackendCPU.prototype.readSync = function (dataId) {
|
|
|
- var _a = this.data.get(dataId), dtype = _a.dtype, complexTensors = _a.complexTensors;
|
|
|
- if (dtype === 'complex64') {
|
|
|
- var realValues = this.readSync(complexTensors.real.dataId);
|
|
|
- var imagValues = this.readSync(complexTensors.imag.dataId);
|
|
|
- return complex_util.mergeRealAndImagArrays(realValues, imagValues);
|
|
|
- }
|
|
|
- return this.data.get(dataId).values;
|
|
|
- };
|
|
|
- MathBackendCPU.prototype.bufferSync = function (t) {
|
|
|
- var data = this.readSync(t.dataId);
|
|
|
- var decodedData = data;
|
|
|
- if (t.dtype === 'string') {
|
|
|
- try {
|
|
|
- // Decode the bytes into string.
|
|
|
- decodedData = data.map(function (d) { return util.decodeString(d); });
|
|
|
- }
|
|
|
- catch (_a) {
|
|
|
- throw new Error('Failed to decode encoded string bytes into utf-8');
|
|
|
- }
|
|
|
- }
|
|
|
- return ops_1.buffer(t.shape, t.dtype, decodedData);
|
|
|
- };
|
|
|
- MathBackendCPU.prototype.makeOutput = function (values, shape, dtype) {
|
|
|
- var dataId = this.write(values, shape, dtype);
|
|
|
- return engine_1.ENGINE.makeTensorFromDataId(dataId, shape, dtype, this);
|
|
|
- };
|
|
|
- MathBackendCPU.prototype.disposeData = function (dataId) {
|
|
|
- if (this.data.has(dataId)) {
|
|
|
- var complexTensors = this.data.get(dataId).complexTensors;
|
|
|
- if (complexTensors != null) {
|
|
|
- complexTensors.real.dispose();
|
|
|
- complexTensors.imag.dispose();
|
|
|
- }
|
|
|
- this.data.delete(dataId);
|
|
|
- }
|
|
|
- };
|
|
|
- MathBackendCPU.prototype.time = function (f) {
|
|
|
- return __awaiter(this, void 0, void 0, function () {
|
|
|
- var start, kernelMs;
|
|
|
- return __generator(this, function (_a) {
|
|
|
- start = util_1.now();
|
|
|
- f();
|
|
|
- kernelMs = util_1.now() - start;
|
|
|
- return [2 /*return*/, { kernelMs: kernelMs }];
|
|
|
- });
|
|
|
- });
|
|
|
- };
|
|
|
- MathBackendCPU.prototype.memory = function () {
|
|
|
- return {
|
|
|
- // Unreliable due to automatic gc. The numbers above are cumulative.
|
|
|
- unreliable: true,
|
|
|
- reasons: ['The reported memory is an upper bound. Due to automatic garbage ' +
|
|
|
- 'collection, the true allocated memory may be less.']
|
|
|
- };
|
|
|
- };
|
|
|
- MathBackendCPU.prototype.complex = function (real, imag) {
|
|
|
- var result = this.makeOutput(null, real.shape, 'complex64');
|
|
|
- var resultData = this.data.get(result.dataId);
|
|
|
- // The backend owns the reference to the underlying real and imaginary
|
|
|
- // clones. These will explicitly get disposed when the complex tensor is
|
|
|
- // disposed.
|
|
|
- resultData.complexTensors = {
|
|
|
- real: engine_1.ENGINE.keep(real.clone()),
|
|
|
- imag: engine_1.ENGINE.keep(imag.clone())
|
|
|
- };
|
|
|
- return result;
|
|
|
- };
|
|
|
- MathBackendCPU.prototype.real = function (input) {
|
|
|
- var resultData = this.data.get(input.dataId);
|
|
|
- return resultData.complexTensors.real.clone();
|
|
|
- };
|
|
|
- MathBackendCPU.prototype.imag = function (input) {
|
|
|
- var resultData = this.data.get(input.dataId);
|
|
|
- return resultData.complexTensors.imag.clone();
|
|
|
- };
|
|
|
- MathBackendCPU.prototype.slice = function (x, begin, size) {
|
|
|
- cpu_util_1.assertNotComplex(x, 'slice');
|
|
|
- var isContinous = slice_util_1.isSliceContinous(x.shape, begin, size);
|
|
|
- if (isContinous) {
|
|
|
- var flatOffset = slice_util_1.computeFlatOffset(begin, x.strides);
|
|
|
- var length_1 = util.sizeFromShape(size);
|
|
|
- var vals = this.readSync(x.dataId);
|
|
|
- return ops_1.tensor(vals.subarray(flatOffset, flatOffset + length_1), size, x.dtype);
|
|
|
- }
|
|
|
- var buffer = ops.buffer(size, x.dtype);
|
|
|
- var xBuf = this.bufferSync(x);
|
|
|
- for (var i = 0; i < buffer.size; ++i) {
|
|
|
- var loc = buffer.indexToLoc(i);
|
|
|
- var xLoc = loc.map(function (idx, j) { return idx + begin[j]; });
|
|
|
- buffer.values[i] = xBuf.get.apply(xBuf, xLoc);
|
|
|
- }
|
|
|
- return buffer.toTensor();
|
|
|
- };
|
|
|
- MathBackendCPU.prototype.stridedSlice = function (x, begin, end, strides) {
|
|
|
- cpu_util_1.assertNotComplex(x, 'stridedSlice');
|
|
|
- var outShape = slice_util_1.computeOutShape(begin, end, strides);
|
|
|
- if (outShape.some(function (axis) { return axis === 0; })) {
|
|
|
- return ops.tensor([], outShape);
|
|
|
- }
|
|
|
- var buffer = ops.buffer(outShape, x.dtype);
|
|
|
- var xBuf = this.bufferSync(x);
|
|
|
- for (var i = 0; i < buffer.size; i++) {
|
|
|
- var loc = buffer.indexToLoc(i);
|
|
|
- var newLoc = new Array(loc.length);
|
|
|
- for (var j = 0; j < newLoc.length; j++) {
|
|
|
- newLoc[j] = loc[j] * strides[j] + begin[j];
|
|
|
- }
|
|
|
- buffer.set.apply(buffer, [xBuf.get.apply(xBuf, newLoc)].concat(loc));
|
|
|
- }
|
|
|
- return buffer.toTensor();
|
|
|
- };
|
|
|
- MathBackendCPU.prototype.diag = function (x) {
|
|
|
- var xVals = this.readSync(x.dataId);
|
|
|
- var buffer = ops.buffer([x.size, x.size], x.dtype);
|
|
|
- var vals = buffer.values;
|
|
|
- for (var i = 0; i < xVals.length; i++) {
|
|
|
- vals[i * x.size + i] = xVals[i];
|
|
|
- }
|
|
|
- return buffer.toTensor();
|
|
|
- };
|
|
|
- MathBackendCPU.prototype.unstack = function (x, axis) {
|
|
|
- var num = x.shape[axis];
|
|
|
- var outShape = new Array(x.rank - 1);
|
|
|
- var outIndex = 0;
|
|
|
- for (var i = 0; i < x.rank; i++) {
|
|
|
- if (i !== axis) {
|
|
|
- outShape[outIndex++] = x.shape[i];
|
|
|
- }
|
|
|
- }
|
|
|
- var begin = new Array(x.rank).fill(0);
|
|
|
- var size = x.shape.slice();
|
|
|
- size[axis] = 1;
|
|
|
- var res = new Array(num);
|
|
|
- for (var i = 0; i < res.length; i++) {
|
|
|
- begin[axis] = i;
|
|
|
- res[i] = this.slice(x, begin, size).reshape(outShape);
|
|
|
- }
|
|
|
- return res;
|
|
|
- };
|
|
|
- MathBackendCPU.prototype.reverse = function (x, axis) {
|
|
|
- cpu_util_1.assertNotComplex(x, 'reverse');
|
|
|
- var buffer = ops.buffer(x.shape, x.dtype);
|
|
|
- var xBuf = this.bufferSync(x);
|
|
|
- var _loop_1 = function (i) {
|
|
|
- var outLoc = buffer.indexToLoc(i);
|
|
|
- var inLoc = outLoc.slice();
|
|
|
- axis.forEach(function (ax) { return inLoc[ax] = x.shape[ax] - 1 - inLoc[ax]; });
|
|
|
- buffer.set.apply(buffer, [xBuf.get.apply(xBuf, inLoc)].concat(outLoc));
|
|
|
- };
|
|
|
- for (var i = 0; i < buffer.size; i++) {
|
|
|
- _loop_1(i);
|
|
|
- }
|
|
|
- return buffer.toTensor();
|
|
|
- };
|
|
|
- MathBackendCPU.prototype.concat = function (tensors, axis) {
|
|
|
- var _this = this;
|
|
|
- if (tensors[0].dtype === 'complex64') {
|
|
|
- var reals = tensors.map(function (t) { return complex_ops_1.real(t); });
|
|
|
- var imags = tensors.map(function (t) { return complex_ops_1.imag(t); });
|
|
|
- return complex_ops_1.complex(this.concat(reals, axis), this.concat(imags, axis));
|
|
|
- }
|
|
|
- var tensors2D = tensors.map(function (t) {
|
|
|
- var innerSize = util.sizeFromShape(t.shape.slice(axis));
|
|
|
- return t.as2D(-1, innerSize);
|
|
|
- });
|
|
|
- var outShape = concat_util.computeOutShape(tensors2D.map(function (t) { return t.shape; }), 1 /* axis */);
|
|
|
- var values = ops.buffer(outShape, tensors[0].dtype)
|
|
|
- .values;
|
|
|
- if (tensors2D[0].shape[0] === 1) {
|
|
|
- // Use built-in TypedArray.set() method for speed.
|
|
|
- var offset_1 = 0;
|
|
|
- tensors2D.forEach(function (t) {
|
|
|
- values.set(_this.readSync(t.dataId), offset_1);
|
|
|
- offset_1 += t.size;
|
|
|
- });
|
|
|
- }
|
|
|
- else {
|
|
|
- var colOffset_1 = 0;
|
|
|
- tensors2D.forEach(function (t) {
|
|
|
- var tVals = _this.readSync(t.dataId);
|
|
|
- var tIdx = 0;
|
|
|
- for (var row = 0; row < t.shape[0]; ++row) {
|
|
|
- var resIdx = row * outShape[1] + colOffset_1;
|
|
|
- for (var col = 0; col < t.shape[1]; ++col) {
|
|
|
- values[resIdx + col] = tVals[tIdx++];
|
|
|
- }
|
|
|
- }
|
|
|
- colOffset_1 += t.shape[1];
|
|
|
- });
|
|
|
- }
|
|
|
- var finalOutShape = concat_util.computeOutShape(tensors.map(function (t) { return t.shape; }), axis);
|
|
|
- return ops_1.tensor(values, finalOutShape, tensors[0].dtype);
|
|
|
- };
|
|
|
- MathBackendCPU.prototype.neg = function (x) {
|
|
|
- cpu_util_1.assertNotComplex(x, 'neg');
|
|
|
- return this.multiply(ops.scalar(-1), x);
|
|
|
- };
|
|
|
- MathBackendCPU.prototype.add = function (a, b) {
|
|
|
- if (a.dtype === 'complex64' || b.dtype === 'complex64') {
|
|
|
- return this.broadcastedBinaryComplexOp(a.cast('complex64'), b.cast('complex64'), function (aReal, aImag, bReal, bImag) {
|
|
|
- return { real: aReal + bReal, imag: aImag + bImag };
|
|
|
- });
|
|
|
- }
|
|
|
- return this.broadcastedBinaryOp(a, b, types_1.upcastType(a.dtype, b.dtype), function (aValue, bValue) { return aValue + bValue; });
|
|
|
- };
|
|
|
- MathBackendCPU.prototype.addN = function (tensors) {
|
|
|
- var _this = this;
|
|
|
- cpu_util_1.assertNotComplex(tensors, 'addN');
|
|
|
- var vals = tensors.map(function (t) { return _this.readSync(t.dataId); });
|
|
|
- var result = ops.buffer(tensors[0].shape, tensors[0].dtype);
|
|
|
- var resultVals = result.values;
|
|
|
- for (var i = 0; i < tensors.length; i++) {
|
|
|
- var currVals = vals[i];
|
|
|
- for (var j = 0; j < resultVals.length; j++) {
|
|
|
- resultVals[j] += currVals[j];
|
|
|
- }
|
|
|
- }
|
|
|
- return result.toTensor();
|
|
|
- };
|
|
|
- MathBackendCPU.prototype.softmax = function (logits, dim) {
|
|
|
- var axes = util.parseAxisParam([dim], logits.shape);
|
|
|
- var maxLogit = this.max(logits, axes);
|
|
|
- var expandedShape = axis_util.expandShapeToKeepDim(maxLogit.shape, axes);
|
|
|
- var a = this.subtract(logits, maxLogit.reshape(expandedShape));
|
|
|
- var b = this.exp(a);
|
|
|
- var sumExp = this.sum(b, axes).reshape(expandedShape);
|
|
|
- return this.realDivide(b, sumExp);
|
|
|
- };
|
|
|
- MathBackendCPU.prototype.subtract = function (a, b) {
|
|
|
- if (a.dtype === 'complex64' || b.dtype === 'complex64') {
|
|
|
- return this.broadcastedBinaryComplexOp(a.cast('complex64'), b.cast('complex64'), function (aReal, aImag, bReal, bImag) {
|
|
|
- return { real: aReal - bReal, imag: aImag - bImag };
|
|
|
- });
|
|
|
- }
|
|
|
- return this.broadcastedBinaryOp(a, b, types_1.upcastType(a.dtype, b.dtype), function (aValue, bValue) { return aValue - bValue; });
|
|
|
- };
|
|
|
- MathBackendCPU.prototype.pow = function (a, b) {
|
|
|
- cpu_util_1.assertNotComplex([a, b], 'pow');
|
|
|
- return this.broadcastedBinaryOp(a, b, a.dtype, function (aValue, bValue) { return Math.pow(aValue, bValue); });
|
|
|
- };
|
|
|
- MathBackendCPU.prototype.batchMatMul = function (a, b, transposeA, transposeB) {
|
|
|
- cpu_util_1.assertNotComplex([a, b], 'matMul');
|
|
|
- var sharedDim = transposeA ? a.shape[1] : a.shape[2];
|
|
|
- var leftDim = transposeA ? a.shape[2] : a.shape[1];
|
|
|
- var rightDim = transposeB ? b.shape[1] : b.shape[2];
|
|
|
- var batchDim = a.shape[0];
|
|
|
- var aValues = this.readSync(a.dataId);
|
|
|
- var bValues = this.readSync(b.dataId);
|
|
|
- var _a = transposeA ?
|
|
|
- [a.strides[0], 1, a.strides[1]] :
|
|
|
- [a.strides[0], a.strides[1], 1], aBatch = _a[0], aOuterStep = _a[1], aInnerStep = _a[2];
|
|
|
- var _b = transposeB ?
|
|
|
- [1, b.strides[1], b.strides[0]] :
|
|
|
- [b.strides[1], 1, b.strides[0]], bInnerStep = _b[0], bOuterStep = _b[1], bBatch = _b[2];
|
|
|
- var size = leftDim * rightDim;
|
|
|
- var result = ops_1.buffer([batchDim, leftDim, rightDim], a.dtype);
|
|
|
- var resVals = result.values;
|
|
|
- var blockSize = this.blockSize;
|
|
|
- for (var b_1 = 0; b_1 < batchDim; b_1++) {
|
|
|
- for (var i0 = 0; i0 < leftDim; i0 += blockSize) {
|
|
|
- for (var j0 = 0; j0 < rightDim; j0 += blockSize) {
|
|
|
- for (var k0 = 0; k0 < sharedDim; k0 += blockSize) {
|
|
|
- // for when blockSize doesn't evenly divide the input
|
|
|
- var iBlock = Math.min(i0 + blockSize, leftDim);
|
|
|
- var jBlock = Math.min(j0 + blockSize, rightDim);
|
|
|
- var kBlock = Math.min(k0 + blockSize, sharedDim);
|
|
|
- for (var i = i0; i < iBlock; i++) {
|
|
|
- for (var j = j0; j < jBlock; j++) {
|
|
|
- var sum = 0.0;
|
|
|
- for (var k = k0; k < kBlock; k++) {
|
|
|
- sum += aValues[b_1 * aBatch + i * aOuterStep + k * aInnerStep] *
|
|
|
- bValues[k * bInnerStep + j * bOuterStep + b_1 * bBatch];
|
|
|
- }
|
|
|
- resVals[b_1 * size + (i * rightDim + j)] += sum;
|
|
|
- }
|
|
|
- }
|
|
|
- }
|
|
|
- }
|
|
|
- }
|
|
|
- }
|
|
|
- return result.toTensor();
|
|
|
- };
|
|
|
- MathBackendCPU.prototype.fusedBatchMatMul = function (_a) {
|
|
|
- var a = _a.a, b = _a.b, transposeA = _a.transposeA, transposeB = _a.transposeB, bias = _a.bias, activation = _a.activation, preluActivationWeights = _a.preluActivationWeights;
|
|
|
- var result = this.batchMatMul(a, b, transposeA, transposeB);
|
|
|
- if (bias) {
|
|
|
- result = this.add(result, bias);
|
|
|
- }
|
|
|
- if (activation) {
|
|
|
- result =
|
|
|
- mapActivation(this, result, activation, preluActivationWeights);
|
|
|
- }
|
|
|
- return result;
|
|
|
- };
|
|
|
- MathBackendCPU.prototype.multiply = function (a, b) {
|
|
|
- if (a.dtype === 'complex64' || b.dtype === 'complex64') {
|
|
|
- return this.broadcastedBinaryComplexOp(a.cast('complex64'), b.cast('complex64'), function (aReal, aImag, bReal, bImag) {
|
|
|
- return {
|
|
|
- real: aReal * bReal - aImag * bImag,
|
|
|
- imag: aReal * bImag + aImag * bReal
|
|
|
- };
|
|
|
- });
|
|
|
- }
|
|
|
- return this.broadcastedBinaryOp(a, b, types_1.upcastType(a.dtype, b.dtype), function (aValue, bValue) { return aValue * bValue; });
|
|
|
- };
|
|
|
- MathBackendCPU.prototype.realDivide = function (a, b) {
|
|
|
- cpu_util_1.assertNotComplex([a, b], 'realDivide');
|
|
|
- var op = function (a, b) { return a / b; };
|
|
|
- var outputDtype = 'float32';
|
|
|
- return this.broadcastedBinaryOp(a, b, outputDtype, op);
|
|
|
- };
|
|
|
- MathBackendCPU.prototype.floorDiv = function (a, b) {
|
|
|
- cpu_util_1.assertNotComplex([a, b], 'floorDiv');
|
|
|
- var op = function (a, b) { return Math.floor(a / b); };
|
|
|
- var outputDtype = 'int32';
|
|
|
- return this.broadcastedBinaryOp(a, b, outputDtype, op);
|
|
|
- };
|
|
|
- MathBackendCPU.prototype.sum = function (x, axes) {
|
|
|
- cpu_util_1.assertNotComplex(x, 'sum');
|
|
|
- axis_util.assertAxesAreInnerMostDims('sum', axes, x.rank);
|
|
|
- var _a = axis_util.computeOutAndReduceShapes(x.shape, axes), outShape = _a[0], reduceShape = _a[1];
|
|
|
- var resultDtype = types_1.upcastType(x.dtype, 'int32');
|
|
|
- var result = ops.zeros(outShape, resultDtype);
|
|
|
- var reduceSize = util.sizeFromShape(reduceShape);
|
|
|
- var vals = this.readSync(result.dataId);
|
|
|
- var aVals = this.readSync(x.dataId);
|
|
|
- for (var i = 0; i < vals.length; ++i) {
|
|
|
- var offset = i * reduceSize;
|
|
|
- var sum = 0;
|
|
|
- for (var j = 0; j < reduceSize; ++j) {
|
|
|
- sum += aVals[offset + j];
|
|
|
- }
|
|
|
- vals[i] = sum;
|
|
|
- }
|
|
|
- return result;
|
|
|
- };
|
|
|
- MathBackendCPU.prototype.prod = function (x, axes) {
|
|
|
- cpu_util_1.assertNotComplex(x, 'sum');
|
|
|
- var _a = axis_util.computeOutAndReduceShapes(x.shape, axes), outShape = _a[0], reduceShape = _a[1];
|
|
|
- var resultDtype = types_1.upcastType(x.dtype, 'int32');
|
|
|
- var result = ops.zeros(outShape, resultDtype);
|
|
|
- var reduceSize = util.sizeFromShape(reduceShape);
|
|
|
- var vals = this.readSync(result.dataId);
|
|
|
- var aVals = this.readSync(x.dataId);
|
|
|
- for (var i = 0; i < vals.length; ++i) {
|
|
|
- var offset = i * reduceSize;
|
|
|
- var prod = 1;
|
|
|
- for (var j = 0; j < reduceSize; ++j) {
|
|
|
- prod *= aVals[offset + j];
|
|
|
- }
|
|
|
- vals[i] = prod;
|
|
|
- }
|
|
|
- return result;
|
|
|
- };
|
|
|
- MathBackendCPU.prototype.unsortedSegmentSum = function (x, segmentIds, numSegments) {
|
|
|
- cpu_util_1.assertNotComplex(x, 'unsortedSegmentSum');
|
|
|
- var res = [];
|
|
|
- // Reshape the segment id's so that they can be broadcast with
|
|
|
- // x. The new shape should be [segmentIds.shape, 1, ..., 1]
|
|
|
- var numIters = x.rank - segmentIds.rank;
|
|
|
- for (var i = 0; i < numIters; ++i) {
|
|
|
- segmentIds = segmentIds.expandDims(i + 1);
|
|
|
- }
|
|
|
- for (var i = 0; i < numSegments; ++i) {
|
|
|
- var segmentId = ops.scalar(i, 'int32');
|
|
|
- var mask = ops.equal(segmentId, segmentIds).asType('float32');
|
|
|
- var sum = mask.mul(x).sum(0);
|
|
|
- res.push(sum);
|
|
|
- }
|
|
|
- return ops.stack(res);
|
|
|
- };
|
|
|
- MathBackendCPU.prototype.argMin = function (x, axis) {
|
|
|
- cpu_util_1.assertNotComplex(x, 'argMin');
|
|
|
- var axes = [axis];
|
|
|
- axis_util.assertAxesAreInnerMostDims('argMin', axes, x.rank);
|
|
|
- var _a = axis_util.computeOutAndReduceShapes(x.shape, axes), outShape = _a[0], reduceShape = _a[1];
|
|
|
- var result = ops.zeros(outShape, 'int32');
|
|
|
- var reduceSize = util.sizeFromShape(reduceShape);
|
|
|
- var vals = this.readSync(result.dataId);
|
|
|
- var aVals = this.readSync(x.dataId);
|
|
|
- for (var i = 0; i < vals.length; ++i) {
|
|
|
- var offset = i * reduceSize;
|
|
|
- var min = aVals[offset];
|
|
|
- var minIndex = 0;
|
|
|
- for (var j = 0; j < reduceSize; ++j) {
|
|
|
- var value = aVals[offset + j];
|
|
|
- if (value < min) {
|
|
|
- min = value;
|
|
|
- minIndex = j;
|
|
|
- }
|
|
|
- }
|
|
|
- vals[i] = minIndex;
|
|
|
- }
|
|
|
- return result;
|
|
|
- };
|
|
|
- MathBackendCPU.prototype.argMax = function (x, axis) {
|
|
|
- cpu_util_1.assertNotComplex(x, 'argMax');
|
|
|
- var axes = [axis];
|
|
|
- axis_util.assertAxesAreInnerMostDims('argMax', axes, x.rank);
|
|
|
- var _a = axis_util.computeOutAndReduceShapes(x.shape, axes), outShape = _a[0], reduceShape = _a[1];
|
|
|
- var result = ops.zeros(outShape, 'int32');
|
|
|
- var reduceSize = util.sizeFromShape(reduceShape);
|
|
|
- var vals = this.readSync(result.dataId);
|
|
|
- var aVals = this.readSync(x.dataId);
|
|
|
- for (var i = 0; i < vals.length; ++i) {
|
|
|
- var offset = i * reduceSize;
|
|
|
- var max = aVals[offset];
|
|
|
- var maxIndex = 0;
|
|
|
- for (var j = 0; j < reduceSize; ++j) {
|
|
|
- var value = aVals[offset + j];
|
|
|
- if (value > max) {
|
|
|
- max = value;
|
|
|
- maxIndex = j;
|
|
|
- }
|
|
|
- }
|
|
|
- vals[i] = maxIndex;
|
|
|
- }
|
|
|
- return result;
|
|
|
- };
|
|
|
- MathBackendCPU.prototype.cumsum = function (x, axis, exclusive, reverse) {
|
|
|
- cpu_util_1.assertNotComplex(x, 'cumsum');
|
|
|
- if (axis !== x.rank - 1) {
|
|
|
- throw new Error("backend.cumsum in CPU expects an inner-most axis=" + (x.rank - 1) + " " +
|
|
|
- ("but got axis=" + axis));
|
|
|
- }
|
|
|
- var resultDtype = types_1.upcastType(x.dtype, 'int32');
|
|
|
- var result = ops.zeros(x.shape, resultDtype);
|
|
|
- var vals = this.readSync(result.dataId);
|
|
|
- var aVals = this.readSync(x.dataId);
|
|
|
- var finalDim = x.shape[x.rank - 1];
|
|
|
- var indexAdjuster = reverse ?
|
|
|
- function (i, j) { return i + finalDim - j - 1; } :
|
|
|
- function (i, j) { return i + j; };
|
|
|
- for (var i = 0; i < aVals.length; i += finalDim) {
|
|
|
- for (var j = 0; j < finalDim; j++) {
|
|
|
- var idx = indexAdjuster(i, j);
|
|
|
- if (j === 0) {
|
|
|
- vals[idx] = exclusive ? 0 : aVals[idx];
|
|
|
- }
|
|
|
- else {
|
|
|
- var prevIdx = indexAdjuster(i, j - 1);
|
|
|
- vals[idx] = exclusive ? aVals[prevIdx] + vals[prevIdx] :
|
|
|
- aVals[idx] + vals[prevIdx];
|
|
|
- }
|
|
|
- }
|
|
|
- }
|
|
|
- return result;
|
|
|
- };
|
|
|
- MathBackendCPU.prototype.equal = function (a, b) {
|
|
|
- cpu_util_1.assertNotComplex([a, b], 'equal');
|
|
|
- return this.broadcastedBinaryOp(a, b, 'bool', function (aVal, bVal) {
|
|
|
- return (aVal === bVal) ? 1 : 0;
|
|
|
- });
|
|
|
- };
|
|
|
- MathBackendCPU.prototype.notEqual = function (a, b) {
|
|
|
- cpu_util_1.assertNotComplex([a, b], 'notEqual');
|
|
|
- return this.broadcastedBinaryOp(a, b, 'bool', function (aVal, bVal) {
|
|
|
- return (aVal !== bVal) ? 1 : 0;
|
|
|
- });
|
|
|
- };
|
|
|
- MathBackendCPU.prototype.less = function (a, b) {
|
|
|
- cpu_util_1.assertNotComplex([a, b], 'less');
|
|
|
- return this.broadcastedBinaryOp(a, b, 'bool', function (aVal, bVal) {
|
|
|
- return (aVal < bVal) ? 1 : 0;
|
|
|
- });
|
|
|
- };
|
|
|
- MathBackendCPU.prototype.lessEqual = function (a, b) {
|
|
|
- cpu_util_1.assertNotComplex([a, b], 'lessEqual');
|
|
|
- return this.broadcastedBinaryOp(a, b, 'bool', function (aVal, bVal) {
|
|
|
- return (aVal <= bVal) ? 1 : 0;
|
|
|
- });
|
|
|
- };
|
|
|
- MathBackendCPU.prototype.greater = function (a, b) {
|
|
|
- cpu_util_1.assertNotComplex([a, b], 'greater');
|
|
|
- return this.broadcastedBinaryOp(a, b, 'bool', function (aVal, bVal) {
|
|
|
- return (aVal > bVal) ? 1 : 0;
|
|
|
- });
|
|
|
- };
|
|
|
- MathBackendCPU.prototype.greaterEqual = function (a, b) {
|
|
|
- cpu_util_1.assertNotComplex([a, b], 'greaterEqual');
|
|
|
- return this.broadcastedBinaryOp(a, b, 'bool', function (aVal, bVal) {
|
|
|
- return (aVal >= bVal) ? 1 : 0;
|
|
|
- });
|
|
|
- };
|
|
|
- MathBackendCPU.prototype.logicalNot = function (x) {
|
|
|
- cpu_util_1.assertNotComplex(x, 'logicalNot');
|
|
|
- var values = this.readSync(x.dataId);
|
|
|
- var newValues = new Uint8Array(values.length);
|
|
|
- for (var i = 0; i < values.length; ++i) {
|
|
|
- newValues[i] = values[i] ? 0 : 1;
|
|
|
- }
|
|
|
- return this.makeOutput(newValues, x.shape, 'bool');
|
|
|
- };
|
|
|
- MathBackendCPU.prototype.logicalAnd = function (a, b) {
|
|
|
- cpu_util_1.assertNotComplex([a, b], 'logicalAnd');
|
|
|
- return this.broadcastedBinaryOp(a, b, 'bool', function (aVal, bVal) {
|
|
|
- return aVal && bVal;
|
|
|
- });
|
|
|
- };
|
|
|
- MathBackendCPU.prototype.logicalOr = function (a, b) {
|
|
|
- cpu_util_1.assertNotComplex([a, b], 'logicalOr');
|
|
|
- return this.broadcastedBinaryOp(a, b, 'bool', function (aVal, bVal) {
|
|
|
- return aVal || bVal;
|
|
|
- });
|
|
|
- };
|
|
|
- MathBackendCPU.prototype.select = function (condition, a, b) {
|
|
|
- cpu_util_1.assertNotComplex([condition, a, b], 'select');
|
|
|
- var values = this.readSync(condition.dataId);
|
|
|
- var aValues = this.readSync(a.dataId);
|
|
|
- var bValues = this.readSync(b.dataId);
|
|
|
- var result = ops.zeros(a.shape, types_1.upcastType(a.dtype, b.dtype));
|
|
|
- var newValues = this.readSync(result.dataId);
|
|
|
- var index = 0;
|
|
|
- var offset = condition.rank === 0 || condition.rank > 1 || a.rank === 1 ?
|
|
|
- 1 :
|
|
|
- util.sizeFromShape(a.shape.slice(1));
|
|
|
- for (var i = 0; i < values.length; i++) {
|
|
|
- for (var j = 0; j < offset; j++) {
|
|
|
- if (values[i] === 1) {
|
|
|
- newValues[index++] = aValues[i];
|
|
|
- }
|
|
|
- else {
|
|
|
- newValues[index++] = bValues[i];
|
|
|
- }
|
|
|
- }
|
|
|
- }
|
|
|
- return result;
|
|
|
- };
|
|
|
- MathBackendCPU.prototype.where = function (condition) {
|
|
|
- cpu_util_1.assertNotComplex([condition], 'where');
|
|
|
- var condVals = this.readSync(condition.dataId);
|
|
|
- return where_impl_1.whereImpl(condition.shape, condVals);
|
|
|
- };
|
|
|
- MathBackendCPU.prototype.topk = function (x, k, sorted) {
|
|
|
- cpu_util_1.assertNotComplex(x, 'topk');
|
|
|
- var xVals = this.readSync(x.dataId);
|
|
|
- return topk_impl_1.topkImpl(xVals, x.shape, x.dtype, k, sorted);
|
|
|
- };
|
|
|
- MathBackendCPU.prototype.min = function (x, axes) {
|
|
|
- cpu_util_1.assertNotComplex(x, 'min');
|
|
|
- axis_util.assertAxesAreInnerMostDims('min', axes, x.rank);
|
|
|
- var _a = axis_util.computeOutAndReduceShapes(x.shape, axes), outShape = _a[0], reduceShape = _a[1];
|
|
|
- var result = ops.zeros(outShape, x.dtype);
|
|
|
- var reduceSize = util.sizeFromShape(reduceShape);
|
|
|
- var vals = this.readSync(result.dataId);
|
|
|
- var aVals = this.readSync(x.dataId);
|
|
|
- for (var i = 0; i < vals.length; ++i) {
|
|
|
- var offset = i * reduceSize;
|
|
|
- var min = aVals[offset];
|
|
|
- for (var j = 0; j < reduceSize; ++j) {
|
|
|
- var value = aVals[offset + j];
|
|
|
- if (value < min) {
|
|
|
- min = value;
|
|
|
- }
|
|
|
- }
|
|
|
- vals[i] = min;
|
|
|
- }
|
|
|
- return result;
|
|
|
- };
|
|
|
- MathBackendCPU.prototype.minimum = function (a, b) {
|
|
|
- cpu_util_1.assertNotComplex([a, b], 'minimum');
|
|
|
- return this.broadcastedBinaryOp(a, b, a.dtype, function (aVal, bVal) { return Math.min(aVal, bVal); });
|
|
|
- };
|
|
|
- MathBackendCPU.prototype.mod = function (a, b) {
|
|
|
- cpu_util_1.assertNotComplex([a, b], 'mod');
|
|
|
- return this.broadcastedBinaryOp(a, b, a.dtype, function (aVal, bVal) {
|
|
|
- var rem = aVal % bVal;
|
|
|
- if ((aVal < 0 && bVal < 0) || (aVal >= 0 && bVal >= 0)) {
|
|
|
- return rem;
|
|
|
- }
|
|
|
- else {
|
|
|
- return (rem + bVal) % bVal;
|
|
|
- }
|
|
|
- });
|
|
|
- };
|
|
|
- MathBackendCPU.prototype.max = function (x, axes) {
|
|
|
- cpu_util_1.assertNotComplex(x, 'max');
|
|
|
- axis_util.assertAxesAreInnerMostDims('max', axes, x.rank);
|
|
|
- var _a = axis_util.computeOutAndReduceShapes(x.shape, axes), outShape = _a[0], reduceShape = _a[1];
|
|
|
- var result = ops.zeros(outShape, x.dtype);
|
|
|
- var reduceSize = util.sizeFromShape(reduceShape);
|
|
|
- var vals = this.readSync(result.dataId);
|
|
|
- var aVals = this.readSync(x.dataId);
|
|
|
- for (var i = 0; i < vals.length; ++i) {
|
|
|
- var offset = i * reduceSize;
|
|
|
- var max = aVals[offset];
|
|
|
- for (var j = 0; j < reduceSize; ++j) {
|
|
|
- var value = aVals[offset + j];
|
|
|
- if (value > max) {
|
|
|
- max = value;
|
|
|
- }
|
|
|
- }
|
|
|
- vals[i] = max;
|
|
|
- }
|
|
|
- return result;
|
|
|
- };
|
|
|
- MathBackendCPU.prototype.maximum = function (a, b) {
|
|
|
- cpu_util_1.assertNotComplex([a, b], 'maximum');
|
|
|
- return this.broadcastedBinaryOp(a, b, a.dtype, function (aVal, bVal) { return Math.max(aVal, bVal); });
|
|
|
- };
|
|
|
- MathBackendCPU.prototype.all = function (x, axes) {
|
|
|
- cpu_util_1.assertNotComplex(x, 'all');
|
|
|
- axis_util.assertAxesAreInnerMostDims('all', axes, x.rank);
|
|
|
- var _a = axis_util.computeOutAndReduceShapes(x.shape, axes), outShape = _a[0], reduceShape = _a[1];
|
|
|
- var result = ops.zeros(outShape, x.dtype);
|
|
|
- var reduceSize = util.sizeFromShape(reduceShape);
|
|
|
- var vals = this.readSync(result.dataId);
|
|
|
- var aVals = this.readSync(x.dataId);
|
|
|
- for (var i = 0; i < vals.length; ++i) {
|
|
|
- var offset = i * reduceSize;
|
|
|
- var all = aVals[offset];
|
|
|
- for (var j = 0; j < reduceSize; ++j) {
|
|
|
- var value = aVals[offset + j];
|
|
|
- all = all && value;
|
|
|
- }
|
|
|
- vals[i] = all;
|
|
|
- }
|
|
|
- return result;
|
|
|
- };
|
|
|
- MathBackendCPU.prototype.any = function (x, axes) {
|
|
|
- cpu_util_1.assertNotComplex(x, 'any');
|
|
|
- axis_util.assertAxesAreInnerMostDims('any', axes, x.rank);
|
|
|
- var _a = axis_util.computeOutAndReduceShapes(x.shape, axes), outShape = _a[0], reduceShape = _a[1];
|
|
|
- var result = ops.zeros(outShape, x.dtype);
|
|
|
- var reduceSize = util.sizeFromShape(reduceShape);
|
|
|
- var vals = this.readSync(result.dataId);
|
|
|
- var aVals = this.readSync(x.dataId);
|
|
|
- for (var i = 0; i < vals.length; ++i) {
|
|
|
- var offset = i * reduceSize;
|
|
|
- var anyVal = aVals[offset];
|
|
|
- for (var j = 0; j < reduceSize; ++j) {
|
|
|
- var value = aVals[offset + j];
|
|
|
- anyVal = anyVal || value;
|
|
|
- }
|
|
|
- vals[i] = anyVal;
|
|
|
- }
|
|
|
- return result;
|
|
|
- };
|
|
|
- MathBackendCPU.prototype.squaredDifference = function (a, b) {
|
|
|
- cpu_util_1.assertNotComplex([a, b], 'squaredDifference');
|
|
|
- return this.broadcastedBinaryOp(a, b, a.dtype, function (aVal, bVal) {
|
|
|
- var diff = aVal - bVal;
|
|
|
- return diff * diff;
|
|
|
- });
|
|
|
- };
|
|
|
- MathBackendCPU.prototype.ceil = function (x) {
|
|
|
- cpu_util_1.assertNotComplex(x, 'ceil');
|
|
|
- var values = this.readSync(x.dataId);
|
|
|
- var newValues = new Float32Array(values.length);
|
|
|
- for (var i = 0; i < values.length; ++i) {
|
|
|
- newValues[i] = Math.ceil(values[i]);
|
|
|
- }
|
|
|
- return this.makeOutput(newValues, x.shape, 'float32');
|
|
|
- };
|
|
|
- MathBackendCPU.prototype.floor = function (x) {
|
|
|
- cpu_util_1.assertNotComplex(x, 'floor');
|
|
|
- var values = this.readSync(x.dataId);
|
|
|
- var newValues = new Float32Array(values.length);
|
|
|
- for (var i = 0; i < values.length; ++i) {
|
|
|
- newValues[i] = Math.floor(values[i]);
|
|
|
- }
|
|
|
- return this.makeOutput(newValues, x.shape, 'float32');
|
|
|
- };
|
|
|
- MathBackendCPU.prototype.sign = function (x) {
|
|
|
- cpu_util_1.assertNotComplex(x, 'x');
|
|
|
- var values = this.readSync(x.dataId);
|
|
|
- var newValues = new Float32Array(values.length);
|
|
|
- for (var i = 0; i < values.length; ++i) {
|
|
|
- if (values[i] < 0) {
|
|
|
- newValues[i] = -1;
|
|
|
- }
|
|
|
- else if (values[i] > 0) {
|
|
|
- newValues[i] = 1;
|
|
|
- }
|
|
|
- else {
|
|
|
- newValues[i] = 0;
|
|
|
- }
|
|
|
- }
|
|
|
- return this.makeOutput(newValues, x.shape, 'float32');
|
|
|
- };
|
|
|
- MathBackendCPU.prototype.isNaN = function (x) {
|
|
|
- cpu_util_1.assertNotComplex(x, 'x');
|
|
|
- var values = this.readSync(x.dataId);
|
|
|
- var newValues = new Uint8Array(values.length);
|
|
|
- for (var i = 0; i < values.length; ++i) {
|
|
|
- if (Number.isNaN(values[i])) {
|
|
|
- newValues[i] = 1;
|
|
|
- }
|
|
|
- }
|
|
|
- return this.makeOutput(newValues, x.shape, 'bool');
|
|
|
- };
|
|
|
- MathBackendCPU.prototype.isInf = function (x) {
|
|
|
- cpu_util_1.assertNotComplex(x, 'x');
|
|
|
- var values = this.readSync(x.dataId);
|
|
|
- var newValues = new Uint8Array(values.length);
|
|
|
- for (var i = 0; i < values.length; ++i) {
|
|
|
- if (Math.abs(values[i]) === Infinity) {
|
|
|
- newValues[i] = 1;
|
|
|
- }
|
|
|
- }
|
|
|
- return this.makeOutput(newValues, x.shape, 'bool');
|
|
|
- };
|
|
|
- MathBackendCPU.prototype.isFinite = function (x) {
|
|
|
- cpu_util_1.assertNotComplex(x, 'x');
|
|
|
- var values = this.readSync(x.dataId);
|
|
|
- var newValues = new Uint8Array(values.length);
|
|
|
- for (var i = 0; i < values.length; ++i) {
|
|
|
- if (Number.isFinite(values[i])) {
|
|
|
- newValues[i] = 1;
|
|
|
- }
|
|
|
- }
|
|
|
- return this.makeOutput(newValues, x.shape, 'bool');
|
|
|
- };
|
|
|
- MathBackendCPU.prototype.round = function (x) {
|
|
|
- cpu_util_1.assertNotComplex(x, 'round');
|
|
|
- var values = this.readSync(x.dataId);
|
|
|
- var newValues = new Float32Array(values.length);
|
|
|
- for (var i = 0; i < values.length; ++i) {
|
|
|
- // The algorithm is based on banker's rounding.
|
|
|
- var base = Math.floor(values[i]);
|
|
|
- if (values[i] - base < 0.5) {
|
|
|
- newValues[i] = Math.floor(values[i]);
|
|
|
- }
|
|
|
- else if (values[i] - base > 0.5) {
|
|
|
- newValues[i] = Math.ceil(values[i]);
|
|
|
- }
|
|
|
- else {
|
|
|
- if (base % 2.0 === 0.0) {
|
|
|
- newValues[i] = base;
|
|
|
- }
|
|
|
- else {
|
|
|
- newValues[i] = base + 1.0;
|
|
|
- }
|
|
|
- }
|
|
|
- }
|
|
|
- return this.makeOutput(newValues, x.shape, 'float32');
|
|
|
- };
|
|
|
- MathBackendCPU.prototype.exp = function (x) {
|
|
|
- cpu_util_1.assertNotComplex(x, 'exp');
|
|
|
- var values = this.readSync(x.dataId);
|
|
|
- var newValues = new Float32Array(values.length);
|
|
|
- for (var i = 0; i < values.length; ++i) {
|
|
|
- newValues[i] = Math.exp(values[i]);
|
|
|
- }
|
|
|
- return this.makeOutput(newValues, x.shape, 'float32');
|
|
|
- };
|
|
|
- MathBackendCPU.prototype.expm1 = function (x) {
|
|
|
- cpu_util_1.assertNotComplex(x, 'expm1');
|
|
|
- var values = this.readSync(x.dataId);
|
|
|
- var newValues = new Float32Array(values.length);
|
|
|
- for (var i = 0; i < values.length; ++i) {
|
|
|
- newValues[i] = Math.expm1(values[i]);
|
|
|
- }
|
|
|
- return this.makeOutput(newValues, x.shape, 'float32');
|
|
|
- };
|
|
|
- MathBackendCPU.prototype.log = function (x) {
|
|
|
- cpu_util_1.assertNotComplex(x, 'log');
|
|
|
- var values = this.readSync(x.dataId);
|
|
|
- var newValues = new Float32Array(values.length);
|
|
|
- for (var i = 0; i < values.length; ++i) {
|
|
|
- var value = values[i];
|
|
|
- newValues[i] = Math.log(value);
|
|
|
- }
|
|
|
- return this.makeOutput(newValues, x.shape, 'float32');
|
|
|
- };
|
|
|
- MathBackendCPU.prototype.log1p = function (x) {
|
|
|
- cpu_util_1.assertNotComplex(x, 'log1p');
|
|
|
- var values = this.readSync(x.dataId);
|
|
|
- var newValues = new Float32Array(values.length);
|
|
|
- for (var i = 0; i < values.length; ++i) {
|
|
|
- var value = values[i];
|
|
|
- newValues[i] = Math.log1p(value);
|
|
|
- }
|
|
|
- return this.makeOutput(newValues, x.shape, 'float32');
|
|
|
- };
|
|
|
- MathBackendCPU.prototype.sqrt = function (x) {
|
|
|
- cpu_util_1.assertNotComplex(x, 'sqrt');
|
|
|
- var values = this.readSync(x.dataId);
|
|
|
- var newValues = new Float32Array(values.length);
|
|
|
- for (var i = 0; i < values.length; ++i) {
|
|
|
- var value = values[i];
|
|
|
- newValues[i] = Math.sqrt(value);
|
|
|
- }
|
|
|
- return this.makeOutput(newValues, x.shape, 'float32');
|
|
|
- };
|
|
|
- MathBackendCPU.prototype.rsqrt = function (x) {
|
|
|
- cpu_util_1.assertNotComplex(x, 'rsqrt');
|
|
|
- var values = this.readSync(x.dataId);
|
|
|
- var newValues = new Float32Array(values.length);
|
|
|
- for (var i = 0; i < values.length; ++i) {
|
|
|
- var value = values[i];
|
|
|
- newValues[i] = 1 / Math.sqrt(value);
|
|
|
- }
|
|
|
- return this.makeOutput(newValues, x.shape, 'float32');
|
|
|
- };
|
|
|
- MathBackendCPU.prototype.reciprocal = function (x) {
|
|
|
- cpu_util_1.assertNotComplex(x, 'reciprocal');
|
|
|
- var values = this.readSync(x.dataId);
|
|
|
- var newValues = new Float32Array(values.length);
|
|
|
- for (var i = 0; i < values.length; ++i) {
|
|
|
- newValues[i] = 1 / values[i];
|
|
|
- }
|
|
|
- return this.makeOutput(newValues, x.shape, 'float32');
|
|
|
- };
|
|
|
- MathBackendCPU.prototype.linear = function (x) {
|
|
|
- return x;
|
|
|
- };
|
|
|
- MathBackendCPU.prototype.relu = function (x) {
|
|
|
- cpu_util_1.assertNotComplex(x, 'relu');
|
|
|
- var res = ops.zeros(x.shape, x.dtype);
|
|
|
- var resVals = this.readSync(res.dataId);
|
|
|
- var inVals = this.readSync(x.dataId);
|
|
|
- for (var i = 0; i < inVals.length; ++i) {
|
|
|
- resVals[i] = Math.max(0, inVals[i]);
|
|
|
- }
|
|
|
- return res;
|
|
|
- };
|
|
|
- MathBackendCPU.prototype.relu6 = function (x) {
|
|
|
- cpu_util_1.assertNotComplex(x, 'relu');
|
|
|
- var res = ops.zeros(x.shape, x.dtype);
|
|
|
- var resVals = this.readSync(res.dataId);
|
|
|
- var inVals = this.readSync(x.dataId);
|
|
|
- for (var i = 0; i < inVals.length; ++i) {
|
|
|
- resVals[i] = Math.min(Math.max(0, inVals[i]), 6);
|
|
|
- }
|
|
|
- return res;
|
|
|
- };
|
|
|
- MathBackendCPU.prototype.prelu = function (x, a) {
|
|
|
- cpu_util_1.assertNotComplex([x, a], 'prelu');
|
|
|
- return this.broadcastedBinaryOp(x, a, x.dtype, function (xValue, aValue) { return xValue < 0 ? aValue * xValue : xValue; });
|
|
|
- };
|
|
|
- MathBackendCPU.prototype.elu = function (x) {
|
|
|
- cpu_util_1.assertNotComplex(x, 'elu');
|
|
|
- var resultValues = new Float32Array(x.size);
|
|
|
- var values = this.readSync(x.dataId);
|
|
|
- for (var i = 0; i < values.length; ++i) {
|
|
|
- var v = values[i];
|
|
|
- if (v >= 0) {
|
|
|
- resultValues[i] = v;
|
|
|
- }
|
|
|
- else {
|
|
|
- resultValues[i] = (Math.exp(v) - 1);
|
|
|
- }
|
|
|
- }
|
|
|
- return this.makeOutput(resultValues, x.shape, 'float32');
|
|
|
- };
|
|
|
- MathBackendCPU.prototype.eluDer = function (dy, y) {
|
|
|
- cpu_util_1.assertNotComplex([dy, y], 'eluDer');
|
|
|
- var resultValues = new Float32Array(y.size);
|
|
|
- var values = this.readSync(y.dataId);
|
|
|
- var dyValues = this.readSync(dy.dataId);
|
|
|
- for (var i = 0; i < values.length; ++i) {
|
|
|
- var v = values[i];
|
|
|
- if (v >= 1) {
|
|
|
- resultValues[i] = dyValues[i];
|
|
|
- }
|
|
|
- else {
|
|
|
- resultValues[i] = dyValues[i] * (v + 1);
|
|
|
- }
|
|
|
- }
|
|
|
- return this.makeOutput(resultValues, y.shape, 'float32');
|
|
|
- };
|
|
|
- MathBackendCPU.prototype.selu = function (x) {
|
|
|
- cpu_util_1.assertNotComplex(x, 'selu');
|
|
|
- // Stable and Attracting Fixed Point (0, 1) for Normalized Weights.
|
|
|
- // see: https://arxiv.org/abs/1706.02515
|
|
|
- var scaleAlpha = selu_util.SELU_SCALEALPHA;
|
|
|
- var scale = selu_util.SELU_SCALE;
|
|
|
- var resultValues = new Float32Array(x.size);
|
|
|
- var values = this.readSync(x.dataId);
|
|
|
- for (var i = 0; i < values.length; ++i) {
|
|
|
- var v = values[i];
|
|
|
- if (v >= 0) {
|
|
|
- resultValues[i] = scale * v;
|
|
|
- }
|
|
|
- else {
|
|
|
- resultValues[i] = scaleAlpha * (Math.exp(v) - 1);
|
|
|
- }
|
|
|
- }
|
|
|
- return this.makeOutput(resultValues, x.shape, 'float32');
|
|
|
- };
|
|
|
- MathBackendCPU.prototype.clip = function (x, min, max) {
|
|
|
- cpu_util_1.assertNotComplex(x, 'clip');
|
|
|
- var resultValues = new Float32Array(x.size);
|
|
|
- var values = this.readSync(x.dataId);
|
|
|
- for (var i = 0; i < values.length; ++i) {
|
|
|
- var v = values[i];
|
|
|
- resultValues[i] = v > max ? max : (v < min ? min : v);
|
|
|
- }
|
|
|
- return this.makeOutput(resultValues, x.shape, 'float32');
|
|
|
- };
|
|
|
- MathBackendCPU.prototype.abs = function (x) {
|
|
|
- var resultValues = new Float32Array(x.size);
|
|
|
- var values = this.readSync(x.dataId);
|
|
|
- for (var i = 0; i < values.length; ++i) {
|
|
|
- resultValues[i] = Math.abs(values[i]);
|
|
|
- }
|
|
|
- return this.makeOutput(resultValues, x.shape, 'float32');
|
|
|
- };
|
|
|
- MathBackendCPU.prototype.complexAbs = function (x) {
|
|
|
- var resultValues = new Float32Array(x.size);
|
|
|
- var values = this.readSync(x.dataId);
|
|
|
- for (var i = 0; i < x.size; ++i) {
|
|
|
- var real_1 = values[i * 2];
|
|
|
- var imag_1 = values[i * 2 + 1];
|
|
|
- resultValues[i] = Math.hypot(real_1, imag_1);
|
|
|
- }
|
|
|
- return this.makeOutput(resultValues, x.shape, 'float32');
|
|
|
- };
|
|
|
- MathBackendCPU.prototype.int = function (x) {
|
|
|
- cpu_util_1.assertNotComplex(x, 'int');
|
|
|
- var resultValues = new Int32Array(x.size);
|
|
|
- var values = this.readSync(x.dataId);
|
|
|
- for (var i = 0; i < values.length; ++i) {
|
|
|
- resultValues[i] = values[i];
|
|
|
- }
|
|
|
- return this.makeOutput(resultValues, x.shape, 'int32');
|
|
|
- };
|
|
|
- MathBackendCPU.prototype.sigmoid = function (x) {
|
|
|
- cpu_util_1.assertNotComplex(x, 'sigmoid');
|
|
|
- var resultValues = new Float32Array(x.size);
|
|
|
- var values = this.readSync(x.dataId);
|
|
|
- for (var i = 0; i < values.length; ++i) {
|
|
|
- resultValues[i] = 1 / (1 + Math.exp(-values[i]));
|
|
|
- }
|
|
|
- return this.makeOutput(resultValues, x.shape, 'float32');
|
|
|
- };
|
|
|
- MathBackendCPU.prototype.softplus = function (x) {
|
|
|
- cpu_util_1.assertNotComplex(x, 'softplus');
|
|
|
- // mirrors the implementation of tf.nn.softplus: https://goo.gl/vkcvwX
|
|
|
- // epsilon is the difference between 1.0 and the next representable float.
|
|
|
- // For a single precision 32 bit float this should be 2^-23, see:
|
|
|
- // https://math.byu.edu/~schow/work/IEEEFloatingPoint.htm
|
|
|
- var epsilon = 1.1920928955078125e-7;
|
|
|
- var threshold = Math.log(epsilon) + 2.0;
|
|
|
- var resultValues = new Float32Array(x.size);
|
|
|
- var values = this.readSync(x.dataId);
|
|
|
- for (var i = 0; i < values.length; ++i) {
|
|
|
- // Value above which exp(x) may overflow, but softplus(x) == x
|
|
|
- // is within machine epsilon.
|
|
|
- var tooLarge = values[i] > -threshold;
|
|
|
- // Value below which exp(x) may underflow, but softplus(x) == exp(x)
|
|
|
- // is within machine epsilon.
|
|
|
- var tooSmall = values[i] < threshold;
|
|
|
- var expX = Math.exp(values[i]);
|
|
|
- var result = void 0;
|
|
|
- if (tooSmall) {
|
|
|
- result = expX;
|
|
|
- }
|
|
|
- else if (tooLarge) {
|
|
|
- result = values[i];
|
|
|
- }
|
|
|
- else {
|
|
|
- result = Math.log(1.0 + expX);
|
|
|
- }
|
|
|
- resultValues[i] = result;
|
|
|
- }
|
|
|
- return this.makeOutput(resultValues, x.shape, 'float32');
|
|
|
- };
|
|
|
- MathBackendCPU.prototype.sin = function (x) {
|
|
|
- cpu_util_1.assertNotComplex(x, 'sin');
|
|
|
- var resultValues = new Float32Array(x.size);
|
|
|
- var values = this.readSync(x.dataId);
|
|
|
- for (var i = 0; i < values.length; ++i) {
|
|
|
- resultValues[i] = Math.sin(values[i]);
|
|
|
- }
|
|
|
- return this.makeOutput(resultValues, x.shape, 'float32');
|
|
|
- };
|
|
|
- MathBackendCPU.prototype.cos = function (x) {
|
|
|
- cpu_util_1.assertNotComplex(x, 'cos');
|
|
|
- var resultValues = new Float32Array(x.size);
|
|
|
- var values = this.readSync(x.dataId);
|
|
|
- for (var i = 0; i < values.length; ++i) {
|
|
|
- resultValues[i] = Math.cos(values[i]);
|
|
|
- }
|
|
|
- return this.makeOutput(resultValues, x.shape, 'float32');
|
|
|
- };
|
|
|
- MathBackendCPU.prototype.tan = function (x) {
|
|
|
- cpu_util_1.assertNotComplex(x, 'tan');
|
|
|
- var resultValues = new Float32Array(x.size);
|
|
|
- var values = this.readSync(x.dataId);
|
|
|
- for (var i = 0; i < values.length; ++i) {
|
|
|
- resultValues[i] = Math.tan(values[i]);
|
|
|
- }
|
|
|
- return this.makeOutput(resultValues, x.shape, 'float32');
|
|
|
- };
|
|
|
- MathBackendCPU.prototype.asin = function (x) {
|
|
|
- cpu_util_1.assertNotComplex(x, 'asin');
|
|
|
- var resultValues = new Float32Array(x.size);
|
|
|
- var values = this.readSync(x.dataId);
|
|
|
- for (var i = 0; i < values.length; ++i) {
|
|
|
- resultValues[i] = Math.asin(values[i]);
|
|
|
- }
|
|
|
- return this.makeOutput(resultValues, x.shape, 'float32');
|
|
|
- };
|
|
|
- MathBackendCPU.prototype.acos = function (x) {
|
|
|
- cpu_util_1.assertNotComplex(x, 'acos');
|
|
|
- var resultValues = new Float32Array(x.size);
|
|
|
- var values = this.readSync(x.dataId);
|
|
|
- for (var i = 0; i < values.length; ++i) {
|
|
|
- resultValues[i] = Math.acos(values[i]);
|
|
|
- }
|
|
|
- return this.makeOutput(resultValues, x.shape, 'float32');
|
|
|
- };
|
|
|
- MathBackendCPU.prototype.atan = function (x) {
|
|
|
- cpu_util_1.assertNotComplex(x, 'atan');
|
|
|
- var resultValues = new Float32Array(x.size);
|
|
|
- var values = this.readSync(x.dataId);
|
|
|
- for (var i = 0; i < values.length; ++i) {
|
|
|
- resultValues[i] = Math.atan(values[i]);
|
|
|
- }
|
|
|
- return this.makeOutput(resultValues, x.shape, 'float32');
|
|
|
- };
|
|
|
- MathBackendCPU.prototype.atan2 = function (a, b) {
|
|
|
- cpu_util_1.assertNotComplex([a, b], 'atan2');
|
|
|
- return this.broadcastedBinaryOp(a, b, a.dtype, function (aValue, bValue) { return Math.atan2(aValue, bValue); });
|
|
|
- };
|
|
|
- MathBackendCPU.prototype.sinh = function (x) {
|
|
|
- cpu_util_1.assertNotComplex(x, 'sinh');
|
|
|
- var resultValues = new Float32Array(x.size);
|
|
|
- var values = this.readSync(x.dataId);
|
|
|
- for (var i = 0; i < values.length; ++i) {
|
|
|
- resultValues[i] = Math.sinh(values[i]);
|
|
|
- }
|
|
|
- return this.makeOutput(resultValues, x.shape, 'float32');
|
|
|
- };
|
|
|
- MathBackendCPU.prototype.cosh = function (x) {
|
|
|
- cpu_util_1.assertNotComplex(x, 'cosh');
|
|
|
- var resultValues = new Float32Array(x.size);
|
|
|
- var values = this.readSync(x.dataId);
|
|
|
- for (var i = 0; i < values.length; ++i) {
|
|
|
- resultValues[i] = Math.cosh(values[i]);
|
|
|
- }
|
|
|
- return this.makeOutput(resultValues, x.shape, 'float32');
|
|
|
- };
|
|
|
- MathBackendCPU.prototype.tanh = function (x) {
|
|
|
- cpu_util_1.assertNotComplex(x, 'tanh');
|
|
|
- var resultValues = new Float32Array(x.size);
|
|
|
- var values = this.readSync(x.dataId);
|
|
|
- for (var i = 0; i < values.length; ++i) {
|
|
|
- resultValues[i] = util.tanh(values[i]);
|
|
|
- }
|
|
|
- return this.makeOutput(resultValues, x.shape, 'float32');
|
|
|
- };
|
|
|
- MathBackendCPU.prototype.asinh = function (x) {
|
|
|
- cpu_util_1.assertNotComplex(x, 'asinh');
|
|
|
- var resultValues = new Float32Array(x.size);
|
|
|
- var values = this.readSync(x.dataId);
|
|
|
- for (var i = 0; i < values.length; ++i) {
|
|
|
- resultValues[i] = Math.asinh(values[i]);
|
|
|
- }
|
|
|
- return this.makeOutput(resultValues, x.shape, 'float32');
|
|
|
- };
|
|
|
- MathBackendCPU.prototype.acosh = function (x) {
|
|
|
- cpu_util_1.assertNotComplex(x, 'acosh');
|
|
|
- var resultValues = new Float32Array(x.size);
|
|
|
- var values = this.readSync(x.dataId);
|
|
|
- for (var i = 0; i < values.length; ++i) {
|
|
|
- resultValues[i] = Math.acosh(values[i]);
|
|
|
- }
|
|
|
- return this.makeOutput(resultValues, x.shape, 'float32');
|
|
|
- };
|
|
|
- MathBackendCPU.prototype.atanh = function (x) {
|
|
|
- cpu_util_1.assertNotComplex(x, 'atanh');
|
|
|
- var resultValues = new Float32Array(x.size);
|
|
|
- var values = this.readSync(x.dataId);
|
|
|
- for (var i = 0; i < values.length; ++i) {
|
|
|
- resultValues[i] = Math.atanh(values[i]);
|
|
|
- }
|
|
|
- return this.makeOutput(resultValues, x.shape, 'float32');
|
|
|
- };
|
|
|
- MathBackendCPU.prototype.erf = function (x) {
|
|
|
- cpu_util_1.assertNotComplex(x, 'erf');
|
|
|
- var resultValues = new Float32Array(x.size);
|
|
|
- var values = this.readSync(x.dataId);
|
|
|
- var p = erf_util.ERF_P;
|
|
|
- var a1 = erf_util.ERF_A1;
|
|
|
- var a2 = erf_util.ERF_A2;
|
|
|
- var a3 = erf_util.ERF_A3;
|
|
|
- var a4 = erf_util.ERF_A4;
|
|
|
- var a5 = erf_util.ERF_A5;
|
|
|
- for (var i = 0; i < values.length; ++i) {
|
|
|
- var sign = Math.sign(values[i]);
|
|
|
- var v = Math.abs(values[i]);
|
|
|
- var t = 1.0 / (1.0 + p * v);
|
|
|
- resultValues[i] = sign *
|
|
|
- (1.0 -
|
|
|
- (((((a5 * t + a4) * t) + a3) * t + a2) * t + a1) * t *
|
|
|
- Math.exp(-v * v));
|
|
|
- }
|
|
|
- return this.makeOutput(resultValues, x.shape, 'float32');
|
|
|
- };
|
|
|
- MathBackendCPU.prototype.step = function (x, alpha) {
|
|
|
- if (alpha === void 0) { alpha = 0; }
|
|
|
- cpu_util_1.assertNotComplex(x, 'step');
|
|
|
- var resultValues = new Float32Array(x.size);
|
|
|
- var values = this.readSync(x.dataId);
|
|
|
- for (var i = 0; i < values.length; ++i) {
|
|
|
- var value = values[i];
|
|
|
- if (isNaN(value)) {
|
|
|
- resultValues[i] = NaN;
|
|
|
- }
|
|
|
- else {
|
|
|
- resultValues[i] = value > 0 ? 1 : alpha;
|
|
|
- }
|
|
|
- }
|
|
|
- return this.makeOutput(resultValues, x.shape, 'float32');
|
|
|
- };
|
|
|
- MathBackendCPU.prototype.fusedConv2d = function (_a) {
|
|
|
- var input = _a.input, filter = _a.filter, convInfo = _a.convInfo, bias = _a.bias, activation = _a.activation, preluActivationWeights = _a.preluActivationWeights;
|
|
|
- var result = this.conv2d(input, filter, convInfo);
|
|
|
- if (bias) {
|
|
|
- result = this.add(result, bias);
|
|
|
- }
|
|
|
- if (activation) {
|
|
|
- result =
|
|
|
- mapActivation(this, result, activation, preluActivationWeights);
|
|
|
- }
|
|
|
- return result;
|
|
|
- };
|
|
|
- MathBackendCPU.prototype.conv2d = function (x, filter, convInfo) {
|
|
|
- cpu_util_1.assertNotComplex([x, filter], 'conv2d');
|
|
|
- var filterHeight = convInfo.filterHeight;
|
|
|
- var filterWidth = convInfo.filterWidth;
|
|
|
- var dilationHeight = convInfo.dilationHeight;
|
|
|
- var dilationWidth = convInfo.dilationWidth;
|
|
|
- var padLeft = convInfo.padInfo.left;
|
|
|
- var padTop = convInfo.padInfo.top;
|
|
|
- var isChannelsLast = convInfo.dataFormat === 'channelsLast';
|
|
|
- var y = ops.buffer(convInfo.outShape, x.dtype);
|
|
|
- var xBatchStride = x.strides[0];
|
|
|
- var xRowStride = isChannelsLast ? x.strides[1] : x.strides[2];
|
|
|
- var xColStride = isChannelsLast ? x.strides[2] : 1;
|
|
|
- var xChannelStride = isChannelsLast ? 1 : x.strides[1];
|
|
|
- var yBatchStride = y.strides[0];
|
|
|
- var yRowStride = isChannelsLast ? y.strides[1] : y.strides[2];
|
|
|
- var yColStride = isChannelsLast ? y.strides[2] : 1;
|
|
|
- var yChannelStride = isChannelsLast ? 1 : y.strides[1];
|
|
|
- var xVals = this.readSync(x.dataId);
|
|
|
- var wVals = this.readSync(filter.dataId);
|
|
|
- var yVals = y.values;
|
|
|
- for (var b = 0; b < convInfo.batchSize; ++b) {
|
|
|
- var xOffset1 = b * xBatchStride;
|
|
|
- var yOffset1 = b * yBatchStride;
|
|
|
- for (var yR = 0; yR < convInfo.outHeight; ++yR) {
|
|
|
- var yOffset2 = yOffset1 + yR * yRowStride;
|
|
|
- var xRCorner = yR * convInfo.strideHeight - padTop;
|
|
|
- for (var wR = 0; wR < filterHeight; wR++) {
|
|
|
- var xR = xRCorner + wR * dilationHeight;
|
|
|
- if (xR < 0 || xR >= convInfo.inHeight) {
|
|
|
- continue;
|
|
|
- }
|
|
|
- var wOffset1 = wR * filter.strides[0];
|
|
|
- var xOffset2 = xOffset1 + xR * xRowStride;
|
|
|
- for (var yC = 0; yC < convInfo.outWidth; ++yC) {
|
|
|
- var yOffset3 = yOffset2 + yC * yColStride;
|
|
|
- var xCCorner = yC * convInfo.strideWidth - padLeft;
|
|
|
- for (var wC = 0; wC < filterWidth; wC++) {
|
|
|
- var xC = xCCorner + wC * dilationWidth;
|
|
|
- if (xC < 0 || xC >= convInfo.inWidth) {
|
|
|
- continue;
|
|
|
- }
|
|
|
- var wOffset2 = wOffset1 + wC * filter.strides[1];
|
|
|
- var xOffset3 = xOffset2 + xC * xColStride;
|
|
|
- var wOffset3 = wOffset2;
|
|
|
- for (var d1 = 0; d1 < convInfo.inChannels; ++d1) {
|
|
|
- var xVal = xVals[xOffset3 + d1 * xChannelStride];
|
|
|
- for (var d2 = 0; d2 < convInfo.outChannels; ++d2) {
|
|
|
- yVals[yOffset3 + d2 * yChannelStride] +=
|
|
|
- xVal * wVals[wOffset3 + d2];
|
|
|
- }
|
|
|
- wOffset3 += convInfo.outChannels;
|
|
|
- }
|
|
|
- }
|
|
|
- }
|
|
|
- }
|
|
|
- }
|
|
|
- }
|
|
|
- return y.toTensor();
|
|
|
- };
|
|
|
- MathBackendCPU.prototype.conv3d = function (x, filter, convInfo) {
|
|
|
- var filterDepth = convInfo.filterDepth;
|
|
|
- var filterHeight = convInfo.filterHeight;
|
|
|
- var filterWidth = convInfo.filterWidth;
|
|
|
- var dilationDepth = convInfo.dilationDepth;
|
|
|
- var dilationHeight = convInfo.dilationHeight;
|
|
|
- var dilationWidth = convInfo.dilationWidth;
|
|
|
- var padFront = convInfo.padInfo.front;
|
|
|
- var padLeft = convInfo.padInfo.left;
|
|
|
- var padTop = convInfo.padInfo.top;
|
|
|
- var y = ops.buffer(convInfo.outShape, x.dtype);
|
|
|
- var xVals = this.readSync(x.dataId);
|
|
|
- var wVals = this.readSync(filter.dataId);
|
|
|
- var yVals = y.values;
|
|
|
- for (var b = 0; b < convInfo.batchSize; ++b) {
|
|
|
- var xOffset1 = b * x.strides[0];
|
|
|
- var yOffset1 = b * y.strides[0];
|
|
|
- for (var yF = 0; yF < convInfo.outDepth; ++yF) {
|
|
|
- var yOffset2 = yOffset1 + yF * y.strides[1];
|
|
|
- var xFCorner = yF * convInfo.strideDepth - padFront;
|
|
|
- for (var wF = 0; wF < filterDepth; wF++) {
|
|
|
- var xF = xFCorner + wF * dilationDepth;
|
|
|
- if (xF < 0 || xF >= convInfo.inDepth) {
|
|
|
- continue;
|
|
|
- }
|
|
|
- var wOffset1 = wF * filter.strides[0];
|
|
|
- var xOffset2 = xOffset1 + xF * x.strides[1];
|
|
|
- for (var yR = 0; yR < convInfo.outHeight; ++yR) {
|
|
|
- var yOffset3 = yOffset2 + yR * y.strides[2];
|
|
|
- var xRCorner = yR * convInfo.strideHeight - padTop;
|
|
|
- for (var wR = 0; wR < filterHeight; wR++) {
|
|
|
- var xR = xRCorner + wR * dilationHeight;
|
|
|
- if (xR < 0 || xR >= convInfo.inHeight) {
|
|
|
- continue;
|
|
|
- }
|
|
|
- var wOffset2 = wOffset1 + wR * filter.strides[1];
|
|
|
- var xOffset3 = xOffset2 + xR * x.strides[2];
|
|
|
- for (var yC = 0; yC < convInfo.outWidth; ++yC) {
|
|
|
- var yOffset4 = yOffset3 + yC * convInfo.outChannels;
|
|
|
- var xCCorner = yC * convInfo.strideWidth - padLeft;
|
|
|
- for (var wC = 0; wC < filterWidth; wC++) {
|
|
|
- var xC = xCCorner + wC * dilationWidth;
|
|
|
- if (xC < 0 || xC >= convInfo.inWidth) {
|
|
|
- continue;
|
|
|
- }
|
|
|
- var wOffset3 = wOffset2 + wC * filter.strides[2];
|
|
|
- var xOffset4 = xOffset3 + xC * convInfo.inChannels;
|
|
|
- var wOffset4 = wOffset3;
|
|
|
- for (var d1 = 0; d1 < convInfo.inChannels; ++d1) {
|
|
|
- var xVal = xVals[xOffset4 + d1];
|
|
|
- for (var d2 = 0; d2 < convInfo.outChannels; ++d2) {
|
|
|
- yVals[yOffset4 + d2] += xVal * wVals[wOffset4 + d2];
|
|
|
- }
|
|
|
- wOffset4 += convInfo.outChannels;
|
|
|
- }
|
|
|
- }
|
|
|
- }
|
|
|
- }
|
|
|
- }
|
|
|
- }
|
|
|
- }
|
|
|
- }
|
|
|
- return y.toTensor();
|
|
|
- };
|
|
|
- MathBackendCPU.prototype.conv2dDerInput = function (dy, filter, convInfo) {
|
|
|
- cpu_util_1.assertNotComplex([dy, filter], 'conv2dDerInput');
|
|
|
- var dx = ops.buffer(convInfo.inShape, 'float32');
|
|
|
- var dxValues = dx.values;
|
|
|
- var dyValues = this.readSync(dy.dataId);
|
|
|
- var fltValues = this.readSync(filter.dataId);
|
|
|
- var _a = filter.strides, fltS0 = _a[0], fltS1 = _a[1], fltS2 = _a[2];
|
|
|
- var batchSize = convInfo.batchSize, filterHeight = convInfo.filterHeight, filterWidth = convInfo.filterWidth, inChannels = convInfo.inChannels, inHeight = convInfo.inHeight, inWidth = convInfo.inWidth, outChannels = convInfo.outChannels, outHeight = convInfo.outHeight, outWidth = convInfo.outWidth, strideHeight = convInfo.strideHeight, strideWidth = convInfo.strideWidth, dataFormat = convInfo.dataFormat;
|
|
|
- var topPad = filterHeight - 1 - convInfo.padInfo.top;
|
|
|
- var leftPad = filterWidth - 1 - convInfo.padInfo.left;
|
|
|
- var isChannelsLast = dataFormat === 'channelsLast';
|
|
|
- var xBatchStride = dx.strides[0];
|
|
|
- var xRowStride = isChannelsLast ? dx.strides[1] : dx.strides[2];
|
|
|
- var xColStride = isChannelsLast ? dx.strides[2] : 1;
|
|
|
- var xChannelStride = isChannelsLast ? 1 : dx.strides[1];
|
|
|
- var yBatchStride = dy.strides[0];
|
|
|
- var yRowStride = isChannelsLast ? dy.strides[1] : dy.strides[2];
|
|
|
- var yColStride = isChannelsLast ? dy.strides[2] : 1;
|
|
|
- var yChannelStride = isChannelsLast ? 1 : dy.strides[1];
|
|
|
- for (var b = 0; b < batchSize; ++b) {
|
|
|
- for (var d1 = 0; d1 < inChannels; ++d1) {
|
|
|
- for (var xR = 0; xR < inHeight; ++xR) {
|
|
|
- var xRCorner = xR - topPad;
|
|
|
- var xRMin = Math.max(0, Math.ceil(xRCorner / strideHeight));
|
|
|
- var yRMax = Math.min(outHeight, (filterHeight + xRCorner) / strideHeight);
|
|
|
- for (var xC = 0; xC < inWidth; ++xC) {
|
|
|
- var xCCorner = xC - leftPad;
|
|
|
- var xCMin = Math.max(0, Math.ceil(xCCorner / strideWidth));
|
|
|
- var yCMax = Math.min(outWidth, (filterWidth + xCCorner) / strideWidth);
|
|
|
- var dotProd = 0;
|
|
|
- for (var yR = xRMin; yR < yRMax; ++yR) {
|
|
|
- var wR = yR * strideHeight - xRCorner;
|
|
|
- for (var yC = xCMin; yC < yCMax; ++yC) {
|
|
|
- var wC = yC * strideWidth - xCCorner;
|
|
|
- var dyOffset = yBatchStride * b + yRowStride * yR + yColStride * yC;
|
|
|
- var fltOffset = fltS0 * (filterHeight - 1 - wR) +
|
|
|
- fltS1 * (filterWidth - 1 - wC) + fltS2 * d1;
|
|
|
- for (var d2 = 0; d2 < outChannels; ++d2) {
|
|
|
- var pixel = dyValues[dyOffset + yChannelStride * d2];
|
|
|
- var weight = fltValues[fltOffset + d2];
|
|
|
- dotProd += pixel * weight;
|
|
|
- }
|
|
|
- }
|
|
|
- }
|
|
|
- var dxOffset = xBatchStride * b + xRowStride * xR +
|
|
|
- xColStride * xC + xChannelStride * d1;
|
|
|
- dxValues[dxOffset] = dotProd;
|
|
|
- }
|
|
|
- }
|
|
|
- }
|
|
|
- }
|
|
|
- return dx.toTensor();
|
|
|
- };
|
|
|
- MathBackendCPU.prototype.conv3dDerInput = function (dy, filter, convInfo) {
|
|
|
- var dx = ops.buffer(convInfo.inShape, 'float32');
|
|
|
- var dxValues = dx.values;
|
|
|
- var _a = dx.strides, dxS0 = _a[0], dxS1 = _a[1], dxS2 = _a[2], dxS3 = _a[3];
|
|
|
- var dyValues = this.readSync(dy.dataId);
|
|
|
- var _b = dy.strides, dyS0 = _b[0], dyS1 = _b[1], dyS2 = _b[2], dyS3 = _b[3];
|
|
|
- var fltValues = this.readSync(filter.dataId);
|
|
|
- var _c = filter.strides, fltS0 = _c[0], fltS1 = _c[1], fltS2 = _c[2], fltS3 = _c[3];
|
|
|
- var batchSize = convInfo.batchSize, filterDepth = convInfo.filterDepth, filterHeight = convInfo.filterHeight, filterWidth = convInfo.filterWidth, inChannels = convInfo.inChannels, inDepth = convInfo.inDepth, inHeight = convInfo.inHeight, inWidth = convInfo.inWidth, outChannels = convInfo.outChannels, outDepth = convInfo.outDepth, outHeight = convInfo.outHeight, outWidth = convInfo.outWidth, strideDepth = convInfo.strideDepth, strideHeight = convInfo.strideHeight, strideWidth = convInfo.strideWidth;
|
|
|
- var frontPad = filterDepth - 1 - convInfo.padInfo.front;
|
|
|
- var topPad = filterHeight - 1 - convInfo.padInfo.top;
|
|
|
- var leftPad = filterWidth - 1 - convInfo.padInfo.left;
|
|
|
- for (var b = 0; b < batchSize; ++b) {
|
|
|
- for (var d1 = 0; d1 < inChannels; ++d1) {
|
|
|
- // Frames of depth
|
|
|
- for (var xF = 0; xF < inDepth; ++xF) {
|
|
|
- var xFCorner = xF - frontPad;
|
|
|
- var xFMin = Math.max(0, Math.ceil(xFCorner / strideDepth));
|
|
|
- var yFMax = Math.min(outDepth, (filterDepth + xFCorner) / strideDepth);
|
|
|
- // Rows as per standard 2d matrix notation
|
|
|
- for (var xR = 0; xR < inHeight; ++xR) {
|
|
|
- var xRCorner = xR - topPad;
|
|
|
- var xRMin = Math.max(0, Math.ceil(xRCorner / strideHeight));
|
|
|
- var yRMax = Math.min(outHeight, (filterHeight + xRCorner) / strideHeight);
|
|
|
- // Columns as per standard 2d matrix notation
|
|
|
- for (var xC = 0; xC < inWidth; ++xC) {
|
|
|
- var xCCorner = xC - leftPad;
|
|
|
- var xCMin = Math.max(0, Math.ceil(xCCorner / strideWidth));
|
|
|
- var yCMax = Math.min(outWidth, (filterWidth + xCCorner) / strideWidth);
|
|
|
- var dotProd = 0;
|
|
|
- for (var yF = xFMin; yF < yFMax; ++yF) {
|
|
|
- var wF = yF * strideDepth - xFCorner;
|
|
|
- for (var yR = xRMin; yR < yRMax; ++yR) {
|
|
|
- var wR = yR * strideHeight - xRCorner;
|
|
|
- for (var yC = xCMin; yC < yCMax; ++yC) {
|
|
|
- var wC = yC * strideWidth - xCCorner;
|
|
|
- var dyOffset = dyS0 * b + dyS1 * yF + dyS2 * yR + dyS3 * yC;
|
|
|
- var fltOffset = fltS0 * (filterDepth - 1 - wF) +
|
|
|
- fltS1 * (filterHeight - 1 - wR) +
|
|
|
- fltS2 * (filterWidth - 1 - wC) + fltS3 * d1;
|
|
|
- for (var d2 = 0; d2 < outChannels; ++d2) {
|
|
|
- var pixel = dyValues[dyOffset + d2];
|
|
|
- var weight = fltValues[fltOffset + d2];
|
|
|
- dotProd += pixel * weight;
|
|
|
- }
|
|
|
- }
|
|
|
- }
|
|
|
- }
|
|
|
- dxValues[dxS0 * b + dxS1 * xF + dxS2 * xR + dxS3 * xC + d1] =
|
|
|
- dotProd;
|
|
|
- }
|
|
|
- }
|
|
|
- }
|
|
|
- }
|
|
|
- }
|
|
|
- return dx.toTensor();
|
|
|
- };
|
|
|
- MathBackendCPU.prototype.conv2dDerFilter = function (x, dy, convInfo) {
|
|
|
- cpu_util_1.assertNotComplex([x, dy], 'conv2dDerFilter');
|
|
|
- var strideHeight = convInfo.strideHeight;
|
|
|
- var strideWidth = convInfo.strideWidth;
|
|
|
- var filterHeight = convInfo.filterHeight;
|
|
|
- var filterWidth = convInfo.filterWidth;
|
|
|
- var isChannelsLast = convInfo.dataFormat === 'channelsLast';
|
|
|
- var dW = ops.buffer(convInfo.filterShape, 'float32');
|
|
|
- var leftPad = convInfo.padInfo.left;
|
|
|
- var topPad = convInfo.padInfo.top;
|
|
|
- var xBuf = this.bufferSync(x);
|
|
|
- var dyBuf = this.bufferSync(dy);
|
|
|
- for (var wR = 0; wR < filterHeight; ++wR) {
|
|
|
- var yRMin = Math.max(0, Math.ceil((topPad - wR) / strideHeight));
|
|
|
- var yRMax = Math.min(convInfo.outHeight, (convInfo.inHeight + topPad - wR) / strideHeight);
|
|
|
- for (var wC = 0; wC < filterWidth; ++wC) {
|
|
|
- var yCMin = Math.max(0, Math.ceil((leftPad - wC) / strideWidth));
|
|
|
- var yCMax = Math.min(convInfo.outWidth, (convInfo.inWidth + leftPad - wC) / strideWidth);
|
|
|
- for (var d1 = 0; d1 < convInfo.inChannels; ++d1) {
|
|
|
- for (var d2 = 0; d2 < convInfo.outChannels; ++d2) {
|
|
|
- // Need to convolve.
|
|
|
- var dotProd = 0;
|
|
|
- for (var b = 0; b < convInfo.batchSize; ++b) {
|
|
|
- for (var yR = yRMin; yR < yRMax; ++yR) {
|
|
|
- var xR = wR + yR * strideHeight - topPad;
|
|
|
- for (var yC = yCMin; yC < yCMax; ++yC) {
|
|
|
- var xC = wC + yC * strideWidth - leftPad;
|
|
|
- if (isChannelsLast) {
|
|
|
- dotProd +=
|
|
|
- xBuf.get(b, xR, xC, d1) * dyBuf.get(b, yR, yC, d2);
|
|
|
- }
|
|
|
- else {
|
|
|
- dotProd +=
|
|
|
- xBuf.get(b, d1, xR, xC) * dyBuf.get(b, d2, yR, yC);
|
|
|
- }
|
|
|
- }
|
|
|
- }
|
|
|
- }
|
|
|
- dW.set(dotProd, wR, wC, d1, d2);
|
|
|
- }
|
|
|
- }
|
|
|
- }
|
|
|
- }
|
|
|
- return dW.toTensor();
|
|
|
- };
|
|
|
- MathBackendCPU.prototype.conv3dDerFilter = function (x, dy, convInfo) {
|
|
|
- var strideDepth = convInfo.strideDepth;
|
|
|
- var strideHeight = convInfo.strideHeight;
|
|
|
- var strideWidth = convInfo.strideWidth;
|
|
|
- var filterDepth = convInfo.filterDepth;
|
|
|
- var filterHeight = convInfo.filterHeight;
|
|
|
- var filterWidth = convInfo.filterWidth;
|
|
|
- var dw = ops.buffer(convInfo.filterShape, 'float32');
|
|
|
- var dwValues = dw.values;
|
|
|
- var _a = dw.strides, dwS0 = _a[0], dwS1 = _a[1], dwS2 = _a[2], dwS3 = _a[3];
|
|
|
- var dyValues = this.readSync(dy.dataId);
|
|
|
- var _b = dy.strides, dyS0 = _b[0], dyS1 = _b[1], dyS2 = _b[2], dyS3 = _b[3];
|
|
|
- var xValues = this.readSync(x.dataId);
|
|
|
- var _c = x.strides, xS0 = _c[0], xS1 = _c[1], xS2 = _c[2], xS3 = _c[3];
|
|
|
- var frontPad = convInfo.padInfo.front;
|
|
|
- var leftPad = convInfo.padInfo.left;
|
|
|
- var topPad = convInfo.padInfo.top;
|
|
|
- for (var wF = 0; wF < filterDepth; ++wF) {
|
|
|
- var yFMin = Math.max(0, Math.ceil((frontPad - wF) / strideDepth));
|
|
|
- var yFMax = Math.min(convInfo.outDepth, (convInfo.inDepth + frontPad - wF) / strideDepth);
|
|
|
- var wOffset1 = wF * dwS0;
|
|
|
- for (var wR = 0; wR < filterHeight; ++wR) {
|
|
|
- var yRMin = Math.max(0, Math.ceil((topPad - wR) / strideHeight));
|
|
|
- var yRMax = Math.min(convInfo.outHeight, (convInfo.inHeight + topPad - wR) / strideHeight);
|
|
|
- var wOffset2 = wR * dwS1 + wOffset1;
|
|
|
- for (var wC = 0; wC < filterWidth; ++wC) {
|
|
|
- var yCMin = Math.max(0, Math.ceil((leftPad - wC) / strideWidth));
|
|
|
- var yCMax = Math.min(convInfo.outWidth, (convInfo.inWidth + leftPad - wC) / strideWidth);
|
|
|
- var wOffset3 = wC * dwS2 + wOffset2;
|
|
|
- for (var d1 = 0; d1 < convInfo.inChannels; ++d1) {
|
|
|
- var wOffset4 = d1 * dwS3 + wOffset3;
|
|
|
- for (var d2 = 0; d2 < convInfo.outChannels; ++d2) {
|
|
|
- var dotProd = 0;
|
|
|
- for (var b = 0; b < convInfo.batchSize; ++b) {
|
|
|
- var xOffset1 = b * xS0;
|
|
|
- var yOffset1 = b * dyS0;
|
|
|
- for (var yF = yFMin; yF < yFMax; ++yF) {
|
|
|
- var xF = wF + yF * strideDepth - frontPad;
|
|
|
- var xOffset2 = xF * xS1 + xOffset1;
|
|
|
- var yOffset2 = yF * dyS1 + yOffset1;
|
|
|
- for (var yR = yRMin; yR < yRMax; ++yR) {
|
|
|
- var xR = wR + yR * strideHeight - topPad;
|
|
|
- var xOffset3 = xR * xS2 + xOffset2;
|
|
|
- var yOffset3 = yR * dyS2 + yOffset2;
|
|
|
- for (var yC = yCMin; yC < yCMax; ++yC) {
|
|
|
- var xC = wC + yC * strideWidth - leftPad;
|
|
|
- var xOffset4 = xC * xS3 + xOffset3;
|
|
|
- var yOffset4 = yC * dyS3 + yOffset3;
|
|
|
- dotProd +=
|
|
|
- xValues[xOffset4 + d1] * dyValues[yOffset4 + d2];
|
|
|
- }
|
|
|
- }
|
|
|
- }
|
|
|
- }
|
|
|
- dwValues[wOffset4 + d2] = dotProd;
|
|
|
- }
|
|
|
- }
|
|
|
- }
|
|
|
- }
|
|
|
- }
|
|
|
- return dw.toTensor();
|
|
|
- };
|
|
|
- MathBackendCPU.prototype.fusedDepthwiseConv2D = function (_a) {
|
|
|
- var input = _a.input, filter = _a.filter, convInfo = _a.convInfo, bias = _a.bias, activation = _a.activation, preluActivationWeights = _a.preluActivationWeights;
|
|
|
- var result = this.depthwiseConv2D(input, filter, convInfo);
|
|
|
- if (bias) {
|
|
|
- result = this.add(result, bias);
|
|
|
- }
|
|
|
- if (activation) {
|
|
|
- result =
|
|
|
- mapActivation(this, result, activation, preluActivationWeights);
|
|
|
- }
|
|
|
- return result;
|
|
|
- };
|
|
|
- MathBackendCPU.prototype.depthwiseConv2D = function (x, filter, convInfo) {
|
|
|
- cpu_util_1.assertNotComplex([x, filter], 'depthwiseConv2D');
|
|
|
- var filterHeight = convInfo.filterHeight;
|
|
|
- var filterWidth = convInfo.filterWidth;
|
|
|
- var dilationHeight = convInfo.dilationHeight;
|
|
|
- var dilationWidth = convInfo.dilationWidth;
|
|
|
- var padLeft = convInfo.padInfo.left;
|
|
|
- var padTop = convInfo.padInfo.top;
|
|
|
- var chMul = convInfo.outChannels / convInfo.inChannels;
|
|
|
- var y = ops.buffer(convInfo.outShape, x.dtype);
|
|
|
- var xVals = this.readSync(x.dataId);
|
|
|
- var wVals = this.readSync(filter.dataId);
|
|
|
- var yVals = y.values;
|
|
|
- for (var b = 0; b < convInfo.batchSize; ++b) {
|
|
|
- var xOffset1 = b * x.strides[0];
|
|
|
- var yOffset1 = b * y.strides[0];
|
|
|
- for (var yR = 0; yR < convInfo.outHeight; ++yR) {
|
|
|
- var yOffset2 = yOffset1 + yR * y.strides[1];
|
|
|
- var xRCorner = yR * convInfo.strideHeight - padLeft;
|
|
|
- for (var wR = 0; wR < filterHeight; ++wR) {
|
|
|
- var xR = xRCorner + wR * dilationHeight;
|
|
|
- if (xR < 0 || xR >= convInfo.inHeight) {
|
|
|
- continue;
|
|
|
- }
|
|
|
- var wOffset1 = wR * filter.strides[0];
|
|
|
- var xOffset2 = xOffset1 + xR * x.strides[1];
|
|
|
- for (var yC = 0; yC < convInfo.outWidth; ++yC) {
|
|
|
- var yOffset3 = yOffset2 + yC * y.strides[2];
|
|
|
- var xCCorner = yC * convInfo.strideWidth - padTop;
|
|
|
- for (var wC = 0; wC < filterWidth; ++wC) {
|
|
|
- var xC = xCCorner + wC * dilationWidth;
|
|
|
- if (xC < 0 || xC >= convInfo.inWidth) {
|
|
|
- continue;
|
|
|
- }
|
|
|
- var wOffset2 = wOffset1 + wC * filter.strides[1];
|
|
|
- var xOffset3 = xOffset2 + xC * convInfo.inChannels;
|
|
|
- var yOffset4 = yOffset3;
|
|
|
- var wOffset3 = wOffset2;
|
|
|
- for (var d1 = 0; d1 < convInfo.inChannels; ++d1) {
|
|
|
- var xVal = xVals[xOffset3 + d1];
|
|
|
- for (var q = 0; q < chMul; ++q) {
|
|
|
- yVals[yOffset4 + q] += xVal * wVals[wOffset3 + q];
|
|
|
- }
|
|
|
- yOffset4 += chMul;
|
|
|
- wOffset3 += chMul;
|
|
|
- }
|
|
|
- }
|
|
|
- }
|
|
|
- }
|
|
|
- }
|
|
|
- }
|
|
|
- return y.toTensor();
|
|
|
- };
|
|
|
- MathBackendCPU.prototype.depthwiseConv2DDerInput = function (dy, filter, convInfo) {
|
|
|
- cpu_util_1.assertNotComplex([dy, filter], 'depthwiseConv2DDerInput');
|
|
|
- var dx = ops.buffer(convInfo.inShape, 'float32');
|
|
|
- var dxValues = dx.values;
|
|
|
- var _a = dx.strides, dxS0 = _a[0], dxS1 = _a[1], dxS2 = _a[2];
|
|
|
- var dyValues = this.readSync(dy.dataId);
|
|
|
- var _b = dy.strides, dyS0 = _b[0], dyS1 = _b[1], dyS2 = _b[2];
|
|
|
- var fltValues = this.readSync(filter.dataId);
|
|
|
- var _c = filter.strides, fltS0 = _c[0], fltS1 = _c[1], fltS2 = _c[2];
|
|
|
- var batchSize = convInfo.batchSize, filterHeight = convInfo.filterHeight, filterWidth = convInfo.filterWidth, inChannels = convInfo.inChannels, inHeight = convInfo.inHeight, inWidth = convInfo.inWidth, outChannels = convInfo.outChannels, outHeight = convInfo.outHeight, outWidth = convInfo.outWidth, strideHeight = convInfo.strideHeight, strideWidth = convInfo.strideWidth;
|
|
|
- var topPad = filterHeight - 1 - convInfo.padInfo.top;
|
|
|
- var leftPad = filterWidth - 1 - convInfo.padInfo.left;
|
|
|
- var chMul = outChannels / inChannels;
|
|
|
- for (var b = 0; b < batchSize; ++b) {
|
|
|
- for (var d1 = 0; d1 < inChannels; ++d1) {
|
|
|
- for (var xR = 0; xR < inHeight; ++xR) {
|
|
|
- var xRCorner = xR - topPad;
|
|
|
- var xRMin = Math.max(0, Math.ceil(xRCorner / strideHeight));
|
|
|
- var yRMax = Math.min(outHeight, (filterHeight + xRCorner) / strideHeight);
|
|
|
- for (var xC = 0; xC < inWidth; ++xC) {
|
|
|
- var xCCorner = xC - leftPad;
|
|
|
- var xCMin = Math.max(0, Math.ceil(xCCorner / strideWidth));
|
|
|
- var yCMax = Math.min(outWidth, (filterWidth + xCCorner) / strideWidth);
|
|
|
- var dotProd = 0;
|
|
|
- for (var yR = xRMin; yR < yRMax; ++yR) {
|
|
|
- var wR = yR * strideHeight - xRCorner;
|
|
|
- for (var yC = xCMin; yC < yCMax; ++yC) {
|
|
|
- var wC = yC * strideWidth - xCCorner;
|
|
|
- var dyOffset = dyS0 * b + dyS1 * yR + dyS2 * yC;
|
|
|
- var fltOffset = fltS0 * (filterHeight - 1 - wR) +
|
|
|
- fltS1 * (filterWidth - 1 - wC) + fltS2 * d1;
|
|
|
- for (var dm = 0; dm < chMul; ++dm) {
|
|
|
- var d2 = d1 * chMul + dm;
|
|
|
- var pixel = dyValues[dyOffset + d2];
|
|
|
- var weight = fltValues[fltOffset + dm];
|
|
|
- dotProd += pixel * weight;
|
|
|
- }
|
|
|
- }
|
|
|
- }
|
|
|
- dxValues[dxS0 * b + dxS1 * xR + dxS2 * xC + d1] = dotProd;
|
|
|
- }
|
|
|
- }
|
|
|
- }
|
|
|
- }
|
|
|
- return dx.toTensor();
|
|
|
- };
|
|
|
- MathBackendCPU.prototype.depthwiseConv2DDerFilter = function (x, dy, convInfo) {
|
|
|
- cpu_util_1.assertNotComplex([x, dy], 'depthwiseConv2DDerFilter');
|
|
|
- var strideHeight = convInfo.strideHeight;
|
|
|
- var strideWidth = convInfo.strideWidth;
|
|
|
- var filterHeight = convInfo.filterHeight;
|
|
|
- var filterWidth = convInfo.filterWidth;
|
|
|
- var dW = ops.buffer(convInfo.filterShape, 'float32');
|
|
|
- var leftPad = convInfo.padInfo.left;
|
|
|
- var topPad = convInfo.padInfo.top;
|
|
|
- var chMul = convInfo.outChannels / convInfo.inChannels;
|
|
|
- var xBuf = this.bufferSync(x);
|
|
|
- var dyBuf = this.bufferSync(dy);
|
|
|
- for (var wR = 0; wR < filterHeight; ++wR) {
|
|
|
- var yRMin = Math.max(0, Math.ceil((topPad - wR) / strideHeight));
|
|
|
- var yRMax = Math.min(convInfo.outHeight, (convInfo.inHeight + topPad - wR) / strideHeight);
|
|
|
- for (var wC = 0; wC < filterWidth; ++wC) {
|
|
|
- var yCMin = Math.max(0, Math.ceil((leftPad - wC) / strideWidth));
|
|
|
- var yCMax = Math.min(convInfo.outWidth, (convInfo.inWidth + leftPad - wC) / strideWidth);
|
|
|
- for (var d2 = 0; d2 < convInfo.outChannels; ++d2) {
|
|
|
- var d1 = Math.trunc(d2 / chMul);
|
|
|
- var dm = d2 % chMul;
|
|
|
- var dotProd = 0;
|
|
|
- for (var b = 0; b < convInfo.batchSize; ++b) {
|
|
|
- for (var yR = yRMin; yR < yRMax; ++yR) {
|
|
|
- var xR = wR + yR * strideHeight - topPad;
|
|
|
- for (var yC = yCMin; yC < yCMax; ++yC) {
|
|
|
- var xC = wC + yC * strideWidth - leftPad;
|
|
|
- dotProd += xBuf.get(b, xR, xC, d1) * dyBuf.get(b, yR, yC, d2);
|
|
|
- }
|
|
|
- }
|
|
|
- }
|
|
|
- dW.set(dotProd, wR, wC, d1, dm);
|
|
|
- }
|
|
|
- }
|
|
|
- }
|
|
|
- return dW.toTensor();
|
|
|
- };
|
|
|
- MathBackendCPU.prototype.tile = function (x, reps) {
|
|
|
- cpu_util_1.assertNotComplex(x, 'tile');
|
|
|
- return tile_impl_1.tile(this.bufferSync(x), reps);
|
|
|
- };
|
|
|
- MathBackendCPU.prototype.pad = function (x, paddings, constantValue) {
|
|
|
- cpu_util_1.assertNotComplex(x, 'pad');
|
|
|
- var outShape = paddings.map(function (p, i) { return p[0] /* beforePad */ + x.shape[i] + p[1]; } /* afterPad */);
|
|
|
- var start = paddings.map(function (p) { return p[0]; });
|
|
|
- var xBuffer = this.bufferSync(x);
|
|
|
- var buffer = ops.buffer(outShape, x.dtype);
|
|
|
- if (constantValue !== 0) {
|
|
|
- buffer.values.fill(constantValue);
|
|
|
- }
|
|
|
- for (var i = 0; i < x.size; i++) {
|
|
|
- var coords = xBuffer.indexToLoc(i);
|
|
|
- var outCoords = coords.map(function (c, i) { return c + start[i]; });
|
|
|
- buffer.set.apply(buffer, [xBuffer.get.apply(xBuffer, coords)].concat(outCoords));
|
|
|
- }
|
|
|
- return buffer.toTensor();
|
|
|
- };
|
|
|
- MathBackendCPU.prototype.transpose = function (x, perm) {
|
|
|
- cpu_util_1.assertNotComplex(x, 'transpose');
|
|
|
- var newShape = new Array(x.rank);
|
|
|
- for (var i = 0; i < newShape.length; i++) {
|
|
|
- newShape[i] = x.shape[perm[i]];
|
|
|
- }
|
|
|
- var values = this.readSync(x.dataId);
|
|
|
- var result = ops_1.buffer(newShape, x.dtype);
|
|
|
- var xBuf = this.bufferSync(x);
|
|
|
- for (var i = 0; i < x.size; ++i) {
|
|
|
- var loc = xBuf.indexToLoc(i);
|
|
|
- // Permute location.
|
|
|
- var newLoc = new Array(loc.length);
|
|
|
- for (var i_1 = 0; i_1 < newLoc.length; i_1++) {
|
|
|
- newLoc[i_1] = loc[perm[i_1]];
|
|
|
- }
|
|
|
- var newIndex = result.locToIndex(newLoc);
|
|
|
- result.values[newIndex] = values[i];
|
|
|
- }
|
|
|
- return result.toTensor();
|
|
|
- };
|
|
|
- MathBackendCPU.prototype.gather = function (x, indices, axis) {
|
|
|
- cpu_util_1.assertNotComplex([x, indices], 'gather');
|
|
|
- var newShape = x.shape.slice();
|
|
|
- var indicesValues = this.readSync(indices.dataId);
|
|
|
- newShape[axis] = indicesValues.length;
|
|
|
- var result = ops_1.buffer(newShape, x.dtype);
|
|
|
- var xBuf = this.bufferSync(x);
|
|
|
- for (var i = 0; i < result.size; ++i) {
|
|
|
- var newLoc = result.indexToLoc(i);
|
|
|
- var originalLoc = newLoc.slice();
|
|
|
- originalLoc[axis] = indicesValues[newLoc[axis]];
|
|
|
- var originalIndex = xBuf.locToIndex(originalLoc);
|
|
|
- result.values[i] = xBuf.values[originalIndex];
|
|
|
- }
|
|
|
- return result.toTensor();
|
|
|
- };
|
|
|
- MathBackendCPU.prototype.batchToSpaceND = function (x, blockShape, crops) {
|
|
|
- cpu_util_1.assertNotComplex([x], 'batchToSpaceND');
|
|
|
- var prod = blockShape.reduce(function (a, b) { return a * b; });
|
|
|
- var reshaped = array_ops_util.getReshaped(x.shape, blockShape, prod);
|
|
|
- var permuted = array_ops_util.getPermuted(reshaped.length, blockShape.length);
|
|
|
- var reshapedPermuted = array_ops_util.getReshapedPermuted(x.shape, blockShape, prod);
|
|
|
- var sliceBeginCoords = array_ops_util.getSliceBeginCoords(crops, blockShape.length);
|
|
|
- var sliceSize = array_ops_util.getSliceSize(reshapedPermuted, crops, blockShape.length);
|
|
|
- return x.reshape(reshaped)
|
|
|
- .transpose(permuted)
|
|
|
- .reshape(reshapedPermuted)
|
|
|
- .slice(sliceBeginCoords, sliceSize);
|
|
|
- };
|
|
|
- MathBackendCPU.prototype.spaceToBatchND = function (x, blockShape, paddings) {
|
|
|
- cpu_util_1.assertNotComplex([x], 'spaceToBatchND');
|
|
|
- var prod = blockShape.reduce(function (a, b) { return a * b; });
|
|
|
- var completePaddings = [[0, 0]];
|
|
|
- completePaddings.push.apply(completePaddings, paddings);
|
|
|
- for (var i = 1 + blockShape.length; i < x.shape.length; ++i) {
|
|
|
- completePaddings.push([0, 0]);
|
|
|
- }
|
|
|
- var paddedX = x.pad(completePaddings);
|
|
|
- var reshapedPaddedShape = array_ops_util.getReshaped(paddedX.shape, blockShape, prod, false);
|
|
|
- var permutedReshapedPaddedPermutation = array_ops_util.getPermuted(reshapedPaddedShape.length, blockShape.length, false);
|
|
|
- var flattenShape = array_ops_util.getReshapedPermuted(paddedX.shape, blockShape, prod, false);
|
|
|
- return paddedX.reshape(reshapedPaddedShape)
|
|
|
- .transpose(permutedReshapedPaddedPermutation)
|
|
|
- .reshape(flattenShape);
|
|
|
- };
|
|
|
- MathBackendCPU.prototype.pool = function (x, convInfo, poolType) {
|
|
|
- cpu_util_1.assertNotComplex(x, 'pool');
|
|
|
- var strideHeight = convInfo.strideHeight;
|
|
|
- var strideWidth = convInfo.strideWidth;
|
|
|
- var dilationHeight = convInfo.dilationHeight;
|
|
|
- var dilationWidth = convInfo.dilationWidth;
|
|
|
- var effectiveFilterHeight = convInfo.effectiveFilterHeight;
|
|
|
- var effectiveFilterWidth = convInfo.effectiveFilterWidth;
|
|
|
- var padTop = convInfo.padInfo.top;
|
|
|
- var padLeft = convInfo.padInfo.left;
|
|
|
- var initialValue = (poolType === 'max' ? Number.NEGATIVE_INFINITY :
|
|
|
- Number.POSITIVE_INFINITY);
|
|
|
- var xValues = this.readSync(x.dataId);
|
|
|
- var output = ops.buffer(convInfo.outShape, x.dtype);
|
|
|
- var outputVals = output.values;
|
|
|
- var outputBatchStrides = convInfo.outShape[1] * convInfo.outShape[2] * convInfo.outShape[3];
|
|
|
- var outputRowStrides = convInfo.outShape[2] * convInfo.outShape[3];
|
|
|
- var outputColStrides = convInfo.outShape[3];
|
|
|
- for (var b = 0; b < convInfo.batchSize; ++b) {
|
|
|
- var outputBatchOffset = b * outputBatchStrides;
|
|
|
- var inputBatchOffset = b * x.strides[0];
|
|
|
- for (var d = 0; d < convInfo.inChannels; ++d) {
|
|
|
- for (var yR = 0; yR < convInfo.outHeight; ++yR) {
|
|
|
- var xRCorner = yR * strideHeight - padTop;
|
|
|
- var xRMin = Math.max(0, xRCorner);
|
|
|
- var xRMax = Math.min(convInfo.inHeight, effectiveFilterHeight + xRCorner);
|
|
|
- var outputRowOffset = outputBatchOffset + yR * outputRowStrides;
|
|
|
- for (var yC = 0; yC < convInfo.outWidth; ++yC) {
|
|
|
- var xCCorner = yC * strideWidth - padLeft;
|
|
|
- var xCMin = Math.max(0, xCCorner);
|
|
|
- var xCMax = Math.min(convInfo.inWidth, effectiveFilterWidth + xCCorner);
|
|
|
- var minMaxValue = initialValue;
|
|
|
- var avgValue = 0;
|
|
|
- var count = 0;
|
|
|
- for (var xR = xRMin; xR < xRMax; xR += dilationHeight) {
|
|
|
- var xROffset = inputBatchOffset + xR * x.strides[1];
|
|
|
- for (var xC = xCMin; xC < xCMax; xC += dilationWidth) {
|
|
|
- var xCOffset = xROffset + xC * x.strides[2];
|
|
|
- var pixel = xValues[xCOffset + d];
|
|
|
- if ((poolType === 'max' && pixel > minMaxValue)) {
|
|
|
- minMaxValue = pixel;
|
|
|
- }
|
|
|
- else if (poolType === 'avg') {
|
|
|
- avgValue += pixel;
|
|
|
- count++;
|
|
|
- }
|
|
|
- }
|
|
|
- if (isNaN(minMaxValue)) {
|
|
|
- break;
|
|
|
- }
|
|
|
- }
|
|
|
- var outputOffset = outputRowOffset + yC * outputColStrides + d;
|
|
|
- outputVals[outputOffset] =
|
|
|
- poolType === 'avg' ? avgValue / count : minMaxValue;
|
|
|
- }
|
|
|
- }
|
|
|
- }
|
|
|
- }
|
|
|
- return output.toTensor();
|
|
|
- };
|
|
|
- MathBackendCPU.prototype.maxPool = function (x, convInfo) {
|
|
|
- return this.pool(x, convInfo, 'max');
|
|
|
- };
|
|
|
- MathBackendCPU.prototype.maxPoolPositions = function (x, convInfo) {
|
|
|
- var maxPositions = ops.buffer(convInfo.outShape, 'int32');
|
|
|
- var strideHeight = convInfo.strideHeight;
|
|
|
- var strideWidth = convInfo.strideWidth;
|
|
|
- var dilationHeight = convInfo.dilationHeight;
|
|
|
- var dilationWidth = convInfo.dilationWidth;
|
|
|
- var effectiveFilterHeight = convInfo.effectiveFilterHeight;
|
|
|
- var effectiveFilterWidth = convInfo.effectiveFilterWidth;
|
|
|
- var padTop = convInfo.padInfo.top;
|
|
|
- var padLeft = convInfo.padInfo.left;
|
|
|
- var xBuf = this.bufferSync(x);
|
|
|
- for (var b = 0; b < convInfo.batchSize; ++b) {
|
|
|
- for (var d = 0; d < convInfo.inChannels; ++d) {
|
|
|
- for (var yR = 0; yR < convInfo.outHeight; ++yR) {
|
|
|
- var xRCorner = yR * strideHeight - padTop;
|
|
|
- var xRMin = xRCorner;
|
|
|
- while (xRMin < 0) {
|
|
|
- xRMin += dilationHeight;
|
|
|
- }
|
|
|
- // const xRMin = Math.max(0, xRCorner);
|
|
|
- var xRMax = Math.min(convInfo.inHeight, effectiveFilterHeight + xRCorner);
|
|
|
- for (var yC = 0; yC < convInfo.outWidth; ++yC) {
|
|
|
- var xCCorner = yC * strideWidth - padLeft;
|
|
|
- var xCMin = xCCorner;
|
|
|
- while (xCMin < 0) {
|
|
|
- xCMin += dilationWidth;
|
|
|
- }
|
|
|
- var xCMax = Math.min(convInfo.inWidth, effectiveFilterWidth + xCCorner);
|
|
|
- var maxValue = Number.NEGATIVE_INFINITY;
|
|
|
- var maxPosition = -1;
|
|
|
- for (var xR = xRMin; xR < xRMax; xR += dilationHeight) {
|
|
|
- var wR = xR - xRCorner;
|
|
|
- for (var xC = xCMin; xC < xCMax; xC += dilationWidth) {
|
|
|
- var wC = xC - xCCorner;
|
|
|
- var pixel = xBuf.get(b, xR, xC, d);
|
|
|
- if (pixel > maxValue) {
|
|
|
- maxValue = pixel;
|
|
|
- maxPosition = wR * effectiveFilterWidth + wC;
|
|
|
- }
|
|
|
- }
|
|
|
- }
|
|
|
- maxPositions.set(maxPosition, b, yR, yC, d);
|
|
|
- }
|
|
|
- }
|
|
|
- }
|
|
|
- }
|
|
|
- return maxPositions.toTensor();
|
|
|
- };
|
|
|
- MathBackendCPU.prototype.maxPoolBackprop = function (dy, x, y, convInfo) {
|
|
|
- cpu_util_1.assertNotComplex([x, y], 'maxPoolBackprop');
|
|
|
- var maxPositions = this.maxPoolPositions(x, convInfo);
|
|
|
- var strideHeight = convInfo.strideHeight;
|
|
|
- var strideWidth = convInfo.strideWidth;
|
|
|
- var dilationHeight = convInfo.dilationHeight;
|
|
|
- var dilationWidth = convInfo.dilationWidth;
|
|
|
- var effectiveFilterHeight = convInfo.effectiveFilterHeight;
|
|
|
- var effectiveFilterWidth = convInfo.effectiveFilterWidth;
|
|
|
- var padLeft = effectiveFilterWidth - 1 - convInfo.padInfo.left;
|
|
|
- var padTop = effectiveFilterHeight - 1 - convInfo.padInfo.top;
|
|
|
- var dx = ops.buffer(x.shape, 'float32');
|
|
|
- var maxPosBuf = this.bufferSync(maxPositions);
|
|
|
- var dyBuf = this.bufferSync(dy);
|
|
|
- for (var b = 0; b < convInfo.batchSize; ++b) {
|
|
|
- for (var d = 0; d < convInfo.inChannels; ++d) {
|
|
|
- for (var dxR = 0; dxR < convInfo.inHeight; ++dxR) {
|
|
|
- for (var dxC = 0; dxC < convInfo.inWidth; ++dxC) {
|
|
|
- // Shader code begins.
|
|
|
- var dyRCorner = dxR - padTop;
|
|
|
- var dyCCorner = dxC - padLeft;
|
|
|
- var dotProd = 0;
|
|
|
- for (var wR = 0; wR < effectiveFilterHeight; wR += dilationHeight) {
|
|
|
- var dyR = (dyRCorner + wR) / strideHeight;
|
|
|
- if (dyR < 0 || dyR >= convInfo.outHeight ||
|
|
|
- Math.floor(dyR) !== dyR) {
|
|
|
- continue;
|
|
|
- }
|
|
|
- for (var wC = 0; wC < effectiveFilterWidth; wC += dilationWidth) {
|
|
|
- var dyC = (dyCCorner + wC) / strideWidth;
|
|
|
- if (dyC < 0 || dyC >= convInfo.outWidth ||
|
|
|
- Math.floor(dyC) !== dyC) {
|
|
|
- continue;
|
|
|
- }
|
|
|
- var maxPos = effectiveFilterHeight * effectiveFilterWidth -
|
|
|
- 1 - maxPosBuf.get(b, dyR, dyC, d);
|
|
|
- var curPos = wR * effectiveFilterWidth + wC;
|
|
|
- var mask = maxPos === curPos ? 1 : 0;
|
|
|
- if (mask === 0) {
|
|
|
- continue;
|
|
|
- }
|
|
|
- var pixel = dyBuf.get(b, dyR, dyC, d);
|
|
|
- dotProd += pixel * mask;
|
|
|
- }
|
|
|
- }
|
|
|
- dx.set(dotProd, b, dxR, dxC, d);
|
|
|
- }
|
|
|
- }
|
|
|
- }
|
|
|
- }
|
|
|
- return dx.toTensor();
|
|
|
- };
|
|
|
- MathBackendCPU.prototype.avgPoolBackprop = function (dy, x, convInfo) {
|
|
|
- cpu_util_1.assertNotComplex([dy, x], 'avgPoolBackprop');
|
|
|
- var strideHeight = convInfo.strideHeight;
|
|
|
- var strideWidth = convInfo.strideWidth;
|
|
|
- var filterHeight = convInfo.filterHeight;
|
|
|
- var filterWidth = convInfo.filterWidth;
|
|
|
- var dilationHeight = convInfo.dilationHeight;
|
|
|
- var dilationWidth = convInfo.dilationWidth;
|
|
|
- var effectiveFilterHeight = convInfo.effectiveFilterHeight;
|
|
|
- var effectiveFilterWidth = convInfo.effectiveFilterWidth;
|
|
|
- var padLeft = effectiveFilterWidth - 1 - convInfo.padInfo.left;
|
|
|
- var padTop = effectiveFilterHeight - 1 - convInfo.padInfo.top;
|
|
|
- var dx = ops.buffer(x.shape, 'float32');
|
|
|
- var avgMultiplier = 1 / (filterHeight * filterWidth);
|
|
|
- var dyBuf = this.bufferSync(dy);
|
|
|
- for (var b = 0; b < convInfo.batchSize; ++b) {
|
|
|
- for (var d = 0; d < convInfo.inChannels; ++d) {
|
|
|
- for (var dxR = 0; dxR < convInfo.inHeight; ++dxR) {
|
|
|
- for (var dxC = 0; dxC < convInfo.inWidth; ++dxC) {
|
|
|
- // Shader code begins.
|
|
|
- var dyRCorner = dxR - padTop;
|
|
|
- var dyCCorner = dxC - padLeft;
|
|
|
- var dotProd = 0;
|
|
|
- for (var wR = 0; wR < effectiveFilterHeight; wR += dilationHeight) {
|
|
|
- var dyR = (dyRCorner + wR) / strideHeight;
|
|
|
- if (dyR < 0 || dyR >= convInfo.outHeight ||
|
|
|
- Math.floor(dyR) !== dyR) {
|
|
|
- continue;
|
|
|
- }
|
|
|
- for (var wC = 0; wC < effectiveFilterWidth; wC += dilationWidth) {
|
|
|
- var dyC = (dyCCorner + wC) / strideWidth;
|
|
|
- if (dyC < 0 || dyC >= convInfo.outWidth ||
|
|
|
- Math.floor(dyC) !== dyC) {
|
|
|
- continue;
|
|
|
- }
|
|
|
- var pixel = dyBuf.get(b, dyR, dyC, d);
|
|
|
- dotProd += pixel;
|
|
|
- }
|
|
|
- }
|
|
|
- dx.set(dotProd * avgMultiplier, b, dxR, dxC, d);
|
|
|
- }
|
|
|
- }
|
|
|
- }
|
|
|
- }
|
|
|
- return dx.toTensor();
|
|
|
- };
|
|
|
- MathBackendCPU.prototype.pool3d = function (x, convInfo, poolType) {
|
|
|
- cpu_util_1.assertNotComplex(x, 'pool3d');
|
|
|
- var strideDepth = convInfo.strideDepth;
|
|
|
- var strideHeight = convInfo.strideHeight;
|
|
|
- var strideWidth = convInfo.strideWidth;
|
|
|
- var dilationDepth = convInfo.dilationDepth;
|
|
|
- var dilationHeight = convInfo.dilationHeight;
|
|
|
- var dilationWidth = convInfo.dilationWidth;
|
|
|
- var effectiveFilterDepth = convInfo.effectiveFilterDepth;
|
|
|
- var effectiveFilterHeight = convInfo.effectiveFilterHeight;
|
|
|
- var effectiveFilterWidth = convInfo.effectiveFilterWidth;
|
|
|
- var padFront = convInfo.padInfo.front;
|
|
|
- var padTop = convInfo.padInfo.top;
|
|
|
- var padLeft = convInfo.padInfo.left;
|
|
|
- var initialValue = (poolType === 'max' ? Number.NEGATIVE_INFINITY :
|
|
|
- Number.POSITIVE_INFINITY);
|
|
|
- var xValues = this.readSync(x.dataId);
|
|
|
- var output = ops.buffer(convInfo.outShape, x.dtype);
|
|
|
- var outputVals = output.values;
|
|
|
- var outputBatchStrides = convInfo.outShape[1] * convInfo.outShape[2] *
|
|
|
- convInfo.outShape[3] * convInfo.outShape[4];
|
|
|
- var outputDepthStrides = convInfo.outShape[2] * convInfo.outShape[3] * convInfo.outShape[4];
|
|
|
- var outputRowStrides = convInfo.outShape[3] * convInfo.outShape[4];
|
|
|
- var outputColStrides = convInfo.outShape[4];
|
|
|
- for (var batch = 0; batch < convInfo.batchSize; ++batch) {
|
|
|
- var outputBatchOffset = batch * outputBatchStrides;
|
|
|
- var inputBatchOffset = batch * x.strides[0];
|
|
|
- for (var channel = 0; channel < convInfo.inChannels; ++channel) {
|
|
|
- for (var yDepth = 0; yDepth < convInfo.outDepth; ++yDepth) {
|
|
|
- var xDepthCorner = yDepth * strideDepth - padFront;
|
|
|
- var xDepthMin = xDepthCorner;
|
|
|
- while (xDepthMin < 0) {
|
|
|
- xDepthMin += dilationDepth;
|
|
|
- }
|
|
|
- var xDepthMax = Math.min(convInfo.inDepth, effectiveFilterDepth + xDepthCorner);
|
|
|
- var outputDepthOffset = outputBatchOffset + yDepth * outputDepthStrides;
|
|
|
- for (var yRow = 0; yRow < convInfo.outHeight; ++yRow) {
|
|
|
- var xRowCorner = yRow * strideHeight - padTop;
|
|
|
- var xRowMin = xRowCorner;
|
|
|
- while (xRowMin < 0) {
|
|
|
- xRowMin += dilationHeight;
|
|
|
- }
|
|
|
- var xRowMax = Math.min(convInfo.inHeight, effectiveFilterHeight + xRowCorner);
|
|
|
- var outputRowOffset = outputDepthOffset + yRow * outputRowStrides;
|
|
|
- for (var yCol = 0; yCol < convInfo.outWidth; ++yCol) {
|
|
|
- var xColCorner = yCol * strideWidth - padLeft;
|
|
|
- var xColMin = xColCorner;
|
|
|
- while (xColMin < 0) {
|
|
|
- xColMin += dilationWidth;
|
|
|
- }
|
|
|
- var xColMax = Math.min(convInfo.inWidth, effectiveFilterWidth + xColCorner);
|
|
|
- // Shader code begins
|
|
|
- var outputColOffset = outputRowOffset + yCol * outputColStrides;
|
|
|
- var minMaxValue = initialValue;
|
|
|
- var avgValue = 0;
|
|
|
- var count = 0;
|
|
|
- for (var xDepth = xDepthMin; xDepth < xDepthMax; xDepth += dilationDepth) {
|
|
|
- var xDepthOffset = inputBatchOffset + xDepth * x.strides[1];
|
|
|
- for (var xRow = xRowMin; xRow < xRowMax; xRow += dilationHeight) {
|
|
|
- var xRowOffset = xDepthOffset + xRow * x.strides[2];
|
|
|
- for (var xCol = xColMin; xCol < xColMax; xCol += dilationWidth) {
|
|
|
- var xColOffset = xRowOffset + xCol * x.strides[3];
|
|
|
- var pixel = xValues[xColOffset + channel];
|
|
|
- if ((poolType === 'max' && pixel > minMaxValue)) {
|
|
|
- minMaxValue = pixel;
|
|
|
- }
|
|
|
- else if (poolType === 'avg') {
|
|
|
- avgValue += pixel;
|
|
|
- count++;
|
|
|
- }
|
|
|
- if (isNaN(minMaxValue)) {
|
|
|
- break;
|
|
|
- }
|
|
|
- }
|
|
|
- if (isNaN(minMaxValue)) {
|
|
|
- break;
|
|
|
- }
|
|
|
- }
|
|
|
- if (isNaN(minMaxValue)) {
|
|
|
- break;
|
|
|
- }
|
|
|
- }
|
|
|
- var outputOffset = outputColOffset + channel;
|
|
|
- outputVals[outputOffset] =
|
|
|
- poolType === 'avg' ? avgValue / count : minMaxValue;
|
|
|
- }
|
|
|
- }
|
|
|
- }
|
|
|
- }
|
|
|
- }
|
|
|
- return output.toTensor();
|
|
|
- };
|
|
|
- MathBackendCPU.prototype.avgPool3d = function (x, convInfo) {
|
|
|
- cpu_util_1.assertNotComplex(x, 'avgPool3d');
|
|
|
- return this.pool3d(x, convInfo, 'avg').toFloat();
|
|
|
- };
|
|
|
- MathBackendCPU.prototype.avgPool3dBackprop = function (dy, x, convInfo) {
|
|
|
- cpu_util_1.assertNotComplex([dy, x], 'avgPool3dBackprop');
|
|
|
- var strideDepth = convInfo.strideDepth;
|
|
|
- var strideHeight = convInfo.strideHeight;
|
|
|
- var strideWidth = convInfo.strideWidth;
|
|
|
- var filterDepth = convInfo.filterDepth;
|
|
|
- var filterHeight = convInfo.filterHeight;
|
|
|
- var filterWidth = convInfo.filterWidth;
|
|
|
- var dilationDepth = convInfo.dilationDepth;
|
|
|
- var dilationHeight = convInfo.dilationHeight;
|
|
|
- var dilationWidth = convInfo.dilationWidth;
|
|
|
- var effectiveFilterDepth = convInfo.effectiveFilterDepth;
|
|
|
- var effectiveFilterHeight = convInfo.effectiveFilterHeight;
|
|
|
- var effectiveFilterWidth = convInfo.effectiveFilterWidth;
|
|
|
- var padFront = effectiveFilterDepth - 1 - convInfo.padInfo.front;
|
|
|
- var padLeft = effectiveFilterWidth - 1 - convInfo.padInfo.left;
|
|
|
- var padTop = effectiveFilterHeight - 1 - convInfo.padInfo.top;
|
|
|
- var dx = ops.buffer(x.shape, 'float32');
|
|
|
- var avgMultiplier = 1 / (filterDepth * filterHeight * filterWidth);
|
|
|
- var dyBuf = this.bufferSync(dy);
|
|
|
- for (var batch = 0; batch < convInfo.batchSize; ++batch) {
|
|
|
- for (var channel = 0; channel < convInfo.inChannels; ++channel) {
|
|
|
- for (var dxDepth = 0; dxDepth < convInfo.inDepth; ++dxDepth) {
|
|
|
- for (var dxRow = 0; dxRow < convInfo.inHeight; ++dxRow) {
|
|
|
- for (var dxCol = 0; dxCol < convInfo.inWidth; ++dxCol) {
|
|
|
- // Shader code begins.
|
|
|
- var dyDepthCorner = dxDepth - padFront;
|
|
|
- var dyRowCorner = dxRow - padTop;
|
|
|
- var dyColCorner = dxCol - padLeft;
|
|
|
- var dotProd = 0;
|
|
|
- for (var wDepth = 0; wDepth < effectiveFilterDepth; wDepth += dilationDepth) {
|
|
|
- var dyDepth = (dyDepthCorner + wDepth) / strideDepth;
|
|
|
- if (dyDepth < 0 || dyDepth >= convInfo.outDepth ||
|
|
|
- Math.floor(dyDepth) !== dyDepth) {
|
|
|
- continue;
|
|
|
- }
|
|
|
- for (var wRow = 0; wRow < effectiveFilterHeight; wRow += dilationHeight) {
|
|
|
- var dyRow = (dyRowCorner + wRow) / strideHeight;
|
|
|
- if (dyRow < 0 || dyRow >= convInfo.outHeight ||
|
|
|
- Math.floor(dyRow) !== dyRow) {
|
|
|
- continue;
|
|
|
- }
|
|
|
- for (var wCol = 0; wCol < effectiveFilterWidth; wCol += dilationWidth) {
|
|
|
- var dyCol = (dyColCorner + wCol) / strideWidth;
|
|
|
- if (dyCol < 0 || dyCol >= convInfo.outWidth ||
|
|
|
- Math.floor(dyCol) !== dyCol) {
|
|
|
- continue;
|
|
|
- }
|
|
|
- var pixel = dyBuf.get(batch, dyDepth, dyRow, dyCol, channel);
|
|
|
- dotProd += pixel;
|
|
|
- }
|
|
|
- }
|
|
|
- }
|
|
|
- dx.set(dotProd * avgMultiplier, batch, dxDepth, dxRow, dxCol, channel);
|
|
|
- }
|
|
|
- }
|
|
|
- }
|
|
|
- }
|
|
|
- }
|
|
|
- return dx.toTensor();
|
|
|
- };
|
|
|
- MathBackendCPU.prototype.maxPool3d = function (x, convInfo) {
|
|
|
- cpu_util_1.assertNotComplex(x, 'maxPool3d');
|
|
|
- return this.pool3d(x, convInfo, 'max').toFloat();
|
|
|
- };
|
|
|
- MathBackendCPU.prototype.maxPool3dPositions = function (x, convInfo) {
|
|
|
- var maxPositions = ops.buffer(convInfo.outShape, 'int32');
|
|
|
- var strideDepth = convInfo.strideDepth;
|
|
|
- var strideHeight = convInfo.strideHeight;
|
|
|
- var strideWidth = convInfo.strideWidth;
|
|
|
- var dilationDepth = convInfo.dilationDepth;
|
|
|
- var dilationHeight = convInfo.dilationHeight;
|
|
|
- var dilationWidth = convInfo.dilationWidth;
|
|
|
- var effectiveFilterDepth = convInfo.effectiveFilterDepth;
|
|
|
- var effectiveFilterHeight = convInfo.effectiveFilterHeight;
|
|
|
- var effectiveFilterWidth = convInfo.effectiveFilterWidth;
|
|
|
- var padFront = convInfo.padInfo.front;
|
|
|
- var padTop = convInfo.padInfo.top;
|
|
|
- var padLeft = convInfo.padInfo.left;
|
|
|
- var xBuf = this.bufferSync(x);
|
|
|
- for (var batch = 0; batch < convInfo.batchSize; ++batch) {
|
|
|
- for (var channel = 0; channel < convInfo.inChannels; ++channel) {
|
|
|
- for (var yDepth = 0; yDepth < convInfo.outDepth; ++yDepth) {
|
|
|
- var xDepthCorner = yDepth * strideDepth - padFront;
|
|
|
- var xDepthMin = xDepthCorner;
|
|
|
- while (xDepthMin < 0) {
|
|
|
- xDepthMin += dilationDepth;
|
|
|
- }
|
|
|
- var xDepthMax = Math.min(convInfo.inDepth, effectiveFilterDepth + xDepthCorner);
|
|
|
- for (var yRow = 0; yRow < convInfo.outHeight; ++yRow) {
|
|
|
- var xRowCorner = yRow * strideHeight - padTop;
|
|
|
- var xRowMin = xRowCorner;
|
|
|
- while (xRowMin < 0) {
|
|
|
- xRowMin += dilationHeight;
|
|
|
- }
|
|
|
- var xRowMax = Math.min(convInfo.inHeight, effectiveFilterHeight + xRowCorner);
|
|
|
- for (var yCol = 0; yCol < convInfo.outWidth; ++yCol) {
|
|
|
- var xColCorner = yCol * strideWidth - padLeft;
|
|
|
- var xColMin = xColCorner;
|
|
|
- while (xColMin < 0) {
|
|
|
- xColMin += dilationWidth;
|
|
|
- }
|
|
|
- var xColMax = Math.min(convInfo.inWidth, effectiveFilterWidth + xColCorner);
|
|
|
- // Shader code begins
|
|
|
- var maxValue = Number.NEGATIVE_INFINITY;
|
|
|
- var maxPosition = -1;
|
|
|
- for (var xDepth = xDepthMin; xDepth < xDepthMax; xDepth += dilationDepth) {
|
|
|
- var wDepth = xDepth - xDepthCorner;
|
|
|
- for (var xRow = xRowMin; xRow < xRowMax; xRow += dilationHeight) {
|
|
|
- var wRow = xRow - xRowCorner;
|
|
|
- for (var xCol = xColMin; xCol < xColMax; xCol += dilationWidth) {
|
|
|
- var wCol = xCol - xColCorner;
|
|
|
- var pixel = xBuf.get(batch, xDepth, xRow, xCol, channel);
|
|
|
- if (pixel >= maxValue) {
|
|
|
- maxValue = pixel;
|
|
|
- maxPosition = wDepth * effectiveFilterHeight *
|
|
|
- effectiveFilterWidth +
|
|
|
- wRow * effectiveFilterHeight + wCol;
|
|
|
- }
|
|
|
- }
|
|
|
- }
|
|
|
- }
|
|
|
- maxPositions.set(maxPosition, batch, yDepth, yRow, yCol, channel);
|
|
|
- }
|
|
|
- }
|
|
|
- }
|
|
|
- }
|
|
|
- }
|
|
|
- return maxPositions.toTensor();
|
|
|
- };
|
|
|
- MathBackendCPU.prototype.maxPool3dBackprop = function (dy, x, y, convInfo) {
|
|
|
- cpu_util_1.assertNotComplex([x, y], 'maxPool3dBackprop');
|
|
|
- var maxPositions = this.maxPool3dPositions(x, convInfo);
|
|
|
- var strideDepth = convInfo.strideDepth;
|
|
|
- var strideHeight = convInfo.strideHeight;
|
|
|
- var strideWidth = convInfo.strideWidth;
|
|
|
- var dilationDepth = convInfo.dilationDepth;
|
|
|
- var dilationHeight = convInfo.dilationHeight;
|
|
|
- var dilationWidth = convInfo.dilationWidth;
|
|
|
- var effectiveFilterDepth = convInfo.effectiveFilterDepth;
|
|
|
- var effectiveFilterHeight = convInfo.effectiveFilterHeight;
|
|
|
- var effectiveFilterWidth = convInfo.effectiveFilterWidth;
|
|
|
- var padFront = effectiveFilterDepth - 1 - convInfo.padInfo.front;
|
|
|
- var padLeft = effectiveFilterWidth - 1 - convInfo.padInfo.left;
|
|
|
- var padTop = effectiveFilterHeight - 1 - convInfo.padInfo.top;
|
|
|
- var dx = ops.buffer(x.shape, 'float32');
|
|
|
- var maxPosBuf = this.bufferSync(maxPositions);
|
|
|
- var dyBuf = this.bufferSync(dy);
|
|
|
- for (var batch = 0; batch < convInfo.batchSize; ++batch) {
|
|
|
- for (var channel = 0; channel < convInfo.inChannels; ++channel) {
|
|
|
- for (var dxDepth = 0; dxDepth < convInfo.inDepth; ++dxDepth) {
|
|
|
- for (var dxRow = 0; dxRow < convInfo.inHeight; ++dxRow) {
|
|
|
- for (var dxCol = 0; dxCol < convInfo.inWidth; ++dxCol) {
|
|
|
- // Shader code begins
|
|
|
- var dyDepthCorner = dxDepth - padFront;
|
|
|
- var dyRowCorner = dxRow - padTop;
|
|
|
- var dyColCorner = dxCol - padLeft;
|
|
|
- var dotProd = 0;
|
|
|
- for (var wDepth = 0; wDepth < effectiveFilterDepth; wDepth += dilationDepth) {
|
|
|
- var dyDepth = (dyDepthCorner + wDepth) / strideDepth;
|
|
|
- if (dyDepth < 0 || dyDepth >= convInfo.outDepth ||
|
|
|
- Math.floor(dyDepth) !== dyDepth) {
|
|
|
- continue;
|
|
|
- }
|
|
|
- for (var wRow = 0; wRow < effectiveFilterHeight; wRow += dilationHeight) {
|
|
|
- var dyRow = (dyRowCorner + wRow) / strideHeight;
|
|
|
- if (dyRow < 0 || dyRow >= convInfo.outHeight ||
|
|
|
- Math.floor(dyRow) !== dyRow) {
|
|
|
- continue;
|
|
|
- }
|
|
|
- for (var wCol = 0; wCol < effectiveFilterWidth; wCol += dilationWidth) {
|
|
|
- var dyCol = (dyColCorner + wCol) / strideWidth;
|
|
|
- if (dyCol < 0 || dyCol >= convInfo.outWidth ||
|
|
|
- Math.floor(dyCol) !== dyCol) {
|
|
|
- continue;
|
|
|
- }
|
|
|
- var maxPos = effectiveFilterDepth *
|
|
|
- effectiveFilterHeight * effectiveFilterWidth -
|
|
|
- 1 -
|
|
|
- maxPosBuf.get(batch, dyDepth, dyRow, dyCol, channel);
|
|
|
- var curPos = wDepth * effectiveFilterHeight * effectiveFilterWidth +
|
|
|
- wRow * effectiveFilterWidth + wCol;
|
|
|
- var mask = maxPos === curPos ? 1 : 0;
|
|
|
- if (mask === 0) {
|
|
|
- continue;
|
|
|
- }
|
|
|
- var pixel = dyBuf.get(batch, dyDepth, dyRow, dyCol, channel);
|
|
|
- dotProd += pixel * mask;
|
|
|
- }
|
|
|
- }
|
|
|
- }
|
|
|
- dx.set(dotProd, batch, dxDepth, dxRow, dxCol, channel);
|
|
|
- }
|
|
|
- }
|
|
|
- }
|
|
|
- }
|
|
|
- }
|
|
|
- return dx.toTensor();
|
|
|
- };
|
|
|
- MathBackendCPU.prototype.cast = function (x, dtype) {
|
|
|
- return backend_util.castTensor(x, dtype, this);
|
|
|
- };
|
|
|
- MathBackendCPU.prototype.reshape = function (x, shape) {
|
|
|
- return backend_util.reshapeTensor(x, shape);
|
|
|
- };
|
|
|
- MathBackendCPU.prototype.avgPool = function (x, convInfo) {
|
|
|
- cpu_util_1.assertNotComplex(x, 'avgPool');
|
|
|
- return this.pool(x, convInfo, 'avg').toFloat();
|
|
|
- };
|
|
|
- MathBackendCPU.prototype.resizeBilinear = function (x, newHeight, newWidth, alignCorners) {
|
|
|
- cpu_util_1.assertNotComplex(x, 'resizeBilinear');
|
|
|
- var _a = x.shape, batch = _a[0], oldHeight = _a[1], oldWidth = _a[2], numChannels = _a[3];
|
|
|
- var xValues = this.readSync(x.dataId);
|
|
|
- var result = new Float32Array(util.sizeFromShape([batch, newHeight, newWidth, numChannels]));
|
|
|
- var effectiveInputSize = [
|
|
|
- (alignCorners && newHeight > 1) ? oldHeight - 1 : oldHeight,
|
|
|
- (alignCorners && newWidth > 1) ? oldWidth - 1 : oldWidth
|
|
|
- ];
|
|
|
- var effectiveOutputSize = [
|
|
|
- (alignCorners && newHeight > 1) ? newHeight - 1 : newHeight,
|
|
|
- (alignCorners && newWidth > 1) ? newWidth - 1 : newWidth
|
|
|
- ];
|
|
|
- var outputIdx = 0;
|
|
|
- var effectiveRowSizeRatio = effectiveInputSize[0] / effectiveOutputSize[0];
|
|
|
- var effectiveColSizeRatio = effectiveInputSize[1] / effectiveOutputSize[1];
|
|
|
- for (var b = 0; b < batch; b++) {
|
|
|
- for (var r = 0; r < newHeight; r++) {
|
|
|
- var sourceFracRow = effectiveRowSizeRatio * r;
|
|
|
- var sourceRowFloor = Math.floor(sourceFracRow);
|
|
|
- var rowFrac = sourceFracRow - sourceRowFloor;
|
|
|
- var sourceRowCeil = Math.min(oldHeight - 1, Math.ceil(sourceFracRow));
|
|
|
- var topRowOffset = b * x.strides[0] + sourceRowFloor * x.strides[1];
|
|
|
- var botRowOffset = b * x.strides[0] + sourceRowCeil * x.strides[1];
|
|
|
- for (var c = 0; c < newWidth; c++) {
|
|
|
- var sourceFracCol = effectiveColSizeRatio * c;
|
|
|
- var sourceColFloor = Math.floor(sourceFracCol);
|
|
|
- var colFrac = sourceFracCol - sourceColFloor;
|
|
|
- var sourceColCeil = Math.min(oldWidth - 1, Math.ceil(sourceFracCol));
|
|
|
- var topLeftOffest = topRowOffset + sourceColFloor * x.strides[2];
|
|
|
- var botLeftOffset = botRowOffset + sourceColFloor * x.strides[2];
|
|
|
- var topRightOffset = topRowOffset + sourceColCeil * x.strides[2];
|
|
|
- var botRightOffest = botRowOffset + sourceColCeil * x.strides[2];
|
|
|
- for (var d = 0; d < numChannels; d++) {
|
|
|
- // Begin shader.
|
|
|
- // Compute the fractional index of the source.
|
|
|
- var topLeft = xValues[topLeftOffest + d];
|
|
|
- var bottomLeft = xValues[botLeftOffset + d];
|
|
|
- var topRight = xValues[topRightOffset + d];
|
|
|
- var bottomRight = xValues[botRightOffest + d];
|
|
|
- var top_1 = topLeft + (topRight - topLeft) * colFrac;
|
|
|
- var bottom = bottomLeft + (bottomRight - bottomLeft) * colFrac;
|
|
|
- var newValue = top_1 + (bottom - top_1) * rowFrac;
|
|
|
- result[outputIdx++] = newValue;
|
|
|
- }
|
|
|
- }
|
|
|
- }
|
|
|
- }
|
|
|
- return ops.tensor(result, [batch, newHeight, newWidth, numChannels]);
|
|
|
- };
|
|
|
- MathBackendCPU.prototype.resizeBilinearBackprop = function (dy, x, alignCorners) {
|
|
|
- cpu_util_1.assertNotComplex([dy, x], 'resizeBilinearBackprop');
|
|
|
- var _a = x.shape, batch = _a[0], xHeight = _a[1], xWidth = _a[2], depth = _a[3];
|
|
|
- var _b = dy.shape, yHeight = _b[1], yWidth = _b[2];
|
|
|
- var output = new Float32Array(batch * xHeight * xWidth * depth);
|
|
|
- // In the backwards pass, we want to find the pixels that were generated
|
|
|
- // for each pixel in the input image the forward pass and add the
|
|
|
- // corresponding coefficient from dy to the gradient (with some
|
|
|
- // interpolation).
|
|
|
- var effectiveXSize = [
|
|
|
- (alignCorners && yHeight > 1) ? xHeight - 1 : xHeight,
|
|
|
- (alignCorners && yWidth > 1) ? xWidth - 1 : xWidth
|
|
|
- ];
|
|
|
- var effectiveYSize = [
|
|
|
- (alignCorners && yHeight > 1) ? yHeight - 1 : yHeight,
|
|
|
- (alignCorners && yWidth > 1) ? yWidth - 1 : yWidth
|
|
|
- ];
|
|
|
- var heightScale = effectiveXSize[0] / effectiveYSize[0];
|
|
|
- var widthScale = effectiveXSize[1] / effectiveYSize[1];
|
|
|
- // Reference implementation
|
|
|
- // tslint:disable-next-line:max-line-length
|
|
|
- // https://github.com/tensorflow/tensorflow/blob/3039375c86a5bbc9610c7725dcaa95d635f87ba2/tensorflow/core/kernels/resize_bilinear_op.cc#L275
|
|
|
- var dyValues = this.readSync(dy.dataId);
|
|
|
- var offset = 0;
|
|
|
- for (var b = 0; b < batch; b++) {
|
|
|
- var bOffset = b * x.strides[0];
|
|
|
- for (var r = 0; r < yHeight; r++) {
|
|
|
- var dxR = r * heightScale;
|
|
|
- var topDxRIndex = Math.floor(dxR);
|
|
|
- var bottomDxRIndex = Math.min(Math.ceil(dxR), xHeight - 1);
|
|
|
- var topDxROffset = bOffset + topDxRIndex * x.strides[1];
|
|
|
- var bottomDxROffset = bOffset + bottomDxRIndex * x.strides[1];
|
|
|
- var dxRLerp = dxR - topDxRIndex;
|
|
|
- var inverseDxRLerp = 1.0 - dxRLerp;
|
|
|
- for (var c = 0; c < yWidth; c++) {
|
|
|
- var dxC = c * widthScale;
|
|
|
- var leftDxCIndex = Math.floor(dxC);
|
|
|
- var rightDxCIndex = Math.min(Math.ceil(dxC), xWidth - 1);
|
|
|
- var dxCLerp = dxC - leftDxCIndex;
|
|
|
- var inverseDxCLerp = 1.0 - dxCLerp;
|
|
|
- var topLeftRCOffset = topDxROffset + leftDxCIndex * x.strides[2];
|
|
|
- var topRightRCOffset = topDxROffset + rightDxCIndex * x.strides[2];
|
|
|
- var bottomLeftRCOffset = bottomDxROffset + leftDxCIndex * x.strides[2];
|
|
|
- var bottomRightRCOffset = bottomDxROffset + rightDxCIndex * x.strides[2];
|
|
|
- var inverseDxRLerpTimesInverseDxCLerp = inverseDxRLerp * inverseDxCLerp;
|
|
|
- var inverseDxRLerpTimesDxCLerp = inverseDxRLerp * dxCLerp;
|
|
|
- var dxRLerpTimesInverseDxCLerp = dxRLerp * inverseDxCLerp;
|
|
|
- var dxRLerpTimesDxCLerp = dxRLerp * dxCLerp;
|
|
|
- for (var d = 0; d < depth; d++) {
|
|
|
- var dyVal = dyValues[offset++];
|
|
|
- output[topLeftRCOffset + d] +=
|
|
|
- dyVal * inverseDxRLerpTimesInverseDxCLerp;
|
|
|
- output[topRightRCOffset + d] += dyVal * inverseDxRLerpTimesDxCLerp;
|
|
|
- output[bottomLeftRCOffset + d] +=
|
|
|
- dyVal * dxRLerpTimesInverseDxCLerp;
|
|
|
- output[bottomRightRCOffset + d] += dyVal * dxRLerpTimesDxCLerp;
|
|
|
- }
|
|
|
- }
|
|
|
- }
|
|
|
- }
|
|
|
- return ops.tensor4d(output, [batch, xWidth, xHeight, depth], x.dtype);
|
|
|
- };
|
|
|
- MathBackendCPU.prototype.resizeNearestNeighbor = function (x, newHeight, newWidth, alignCorners) {
|
|
|
- cpu_util_1.assertNotComplex(x, 'resizeNearestNeighbor');
|
|
|
- var _a = x.shape, batch = _a[0], oldHeight = _a[1], oldWidth = _a[2], numChannels = _a[3];
|
|
|
- var xValues = this.readSync(x.dataId);
|
|
|
- var output = new Float32Array(batch * newHeight * newWidth * numChannels);
|
|
|
- var effectiveInputSize = [
|
|
|
- (alignCorners && newHeight > 1) ? oldHeight - 1 : oldHeight,
|
|
|
- (alignCorners && newWidth > 1) ? oldWidth - 1 : oldWidth
|
|
|
- ];
|
|
|
- var effectiveOutputSize = [
|
|
|
- (alignCorners && newHeight > 1) ? newHeight - 1 : newHeight,
|
|
|
- (alignCorners && newWidth > 1) ? newWidth - 1 : newWidth
|
|
|
- ];
|
|
|
- var effectiveRowSizeRatio = effectiveInputSize[0] / effectiveOutputSize[0];
|
|
|
- var effectiveColSizeRatio = effectiveInputSize[1] / effectiveOutputSize[1];
|
|
|
- var outputOffset = 0;
|
|
|
- for (var b = 0; b < batch; b++) {
|
|
|
- var batchOffset = b * x.strides[0];
|
|
|
- for (var r = 0; r < newHeight; r++) {
|
|
|
- var sourceFracRow = effectiveRowSizeRatio * r;
|
|
|
- var sourceNearestRow = Math.min(oldHeight - 1, alignCorners ? Math.round(sourceFracRow) :
|
|
|
- Math.floor(sourceFracRow));
|
|
|
- var rowOffset = batchOffset + sourceNearestRow * x.strides[1];
|
|
|
- for (var c = 0; c < newWidth; c++) {
|
|
|
- var sourceFracCol = effectiveColSizeRatio * c;
|
|
|
- var sourceNearestCol = Math.min(oldWidth - 1, alignCorners ? Math.round(sourceFracCol) :
|
|
|
- Math.floor(sourceFracCol));
|
|
|
- var colOffset = rowOffset + sourceNearestCol * x.strides[2];
|
|
|
- for (var d = 0; d < numChannels; d++) {
|
|
|
- // Begin shader.
|
|
|
- // Compute the fractional index of the source.
|
|
|
- var newVal = xValues[colOffset + d];
|
|
|
- output[outputOffset++] = newVal;
|
|
|
- }
|
|
|
- }
|
|
|
- }
|
|
|
- }
|
|
|
- return ops.tensor(output, [batch, newHeight, newWidth, numChannels], x.dtype);
|
|
|
- };
|
|
|
- MathBackendCPU.prototype.resizeNearestNeighborBackprop = function (dy, x, alignCorners) {
|
|
|
- cpu_util_1.assertNotComplex([dy, x], 'resizeNearestNeighborBackprop');
|
|
|
- var _a = x.shape, batch = _a[0], xHeight = _a[1], xWidth = _a[2], depth = _a[3];
|
|
|
- var _b = dy.shape, yHeight = _b[1], yWidth = _b[2];
|
|
|
- var output = new Float32Array(batch * xHeight * xWidth * depth);
|
|
|
- var dyValues = this.readSync(dy.dataId);
|
|
|
- // In the backwards pass, we want to find the pixels that were generated
|
|
|
- // for each pixel in the input image the forward pass
|
|
|
- var effectiveXSize = [
|
|
|
- (alignCorners && yHeight > 1) ? xHeight - 1 : xHeight,
|
|
|
- (alignCorners && yWidth > 1) ? xWidth - 1 : xWidth
|
|
|
- ];
|
|
|
- var effectiveYSize = [
|
|
|
- (alignCorners && yHeight > 1) ? yHeight - 1 : yHeight,
|
|
|
- (alignCorners && yWidth > 1) ? yWidth - 1 : yWidth
|
|
|
- ];
|
|
|
- var heightScale = effectiveXSize[0] / effectiveYSize[0];
|
|
|
- var widthScale = effectiveXSize[1] / effectiveYSize[1];
|
|
|
- var invHeightScale = 1 / heightScale;
|
|
|
- var invWidthScale = 1 / widthScale;
|
|
|
- // This defines the size of the window of values around a particular
|
|
|
- // index in dy that we want to search for contributions to dx.
|
|
|
- var winHeight = (Math.ceil(invHeightScale) * 2) + 2;
|
|
|
- var winWidth = (Math.ceil(invWidthScale) * 2) + 2;
|
|
|
- // Loop over the output space.
|
|
|
- for (var b = 0; b < batch; b++) {
|
|
|
- var batchOffset = b * x.strides[0];
|
|
|
- for (var r = 0; r < xHeight; r++) {
|
|
|
- var rowOffset = batchOffset + r * x.strides[1];
|
|
|
- // Compute bounds for where in dy we will look
|
|
|
- var startRLerp = Math.floor(r * invHeightScale);
|
|
|
- var startDyR = Math.floor(startRLerp - (winHeight / 2));
|
|
|
- for (var c = 0; c < xWidth; c++) {
|
|
|
- var colOffset = rowOffset + c * x.strides[2];
|
|
|
- // Compute bounds for where in dy we will look
|
|
|
- var startCLerp = Math.floor(c * invWidthScale);
|
|
|
- var startDyC = Math.floor(startCLerp - (winWidth / 2));
|
|
|
- for (var d = 0; d < depth; d++) {
|
|
|
- var accum = 0;
|
|
|
- // loop over dy
|
|
|
- for (var dyRIndex = 0; dyRIndex < winHeight; dyRIndex++) {
|
|
|
- var dyR = dyRIndex + startDyR;
|
|
|
- // Guard against the window exceeding the bounds of dy
|
|
|
- if (dyR < 0 || dyR >= yHeight) {
|
|
|
- continue;
|
|
|
- }
|
|
|
- var dyROffset = batchOffset + dyR * dy.strides[1];
|
|
|
- var sourceFracRow = dyR * heightScale;
|
|
|
- var sourceNearestRow = Math.min(xHeight - 1, alignCorners ? Math.round(sourceFracRow) :
|
|
|
- Math.floor(sourceFracRow));
|
|
|
- if (r !== sourceNearestRow) {
|
|
|
- continue;
|
|
|
- }
|
|
|
- for (var dyCIndex = 0; dyCIndex < winWidth; dyCIndex++) {
|
|
|
- var dyC = dyCIndex + startDyC;
|
|
|
- // Guard against the window exceeding the bounds of dy
|
|
|
- if (dyC < 0 || dyC >= yWidth) {
|
|
|
- continue;
|
|
|
- }
|
|
|
- var dyCOffset = dyROffset + dyC * dy.strides[2];
|
|
|
- var sourceFracCol = dyC * widthScale;
|
|
|
- var sourceNearestCol = Math.min(xWidth - 1, alignCorners ? Math.round(sourceFracCol) :
|
|
|
- Math.floor(sourceFracCol));
|
|
|
- if (c === sourceNearestCol) {
|
|
|
- accum += dyValues[dyCOffset + d];
|
|
|
- }
|
|
|
- }
|
|
|
- }
|
|
|
- output[colOffset + d] = accum;
|
|
|
- }
|
|
|
- }
|
|
|
- }
|
|
|
- }
|
|
|
- return ops.tensor4d(output, x.shape, x.dtype);
|
|
|
- };
|
|
|
- MathBackendCPU.prototype.batchNormalization = function (x, mean, variance, varianceEpsilon, scale, offset) {
|
|
|
- cpu_util_1.assertNotComplex([x, mean, variance, scale, offset], 'batchNorm');
|
|
|
- var xVals = this.readSync(x.dataId);
|
|
|
- var mVals = this.readSync(mean.dataId);
|
|
|
- var varVals = this.readSync(variance.dataId);
|
|
|
- var sVals = scale ? this.readSync(scale.dataId) :
|
|
|
- new Float32Array([1]);
|
|
|
- var offVals = offset ? this.readSync(offset.dataId) :
|
|
|
- new Float32Array([0]);
|
|
|
- var outVals = new Float32Array(xVals.length);
|
|
|
- var offValsLength = offVals.length;
|
|
|
- var sValsLength = sVals.length;
|
|
|
- var varValsLength = varVals.length;
|
|
|
- var mValsLength = mVals.length;
|
|
|
- var offi = 0;
|
|
|
- var mi = 0;
|
|
|
- var si = 0;
|
|
|
- var vi = 0;
|
|
|
- for (var i = 0; i < xVals.length; ++i) {
|
|
|
- outVals[i] = offVals[offi++] +
|
|
|
- (xVals[i] - mVals[mi++]) * sVals[si++] /
|
|
|
- Math.sqrt(varVals[vi++] + varianceEpsilon);
|
|
|
- if (offi >= offValsLength) {
|
|
|
- offi = 0;
|
|
|
- }
|
|
|
- if (mi >= mValsLength) {
|
|
|
- mi = 0;
|
|
|
- }
|
|
|
- if (si >= sValsLength) {
|
|
|
- si = 0;
|
|
|
- }
|
|
|
- if (vi >= varValsLength) {
|
|
|
- vi = 0;
|
|
|
- }
|
|
|
- }
|
|
|
- return ops_1.tensor4d(outVals, x.shape);
|
|
|
- };
|
|
|
- MathBackendCPU.prototype.localResponseNormalization4D = function (x, depthRadius, bias, alpha, beta) {
|
|
|
- cpu_util_1.assertNotComplex(x, 'localResponseNormalization4D');
|
|
|
- var channels = x.shape[3];
|
|
|
- var maxD = channels - 1;
|
|
|
- var xValues = this.readSync(x.dataId);
|
|
|
- var size = x.size;
|
|
|
- var result = new Float32Array(size);
|
|
|
- function sumAcrossChannels(offset) {
|
|
|
- var currentChannel = offset % channels;
|
|
|
- var beginSumOffset = offset - currentChannel + Math.max(0, currentChannel - depthRadius);
|
|
|
- var endSumOffset = offset - currentChannel +
|
|
|
- Math.min(currentChannel + depthRadius, maxD);
|
|
|
- var sum = 0.0;
|
|
|
- for (; beginSumOffset <= endSumOffset; beginSumOffset++) {
|
|
|
- var z = xValues[beginSumOffset];
|
|
|
- sum += z * z;
|
|
|
- }
|
|
|
- return sum;
|
|
|
- }
|
|
|
- for (var offset = 0; offset < size; offset++) {
|
|
|
- var sum = sumAcrossChannels(offset);
|
|
|
- var val = xValues[offset] * Math.pow(bias + alpha * sum, -beta);
|
|
|
- result[offset] = val;
|
|
|
- }
|
|
|
- return ops.tensor4d(result, x.shape);
|
|
|
- };
|
|
|
- MathBackendCPU.prototype.LRNGrad = function (dy, inputImage, outputImage, depthRadius, bias, alpha, beta) {
|
|
|
- cpu_util_1.assertNotComplex(dy, 'LRNGrad');
|
|
|
- var channels = dy.shape[3];
|
|
|
- var dyValues = this.readSync(dy.dataId);
|
|
|
- var inputImageValues = this.readSync(inputImage.dataId);
|
|
|
- var outputImageValues = this.readSync(outputImage.dataId);
|
|
|
- var result = new Float32Array(dy.size);
|
|
|
- var size = dy.size;
|
|
|
- for (var offset = 0; offset < size; offset++) {
|
|
|
- var currentChannel = offset % channels;
|
|
|
- var depthBegin = (offset - currentChannel) + Math.max(0, currentChannel - depthRadius);
|
|
|
- var depthEnd = (offset - currentChannel) +
|
|
|
- Math.min(channels, currentChannel + depthRadius + 1);
|
|
|
- var norm = 0;
|
|
|
- for (var k = depthBegin; k < depthEnd; k++) {
|
|
|
- norm += Math.pow(inputImageValues[k], 2);
|
|
|
- }
|
|
|
- norm = alpha * norm + bias;
|
|
|
- for (var k = depthBegin; k < depthEnd; k++) {
|
|
|
- var dyi = -2 * alpha * beta * inputImageValues[k] *
|
|
|
- outputImageValues[offset] / norm;
|
|
|
- if (offset === k) {
|
|
|
- dyi += Math.pow(norm, -beta);
|
|
|
- }
|
|
|
- dyi *= dyValues[offset];
|
|
|
- result[k] += dyi;
|
|
|
- }
|
|
|
- }
|
|
|
- return ops.tensor4d(result, dy.shape);
|
|
|
- };
|
|
|
- MathBackendCPU.prototype.multinomial = function (logits, normalized, numSamples, seed) {
|
|
|
- cpu_util_1.assertNotComplex(logits, 'multinomial');
|
|
|
- var probabilities = normalized ? logits : ops.softmax(logits);
|
|
|
- var batchSize = probabilities.shape[0];
|
|
|
- var numEvents = probabilities.shape[1];
|
|
|
- var res = ops.zeros([batchSize, numSamples], 'int32');
|
|
|
- var resVals = this.readSync(res.dataId);
|
|
|
- var probVals = this.readSync(probabilities.dataId);
|
|
|
- for (var b = 0; b < batchSize; ++b) {
|
|
|
- var offset = b * numEvents;
|
|
|
- // The cdf won't include the last event. It will be implicit if no other
|
|
|
- // event happened.
|
|
|
- var cdf = new Float32Array(numEvents - 1);
|
|
|
- cdf[0] = probVals[offset];
|
|
|
- for (var event_1 = 1; event_1 < cdf.length; ++event_1) {
|
|
|
- cdf[event_1] = cdf[event_1 - 1] + probVals[offset + event_1];
|
|
|
- }
|
|
|
- var random = seedrandom.alea(seed.toString());
|
|
|
- var outOffset = b * numSamples;
|
|
|
- for (var sampleId = 0; sampleId < numSamples; ++sampleId) {
|
|
|
- var r = random();
|
|
|
- // Assume last event happened by default.
|
|
|
- resVals[outOffset + sampleId] = cdf.length;
|
|
|
- for (var event_2 = 0; event_2 < cdf.length; event_2++) {
|
|
|
- if (r < cdf[event_2]) {
|
|
|
- resVals[outOffset + sampleId] = event_2;
|
|
|
- break;
|
|
|
- }
|
|
|
- }
|
|
|
- }
|
|
|
- }
|
|
|
- return res;
|
|
|
- };
|
|
|
- MathBackendCPU.prototype.oneHot = function (indices, depth, onValue, offValue) {
|
|
|
- cpu_util_1.assertNotComplex(indices, 'oneHot');
|
|
|
- var res = new Float32Array(indices.size * depth);
|
|
|
- res.fill(offValue);
|
|
|
- var indicesVal = this.readSync(indices.dataId);
|
|
|
- for (var event_3 = 0; event_3 < indices.size; ++event_3) {
|
|
|
- if (indicesVal[event_3] >= 0 && indicesVal[event_3] < depth) {
|
|
|
- res[event_3 * depth + indicesVal[event_3]] = onValue;
|
|
|
- }
|
|
|
- }
|
|
|
- return ops.tensor2d(res, [indices.size, depth], 'int32');
|
|
|
- };
|
|
|
- MathBackendCPU.prototype.nonMaxSuppression = function (boxes, scores, maxOutputSize, iouThreshold, scoreThreshold) {
|
|
|
- cpu_util_1.assertNotComplex(boxes, 'nonMaxSuppression');
|
|
|
- var boxesVals = this.readSync(boxes.dataId);
|
|
|
- var scoresVals = this.readSync(scores.dataId);
|
|
|
- return non_max_suppression_impl_1.nonMaxSuppressionV3(boxesVals, scoresVals, maxOutputSize, iouThreshold, scoreThreshold);
|
|
|
- };
|
|
|
- MathBackendCPU.prototype.fft = function (x) {
|
|
|
- return this.fftBatch(x, false);
|
|
|
- };
|
|
|
- MathBackendCPU.prototype.ifft = function (x) {
|
|
|
- return this.fftBatch(x, true);
|
|
|
- };
|
|
|
- /**
|
|
|
- * Calculate FFT of inner most elements of batch tensor.
|
|
|
- */
|
|
|
- MathBackendCPU.prototype.fftBatch = function (x, inverse) {
|
|
|
- var batch = x.shape[0];
|
|
|
- var innerDim = x.shape[1];
|
|
|
- // Collects real and imaginary values separately.
|
|
|
- var realResult = ops.buffer(x.shape, 'float32');
|
|
|
- var imagResult = ops.buffer(x.shape, 'float32');
|
|
|
- var real = ops.real(x).as2D(batch, innerDim);
|
|
|
- var imag = ops.imag(x).as2D(batch, innerDim);
|
|
|
- for (var b = 0; b < batch; b++) {
|
|
|
- // TODO: Support slice ops for complex type.
|
|
|
- var r = real.slice([b, 0], [1, innerDim]);
|
|
|
- var i = imag.slice([b, 0], [1, innerDim]);
|
|
|
- var input = ops.complex(r, i);
|
|
|
- // Run FFT by batch element.
|
|
|
- var res = this.readSync(this.fftImpl(input, inverse).dataId);
|
|
|
- for (var d = 0; d < innerDim; d++) {
|
|
|
- var c = complex_util.getComplexWithIndex(res, d);
|
|
|
- realResult.values[b * innerDim + d] = c.real;
|
|
|
- imagResult.values[b * innerDim + d] = c.imag;
|
|
|
- }
|
|
|
- }
|
|
|
- var t = ops.complex(realResult.toTensor(), imagResult.toTensor());
|
|
|
- return t.as2D(batch, innerDim);
|
|
|
- };
|
|
|
- MathBackendCPU.prototype.fftImpl = function (x, inverse) {
|
|
|
- var x1D = x.as1D();
|
|
|
- var n = x1D.size;
|
|
|
- if (this.isExponentOf2(n)) {
|
|
|
- var result = this.fftRadix2(x1D, n, inverse).as2D(x.shape[0], x.shape[1]);
|
|
|
- if (inverse) {
|
|
|
- result = ops.complex(ops.real(result).div(ops_1.scalar(n)), ops.imag(result).div(ops_1.scalar(n)));
|
|
|
- }
|
|
|
- return result;
|
|
|
- }
|
|
|
- else {
|
|
|
- var data = this.readSync(x.dataId);
|
|
|
- var rawOutput = this.fourierTransformByMatmul(data, n, inverse);
|
|
|
- var output = complex_util.splitRealAndImagArrays(rawOutput);
|
|
|
- return ops.complex(output.real, output.imag).as2D(x.shape[0], x.shape[1]);
|
|
|
- }
|
|
|
- };
|
|
|
- MathBackendCPU.prototype.isExponentOf2 = function (size) {
|
|
|
- return (size & size - 1) === 0;
|
|
|
- };
|
|
|
- // FFT using Cooley-Tukey algorithm on radix 2 dimensional input.
|
|
|
- MathBackendCPU.prototype.fftRadix2 = function (input, size, inverse) {
|
|
|
- if (size === 1) {
|
|
|
- return input;
|
|
|
- }
|
|
|
- var data = this.readSync(input.dataId);
|
|
|
- var half = size / 2;
|
|
|
- var evenComplex = complex_util.complexWithEvenIndex(data);
|
|
|
- var evenTensor = ops.complex(evenComplex.real, evenComplex.imag).as1D();
|
|
|
- var oddComplex = complex_util.complexWithOddIndex(data);
|
|
|
- var oddTensor = ops.complex(oddComplex.real, oddComplex.imag).as1D();
|
|
|
- // Recursive call for half part of original input.
|
|
|
- evenTensor = this.fftRadix2(evenTensor, half, inverse);
|
|
|
- oddTensor = this.fftRadix2(oddTensor, half, inverse);
|
|
|
- var e = complex_util.exponents(size, inverse);
|
|
|
- var exponent = ops.complex(e.real, e.imag).mul(oddTensor);
|
|
|
- var addPart = evenTensor.add(exponent);
|
|
|
- var subPart = evenTensor.sub(exponent);
|
|
|
- var realTensor = ops.real(addPart).concat(ops.real(subPart));
|
|
|
- var imagTensor = ops.imag(addPart).concat(ops.imag(subPart));
|
|
|
- return ops.complex(realTensor, imagTensor).as1D();
|
|
|
- };
|
|
|
- // Calculate fourier transform by multplying sinusoid matrix.
|
|
|
- MathBackendCPU.prototype.fourierTransformByMatmul = function (data, size, inverse) {
|
|
|
- var ret = new Float32Array(size * 2);
|
|
|
- // TODO: Use matmul instead once it supports complex64 type.
|
|
|
- for (var r = 0; r < size; r++) {
|
|
|
- var real_2 = 0.0;
|
|
|
- var imag_2 = 0.0;
|
|
|
- for (var c = 0; c < size; c++) {
|
|
|
- var e = complex_util.exponent(r * c, size, inverse);
|
|
|
- var term = complex_util.getComplexWithIndex(data, c);
|
|
|
- real_2 += term.real * e.real - term.imag * e.imag;
|
|
|
- imag_2 += term.real * e.imag + term.imag * e.real;
|
|
|
- }
|
|
|
- if (inverse) {
|
|
|
- real_2 /= size;
|
|
|
- imag_2 /= size;
|
|
|
- }
|
|
|
- complex_util.assignToTypedArray(ret, real_2, imag_2, r);
|
|
|
- }
|
|
|
- return ret;
|
|
|
- };
|
|
|
- MathBackendCPU.prototype.depthToSpace = function (x, blockSize, dataFormat) {
|
|
|
- util.assert(dataFormat === 'NHWC', function () { return "Only NHWC dataFormat supported on CPU for depthToSpace. Got " + dataFormat; });
|
|
|
- util.assert(blockSize > 1, function () {
|
|
|
- return "blockSize should be > 1 for depthToSpace, but was: " + blockSize;
|
|
|
- });
|
|
|
- var batchSize = x.shape[0];
|
|
|
- var inputHeight = x.shape[1];
|
|
|
- var inputWidth = x.shape[2];
|
|
|
- var inputDepth = x.shape[3];
|
|
|
- var outputHeight = inputHeight * blockSize;
|
|
|
- var outputWidth = inputWidth * blockSize;
|
|
|
- var outputDepth = inputDepth / (blockSize * blockSize);
|
|
|
- var xValues = this.readSync(x.dataId);
|
|
|
- var result = new Float32Array(batchSize * outputHeight * outputWidth * outputDepth);
|
|
|
- var outputIdx = 0;
|
|
|
- for (var b = 0; b < batchSize; ++b) {
|
|
|
- for (var h = 0; h < outputHeight; ++h) {
|
|
|
- var inH = Math.floor(h / blockSize);
|
|
|
- var offsetH = (h % blockSize);
|
|
|
- for (var w = 0; w < outputWidth; ++w) {
|
|
|
- var inW = Math.floor(w / blockSize);
|
|
|
- var offsetW = (w % blockSize);
|
|
|
- var offsetD = (offsetH * blockSize + offsetW) * outputDepth;
|
|
|
- for (var d = 0; d < outputDepth; ++d) {
|
|
|
- var inD = d + offsetD;
|
|
|
- var inputIdx = inD + inputDepth * (inW + inputWidth * (inH + inputHeight * b));
|
|
|
- result[outputIdx++] = xValues[inputIdx];
|
|
|
- }
|
|
|
- }
|
|
|
- }
|
|
|
- }
|
|
|
- return ops.tensor4d(result, [batchSize, outputHeight, outputWidth, outputDepth]);
|
|
|
- };
|
|
|
- MathBackendCPU.prototype.broadcastedBinaryOp = function (a, b, dtype, op) {
|
|
|
- var newShape = broadcast_util.assertAndGetBroadcastShape(a.shape, b.shape);
|
|
|
- var result = ops.buffer(newShape, dtype);
|
|
|
- var aVals = this.readSync(a.dataId);
|
|
|
- var bVals = this.readSync(b.dataId);
|
|
|
- var aBroadcastDims = broadcast_util.getBroadcastDims(a.shape, newShape);
|
|
|
- var bBroadcastDims = broadcast_util.getBroadcastDims(b.shape, newShape);
|
|
|
- var resVals = result.values;
|
|
|
- if (aBroadcastDims.length + bBroadcastDims.length === 0) {
|
|
|
- for (var i = 0; i < resVals.length; ++i) {
|
|
|
- resVals[i] = op(aVals[i % aVals.length], bVals[i % bVals.length]);
|
|
|
- }
|
|
|
- }
|
|
|
- else {
|
|
|
- var aBuf = this.bufferSync(a);
|
|
|
- var bBuf = this.bufferSync(b);
|
|
|
- var _loop_2 = function (i) {
|
|
|
- var loc = result.indexToLoc(i);
|
|
|
- var aLoc = loc.slice(-a.rank);
|
|
|
- aBroadcastDims.forEach(function (d) { return aLoc[d] = 0; });
|
|
|
- var aIndex = aBuf.locToIndex(aLoc);
|
|
|
- var bLoc = loc.slice(-b.rank);
|
|
|
- bBroadcastDims.forEach(function (d) { return bLoc[d] = 0; });
|
|
|
- var bIndex = bBuf.locToIndex(bLoc);
|
|
|
- resVals[i] = op(aVals[aIndex], bVals[bIndex]);
|
|
|
- };
|
|
|
- for (var i = 0; i < resVals.length; ++i) {
|
|
|
- _loop_2(i);
|
|
|
- }
|
|
|
- }
|
|
|
- return result.toTensor();
|
|
|
- };
|
|
|
- MathBackendCPU.prototype.broadcastedBinaryComplexOp = function (a, b, op) {
|
|
|
- var newShape = broadcast_util.assertAndGetBroadcastShape(a.shape, b.shape);
|
|
|
- var realResult = ops.buffer(newShape, 'float32');
|
|
|
- var imagResult = ops.buffer(newShape, 'float32');
|
|
|
- var aVals = this.readSync(a.dataId);
|
|
|
- var bVals = this.readSync(b.dataId);
|
|
|
- var aBroadcastDims = broadcast_util.getBroadcastDims(a.shape, newShape);
|
|
|
- var bBroadcastDims = broadcast_util.getBroadcastDims(b.shape, newShape);
|
|
|
- var realVals = realResult.values;
|
|
|
- var imagVals = imagResult.values;
|
|
|
- if (aBroadcastDims.length + bBroadcastDims.length === 0) {
|
|
|
- for (var i = 0; i < realVals.length; i++) {
|
|
|
- var aIdx = i % aVals.length;
|
|
|
- var bIdx = i % bVals.length;
|
|
|
- var result = op(aVals[aIdx * 2], aVals[aIdx * 2 + 1], bVals[bIdx * 2], bVals[bIdx * 2 + 1]);
|
|
|
- realVals[i] = result.real;
|
|
|
- imagVals[i] = result.imag;
|
|
|
- }
|
|
|
- }
|
|
|
- else {
|
|
|
- var aRealBuf = this.bufferSync(this.data.get(a.dataId).complexTensors.real);
|
|
|
- var bRealBuf = this.bufferSync(this.data.get(b.dataId).complexTensors.real);
|
|
|
- var _loop_3 = function (i) {
|
|
|
- var loc = realResult.indexToLoc(i);
|
|
|
- var aLoc = loc.slice(-a.rank);
|
|
|
- aBroadcastDims.forEach(function (d) { return aLoc[d] = 0; });
|
|
|
- var aIndex = aRealBuf.locToIndex(aLoc);
|
|
|
- var bLoc = loc.slice(-b.rank);
|
|
|
- bBroadcastDims.forEach(function (d) { return bLoc[d] = 0; });
|
|
|
- var bIndex = bRealBuf.locToIndex(bLoc);
|
|
|
- var opResult = op(aVals[aIndex * 2], aVals[aIndex * 2 + 1], bVals[bIndex * 2], bVals[bIndex * 2 + 1]);
|
|
|
- realVals[i] = opResult.real;
|
|
|
- imagVals[i] = opResult.imag;
|
|
|
- };
|
|
|
- for (var i = 0; i < realVals.length; i++) {
|
|
|
- _loop_3(i);
|
|
|
- }
|
|
|
- }
|
|
|
- return this.complex(realResult.toTensor(), imagResult.toTensor());
|
|
|
- };
|
|
|
- MathBackendCPU.prototype.split = function (x, sizeSplits, axis) {
|
|
|
- return split_shared_1.split(x, sizeSplits, axis);
|
|
|
- };
|
|
|
- MathBackendCPU.prototype.dispose = function () { };
|
|
|
- MathBackendCPU.prototype.floatPrecision = function () {
|
|
|
- return 32;
|
|
|
- };
|
|
|
- /** Returns the smallest representable number. */
|
|
|
- MathBackendCPU.prototype.epsilon = function () {
|
|
|
- return backend_1.EPSILON_FLOAT32;
|
|
|
- };
|
|
|
- MathBackendCPU.prototype.cropAndResize = function (images, boxes, boxIndex, cropSize, method, extrapolationValue) {
|
|
|
- var _a = images.shape, batch = _a[0], imageHeight = _a[1], imageWidth = _a[2], numChannels = _a[3];
|
|
|
- var numBoxes = boxes.shape[0];
|
|
|
- var cropHeight = cropSize[0], cropWidth = cropSize[1];
|
|
|
- var output = ops.buffer([numBoxes, cropHeight, cropWidth, numChannels], 'float32');
|
|
|
- var boxVals = this.readSync(boxes.dataId);
|
|
|
- var boxIndVals = this.readSync(boxIndex.dataId);
|
|
|
- var imageVals = this.readSync(images.dataId);
|
|
|
- var inStride = images.strides; // to calculate flat indexes into image
|
|
|
- var outStride = output.strides; // to calculate flat indexes into output
|
|
|
- // Reference implementation
|
|
|
- // tslint:disable-next-line:max-line-length
|
|
|
- // https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/kernels/crop_and_resize_op.cc
|
|
|
- for (var b = 0; b < numBoxes; b++) {
|
|
|
- var startInd = b * 4;
|
|
|
- var y1 = boxVals[startInd];
|
|
|
- var x1 = boxVals[startInd + 1];
|
|
|
- var y2 = boxVals[startInd + 2];
|
|
|
- var x2 = boxVals[startInd + 3];
|
|
|
- var bInd = boxIndVals[b];
|
|
|
- if (bInd >= batch) {
|
|
|
- continue;
|
|
|
- }
|
|
|
- var heightScale = (cropHeight > 1) ?
|
|
|
- (y2 - y1) * (imageHeight - 1) / (cropHeight - 1) :
|
|
|
- 0;
|
|
|
- var widthScale = (cropWidth > 1) ? (x2 - x1) * (imageWidth - 1) / (cropWidth - 1) : 0;
|
|
|
- for (var y = 0; y < cropHeight; y++) {
|
|
|
- var yInd = (cropHeight > 1) ?
|
|
|
- y1 * (imageHeight - 1) + y * (heightScale) :
|
|
|
- 0.5 * (y1 + y2) * (imageHeight - 1);
|
|
|
- if (yInd < 0 || yInd > imageHeight - 1) {
|
|
|
- for (var x = 0; x < cropWidth; x++) {
|
|
|
- for (var c = 0; c < numChannels; c++) {
|
|
|
- var ind = c + x * outStride[2] + y * outStride[1] + b * outStride[0];
|
|
|
- output.values[ind] = extrapolationValue;
|
|
|
- }
|
|
|
- }
|
|
|
- continue;
|
|
|
- }
|
|
|
- if (method === 'bilinear') {
|
|
|
- var topInd = Math.floor(yInd);
|
|
|
- var bottomInd = Math.ceil(yInd);
|
|
|
- var yLerp = yInd - topInd;
|
|
|
- for (var x = 0; x < cropWidth; x++) {
|
|
|
- var xInd = (cropWidth > 1) ?
|
|
|
- x1 * (imageWidth - 1) + x * widthScale :
|
|
|
- 0.5 * (x1 + x2) * (imageWidth - 1);
|
|
|
- if (xInd < 0 || xInd > imageWidth - 1) {
|
|
|
- for (var c = 0; c < numChannels; c++) {
|
|
|
- var ind = c + x * outStride[2] + y * outStride[1] + b * outStride[0];
|
|
|
- output.values[ind] = extrapolationValue;
|
|
|
- }
|
|
|
- continue;
|
|
|
- }
|
|
|
- var leftInd = Math.floor(xInd);
|
|
|
- var rightInd = Math.ceil(xInd);
|
|
|
- var xLerp = xInd - leftInd;
|
|
|
- for (var c = 0; c < numChannels; c++) {
|
|
|
- var ind = c + leftInd * inStride[2] + topInd * inStride[1] +
|
|
|
- bInd * inStride[0];
|
|
|
- var topLeft = imageVals[ind];
|
|
|
- ind = c + rightInd * inStride[2] + topInd * inStride[1] +
|
|
|
- bInd * inStride[0];
|
|
|
- var topRight = imageVals[ind];
|
|
|
- ind = c + leftInd * inStride[2] + bottomInd * inStride[1] +
|
|
|
- bInd * inStride[0];
|
|
|
- var bottomLeft = imageVals[ind];
|
|
|
- ind = c + rightInd * inStride[2] + bottomInd * inStride[1] +
|
|
|
- bInd * inStride[0];
|
|
|
- var bottomRight = imageVals[ind];
|
|
|
- var top_2 = topLeft + (topRight - topLeft) * xLerp;
|
|
|
- var bottom = bottomLeft + (bottomRight - bottomLeft) * xLerp;
|
|
|
- ind = c + x * outStride[2] + y * outStride[1] + b * outStride[0];
|
|
|
- output.values[ind] = top_2 + ((bottom - top_2) * yLerp);
|
|
|
- }
|
|
|
- }
|
|
|
- }
|
|
|
- else { // method == "nearest"
|
|
|
- for (var x = 0; x < cropWidth; ++x) {
|
|
|
- var xInd = (cropWidth > 1) ?
|
|
|
- x1 * (imageWidth - 1) + x * widthScale :
|
|
|
- 0.5 * (x1 + x2) * (imageWidth - 1);
|
|
|
- if (xInd < 0 || xInd > imageWidth - 1) {
|
|
|
- for (var c = 0; c < numChannels; c++) {
|
|
|
- var ind = c + x * outStride[2] + y * outStride[1] + b * outStride[0];
|
|
|
- output.values[ind] = extrapolationValue;
|
|
|
- }
|
|
|
- continue;
|
|
|
- }
|
|
|
- var closestX = Math.round(xInd);
|
|
|
- var closestY = Math.round(yInd);
|
|
|
- for (var c = 0; c < numChannels; c++) {
|
|
|
- var inInd = c + closestX * inStride[2] +
|
|
|
- closestY * inStride[1] + bInd * inStride[0];
|
|
|
- var outInd = c + x * outStride[2] + y * outStride[1] + b * outStride[0];
|
|
|
- output.values[outInd] = imageVals[inInd];
|
|
|
- }
|
|
|
- }
|
|
|
- }
|
|
|
- }
|
|
|
- }
|
|
|
- return output.toTensor();
|
|
|
- };
|
|
|
- MathBackendCPU.prototype.sparseToDense = function (sparseIndices, sparseValues, outputShape, defaultValue) {
|
|
|
- var _a = scatter_nd_util.calculateShapes(sparseValues, sparseIndices, outputShape), sliceRank = _a.sliceRank, numUpdates = _a.numUpdates, sliceSize = _a.sliceSize, strides = _a.strides, outputSize = _a.outputSize;
|
|
|
- var sumDupeIndices = false;
|
|
|
- return this.scatter(sparseIndices, sparseValues, outputShape, outputSize, sliceSize, numUpdates, sliceRank, strides, defaultValue, sumDupeIndices);
|
|
|
- };
|
|
|
- MathBackendCPU.prototype.gatherND = function (x, indices) {
|
|
|
- var indicesShape = indices.shape;
|
|
|
- var sliceRank = indicesShape[indicesShape.length - 1];
|
|
|
- var _a = gather_nd_util.prepareAndValidate(x, indices), resultShape = _a[0], numSlices = _a[1], sliceSize = _a[2], strides = _a[3];
|
|
|
- if (numSlices === 0) {
|
|
|
- return ops_1.tensor([], resultShape, x.dtype);
|
|
|
- }
|
|
|
- var buffer = new tensor_1.TensorBuffer([numSlices, sliceSize], x.dtype);
|
|
|
- var indicesData = this.readSync(indices.dataId);
|
|
|
- var xData = this.readSync(x.dataId);
|
|
|
- for (var i = 0; i < numSlices; i++) {
|
|
|
- var index = [];
|
|
|
- var flattenIndex = 0;
|
|
|
- for (var j = 0; j < sliceRank; j++) {
|
|
|
- var dim = indicesData[i * sliceRank + j];
|
|
|
- flattenIndex += dim * strides[j];
|
|
|
- index.push(dim);
|
|
|
- }
|
|
|
- if (flattenIndex < 0 || flattenIndex >= x.size / sliceSize) {
|
|
|
- throw new Error("Invalid indices: " + index + " does not index into " + x.shape);
|
|
|
- }
|
|
|
- for (var k = 0; k < sliceSize; k++) {
|
|
|
- buffer.values[i * sliceSize + k] = xData[flattenIndex * sliceSize + k];
|
|
|
- }
|
|
|
- }
|
|
|
- return buffer.toTensor().reshape(resultShape);
|
|
|
- };
|
|
|
- MathBackendCPU.prototype.scatterND = function (indices, updates, shape) {
|
|
|
- var _a = scatter_nd_util.calculateShapes(updates, indices, shape), sliceRank = _a.sliceRank, numUpdates = _a.numUpdates, sliceSize = _a.sliceSize, strides = _a.strides, outputSize = _a.outputSize;
|
|
|
- var defaultValue = ops_1.scalar(0);
|
|
|
- var sumDupeIndices = true;
|
|
|
- return this.scatter(indices, updates, shape, outputSize, sliceSize, numUpdates, sliceRank, strides, defaultValue, sumDupeIndices);
|
|
|
- };
|
|
|
- MathBackendCPU.prototype.fill = function (shape, value, dtype) {
|
|
|
- dtype = dtype || util_1.inferDtype(value);
|
|
|
- var values = util_1.getArrayFromDType(dtype, util_1.sizeFromShape(shape));
|
|
|
- values.fill(value);
|
|
|
- return engine_1.ENGINE.makeTensor(values, shape, dtype, this);
|
|
|
- };
|
|
|
- MathBackendCPU.prototype.onesLike = function (x) {
|
|
|
- if (x.dtype === 'string') {
|
|
|
- throw new Error('onesLike is not supported for string tensors');
|
|
|
- }
|
|
|
- else {
|
|
|
- return this.fill(x.shape, 1, x.dtype);
|
|
|
- }
|
|
|
- };
|
|
|
- MathBackendCPU.prototype.zerosLike = function (x) {
|
|
|
- var values = util_1.getArrayFromDType(x.dtype, util_1.sizeFromShape(x.shape));
|
|
|
- return this.makeOutput(values, x.shape, x.dtype);
|
|
|
- };
|
|
|
- MathBackendCPU.prototype.linspace = function (start, stop, num) {
|
|
|
- return backend_util.linspaceImpl(start, stop, num);
|
|
|
- };
|
|
|
- MathBackendCPU.prototype.scatter = function (indices, updates, shape, outputSize, sliceSize, numUpdates, sliceRank, strides, defaultValue, sumDupeIndices) {
|
|
|
- var flattenShape = [outputSize / sliceSize, sliceSize];
|
|
|
- var indicesData = this.readSync(indices.dataId);
|
|
|
- var updatesData = this.readSync(updates.dataId);
|
|
|
- if (outputSize === 0) {
|
|
|
- return ops_1.tensor([], shape, updates.dtype);
|
|
|
- }
|
|
|
- var buffer = new tensor_1.TensorBuffer(flattenShape, updates.dtype);
|
|
|
- buffer.values.fill(this.readSync(defaultValue.dataId)[0]);
|
|
|
- for (var i = 0; i < numUpdates; i++) {
|
|
|
- var index = [];
|
|
|
- var flattenIndex = 0;
|
|
|
- for (var j = 0; j < sliceRank; j++) {
|
|
|
- var dim = indicesData[i * sliceRank + j];
|
|
|
- index.push(dim);
|
|
|
- flattenIndex += dim * strides[j];
|
|
|
- }
|
|
|
- if (flattenIndex < 0 || flattenIndex >= outputSize / sliceSize) {
|
|
|
- throw new Error("Invalid indices: " + index + " does not index into " + shape);
|
|
|
- }
|
|
|
- for (var k = 0; k < sliceSize; k++) {
|
|
|
- if (sumDupeIndices) {
|
|
|
- buffer.values[flattenIndex * sliceSize + k] +=
|
|
|
- updatesData[i * sliceSize + k];
|
|
|
- }
|
|
|
- else {
|
|
|
- buffer.values[flattenIndex * sliceSize + k] = updates.rank === 0 ?
|
|
|
- updatesData[0] :
|
|
|
- updatesData[i * sliceSize + k];
|
|
|
- }
|
|
|
- }
|
|
|
- }
|
|
|
- return buffer.toTensor().reshape(shape);
|
|
|
- };
|
|
|
- return MathBackendCPU;
|
|
|
-}(backend_1.KernelBackend));
|
|
|
-exports.MathBackendCPU = MathBackendCPU;
|
|
|
-engine_1.ENGINE.registerBackend('cpu', function () { return new MathBackendCPU(); }, 1 /* priority */);
|
|
|
-//# sourceMappingURL=backend_cpu.js.map
|