1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300
|
/*******************************************************
* Copyright (c) 2014, ArrayFire
* All rights reserved.
*
* This file is distributed under 3-clause BSD license.
* The complete license agreement can be obtained at:
* http://arrayfire.com/licenses/BSD-3-Clause
********************************************************/
#include <gtest/gtest.h>
#include <arrayfire.h>
#include <vector>
#include <complex>
#include <testHelpers.hpp>
using namespace af;
using std::vector;
using std::abs;
TEST(GettingStarted, SNIPPET_getting_started_gen)
{
//! [ex_getting_started_gen]
// Generate an array of size three filled with zeros.
// If no data type is specified, ArrayFire defaults to f32.
// The af::constant function generates the data on the device.
array zeros = constant(0, 3);
// Generate a 1x4 array of uniformly distributed [0,1] random numbers
// The af::randu function generates the data on the device.
array rand1 = randu(1, 4);
// Generate a 2x2 array (or matrix, if you prefer) of random numbers
// sampled from a normal distribution.
// The af::randn function generates data on the device.
array rand2 = randn(2, 2);
// Generate a 3x3 identity matrix. The data is generated on the device.
array iden = af::identity(3, 3);
// Lastly, create a 2x1 array (column vector) of uniformly distributed
// 32-bit complex numbers (c32 data type):
array randcplx = randu(2, 1, c32);
//! [ex_getting_started_gen]
{
vector<float> output;
output.resize(zeros.elements());
zeros.host(&output.front());
ASSERT_EQ(f32, zeros.type());
for(dim_t i = 0; i < zeros.elements(); i++) ASSERT_FLOAT_EQ(0, output[i]);
}
if (!noDoubleTests<double>()) {
array ones = constant(1, 3, 2, f64);
vector<double> output(ones.elements());
ones.host(&output.front());
ASSERT_EQ(f64, ones.type());
for(dim_t i = 0; i < ones.elements(); i++) ASSERT_FLOAT_EQ(1, output[i]);
}
{
vector<float> output;
output.resize(iden.elements());
iden.host(&output.front());
for(dim_t i = 0; i < iden.dims(0); i++)
for(dim_t j = 0; j < iden.dims(1); j++)
if(i == j) ASSERT_FLOAT_EQ(1, output[i * iden.dims(0) + j]);
else ASSERT_FLOAT_EQ(0, output[i * iden.dims(0) + j]);
}
}
TEST(GettingStarted, SNIPPET_getting_started_init)
{
//! [ex_getting_started_init]
// Create a six-element array on the host
float hA[] = {0, 1, 2, 3, 4, 5};
// Which can be copied into an ArrayFire Array using the pointer copy
// constructor. Here we copy the data into a 2x3 matrix:
array A(2, 3, hA);
// ArrayFire provides a convenince function for printing af::array
// objects in case you wish to see how the data is stored:
af_print(A);
// This technique can also be used to populate an array with complex
// data (stored in {{real, imaginary}, {real, imaginary}, ... } format
// as found in C's complex.h and C++'s <complex>.
// Below we create a 3x1 column vector of complex data values:
array dB(3, 1, (cfloat*) hA); // 3x1 column vector of complex numbers
af_print(dB);
//! [ex_getting_started_init]
vector<float> out(A.elements());
A.host(&out.front());
for(unsigned int i = 0; i < out.size(); i++) ASSERT_FLOAT_EQ(hA[i], out[i]);
}
TEST(GettingStarted, SNIPPET_getting_started_print)
{
//! [ex_getting_started_print]
// Generate two arrays
array a = randu(2, 2);
array b = constant(1, 2, 1);
// Print them to the console using af_print
af_print(a);
af_print(b);
// Print the results of an expression involving arrays:
af_print(a.col(0) + b + .4);
//! [ex_getting_started_print]
array result = a.col(0) + b + 0.4;
vector<float> outa(a.elements());
vector<float> outb(b.elements());
vector<float> out(result.elements());
a.host(&outa.front());
b.host(&outb.front());
result.host(&out.front());
for(unsigned i = 0; i < outb.size(); i++) ASSERT_FLOAT_EQ(outa[i] + outb[i] + 0.4, out[i]);
}
TEST(GettingStarted, SNIPPET_getting_started_dims)
{
//! [ex_getting_started_dims]
// Create a 4x5x2 array of uniformly distributed random numbers
array a = randu(4,5,2);
// Determine the number of dimensions using the numdims() function:
printf("numdims(a) %d\n", a.numdims()); // 3
// We can also find the size of the individual dimentions using either
// the `dims` function:
printf("dims = [%lld %lld]\n", a.dims(0), a.dims(1)); // 4,5
// Or the elements of a af::dim4 object:
dim4 dims = a.dims();
printf("dims = [%lld %lld]\n", dims[0], dims[1]); // 4,5
//! [ex_getting_started_dims]
//! [ex_getting_started_prop]
// Get the type stored in the array. This will be one of the many
// `af_dtype`s presented above:
printf("underlying type: %d\n", a.type());
// Arrays also have several conveience functions to determine if
// an Array contains complex or real values:
printf("is complex? %d is real? %d\n", a.iscomplex(), a.isreal());
// if it is a column or row vector
printf("is vector? %d column? %d row? %d\n", a.isvector(), a.iscolumn(), a.isrow());
// and whether or not the array is empty and how much memory it takes on
// the device:
printf("empty? %d total elements: %lld bytes: %lu\n", a.isempty(), a.elements(), a.bytes());
//! [ex_getting_started_prop]
ASSERT_EQ(f32, a.type());
ASSERT_TRUE(a.isreal());
ASSERT_FALSE(a.iscomplex());
ASSERT_FALSE(a.isvector());
ASSERT_FALSE(a.iscolumn());
ASSERT_FALSE(a.isrow());
ASSERT_FALSE(a.isempty());
ASSERT_EQ(40, a.elements());
ASSERT_EQ(f32, a.type());
ASSERT_EQ(f32, a.type());
ASSERT_EQ(4, dims[0]);
ASSERT_EQ(4, a.dims(0));
ASSERT_EQ(5, dims[1]);
ASSERT_EQ(5, a.dims(1));
}
TEST(GettingStarted, SNIPPET_getting_started_arith)
{
//! [ex_getting_started_arith]
array R = randu(3, 3);
af_print(constant(1, 3, 3) + af::complex(sin(R))); // will be c32
// rescale complex values to unit circle
array a = randn(5, c32);
af_print(a / abs(a));
// calculate L2 norm of vectors
array X = randn(3, 4);
af_print(sqrt(sum(pow(X, 2)))); // norm of every column vector
af_print(sqrt(sum(pow(X, 2), 0))); // same as above
af_print(sqrt(sum(pow(X, 2), 1))); // norm of every row vector
//! [ex_getting_started_arith]
}
TEST(GettingStarted, SNIPPET_getting_started_dev_ptr)
{
#ifdef __CUDACC__
//! [ex_getting_started_dev_ptr]
// Create an array on the host, copy it into an ArrayFire 2x3 ArrayFire array
float host_ptr[] = {0,1,2,3,4,5};
array a(2, 3, host_ptr);
// Create a CUDA device pointer, populate it with data from the host
float *device_ptr;
cudaMalloc((void**)&device_ptr, 6*sizeof(float));
cudaMemcpy(device_ptr, host_ptr, 6*sizeof(float), cudaMemcpyHostToDevice);
// Convert the CUDA-allocated device memory into an ArrayFire array:
array b(2,3, device_ptr, afDevice); // Note: afDevice (default: afHost)
// Note that ArrayFire takes ownership over `device_ptr`, so memory will
// be freed when `b` id destructed. Do not call cudaFree(device_ptr)!
//! [ex_getting_started_dev_ptr]
#endif //__CUDACC__
}
TEST(GettingStarted, SNIPPET_getting_started_ptr)
{
#ifdef __CUDACC__
//! [ex_getting_started_ptr]
// Create an array consisting of 3 random numbers
array a = randu(3, f32);
// Copy an array on the device to the host:
float * host_a = a.host<float>();
// access the host data as a normal array
printf("host_a[2] = %g\n", host_a[2]); // last element
// and free memory using delete:
delete[] host_a;
// Get access to the device memory for a CUDA kernel
float * d_cuda = a.device<float>(); // no need to free this
float value;
cudaMemcpy(&value, d_cuda + 2, sizeof(float), cudaMemcpyDeviceToHost);
printf("d_cuda[2] = %g\n", value);
a.unlock(); // unlock to allow garbage collection if necessary
// Because OpenCL uses references rather than pointers, accessing memory
// is similar, but has a somewhat clunky syntax. For the C-API
cl_mem d_opencl = (cl_mem) a.device<float>();
// for the C++ API, you can just wrap this object into a cl::Buffer
// after calling clRetainMemObject.
//! [ex_getting_started_ptr]
#endif //__CUDACC__
}
TEST(GettingStarted, SNIPPET_getting_started_scalar)
{
//! [ex_getting_started_scalar]
array a = randu(3);
float val = a.scalar<float>();
printf("scalar value: %g\n", val);
//! [ex_getting_started_scalar]
}
TEST(GettingStarted, SNIPPET_getting_started_bit)
{
//! [ex_getting_started_bit]
int h_A[] = {1, 1, 0, 0, 4, 0, 0, 2, 0};
int h_B[] = {1, 0, 1, 0, 1, 0, 1, 1, 1};
array A = array(3, 3, h_A), B = array(3, 3, h_B);
af_print(A); af_print(B);
array A_and_B = A & B; af_print(A_and_B);
array A_or_B = A | B; af_print(A_or_B);
array A_xor_B = A ^ B; af_print(A_xor_B);
//! [ex_getting_started_bit]
vector<int> Andout(A_and_B.elements());
vector<int> Orout(A_or_B.elements());
vector<int> Xorout(A_xor_B.elements());
A_and_B.host(&Andout.front());
A_or_B.host(&Orout.front());
A_xor_B.host(&Xorout.front());
for(unsigned int i = 0; i < Andout.size(); i++) ASSERT_FLOAT_EQ(h_A[i] & h_B[i], Andout[i]);
for(unsigned int i = 0; i < Orout.size(); i++) ASSERT_FLOAT_EQ(h_A[i] | h_B[i], Orout[i]);
for(unsigned int i = 0; i < Xorout.size(); i++) ASSERT_FLOAT_EQ(h_A[i] ^ h_B[i], Xorout[i]);
}
TEST(GettingStarted, SNIPPET_getting_started_constants)
{
//! [ex_getting_started_constants]
array A = randu(5,5);
A(where(A > .5)) = af::NaN;
array x = randu(10e6), y = randu(10e6);
double pi_est = 4 * sum<float>(hypot(x,y) < 1) / 10e6;
printf("estimation error: %g\n", fabs(Pi - pi_est));
//! [ex_getting_started_constants]
ASSERT_LE(fabs(Pi-pi_est), 0.005);
}
|