NumpyArray.h
Go to the documentation of this file.
1 // BSD 3-Clause License; see https://github.com/scikit-hep/awkward-1.0/blob/main/LICENSE
2 
3 #ifndef AWKWARD_NUMPYARRAY_H_
4 #define AWKWARD_NUMPYARRAY_H_
5 
6 #include <string>
7 #include <memory>
8 #include <vector>
9 
10 #include "awkward/common.h"
11 #include "awkward/Slice.h"
12 #include "awkward/Content.h"
13 
14 namespace awkward {
19  public:
21  NumpyForm(bool has_identities,
22  const util::Parameters& parameters,
23  const FormKey& form_key,
24  const std::vector<int64_t>& inner_shape,
25  int64_t itemsize,
26  const std::string& format,
28 
29  const std::vector<int64_t>
30  inner_shape() const;
31 
32  int64_t
33  itemsize() const;
34 
35  const std::string
36  format() const;
37 
39  dtype() const;
40 
41  const std::string
42  primitive() const;
43 
44  const TypePtr
45  type(const util::TypeStrs& typestrs) const override;
46 
47  const std::string
48  tostring() const override;
49 
50  const std::string
51  tojson(bool pretty, bool verbose) const override;
52 
53  void
54  tojson_part(ToJson& builder, bool verbose) const override;
55 
56  void
57  tojson_part(ToJson& builder, bool verbose, bool toplevel) const;
58 
59  const FormPtr
60  shallow_copy() const override;
61 
62  const FormPtr
63  with_form_key(const FormKey& form_key) const override;
64 
65  const std::string
66  purelist_parameter(const std::string& key) const override;
67 
68  bool
69  purelist_isregular() const override;
70 
71  int64_t
72  purelist_depth() const override;
73 
74  bool
75  dimension_optiontype() const override;
76 
77  const std::pair<int64_t, int64_t>
78  minmax_depth() const override;
79 
80  const std::pair<bool, int64_t>
81  branch_depth() const override;
82 
83  int64_t
84  numfields() const override;
85 
86  int64_t
87  fieldindex(const std::string& key) const override;
88 
89  const std::string
90  key(int64_t fieldindex) const override;
91 
92  bool
93  haskey(const std::string& key) const override;
94 
95  const std::vector<std::string>
96  keys() const override;
97 
98  bool
99  istuple() const override;
100 
101  bool
102  equal(const FormPtr& other,
103  bool check_identities,
104  bool check_parameters,
105  bool check_form_key,
106  bool compatibility_check) const override;
107 
108  const FormPtr
109  getitem_field(const std::string& key) const override;
110 
111  const FormPtr
112  getitem_fields(const std::vector<std::string>& keys) const override;
113 
114  private:
115  const std::vector<int64_t> inner_shape_;
116  int64_t itemsize_;
117  const std::string format_;
118  const util::dtype dtype_;
119  };
120 
139  public:
140 
175  NumpyArray(const IdentitiesPtr& identities,
176  const util::Parameters& parameters,
177  const std::shared_ptr<void>& ptr,
178  const std::vector<ssize_t>& shape,
179  const std::vector<ssize_t>& strides,
180  ssize_t byteoffset,
181  ssize_t itemsize,
182  const std::string& format,
184  const kernel::lib ptr_lib);
185 
187  explicit NumpyArray(const Index8 index);
189  explicit NumpyArray(const IndexU8 index);
191  explicit NumpyArray(const Index32 index);
193  explicit NumpyArray(const IndexU32 index);
195  explicit NumpyArray(const Index64 index);
196 
198  const std::shared_ptr<void>
199  ptr() const;
200 
203  ptr_lib() const;
204 
206  void*
207  data() const;
208 
220  const std::vector<ssize_t>
221  shape() const;
222 
233  const std::vector<ssize_t>
234  strides() const;
235 
246  ssize_t
247  byteoffset() const;
248 
253  ssize_t
254  itemsize() const;
255 
262  const std::string
263  format() const;
264 
268  dtype() const;
269 
274  ssize_t
275  ndim() const;
276 
281  bool
282  isempty() const;
283 
289  ssize_t
290  bytelength() const;
291 
296  uint8_t
297  getbyte(ssize_t at) const;
298 
304  const ContentPtr
305  toRegularArray() const;
306 
308  bool
309  isscalar() const override;
310 
312  const std::string
313  classname() const override;
314 
315  void
316  setidentities() override;
317 
318  void
319  setidentities(const IdentitiesPtr& identities) override;
320 
321  const TypePtr
322  type(const util::TypeStrs& typestrs) const override;
323 
324  const FormPtr
325  form(bool materialize) const override;
326 
328  kernels() const override;
329 
330  void
331  caches(std::vector<ArrayCachePtr>& out) const override;
332 
333  const std::string
334  tostring_part(const std::string& indent,
335  const std::string& pre,
336  const std::string& post) const override;
337 
338  void
339  tojson_part(ToJson& builder, bool include_beginendlist) const override;
340 
341  void
342  nbytes_part(std::map<size_t, int64_t>& largest) const override;
343 
344  int64_t
345  length() const override;
346 
347  const ContentPtr
348  shallow_copy() const override;
349 
350  const ContentPtr
351  deep_copy(bool copyarrays,
352  bool copyindexes,
353  bool copyidentities) const override;
354 
355  void
356  check_for_iteration() const override;
357 
358  const ContentPtr
359  getitem_nothing() const override;
360 
361  const ContentPtr
362  getitem_at(int64_t at) const override;
363 
364  const ContentPtr
365  getitem_at_nowrap(int64_t at) const override;
366 
367  const ContentPtr
368  getitem_range(int64_t start, int64_t stop) const override;
369 
370  const ContentPtr
371  getitem_range_nowrap(int64_t start, int64_t stop) const override;
372 
373  const ContentPtr
374  getitem_field(const std::string& key) const override;
375 
376  const ContentPtr
377  getitem_field(const std::string& key,
378  const Slice& only_fields) const override;
379 
380  const ContentPtr
381  getitem_fields(const std::vector<std::string>& keys) const override;
382 
383  const ContentPtr
384  getitem_fields(const std::vector<std::string>& keys,
385  const Slice& only_fields) const override;
386 
387  const ContentPtr
388  getitem(const Slice& where) const override;
389 
390  const ContentPtr
391  getitem_next(const SliceItemPtr& head,
392  const Slice& tail,
393  const Index64& advanced) const override;
394 
395  const ContentPtr
396  carry(const Index64& carry, bool allow_lazy) const override;
397 
398  int64_t
399  purelist_depth() const override;
400 
401  const std::pair<int64_t, int64_t>
402  minmax_depth() const override;
403 
404  const std::pair<bool, int64_t>
405  branch_depth() const override;
406 
407  int64_t
408  numfields() const override;
409 
410  int64_t
411  fieldindex(const std::string& key) const override;
412 
413  const std::string
414  key(int64_t fieldindex) const override;
415 
416  bool
417  haskey(const std::string& key) const override;
418 
419  const std::vector<std::string>
420  keys() const override;
421 
422  bool
423  istuple() const override;
424 
425  // operations
426  const std::string
427  validityerror(const std::string& path) const override;
428 
432  const ContentPtr
433  shallow_simplify() const override;
434 
435  const ContentPtr
436  num(int64_t axis, int64_t depth) const override;
437 
438  const std::pair<Index64, ContentPtr>
439  offsets_and_flattened(int64_t axis, int64_t depth) const override;
440 
441  bool
442  mergeable(const ContentPtr& other, bool mergebool) const override;
443 
444  bool
445  referentially_equal(const ContentPtr& other) const override;
446 
447  const ContentPtr
448  mergemany(const ContentPtrVec& others) const override;
449 
450  const SliceItemPtr
451  asslice() const override;
452 
453  const ContentPtr
454  fillna(const ContentPtr& value) const override;
455 
456  const ContentPtr
457  rpad(int64_t target, int64_t axis, int64_t depth) const override;
458 
459  const ContentPtr
460  rpad_and_clip(int64_t target,
461  int64_t axis,
462  int64_t depth) const override;
463 
464  const ContentPtr
465  reduce_next(const Reducer& reducer,
466  int64_t negaxis,
467  const Index64& starts,
468  const Index64& shifts,
469  const Index64& parents,
470  int64_t outlength,
471  bool mask,
472  bool keepdims) const override;
473 
474  const ContentPtr
475  sort_next(int64_t negaxis,
476  const Index64& starts,
477  const Index64& parents,
478  int64_t outlength,
479  bool ascending,
480  bool stable) const override;
481 
482  const ContentPtr
483  as_unique_strings(const Index64& offsets) const;
484 
485  const ContentPtr
486  argsort_next(int64_t negaxis,
487  const Index64& starts,
488  const Index64& shifts,
489  const Index64& parents,
490  int64_t outlength,
491  bool ascending,
492  bool stable) const override;
493 
494  const ContentPtr
495  localindex(int64_t axis, int64_t depth) const override;
496 
497  const ContentPtr
498  combinations(int64_t n,
499  bool replacement,
500  const util::RecordLookupPtr& recordlookup,
501  const util::Parameters& parameters,
502  int64_t axis,
503  int64_t depth) const override;
504 
517  bool
518  iscontiguous() const;
519 
533  const NumpyArray
534  contiguous() const;
535 
538  const ContentPtr
539  getitem_next(const SliceAt& at,
540  const Slice& tail,
541  const Index64& advanced) const override;
542 
545  const ContentPtr
546  getitem_next(const SliceRange& range,
547  const Slice& tail,
548  const Index64& advanced) const override;
549 
552  const ContentPtr
553  getitem_next(const SliceArray64& array,
554  const Slice& tail,
555  const Index64& advanced) const override;
556 
559  const ContentPtr
560  getitem_next(const SliceField& field,
561  const Slice& tail,
562  const Index64& advanced) const override;
563 
566  const ContentPtr
567  getitem_next(const SliceFields& fields,
568  const Slice& tail,
569  const Index64& advanced) const override;
570 
573  const ContentPtr
574  getitem_next(const SliceJagged64& jagged,
575  const Slice& tail,
576  const Index64& advanced) const override;
577 
580  const ContentPtr
581  copy_to(kernel::lib ptr_lib) const override;
582 
583  const ContentPtr
584  numbers_to_type(const std::string& name) const override;
585 
587  bool
588  is_unique() const override;
589 
591  const ContentPtr
592  unique() const override;
593 
595  bool
596  is_subrange_equal(const Index64& starts, const Index64& stops) const override;
597 
598  protected:
605  const NumpyArray
606  contiguous_next(const Index64& bytepos) const;
607 
615  const NumpyArray
616  getitem_bystrides(const SliceItemPtr& head,
617  const Slice& tail,
618  int64_t length) const;
619 
624  const NumpyArray
625  getitem_bystrides(const SliceAt& at,
626  const Slice& tail,
627  int64_t length) const;
628 
633  const NumpyArray
634  getitem_bystrides(const SliceRange& range,
635  const Slice& tail,
636  int64_t length) const;
637 
642  const NumpyArray
643  getitem_bystrides(const SliceEllipsis& ellipsis,
644  const Slice& tail,
645  int64_t length) const;
646 
651  const NumpyArray
652  getitem_bystrides(const SliceNewAxis& newaxis,
653  const Slice& tail,
654  int64_t length) const;
655 
677  const NumpyArray
678  getitem_next(const SliceItemPtr& head,
679  const Slice& tail,
680  const Index64& carry,
681  const Index64& advanced,
682  int64_t length,
683  int64_t stride,
684  bool first) const;
685 
690  const NumpyArray
691  getitem_next(const SliceAt& at,
692  const Slice& tail,
693  const Index64& carry,
694  const Index64& advanced,
695  int64_t length,
696  int64_t stride,
697  bool first) const;
698 
703  const NumpyArray
704  getitem_next(const SliceRange& range,
705  const Slice& tail,
706  const Index64& carry,
707  const Index64& advanced,
708  int64_t length,
709  int64_t stride,
710  bool first) const;
711 
716  const NumpyArray
717  getitem_next(const SliceEllipsis& ellipsis,
718  const Slice& tail,
719  const Index64& carry,
720  const Index64& advanced,
721  int64_t length,
722  int64_t stride,
723  bool first) const;
724 
729  const NumpyArray
730  getitem_next(const SliceNewAxis& newaxis,
731  const Slice& tail,
732  const Index64& carry,
733  const Index64& advanced,
734  int64_t length,
735  int64_t stride,
736  bool first) const;
737 
742  const NumpyArray
743  getitem_next(const SliceArray64& array,
744  const Slice& tail,
745  const Index64& carry,
746  const Index64& advanced,
747  int64_t length,
748  int64_t stride,
749  bool first) const;
750 
751  const ContentPtr
752  getitem_next_jagged(const Index64& slicestarts,
753  const Index64& slicestops,
754  const SliceArray64& slicecontent,
755  const Slice& tail) const override;
756 
757  const ContentPtr
758  getitem_next_jagged(const Index64& slicestarts,
759  const Index64& slicestops,
760  const SliceMissing64& slicecontent,
761  const Slice& tail) const override;
762 
763  const ContentPtr
764  getitem_next_jagged(const Index64& slicestarts,
765  const Index64& slicestops,
766  const SliceJagged64& slicecontent,
767  const Slice& tail) const override;
768 
770  void
771  tojson_boolean(ToJson& builder, bool include_beginendlist) const;
772 
774  template <typename T>
775  void
776  tojson_integer(ToJson& builder, bool include_beginendlist) const;
777 
779  template <typename T>
780  void
781  tojson_real(ToJson& builder, bool include_beginendlist) const;
782 
784  template <typename T>
785  void
786  tojson_complex(ToJson& builder, bool include_beginendlist) const;
787 
789  void
790  tojson_string(ToJson& builder, bool include_beginendlist) const;
791 
792  private:
793 
796  template<typename T>
797  const std::shared_ptr<void> index_sort(const T* data,
798  int64_t length,
799  const Index64& starts,
800  const Index64& shifts,
801  const Index64& parents,
802  int64_t outlength,
803  bool ascending,
804  bool stable) const;
805 
806  template<typename T>
807  const std::shared_ptr<void> array_sort(const T* data,
808  int64_t length,
809  const Index64& starts,
810  const Index64& parents,
811  int64_t outlength,
812  bool ascending,
813  bool stable) const;
814  template<typename T>
815  const std::shared_ptr<void> array_unique(const T* data,
816  int64_t length,
817  const Index64& starts,
818  const Index64& parents,
819  int64_t& outlength) const;
820 
821  template<typename T>
822  const std::shared_ptr<void> string_unique(const T* data,
823  int64_t length,
824  const Index64& offsets,
825  Index64& outoffsets,
826  int64_t& outlength) const;
827 
828  template<typename T>
829  bool subranges_equal(const T* data,
830  int64_t length,
831  const Index64& starts,
832  const Index64& stops) const;
833 
834  template<typename T>
835  const std::shared_ptr<void> as_type(const T* data,
836  int64_t length,
837  const util::dtype dtype) const;
838 
839  template<typename TO, typename FROM>
840  const std::shared_ptr<void> cast_to_type(const FROM* data,
841  int64_t length) const;
842 
843  const ContentPtr
844  sort_data(bool ascending,
845  bool stable) const;
846 
847  const ContentPtr
848  unique_data() const;
849 
851  std::shared_ptr<void> ptr_;
853  const kernel::lib ptr_lib_;
855  std::vector<ssize_t> shape_;
857  std::vector<ssize_t> strides_;
859  ssize_t byteoffset_;
861  const ssize_t itemsize_;
863  const std::string format_;
865  const util::dtype dtype_;
866 
867  };
868 }
869 
870 #endif // AWKWARD_NUMPYARRAY_H_
Represents a Python slice object (usual syntax: array[start:stop:step]).
Definition: Slice.h:93
#define LIBAWKWARD_EXPORT_SYMBOL
Definition: common.h:45
const std::string key(const RecordLookupPtr &recordlookup, int64_t fieldindex, int64_t numfields)
Returns the key associated with a field index, given a RecordLookup and a number of fields...
Abstract superclass of all array node types (flat hierarchy). Any Content can be nested within any ot...
Definition: Content.h:276
Definition: json.h:19
Definition: BitMaskedArray.h:15
A contiguous, one-dimensional array of integers used to represent data structures, rather than numerical data in the arrays themselves.
Definition: Index.h:16
Represents NumPy&#39;s newaxis marker (a.k.a. None), which prompts __getitem__ to insert a length-1 regul...
Definition: Slice.h:197
std::string name(Mapping a, V value)
Definition: datetime_util.h:39
Definition: Slice.h:384
Represents a SliceArrayOf, SliceMissingOf, or SliceJaggedOf with missing values: None (no equivalent ...
Definition: Slice.h:431
Represents a single string in a slice tuple, indicating that a RecordArray should be replaced by one ...
Definition: Slice.h:340
Abstract class for all reducer algorithms.
Definition: Reducer.h:20
ERROR combinations(kernel::lib ptr_lib, T *toindex, int64_t n, bool replacement, int64_t singlelen)
const std::vector< std::string > keys(const RecordLookupPtr &recordlookup, int64_t numfields)
Returns a given RecordLookup as keys or generate anonymous ones form a number of fields.
Form describing NumpyArray.
Definition: NumpyArray.h:18
std::shared_ptr< std::string > FormKey
Definition: Content.h:19
dtype
NumPy dtypes that can be interpreted within Awkward C++ (only the primitive, fixed-width types)...
Definition: util.h:26
ERROR unique(kernel::lib ptr_lib, T *toptr, int64_t length, int64_t *tolength)
Represents an array of integers in a slice (possibly converted from an array of booleans).
Definition: Slice.h:225
std::map< std::string, std::string > Parameters
Definition: util.h:165
ERROR copy_to(kernel::lib to_lib, kernel::lib from_lib, void *to_ptr, void *from_ptr, int64_t bytelength)
Internal Function an array buffer from library FROM to library TO, usually between main memory and a ...
std::shared_ptr< Form > FormPtr
Definition: Content.h:18
int64_t fieldindex(const RecordLookupPtr &recordlookup, const std::string &key, int64_t numfields)
Returns the field index associated with a key, given a RecordLookup and a number of fields...
Represents an array of nested lists, where the content may be SliceArrayOf, SliceMissingOf, or SliceJaggedOf (no equivalent in NumPy).
Definition: Slice.h:511
Represents a rectilinear numerical array that can be converted to and from NumPy without loss of info...
Definition: NumpyArray.h:138
std::vector< std::shared_ptr< Content > > ContentPtrVec
Definition: Content.h:16
Mapping::value_type::value_type value(Mapping a, const std::string &name)
Definition: datetime_util.h:49
A sequence of SliceItem objects representing a tuple passed to Python&#39;s __getitem__.
Definition: Slice.h:585
Represents a Python Ellipsis object (usual syntax: array[...]).
Definition: Slice.h:169
bool haskey(const RecordLookupPtr &recordlookup, const std::string &key, int64_t numfields)
Returns true if a RecordLookup has a given key; false otherwise.
std::shared_ptr< Type > TypePtr
Definition: Content.h:23
std::shared_ptr< RecordLookup > RecordLookupPtr
Definition: util.h:130
Represents an integer in a tuple of slices passed to __getitem__ in Python.
Definition: Slice.h:58
std::map< std::string, std::string > TypeStrs
Definition: util.h:215
Abstract superclass of all array node forms, which expresses the nesting structure without any large ...
Definition: Content.h:39
std::shared_ptr< Identities > IdentitiesPtr
Definition: Identities.h:16
std::shared_ptr< Content > ContentPtr
Definition: ArrayBuilder.h:16
std::shared_ptr< SliceItem > SliceItemPtr
Definition: Slice.h:15
lib
Definition: kernel-dispatch.h:20