MetricKnn API
Fast Similarity Search using the Metric Space Approach
mknn_dataset_loader.hpp
1 /*
2  * Copyright (C) 2012-2015, Juan Manuel Barrios <juanmanuel@barrios.cl>
3  * All rights reserved.
4  *
5  * This file is part of MetricKnn.
6  * MetricKnn is made available under the terms of the BSD 2-Clause License.
7  */
8 
9 #ifndef MKNN_DATASET_LOADER_HPP_
10 #define MKNN_DATASET_LOADER_HPP_
11 
12 #include "../metricknn_cpp.hpp"
13 
14 namespace mknn {
15 
20 public:
21 
27  virtual long long getNumObjects()= 0;
34  virtual void *getObject(long long pos)= 0;
35 
43  virtual void pushObject(void *object)= 0;
44 
45  virtual ~DatasetCustom() = 0;
46 
47 };
48 
52 class DatasetConcatenate: public Dataset {
53 public:
58  long long getNumSubDatasets();
66  Dataset getSubDataset(long long num_subdataset);
76  void getDatasetObject(long long posObject, long long *out_numSubdataset,
77  long long *out_posObjectInSubdataset);
78 
79 };
80 
84 class DatasetMultiObject: public Dataset {
85 public:
90  long long getNumSubDatasets();
98  Dataset getSubDataset(long long num_subdataset);
99 
100 };
101 
106 public:
107 
120  static Dataset Custom(DatasetCustom *custom_dataset,
121  bool delete_custom_dataset_on_dataset_release, Domain domain);
122 
139  static Dataset PointerArray(void **object_array, long long num_objects,
140  Domain domain);
141 
161  static Dataset PointerCompactVectors(void *vectors_header,
162  long long num_vectors, long long vector_num_dimensions,
163  const std::string vector_dimension_datatype);
164 
174  static Dataset ParseVectorFile(std::string filename,
175  const std::string datatype);
176 
184  static Dataset ParseStringsFile(std::string filename);
185 
195  const std::vector<Dataset> &subdatasets);
196 
205  static Dataset SubsetSegment(Dataset &superdataset, long long position_start,
206  long long length);
207 
216  static Dataset SubsetPositions(Dataset &superdataset, long long *positions,
217  long long num_positions);
229  static Dataset UniformRandomVectors(long long num_objects, long long dimension,
230  double dimension_minValueIncluded,
231  double dimension_maxValueNotIncluded, const std::string datatype);
232 
244  const std::vector<Dataset> &subdatasets);
245 
254  static Dataset Empty(Domain domain);
255 
256 };
257 
258 }
259 #endif
static Dataset ParseVectorFile(std::string filename, const std::string datatype)
Creates a new dataset by reading a text file with vectors.
static Dataset SubsetPositions(Dataset &superdataset, long long *positions, long long num_positions)
Creates a new dataset which is a subset of a bigger dataset.
static DatasetConcatenate Concatenate(const std::vector< Dataset > &subdatasets)
Creates a new dataset which is the concatenation of one or more datasets.
long long getNumSubDatasets()
Returns the number of subdatasets.
Abstract class that must be inherited to define a custom dataset.
Definition: mknn_dataset_loader.hpp:19
Dataset getSubDataset(long long num_subdataset)
Returns one of the subdatasets that produced this dataset.
Dataset getSubDataset(long long num_subdataset)
Returns one of the subdatasets that produced this dataset.
static Dataset ParseStringsFile(std::string filename)
Creates a new dataset by reading a text file with strings.
Represents a set of objects of any type.
Definition: mknn_dataset.hpp:23
A dataset returned by DatasetLoader::MultiObject.
Definition: mknn_dataset_loader.hpp:84
virtual void * getObject(long long pos)=0
Returns an object in the dataset.
static Dataset Empty(Domain domain)
Creates a new empty dataset that can dynamically grow as new objects are added.
Definition: mevaluation_answers.hpp:18
long long getNumSubDatasets()
Returns the number of subdatasets.
static Dataset PointerCompactVectors(void *vectors_header, long long num_vectors, long long vector_num_dimensions, const std::string vector_dimension_datatype)
Creates a new dataset from a data array.
A dataset returned by DatasetLoader::Concatenate.
Definition: mknn_dataset_loader.hpp:52
Different Loaders.
Definition: mknn_dataset_loader.hpp:105
static DatasetMultiObject MultiObject(const std::vector< Dataset > &subdatasets)
Creates a new dataset where each object is a multi-object.
void getDatasetObject(long long posObject, long long *out_numSubdataset, long long *out_posObjectInSubdataset)
Given the number of an object returns two numbers: the number of the subdataset and the number of the...
virtual void pushObject(void *object)=0
Adds an object to a dynamic dataset.
static Dataset PointerArray(void **object_array, long long num_objects, Domain domain)
Creates a new dataset from an array of objects.
static Dataset Custom(DatasetCustom *custom_dataset, bool delete_custom_dataset_on_dataset_release, Domain domain)
Creates a new custom dataset.
static Dataset UniformRandomVectors(long long num_objects, long long dimension, double dimension_minValueIncluded, double dimension_maxValueNotIncluded, const std::string datatype)
Creates a new dataset with random vectors of the given datatype.
virtual long long getNumObjects()=0
Return the current size of the dataset.
A domain represents the type of object that are contained in a dataset.
Definition: mknn_domain.hpp:33
static Dataset SubsetSegment(Dataset &superdataset, long long position_start, long long length)
Creates a new dataset which is a subset of a bigger dataset.
Powered by Download MetricKnn