Column Store
GenericGenerator.h
Go to the documentation of this file.
1 
2 #pragma once
3 #include <memory>
4 #include <queue>
5 #include <string>
6 #include <unordered_map>
7 
8 #include "GenericQueryBuilder.h"
9 #include "GroupByManager.h"
12 #include "interfaces/Validators.h"
13 
14 namespace GenericQuery {
27 
33 
36  std::vector<int> baseIndices;
37 
44  std::vector<int> indices;
46  int FKIndex;
56  std::unordered_map<JoinValue, DataRecord, JoinHash> joinMap;
57 
58  DataGeneraterJoin(std::string name, Metadata metadata)
59  : manager(name, metadata) {}
60  };
61 
62  bool _hasNext = false;
63 
65 
67  std::vector<DataGeneraterJoin> joins;
68 
75 
81  void _setNext();
82 
92 
93  public:
110  bool hasNext();
111 
117  DataRecord next();
118 };
119 
121  bool _hasNext = false;
122 
124 
125  public:
136  GenericDataGenerator generator(builder);
137 
138  auto generatorMetadata = generator.getMetadata();
139 
140  auto &aggregations = builder.aggregations;
141 
142  std::vector<ColumnStore::Column> columns;
143  for (auto &aggregate : aggregations)
144  columns.emplace_back(aggregate->getColumnName(),
146 
147  metadata = Metadata(new DataRecordMetadata(columns));
148 
149  if (!generator.hasNext()) return;
150 
151  for (auto &aggregate : aggregations)
152  aggregate->initialize(generatorMetadata);
153 
154  while (generator.hasNext()) {
155  auto record = generator.next();
156  for (auto &aggregate : aggregations) aggregate->addRecord(record);
157  }
158 
159  std::vector<ColumnStore::DataValue> values;
160 
161  for (auto &aggregate : aggregations)
162  values.emplace_back(aggregate->getValue());
163 
165  _hasNext = true;
166  }
173  bool hasNext() { return _hasNext; }
174 
181  _hasNext = false;
182  return record;
183  }
184 };
185 
187  std::queue<ColumnStore::DataRecord> records;
188 
189  public:
200  GenericDataGenerator generator(builder);
201 
202  auto generatorMetadata = generator.getMetadata();
203 
204  GroupByManager manager(builder.groupBys, generatorMetadata);
205 
206  std::vector<std::string> &groupBys = builder.groupBys;
207  auto &aggregations = builder.aggregations;
208 
209  std::vector<ColumnStore::Column> columns;
210 
211  for (auto &grp : groupBys)
212  columns.push_back(generatorMetadata->getColumn(grp));
213 
214  for (auto &aggregate : aggregations)
215  columns.emplace_back(aggregate->getColumnName(),
217 
218  metadata = Metadata(new DataRecordMetadata(columns));
219 
220  std::unordered_map<GroupByValue,
221  std::vector<ColumnStore::AggregatorQuery>, GroupHash>
222  map;
223 
224  for (auto &aggregate : aggregations)
225  aggregate->initialize(generatorMetadata);
226 
227  auto duplicateAggregators = [&]() {
228  std::vector<ColumnStore::AggregatorQuery> temp;
229  for (auto &agg : aggregations) temp.push_back(agg->clone());
230  return temp;
231  };
232 
233  while (generator.hasNext()) {
234  auto record = generator.next();
235  auto groupValue = manager.processRecord(record);
236  if (!map.count(groupValue))
237  map[groupValue] = duplicateAggregators();
238 
239  auto &aggregations1 = map[groupValue];
240 
241  for (auto &aggregate : aggregations1) aggregate->addRecord(record);
242  }
243 
244  for (auto &[groupValue, aggregations1] : map) {
245  std::vector<ColumnStore::DataValue> values = groupValue.values;
246  for (auto &aggregate : aggregations1)
247  values.emplace_back(aggregate->getValue());
248  records.emplace(values);
249  }
250  }
257  bool hasNext() { return !records.empty(); }
258 
265  auto temp = records.front();
266  records.pop();
267  return temp;
268  }
269 };
270 
271 } // namespace GenericQuery
GenericQuery::GenericDataAggregator::record
DataRecord record
Definition: GenericGenerator.h:123
GenericQuery::GenericGroupByAggregator::records
std::queue< ColumnStore::DataRecord > records
Definition: GenericGenerator.h:187
GenericQuery::GenericDataGenerator::_hasNext
bool _hasNext
Definition: GenericGenerator.h:62
ColumnStore::RecordValidator
std::shared_ptr< RecordValidatorInterface > RecordValidator
Shared Pointer for RecordValidator Interface.
Definition: Validators.h:39
ColumnStore::DataType
DataType
Different datatypes supported by this project.
Definition: Column.h:16
csv
Definition: CSVparser.cpp:6
GenericQuery::JoinValueManager::processValue
JoinValue processValue(DataValue &value)
Definition: GroupByManager.cpp:32
ColumnStore::DataRecordMetadata
Stores metadata information of the data record.
Definition: DataRecord.h:88
GenericQuery::GenericDataAggregator::_hasNext
bool _hasNext
Definition: GenericGenerator.h:121
GenericQuery::GenericQueryBuilder::baseSource
std::string baseSource
Base data source.
Definition: GenericQueryBuilder.h:73
GenericQuery::GenericDataGenerator::baseIndices
std::vector< int > baseIndices
List of column indices to extract values from base source records.
Definition: GenericGenerator.h:36
GenericQuery::GenericDataGenerator
Definition: GenericGenerator.h:28
GenericQuery::GenericQueryBuilder::generateMetadata
Metadata generateMetadata()
Generates metadata of result records using join and query information.
Definition: GenericQueryBuilder.cpp:95
GenericQuery::GenericDataAggregator::next
DataRecord next()
generates a new record
Definition: GenericGenerator.h:180
GenericQuery
Data Generator for GenericQueryBuilder.
Definition: GenericGenerator.cpp:11
GenericQuery::GenericDataGenerator::joins
std::vector< DataGeneraterJoin > joins
List of joins.
Definition: GenericGenerator.h:67
GenericQuery::GenericQueryBuilder::data_sources
std::unordered_map< std::string, DataSource > data_sources
map of data source name and data generator
Definition: GenericQueryBuilder.h:66
GenericQuery::GroupByManager::processRecord
GroupByValue processRecord(DataRecord &record)
Definition: GroupByManager.cpp:67
ColumnStore::Metadata
std::shared_ptr< DataRecordMetadata > Metadata
Shared pointer to DataRecordMetadata.
Definition: DataRecord.h:208
GenericQueryBuilder.h
Generic Query Builder.
GenericQuery::GenericDataGenerator::baseSource
DataSource baseSource
Definition: GenericGenerator.h:32
ColumnStore::DataSource
std::shared_ptr< DataGeneratorInterface > DataSource
Shared pointer to the DataGeneratorInterface.
Definition: DataGeneratorInterface.h:73
GenericQuery::JoinValue::manager
JoinValueManager * manager
Definition: GroupByManager.h:58
GenericQuery::GenericDataAggregator::GenericDataAggregator
GenericDataAggregator(GenericQueryBuilder builder)
Construct a new Generic Data Generator object.
Definition: GenericGenerator.h:135
GenericQuery::GenericGroupByAggregator
Definition: GenericGenerator.h:186
GenericQuery::GroupByValue::values
std::vector< ColumnStore::DataValue > values
Definition: GroupByManager.h:47
GenericQuery::GenericQueryBuilder::generateRecordValidator
RecordValidator generateRecordValidator()
creates record
Definition: GenericQueryBuilder.cpp:130
GenericQuery::GenericDataGenerator::builder
GenericQueryBuilder builder
builder object from which we are generating data
Definition: GenericGenerator.h:30
ColumnStore::DataRecord
Stores a row of data.
Definition: DataRecord.h:64
GenericQuery::GenericGroupByAggregator::hasNext
bool hasNext()
check if a record is available
Definition: GenericGenerator.h:257
GenericQuery::GenericDataGenerator::_getCandidateNext
DataRecord _getCandidateNext()
Gets the next candidate record.
Definition: GenericGenerator.cpp:50
GenericQuery::GenericDataGenerator::_setNext
void _setNext()
internal function to set the next valid record
Definition: GenericGenerator.cpp:31
GenericGenerator.h
GenericQuery::GenericGroupByAggregator::GenericGroupByAggregator
GenericGroupByAggregator(GenericQueryBuilder builder)
Construct a new Generic Data Generator object.
Definition: GenericGenerator.h:199
GenericQuery::GenericDataAggregator::hasNext
bool hasNext()
check if a record is available
Definition: GenericGenerator.h:173
GroupByManager.h
GenericQuery::GroupByValue
Definition: GroupByManager.h:46
ColumnStore::DataGeneratorInterface::getMetadata
Metadata getMetadata() const
Interface for relational data sources.
Definition: DataGeneratorInterface.cpp:23
GenericQuery::JoinValue
Definition: GroupByManager.h:56
Validators.h
RecordValidator Interfaces and Implementations for Query Processing.
GenericQuery::GenericDataGenerator::GenericDataGenerator
GenericDataGenerator(GenericQueryBuilder builder)
Construct a new Generic Data Generator object.
Definition: GenericGenerator.cpp:84
GenericQuery::JoinValueManager
Definition: GroupByManager.h:35
GenericQuery::GenericDataGenerator::hasNext
bool hasNext()
check if a record is available
Definition: GenericGenerator.cpp:142
GenericQuery::GenericDataGenerator::recordValidator
RecordValidator recordValidator
Definition: GenericGenerator.h:31
GenericQuery::GroupByManager
Definition: GroupByManager.h:18
GenericQuery::GenericDataGenerator::DataGeneraterJoin::DataGeneraterJoin
DataGeneraterJoin(std::string name, Metadata metadata)
Definition: GenericGenerator.h:58
DataGeneratorInterface.h
Data Generator Interface.
ColumnStore::DataGeneratorInterface::metadata
Metadata metadata
metadata for processing queries
Definition: DataGeneratorInterface.h:27
csv::operator<<
std::ostream & operator<<(std::ostream &os, const Row &row)
Definition: CSVparser.cpp:250
GenericQuery::GenericDataGenerator::nextRecord
DataRecord nextRecord
Definition: GenericGenerator.h:64
GenericQuery::GenericDataGenerator::DataGeneraterJoin::FKIndex
int FKIndex
FK index in base data source.
Definition: GenericGenerator.h:46
GenericQuery::GenericQueryBuilder::groupBys
std::vector< std::string > groupBys
Definition: GenericQueryBuilder.h:100
ConditionQuery.h
Generic Condition Builder.
GenericQuery::GenericDataGenerator::DataGeneraterJoin::joinMap
std::unordered_map< JoinValue, DataRecord, JoinHash > joinMap
Definition: GenericGenerator.h:56
ColumnStore::DataValue
Implementation of a single data element.
Definition: DataRecord.h:28
GenericQuery::GenericQueryBuilder
Generic Query Builder Class.
Definition: GenericQueryBuilder.h:47
GenericQuery::GenericQueryBuilder::joins
std::vector< std::pair< std::string, Join > > joins
List of joins.
Definition: GenericQueryBuilder.h:85
GenericQuery::GenericGroupByAggregator::next
DataRecord next()
generates a new record
Definition: GenericGenerator.h:264
GenericQuery::GenericDataGenerator::DataGeneraterJoin
helper structure to efficiently process joins
Definition: GenericGenerator.h:42
ColumnStore::DataType::FLOAT
@ FLOAT
GenericQuery::GroupHash
Definition: GroupByManager.h:68
GenericQuery::GenericDataGenerator::DataGeneraterJoin::indices
std::vector< int > indices
indices of columns required for joined result
Definition: GenericGenerator.h:44
GenericQuery::GenericQueryBuilder::baseColumns
std::vector< std::string > baseColumns
list of columns from the base table required in result
Definition: GenericQueryBuilder.h:79
GenericQuery::GenericDataGenerator::next
DataRecord next()
generates a new record
Definition: GenericGenerator.cpp:149
GenericQuery::GenericDataAggregator
Definition: GenericGenerator.h:120
ColumnStore::DataGeneratorInterface
Interface for relational data sources.
Definition: DataGeneratorInterface.h:24
GenericQuery::GenericQueryBuilder::aggregations
std::vector< ColumnStore::AggregatorQuery > aggregations
Definition: GenericQueryBuilder.h:99
GenericQuery::GenericDataGenerator::_getNext
DataRecord _getNext()
internal function to process and generate next valid record
Definition: GenericGenerator.cpp:17
GenericQuery::GenericDataGenerator::DataGeneraterJoin::manager
JoinValueManager manager
map of PK of join source to join records
Definition: GenericGenerator.h:55