Column Store
PostgreSQLDataGenerator.h
Go to the documentation of this file.
1 #pragma once
2 
3 #include <iostream>
4 #include <pqxx/pqxx>
5 #include <queue>
6 #include <vector>
7 
10 #include "PostgreSQLMetaData.h"
11 
18 using Parser::Projection;
20 using Parser::Table;
21 using std::vector;
22 
23 namespace Postgres {
25  std::queue<DataRecord> data;
26  pqxx::connection *conn;
27  std::string relation_name;
30  vector<std::string> columns;
31 
32  pqxx::result get_rows_of_columns(pqxx::transaction_base &txn,
33  vector<ColumnStore::Column> columns) {
34  std::string sql = "SELECT ";
35  for (int i = 0; i < int(columns.size()); i++) {
36  std::string column_name = columns[i].name;
37  sql += (column_name + ",");
38  }
39  sql.pop_back();
40  sql += " FROM " + relation_name + " LIMIT " + std::to_string(batch_size) +
41  " OFFSET " + std::to_string(offset) + ";";
42  offset += batch_size;
43  return txn.exec(sql);
44  }
45 
46  void set_total_number_of_rows(pqxx::transaction_base &txn) {
47  std::string sql = "SELECT count(*) FROM " + relation_name + ";";
48  pqxx::result rows = txn.exec(sql);
49  total_number_of_rows = rows[0]["count"].as<int>();
50  }
51 
52  void load_into_queue(pqxx::transaction_base &txn,
53  vector<ColumnStore::Column> columns) {
54  pqxx::result rows_of_all_columns = get_rows_of_columns(txn, columns);
55  for (auto row : rows_of_all_columns) {
56  vector<DataValue> values;
57  for (int i = 0; i < int(columns.size()); i++) {
58  auto type = columns[i].type;
59  std::string column_name = columns[i].name;
60  if (type == DataType::INT) {
61  values.push_back(
62  DataValue((int)row[column_name].as<int>()));
63  } else if (type == DataType::FLOAT) {
64  values.push_back(
65  DataValue((float)row[column_name].as<float>()));
66  } else if (type == DataType::STRING) {
67  values.push_back(
68  DataValue((std::string)row[column_name].as<std::string>()));
69  }
70  }
71  data.push(values);
72  }
73  }
74 
75  public:
77  std::string r_name, vector<std::string> c = {},
78  int b_size = 5000) {
79  conn = postgresql_meta_data.get_connection();
80  pqxx::work txn{*conn};
81  relation_name = r_name;
82  columns = c;
83  batch_size = b_size;
84  offset = 0;
85  schema_meta_data = postgresql_meta_data.get_schema_meta_data();
86  try {
87  try {
88  vector<Table> tables = schema_meta_data.get_tables();
91  vector<ColumnStore::Column> metadata_columns;
92  if (int(columns.size()) == 0) {
93  auto temp = table.get_columns();
94  for (auto column : temp) metadata_columns.push_back(column);
95  } else {
96  for (int i = 0; i < int(columns.size()); i++) {
97  metadata_columns.push_back(table[columns[i]]);
98  }
99  }
100  metadata = Metadata(new DataRecordMetadata(metadata_columns));
101  load_into_queue(txn, metadata_columns);
102  } catch (const Parser::TableNotFoundException &e) {
103  vector<Projection> projections =
107  vector<ColumnStore::Column> metadata_columns;
108  if (int(columns.size()) == 0) {
109  vector<projection_column> projection_columns =
110  p.get_columns();
111  for (int i = 0; i < int(projection_columns.size()); i++) {
112  projection_column p_column = projection_columns[i];
113  ColumnStore::Column c = {
114  p_column.name, p_column.data_type, p_column.index};
115  metadata_columns.push_back(c);
116  }
117  } else {
118  for (int i = 0; i < int(columns.size()); i++) {
119  projection_column p_column = p[columns[i]];
120  ColumnStore::Column c = {
121  p_column.name, p_column.data_type, p_column.index};
122  metadata_columns.push_back(c);
123  }
124  }
125  metadata = Metadata(new DataRecordMetadata(metadata_columns));
126  load_into_queue(txn, metadata_columns);
127  } catch (const Parser::ProjectionNotFoundException &e) {
128  std::cout << e.what() << std::endl;
129  exit(1);
130  }
131  } catch (const std::exception &e) {
132  std::cout << e.what() << std::endl;
133  exit(1);
134  }
135  }
136 
138  if (data.size() == 0) {
139  pqxx::work txn{*conn};
140  try {
141  try {
142  vector<Table> tables = schema_meta_data.get_tables();
144  vector<ColumnStore::Column> metadata_columns;
145  if (int(columns.size()) == 0) {
146  for (auto c : table.get_columns())
147  metadata_columns.push_back(c);
148  } else {
149  for (int i = 0; i < int(columns.size()); i++) {
150  metadata_columns.push_back(table[columns[i]]);
151  }
152  }
153  load_into_queue(txn, metadata_columns);
154  } catch (const std::exception &e) {
155  vector<Projection> projections =
157  Projection p =
159  vector<projection_column> projection_columns =
160  p.get_columns();
161  vector<ColumnStore::Column> metadata_columns;
162  if (int(columns.size()) == 0) {
163  vector<projection_column> projection_columns =
164  p.get_columns();
165  for (int i = 0; i < int(projection_columns.size()); i++) {
166  projection_column p_column = projection_columns[i];
167  ColumnStore::Column c = {p_column.name,
168  p_column.data_type,
169  p_column.index};
170  metadata_columns.push_back(c);
171  }
172  } else {
173  for (int i = 0; i < int(columns.size()); i++) {
174  projection_column p_column = p[columns[i]];
175  ColumnStore::Column c = {p_column.name,
176  p_column.data_type,
177  p_column.index};
178  metadata_columns.push_back(c);
179  }
180  }
181  load_into_queue(txn, metadata_columns);
182  }
183  } catch (const std::exception &e) {
184  std::cout << "No relation " + relation_name << std::endl;
185  std::cerr << e.what() << std::endl;
186  exit(1);
187  }
188  }
189  auto d = data.front();
190  data.pop();
191  return d;
192  }
193 
194  void advance(int recordCount) {
195  while(data.size() && recordCount) {
196  recordCount--;
197  data.pop();
198  }
199  if(recordCount) {
200  offset += recordCount;
201  }
202  }
203 
204  bool hasNext() { return (offset < total_number_of_rows) || data.size(); }
205 };
206 }
Postgres::PostgreSQLMetaData::get_schema_meta_data
SchemaMetaData get_schema_meta_data() const
Definition: PostgreSQLMetaData.h:25
Postgres::PostgreSQLDataSource::hasNext
bool hasNext()
Definition: PostgreSQLDataGenerator.h:204
Parser::projection_column::data_type
DataType data_type
Definition: Projection.h:32
SchemaMetaData.h
Parser::SchemaMetaData
Definition: SchemaMetaData.h:14
Postgres::PostgreSQLDataSource::advance
void advance(int recordCount)
Definition: PostgreSQLDataGenerator.h:194
ColumnStore::DataType
DataType
Different datatypes supported by this project.
Definition: Column.h:16
Parser::SchemaMetaData::get_table
Table & get_table(string table_name)
Definition: SchemaMetaData.cpp:5
Postgres::PostgreSQLDataSource::offset
int offset
Definition: PostgreSQLDataGenerator.h:28
ColumnStore::DataRecordMetadata
Stores metadata information of the data record.
Definition: DataRecord.h:88
Postgres::PostgreSQLDataSource::next
DataRecord next()
Definition: PostgreSQLDataGenerator.h:137
Postgres
Definition: PostgreSQLDataGenerator.h:23
Postgres::PostgreSQLDataSource::data
std::queue< DataRecord > data
Definition: PostgreSQLDataGenerator.h:25
Postgres::PostgreSQLDataSource::set_total_number_of_rows
void set_total_number_of_rows(pqxx::transaction_base &txn)
Definition: PostgreSQLDataGenerator.h:46
ColumnStore::Column
Struct which maintains metadata of a single column.
Definition: Column.h:22
ColumnStore::Metadata
std::shared_ptr< DataRecordMetadata > Metadata
Shared pointer to DataRecordMetadata.
Definition: DataRecord.h:208
ColumnStore::DataSource
std::shared_ptr< DataGeneratorInterface > DataSource
Shared pointer to the DataGeneratorInterface.
Definition: DataGeneratorInterface.h:73
Parser::Projection
Definition: Projection.h:44
Postgres::PostgreSQLDataSource::get_rows_of_columns
pqxx::result get_rows_of_columns(pqxx::transaction_base &txn, vector< ColumnStore::Column > columns)
Definition: PostgreSQLDataGenerator.h:32
Postgres::PostgreSQLDataSource::PostgreSQLDataSource
PostgreSQLDataSource(PostgreSQLMetaData postgresql_meta_data, std::string r_name, vector< std::string > c={}, int b_size=5000)
Definition: PostgreSQLDataGenerator.h:76
Parser::Projection::get_columns
vector< projection_column > get_columns()
Definition: Projection.h:69
Postgres::PostgreSQLDataSource::conn
pqxx::connection * conn
Definition: PostgreSQLDataGenerator.h:26
Postgres::PostgreSQLDataSource::load_into_queue
void load_into_queue(pqxx::transaction_base &txn, vector< ColumnStore::Column > columns)
Definition: PostgreSQLDataGenerator.h:52
ColumnStore::DataRecord
Stores a row of data.
Definition: DataRecord.h:64
Postgres::PostgreSQLDataSource::schema_meta_data
SchemaMetaData schema_meta_data
Definition: PostgreSQLDataGenerator.h:29
Parser::ProjectionNotFoundException
Definition: Projection.h:18
Postgres::PostgreSQLDataSource::relation_name
std::string relation_name
Definition: PostgreSQLDataGenerator.h:27
Parser::SchemaMetaData::get_projection
Projection & get_projection(string projection_name)
Definition: SchemaMetaData.cpp:13
Parser::TableNotFoundException
Definition: Table.h:16
Parser::SchemaMetaData::get_projections
vector< Projection > & get_projections()
Definition: SchemaMetaData.h:31
Parser::projection_column
Definition: Projection.h:27
Parser::ProjectionNotFoundException::what
virtual const char * what() const
Definition: Projection.h:24
Postgres::PostgreSQLDataSource::columns
vector< std::string > columns
Definition: PostgreSQLDataGenerator.h:30
DataGeneratorInterface.h
Data Generator Interface.
Parser::Table
Definition: Table.h:25
ColumnStore::DataGeneratorInterface::metadata
Metadata metadata
metadata for processing queries
Definition: DataGeneratorInterface.h:27
Parser::projection_column::index
int index
Definition: Projection.h:33
PostgreSQLMetaData.h
ColumnStore::DataValue
Implementation of a single data element.
Definition: DataRecord.h:28
Postgres::PostgreSQLDataSource::batch_size
int batch_size
Definition: PostgreSQLDataGenerator.h:28
Postgres::PostgreSQLMetaData::get_connection
pqxx::connection * get_connection() const
Definition: PostgreSQLMetaData.h:21
Postgres::PostgreSQLDataSource::total_number_of_rows
int total_number_of_rows
Definition: PostgreSQLDataGenerator.h:28
Postgres::PostgreSQLDataSource
Definition: PostgreSQLDataGenerator.h:24
Parser::SchemaMetaData::get_tables
vector< Table > & get_tables()
Definition: SchemaMetaData.h:29
Postgres::PostgreSQLMetaData
Definition: PostgreSQLMetaData.h:13
Parser::projection_column::name
std::string name
Definition: Projection.h:28
ColumnStore::DataGeneratorInterface
Interface for relational data sources.
Definition: DataGeneratorInterface.h:24
Parser::Table::get_columns
std::vector< Parser::Column > & get_columns()
Definition: Table.h:41