@ -0,0 +1,128 @@ |
|||
/**
|
|||
* Copyright (c) 2018 Niklas Rosenstein |
|||
* MIT licensed. |
|||
* |
|||
* @description Tiny library to parse CSV files. |
|||
*/ |
|||
|
|||
#pragma once
|
|||
#include <cassert>
|
|||
#include <functional>
|
|||
#include <istream>
|
|||
#include <string>
|
|||
#include <vector>
|
|||
|
|||
namespace niklasrosenstein { |
|||
|
|||
/* Container type all elements in a CSV row. */ |
|||
using csv_row = std::vector<std::string>; |
|||
|
|||
/* Callback to read new data to be parsed into the CSV parser. */ |
|||
using csv_readdata_f = std::function<size_t(char* buffer, size_t bufsize)>; |
|||
|
|||
/* Callback type for when a CSV row is available. */ |
|||
using csv_rowdone_f = std::function<bool(csv_row const& row)>; |
|||
|
|||
/* Info structure. */ |
|||
struct csv_info { |
|||
char delim = ','; |
|||
char quote = '"'; |
|||
}; |
|||
|
|||
/* Parse CSV data served by the #readdata callback. The #rowdone callback
|
|||
* will be called for each row that has been parsed. Stops parsing when |
|||
* #readdata returns no more data or #rowdone returns false. |
|||
* |
|||
* @param readdata The callback that provides data into the parser. |
|||
* @param rowdone The callback that is invoked when a row of CSV data is |
|||
* available. |
|||
* @param delim The CSV delimiter. |
|||
* @param quote The CSV quote character. */ |
|||
inline bool csv_parse( |
|||
csv_readdata_f const& readdata, csv_rowdone_f const& rowdone, |
|||
csv_info const& info = {}) |
|||
{ |
|||
int c = 0, prev_c = EOF; |
|||
enum { DEFAULT, QOPEN } state = DEFAULT; |
|||
std::string token; |
|||
csv_row line; |
|||
|
|||
char buffer[BUFSIZ]; |
|||
size_t bufidx = 0; |
|||
size_t bufsiz = 0; |
|||
|
|||
for (;;) { |
|||
if (bufidx == bufsiz) { |
|||
bufsiz = readdata(buffer, BUFSIZ); |
|||
bufidx = 0; |
|||
if (bufsiz == 0) |
|||
break; // EOF
|
|||
} |
|||
prev_c = c; |
|||
c = buffer[bufidx++]; |
|||
|
|||
switch (state) { |
|||
case DEFAULT: { |
|||
if (c == info.quote) { |
|||
if (prev_c == info.quote) { // Embedded quotes, eg. "Super ""simple"" API"
|
|||
token.push_back(info.quote); |
|||
} |
|||
state = QOPEN; |
|||
} |
|||
else if (c == info.delim || c == '\n') { |
|||
line.push_back(std::move(token)); |
|||
if (c == '\n') { |
|||
if (!rowdone(line)) { |
|||
return false; |
|||
} |
|||
line.clear(); |
|||
} |
|||
} |
|||
else { |
|||
token.push_back((char) c); |
|||
} |
|||
break; |
|||
} // DEFAULT
|
|||
case QOPEN: { |
|||
if (c == info.quote) { |
|||
state = DEFAULT; |
|||
} |
|||
else { |
|||
token.push_back((char) c); |
|||
} |
|||
} // QOPEN
|
|||
} // switch
|
|||
} |
|||
|
|||
if (!token.empty()) { |
|||
line.push_back(token); |
|||
} |
|||
if (!line.empty()) { |
|||
rowdone(line); |
|||
} |
|||
return true; |
|||
} |
|||
|
|||
/* Wrapper for #csv_parse() that feeds data from a C file-like object. */ |
|||
inline bool csv_parse( |
|||
FILE* fp, csv_rowdone_f const& rowdone, |
|||
csv_info const& info = {}) |
|||
{ |
|||
auto readdata = [fp](char* buffer, size_t bufsize) -> size_t { |
|||
return fread(buffer, 1, bufsize, fp); |
|||
}; |
|||
return csv_parse(readdata, rowdone, info); |
|||
} |
|||
|
|||
/* Wrapper for #csv_parse() that feeds data from a #std::istream. */ |
|||
inline bool csv_parse( |
|||
std::istream& stream, csv_rowdone_f const& rowdone, |
|||
csv_info const& info = {}) |
|||
{ |
|||
auto readdata = [&stream](char* buffer, size_t bufsize) -> size_t { |
|||
return stream.read(buffer, bufsize), stream.gcount(); |
|||
}; |
|||
return csv_parse(readdata, rowdone, info); |
|||
} |
|||
|
|||
} // namespace niklasrosenstein
|
@ -0,0 +1,46 @@ |
|||
/**
|
|||
* Copyright (c) 2018 Niklas Rosenstein |
|||
* MIT licensed. |
|||
*/ |
|||
|
|||
#include <iostream>
|
|||
#include <sstream>
|
|||
#include <benchmark/benchmark.h>
|
|||
#include <NiklasRosenstein/csv.hpp>
|
|||
|
|||
namespace nr = niklasrosenstein; |
|||
|
|||
|
|||
static void generate_csv(std::ostream& data, int lines) { |
|||
srand(1); |
|||
for (int i = 0; i < lines; ++i) { |
|||
data << rand() << ",\"" << rand() % 100 << "\"," << rand() % 3 << "\n"; |
|||
} |
|||
} |
|||
|
|||
static void BM_Csv_ParseFile(benchmark::State& state) { |
|||
int const lines = state.range(0); |
|||
std::stringstream indata; |
|||
generate_csv(indata, lines); |
|||
|
|||
while (state.KeepRunning()) { |
|||
bool error = false; |
|||
int have_lines = 0; |
|||
nr::csv_parse(indata, [&](nr::csv_row const& row) -> bool { |
|||
if (row.size() != 3) { |
|||
error = true; |
|||
state.SkipWithError("a row did not have 3 columns"); |
|||
return false; |
|||
} |
|||
++have_lines; |
|||
return true; |
|||
}); |
|||
if (have_lines != lines) { |
|||
state.SkipWithError("could not read as many lines as generated"); |
|||
break; |
|||
} |
|||
indata.clear(); |
|||
indata.seekg(0); |
|||
} |
|||
} |
|||
BENCHMARK(BM_Csv_ParseFile)->Range(10, 10<<10); |