Browse Source

add csv.hpp and test

master
Niklas Rosenstein 3 years ago
parent
commit
fd7bdbb1b5
Signed by: NiklasRosenstein GPG Key ID: 06D269B33D25F6C6
2 changed files with 174 additions and 0 deletions
  1. + 128
    - 0
      include/NiklasRosenstein/csv.hpp
  2. + 46
    - 0
      tests/test_csv.cpp

+ 128
- 0
include/NiklasRosenstein/csv.hpp

@ -0,0 +1,128 @@
/**
* Copyright (c) 2018 Niklas Rosenstein
* MIT licensed.
*
* @description Tiny library to parse CSV files.
*/
#pragma once
#include <cassert>
#include <functional>
#include <istream>
#include <string>
#include <vector>
namespace niklasrosenstein {
/* Container type all elements in a CSV row. */
using csv_row = std::vector<std::string>;
/* Callback to read new data to be parsed into the CSV parser. */
using csv_readdata_f = std::function<size_t(char* buffer, size_t bufsize)>;
/* Callback type for when a CSV row is available. */
using csv_rowdone_f = std::function<bool(csv_row const& row)>;
/* Info structure. */
struct csv_info {
char delim = ',';
char quote = '"';
};
/* Parse CSV data served by the #readdata callback. The #rowdone callback
* will be called for each row that has been parsed. Stops parsing when
* #readdata returns no more data or #rowdone returns false.
*
* @param readdata The callback that provides data into the parser.
* @param rowdone The callback that is invoked when a row of CSV data is
* available.
* @param delim The CSV delimiter.
* @param quote The CSV quote character. */
inline bool csv_parse(
csv_readdata_f const& readdata, csv_rowdone_f const& rowdone,
csv_info const& info = {})
{
int c = 0, prev_c = EOF;
enum { DEFAULT, QOPEN } state = DEFAULT;
std::string token;
csv_row line;
char buffer[BUFSIZ];
size_t bufidx = 0;
size_t bufsiz = 0;
for (;;) {
if (bufidx == bufsiz) {
bufsiz = readdata(buffer, BUFSIZ);
bufidx = 0;
if (bufsiz == 0)
break; // EOF
}
prev_c = c;
c = buffer[bufidx++];
switch (state) {
case DEFAULT: {
if (c == info.quote) {
if (prev_c == info.quote) { // Embedded quotes, eg. "Super ""simple"" API"
token.push_back(info.quote);
}
state = QOPEN;
}
else if (c == info.delim || c == '\n') {
line.push_back(std::move(token));
if (c == '\n') {
if (!rowdone(line)) {
return false;
}
line.clear();
}
}
else {
token.push_back((char) c);
}
break;
} // DEFAULT
case QOPEN: {
if (c == info.quote) {
state = DEFAULT;
}
else {
token.push_back((char) c);
}
} // QOPEN
} // switch
}
if (!token.empty()) {
line.push_back(token);
}
if (!line.empty()) {
rowdone(line);
}
return true;
}
/* Wrapper for #csv_parse() that feeds data from a C file-like object. */
inline bool csv_parse(
FILE* fp, csv_rowdone_f const& rowdone,
csv_info const& info = {})
{
auto readdata = [fp](char* buffer, size_t bufsize) -> size_t {
return fread(buffer, 1, bufsize, fp);
};
return csv_parse(readdata, rowdone, info);
}
/* Wrapper for #csv_parse() that feeds data from a #std::istream. */
inline bool csv_parse(
std::istream& stream, csv_rowdone_f const& rowdone,
csv_info const& info = {})
{
auto readdata = [&stream](char* buffer, size_t bufsize) -> size_t {
return stream.read(buffer, bufsize), stream.gcount();
};
return csv_parse(readdata, rowdone, info);
}
} // namespace niklasrosenstein

+ 46
- 0
tests/test_csv.cpp

@ -0,0 +1,46 @@
/**
* Copyright (c) 2018 Niklas Rosenstein
* MIT licensed.
*/
#include <iostream>
#include <sstream>
#include <benchmark/benchmark.h>
#include <NiklasRosenstein/csv.hpp>
namespace nr = niklasrosenstein;
static void generate_csv(std::ostream& data, int lines) {
srand(1);
for (int i = 0; i < lines; ++i) {
data << rand() << ",\"" << rand() % 100 << "\"," << rand() % 3 << "\n";
}
}
static void BM_Csv_ParseFile(benchmark::State& state) {
int const lines = state.range(0);
std::stringstream indata;
generate_csv(indata, lines);
while (state.KeepRunning()) {
bool error = false;
int have_lines = 0;
nr::csv_parse(indata, [&](nr::csv_row const& row) -> bool {
if (row.size() != 3) {
error = true;
state.SkipWithError("a row did not have 3 columns");
return false;
}
++have_lines;
return true;
});
if (have_lines != lines) {
state.SkipWithError("could not read as many lines as generated");
break;
}
indata.clear();
indata.seekg(0);
}
}
BENCHMARK(BM_Csv_ParseFile)->Range(10, 10<<10);

Loading…
Cancel
Save