CsvHelper.h
source: wtcpp/folder98/folder03/file04.md
#pragma once
#include <string.h>
#include <string>
#include <unordered_map>
#include <stdint.h>
#include <fstream>
#include <vector>
#include <sstream>
// 读取CSV文件, 内容比较简单: 加载文件 -> 获取列名 -> 逐行读取 -> 数据类型转换
class CsvReader
{
public:
CsvReader(const char* item_splitter = ",");
public:
bool load_from_file(const char* filename);
public:
inline uint32_t col_count() { return _fields_map.size(); }
int32_t get_int32(int32_t col);
uint32_t get_uint32(int32_t col);
int64_t get_int64(int32_t col);
uint64_t get_uint64(int32_t col);
double get_double(int32_t col);
const char* get_string(int32_t col);
int32_t get_int32(const char* field);
uint32_t get_uint32(const char* field);
int64_t get_int64(const char* field);
uint64_t get_uint64(const char* field);
double get_double(const char* field);
const char* get_string(const char* field);
bool next_row();
const char* fields() const
{
static std::string s;
if(s.empty())
{
std::stringstream ss;
for (auto item : _fields_map)
ss << item.first << ",";
s = ss.str();
s = s.substr(0, s.size() - 1);
}
return s.c_str();
}
private:
bool check_cell(int32_t col);
int32_t get_col_by_filed(const char* field);
private:
std::ifstream _ifs; // 文件流
char _buffer[1024];
std::string _item_splitter; // 文件分隔符
std::unordered_map<std::string, int32_t> _fields_map; // 列名字典
std::vector<std::string> _current_cells; // 每列数据字段
};
CsvHelper.cpp
#include "CsvHelper.h"
#include <limits.h>
#include "../Share/StdUtils.hpp"
#include "../Share/StrUtil.hpp"
CsvReader::CsvReader(const char* item_splitter /* = "," */)
: _item_splitter(item_splitter)
{}
// 加载CSV文件, 保存列名字典到 _fields_map 中
bool CsvReader::load_from_file(const char* filename)
{
if (!StdFile::exists(filename))
return false;
_ifs.open(filename);
_ifs.getline(_buffer, 1024);
//判断是不是UTF-8BOM 编码
static char flag[] = { (char)0xEF, (char)0xBB, (char)0xBF };
char* buf = _buffer;
if (memcmp(_buffer, flag, sizeof(char) * 3) == 0)
buf += 3;
std::string row = buf;
//替换掉一些字段的特殊符号
StrUtil::replace(row, "<", "");
StrUtil::replace(row, ">", "");
StrUtil::replace(row, "\"", "");
StrUtil::replace(row, "'", "");
//将字段名转成小写
StrUtil::toLowerCase(row);
StringVector fields = StrUtil::split(row, _item_splitter.c_str());
for (uint32_t i = 0; i < fields.size(); i++)
{
std::string field = StrUtil::trim(fields[i].c_str(), " ");
if (field.empty())
break;
_fields_map[field] = i;
}
return true;
}
// 获取下一列数据字段保存到 _current_cells 中
bool CsvReader::next_row()
{
if (_ifs.eof())
return false;
while (!_ifs.eof())
{
_ifs.getline(_buffer, 1024);
if(strlen(_buffer) == 0)
continue;
else
break;
}
if (strlen(_buffer) == 0)
return false;
_current_cells.clear();
StrUtil::split(_buffer, _current_cells, _item_splitter.c_str());
return true;
}
// 将数据字段转为对应的类型
int32_t CsvReader::get_int32(int32_t col)
{
if (!check_cell(col))
return 0;
return strtol(_current_cells[col].c_str(), NULL, 10);
}
uint32_t CsvReader::get_uint32(int32_t col)
{
if (!check_cell(col))
return 0;
return strtoul(_current_cells[col].c_str(), NULL, 10);
}
int64_t CsvReader::get_int64(int32_t col)
{
if (!check_cell(col))
return 0;
return strtoll(_current_cells[col].c_str(), NULL, 10);
}
uint64_t CsvReader::get_uint64(int32_t col)
{
if (!check_cell(col))
return 0;
return strtoull(_current_cells[col].c_str(), NULL, 10);
}
// 将数据字段转为double
double CsvReader::get_double(int32_t col)
{
if (!check_cell(col))
return 0;
return strtod(_current_cells[col].c_str(), NULL);
}
// 将字段数据转为字符串
const char* CsvReader::get_string(int32_t col)
{
if (!check_cell(col))
return "";
return _current_cells[col].c_str();
}
// 通过列名获取对应字段并做类型转换
int32_t CsvReader::get_int32(const char* field)
{
int32_t col = get_col_by_filed(field);
return get_int32(col);
}
uint32_t CsvReader::get_uint32(const char* field)
{
int32_t col = get_col_by_filed(field);
return get_uint32(col);
}
int64_t CsvReader::get_int64(const char* field)
{
int32_t col = get_col_by_filed(field);
return get_int64(col);
}
uint64_t CsvReader::get_uint64(const char* field)
{
int32_t col = get_col_by_filed(field);
return get_uint64(col);
}
double CsvReader::get_double(const char* field)
{
int32_t col = get_col_by_filed(field);
return get_double(col);
}
const char* CsvReader::get_string(const char* field)
{
int32_t col = get_col_by_filed(field);
return get_string(col);
}
bool CsvReader::check_cell(int32_t col)
{
if (col == INT_MAX )
return false;
if (col < 0 || col >= (int32_t)_fields_map.size())
return false;
return true;
}
// 找到列名对应索引
int32_t CsvReader::get_col_by_filed(const char* field)
{
auto it = _fields_map.find(field);
if (it == _fields_map.end())
return INT_MAX;
return it->second;
}