Qore CsvUtil Module Reference  1.10
AbstractCsvIterator.qc.dox.h
1 // -*- mode: c++; indent-tabs-mode: nil -*-
2 // Qore AbstractCsvIterator class definition
3 
4 /* AbstractCsvIterator.qc Copyright 2012 - 2022 Qore Technologies, s.r.o.
5 
6  Permission is hereby granted, free of charge, to any person obtaining a
7  copy of this software and associated documentation files (the "Software"),
8  to deal in the Software without restriction, including without limitation
9  the rights to use, copy, modify, merge, publish, distribute, sublicense,
10  and/or sell copies of the Software, and to permit persons to whom the
11  Software is furnished to do so, subject to the following conditions:
12 
13  The above copyright notice and this permission notice shall be included in
14  all copies or substantial portions of the Software.
15 
16  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19  AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21  FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22  DEALINGS IN THE SOFTWARE.
23 */
24 
25 // assume local var scope, do not use "$" for vars, members, and method calls
26 
28 namespace CsvUtil {
30 
285 class AbstractCsvIterator : public Qore::AbstractIterator, protected CsvHelper {
286 
287 public:
288 protected:
290  const Options = {
291  "compat_force_empty_string": C_OPT1|C_OPT2,
292  "date_format": C_OPT1|C_OPT2,
293  "date-format": C_OPT1|C_OPT2,
294  "encoding": C_OPT1|C_OPT2,
295  "eol": C_OPT1|C_OPT2,
296  "extended_record": C_OPT2,
297  "fields": C_OPT1,
298  "header-lines": C_OPT1|C_OPT2,
299  "header_lines": C_OPT1|C_OPT2,
300  "header-names": C_OPT1|C_OPT2,
301  "header_names": C_OPT1|C_OPT2,
302  "header_reorder": C_OPT1|C_OPT2,
303  "headers": C_OPT1,
304  "ignore-empty": C_OPT1|C_OPT2,
305  "ignore_empty": C_OPT1|C_OPT2,
306  "ignore-whitespace": C_OPT1|C_OPT2,
307  "ignore_whitespace": C_OPT1|C_OPT2,
308  "number_format": C_OPT1|C_OPT2,
309  "quote": C_OPT1|C_OPT2,
310  "separator": C_OPT1|C_OPT2,
311  "timezone": C_OPT1|C_OPT2,
312  "tolwr": C_OPT1|C_OPT2,
313  "verify-columns": C_OPT1|C_OPT2,
314  "verify_columns": C_OPT1|C_OPT2,
315  };
316 
317  // field separator
318  string separator = ",";
319 
320  // field content delimiter
321  string quote = "\"";
322 
323  // number of header lines
324  softint headerLines = 0;
325 
326  // flag to use string names from the first header row if possible
327  bool headerNames = False;
328 
329  // True if empty lines should be ignored
330  bool ignoreEmptyLines = True;
331 
332  // Flag to trim the field content (trim leading and trailing whitespace) from unquoted fields
333  bool ignoreWhitespace = True;
334 
335  // the @ref Qore::TimeZone to use when parsing dates (default: current time zone)
336  *TimeZone timezone;
337 
338  // verify the column count for every row; if a row does not match, then throw a \c CSVFILEITERATOR-DATA-ERROR exception
339  bool checkElementCounts = False;
340 
341  // getRecord/getValue returns extended hash
342  bool extendedRecord = False;
343 
344  // force "*string" fields with no value to return an empty string rather than @ref nothing for backwards compatibility with very early versions of CsvUtil
345  bool compat_force_empty_string = False;
346 
347  // read ahead flag
348  bool read_ahead;
349 
350  // column count for verifying column counts
351  int cc;
352 
353  // current record count for the index() method
354  int rc = 0;
355 
356  // to resolve record type by rules
357  hash<string, hash<string, list<hash<auto>>>> m_resolve_by_rule;
358 
359  // to resolve record type by number of fields
360  hash<string, list<string>> m_resolve_by_count;
361 
362  // list of idx to field transformarions, in order of spec
363  hash<string, list<string>> m_resolve_by_idx;
364 
365  // fake specs based on the first non-header row
366  bool fakeHeaderNames;
367 
369  *string eol;
370 
371  // data source iterator
372  AbstractLineIterator lineIterator;
373 
374 public:
375 
377 
384  constructor(AbstractLineIterator li, *hash<auto> opts);
385 
386 
388 
393  // NOTE: when declared as *hash then always calls this constructor
394  constructor(AbstractLineIterator li, hash<auto> spec, hash<auto> opts);
395 
396 
398 protected:
399  processCommonOptions(*hash<auto> opts, int C_OPTx);
400 public:
401 
402 
404 protected:
405  processSpec(hash<auto> spec);
406 public:
407 
408 
410 protected:
411  prepareFieldsFromHeaders(*list<auto> headers);
412 public:
413 
414 
415  bool valid();
416 
417 
419 
424  bool next();
425 
426 
428 
430  peek();
431 
432 
434 
441  auto memberGate(string name);
442 
443 
445 
456  hash<auto> getValue();
457 
458 
460 
473  hash<auto> getRecord(bool extended);
474 
475 
477 
488  hash<auto> getRecord();
489 
490 
492 
505 
506 
508 
515  string getSeparator();
516 
517 
519 
526  string getQuote();
527 
528 
530  *hash<string, AbstractDataField> getRecordType();
531 
532 
534 
541  *list<string> getHeaders();
542 
543 
545 
550  *list<string> getHeaders(string type);
551 
552 
554 
565  int index();
566 
567 
569 
582  int lineNumber();
583 
584 
586 
595  string getRawLine();
596 
597 
599 
609  list<*string> getRawLineValues();
610 
611 
612 protected:
613  auto handleType(hash<auto> fh, *string val);
614 public:
615 
616 
618 protected:
619  list<*string> getLineAndSplit();
620 public:
621 
622 
624 
631  string identifyType(list<auto> rec);
632 
633 
635 
642 protected:
643  *string identifyTypeImpl(list<auto> rec);
644 public:
645 
646 
648 protected:
649  hash<auto> parseLine();
650 public:
651 
652  }; // AbstractCsvIterator class
653 }; // CsvUtil namespace
the AbstractCsvIterator class is an abstract base class that allows abstract CSV data to be iterated
Definition: AbstractCsvIterator.qc.dox.h:285
prepareFieldsFromHeaders(*list< auto > headers)
match headers provided at csv header or in options, never called for multi-type because header_names ...
*hash< string, AbstractDataField > getRecordType()
Returns the description of the record type, if any.
string getQuote()
Returns the current quote string.
processSpec(hash< auto > spec)
process specification and assing internal data for resolving
*string eol
the eol marker, if any
Definition: AbstractCsvIterator.qc.dox.h:369
auto memberGate(string name)
Returns the given column value for the current row.
list< *string > getLineAndSplit()
Read line split by separator/quote into list.
hash< auto > getRecord()
Returns the current record as a hash.
constructor(AbstractLineIterator li, *hash< auto > opts)
creates the AbstractCsvIterator with an option hash in single-type mode
string getRawLine()
Returns the current line 'as it is', i.e. the original string.
const Options
valid options for the object (a hash for quick lookups of valid keys)
Definition: AbstractCsvIterator.qc.dox.h:290
peek()
Reads a single row without moving the index position.
hash< auto > getValue()
Returns the current record as a hash.
string identifyType(list< auto > rec)
Identify a fixed-length line type using identifyTypeImpl(); may be overridden if necessary.
int index()
Returns the row index being iterated, which does not necessarily correspond to the line number when t...
*string identifyTypeImpl(list< auto > rec)
Identify a input record, given the raw line string. This method performs a lookup to a precalculated ...
int lineNumber()
Returns the current iterator line number in the file (the first line is line 1) or 0 if not pointing ...
hash< auto > parseLine()
Parses a line in the file and returns a processed list of the fields.
list< *string > getRawLineValues()
Returns the list of raw string values of the current line.
hash< auto > getRecord(bool extended)
Returns the current record as a hash.
processCommonOptions(*hash< auto > opts, int C_OPTx)
process common options and and assing internal fields
*list< string > getHeaders()
Returns the current record headers or NOTHING if no headers have been detected or saved yet.
constructor(AbstractLineIterator li, hash< auto > spec, hash< auto > opts)
creates the AbstractCsvIterator with an option hash in multi-type mode
*list< string > getHeaders(string type)
Returns a list of headers for the given record or NOTHING if the record is not recognized.
auto getRecordList()
Returns the current record as a list.
string getSeparator()
Returns the current separator string.
bool next()
Moves the current line / record position to the next line / record; returns False if there are no mor...
const True
const False
string type(auto arg)
the CsvUtil namespace. All classes used in the CsvUtil module should be inside this namespace
Definition: AbstractCsvIterator.qc.dox.h:28