Cantera  2.2.1
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
csvdiff.cpp
1 /*
2  * csvdiff File1.csv File2.csv
3  *
4  * Compares the variable values in two Excel formatted
5  * comma separated files.
6  * The comparison is done using a weighted norm basis.
7  *
8  * The two files should be basically equal. However, File1.csv is
9  * taken as the reference file, that has precedence, when there is
10  * something to be decided upon.
11  *
12  * Arguments:
13  * -h = prints this usage information
14  *
15  * Shell Return Values
16  * 0 = Comparison was successful
17  * 1 = One or more nodal values failed the comparison
18  * 2 = One or more of the header values failed the comparison
19  * 3 = Apples to oranges, the files can not even be compared against
20  * one another.
21  */
22 
23 #include <math.h>
24 #include <float.h>
25 #include <algorithm>
26 #include <cstdlib>
27 #include "cantera/base/config.h"
28 #ifndef _MSC_VER
29 #include <unistd.h>
30 #else
31 #include <string>
32 #endif
33 #include <vector>
34 #include <algorithm>
35 using namespace std;
36 
37 #if defined(__CYGWIN__)
38 #include <getopt.h>
39 #endif
40 
41 #include "mdp_allo.h"
42 //#include "cantera/base/mdp_allo.h"
43 #include "tok_input_util.h"
44 
45 int Debug_Flag = true;
46 double grtol = 1.0E-3;
47 double gatol = 1.0E-9;
48 
49 #define RT_PASSED 0
50 #define RT_FAILED_COL 1
51 #define RT_FAILED_HDR 2
52 #define RT_FAILED_OTHER 3
53 
54 /*
55  * First iteration towards getting this variable
56  */
57 int Max_Input_Str_Ln = MAX_INPUT_STR_LN;
58 /*****************************************************************************/
59 /*****************************************************************************/
60 /*****************************************************************************/
61 
62 #ifdef _MSC_VER
63 /*
64  * Windows doesn't have getopt(). This is an incomplete version that
65  * does enough to handle required functionality.
66  */
67 int optind = -1;
68 char* optarg = 0;
69 
70 int getopt(int argc, char** argv, const char*)
71 {
72  static int currArg = 1;
73  static int currOptInd = 1;
74  string tok;
75  static int charPos = 0;
76  int rc = -1;
77  if (currArg >= argc) {
78  optarg = 0;
79  return -rc;
80  }
81  tok = string(argv[currArg]);
82  currOptInd = currArg+1;
83  if (currOptInd > argc - 1) {
84  currOptInd = -1;
85  optarg = 0;
86  } else {
87  optarg = argv[currArg+1];
88  }
89  size_t len = strlen(tok.c_str());
90  if (charPos == 0) {
91  bool found = false;
92  do {
93  tok = string(argv[currArg]);
94  len = strlen(tok.c_str());
95  if (len > 1 && tok[0] == '-') {
96  found = true;
97  charPos = 1;
98  if (len > 2 && tok[1] == '-') {
99  charPos = 2;
100  }
101  } else {
102  if (optind == -1) {
103  optind = currArg;
104  }
105  }
106  if (!found) {
107  if (currArg < (argc-1)) {
108  currArg++;
109  } else {
110  optarg = 0;
111  return -1;
112  }
113  }
114  } while (!found);
115  }
116 
117  rc = tok[charPos];
118  if (charPos < static_cast<int>(len - 1)) {
119  charPos++;
120  } else {
121  charPos = 0;
122  }
123  return rc;
124 }
125 
126 #endif
127 
128 /*****************************************************************************/
129 /*****************************************************************************/
130 /*****************************************************************************/
131 
132 static int diff_double(double d1, double d2, double rtol, double atol)
133 
134 /*
135  * Compares 2 doubles. If they are not within tolerance, then this
136  * function returns true.
137  */
138 {
139  if (fabs(d1-d2) > (atol + rtol * 0.5 * (fabs(d1) + fabs(d2)))) {
140  return 1;
141  }
142  return 0;
143 }
144 
145 static int diff_double_slope(double d1, double d2, double rtol,
146  double atol, double xtol, double slope1, double slope2)
147 
148 /*
149  * Compares 2 doubles. If they are not within tolerance, then this
150  * function returns true.
151  */
152 {
153  double atol2 = xtol*(fabs(slope1) + fabs(slope2));
154  if (fabs(d1-d2) > (atol + atol2 + rtol * 0.5 * (fabs(d1) + fabs(d2)))) {
155  return 1;
156  }
157  return 0;
158 }
159 
160 /*****************************************************************************/
161 /*****************************************************************************/
162 /*****************************************************************************/
163 
164 static double calc_rdiff(double d1, double d2, double rtol, double atol)
165 
166 /*
167  * Calculates the relative difference using a fuzzy comparison
168  */
169 
170 {
171  double rhs, lhs;
172  rhs = fabs(d1-d2);
173  lhs = atol + rtol * 0.5 * (fabs(d1) + fabs(d2));
174  return rhs/lhs;
175 }
176 
177 /*****************************************************************************/
178 /*
179  * breakStrCommas():
180  * This routine will break a character string into stringlets according
181  * to the placement of commas. The commas are replaced by null
182  * characters.
183  *
184  * Argument:
185  * str => original string. On exit, this string will have been altered.
186  * strlets -> Vector of pointers to char *. The vector has a size
187  * larger than or equal to maxPieces.
188  * maxPieces -> largest number of pieces to divide the string into.
189  *
190  * Return:
191  * This returns the number of pieces that the string is actually
192  * broken up into.
193  */
194 
195 static int breakStrCommas(char* str, char** strlets, int maxPieces)
196 {
197  int numbreaks = 0;
198  if (strlets) {
199  strlets[0] = str;
200  if (str) {
201  char* cptr = str;
202  char* cetn = NULL;
203  do {
204  cetn = strchr(cptr, (int) ',');
205  if (cetn) {
206  numbreaks++;
207  cptr = cetn + 1;
208  strlets[numbreaks] = cptr;
209  *cetn = '\0';
210  }
211  } while (cetn && (numbreaks < (maxPieces - 1)));
212  }
213  }
214  return numbreaks + 1;
215 }
216 
217 /**************************************************************************************************************************/
218 /**************************************************************************************************************************/
219 /*
220  * Here, we ensure consistency of the file
221  * ntitleLines of any content
222  * nColTitleLines of nCol columns. Each are separated by column.
223  * nDataRows of nCol columns. Each are separated by columns
224  * Each column is identified as either a text or double. Each double entry must be able to be read
225  * as a double.
226  */
227 static void check_consistency(FILE* fp, const char *fileName, const int nTitleLines, const int nColTitleLines,
228  const int nCol, const int nDataRows, const std::vector<int>& ColIsFloat)
229 {
230  int retn, ncolsFound;
231  bool rerr;
232  TOKEN fieldToken;
233  char* scanLine = mdp_alloc_char_1(MAX_INPUT_STR_LN+1, '\0');
234  char** strlets = (char**) mdp_alloc_ptr_1(nCol+200);
235  /*
236  * Rewind the file
237  */
238  rewind(fp);
239 
240  for (int i = 0; i < nTitleLines; i++) {
241  retn = read_line(fp, scanLine, 0);
242  if (retn == -1) {
243  fprintf(stderr, "check_consistency() error for file %s, Line %d couldn't be read\n",
244  fileName, i);
245  exit(-1);
246  }
247  }
248  if (nColTitleLines == 0) {
249  if (nTitleLines > 0) {
250  fprintf(stderr, "check_consistency() error for file %s, number column title lines are zero but number title lines are greater than 0",
251  fileName);
252  exit(-1);
253  }
254  }
255  for (int i = 0; i < nColTitleLines; i++) {
256  retn = read_line(fp, scanLine, 0);
257  if (retn == -1) {
258  fprintf(stderr, "check_consistency() error for file %s, Line %d couldn't be read\n",
259  fileName, i);
260  exit(-1);
261  }
262  ncolsFound = breakStrCommas(scanLine, strlets, nCol);
263  if (ncolsFound != (nCol)) {
264  fprintf(stderr, "check_consistency() error for file %s, Line %d of "
265  "ColTitleLines didn't have correct commas: %d vs %d\n", fileName, i, ncolsFound, nCol);
266  fprintf(stderr, " %s\n", scanLine);
267  }
268  }
269  for (int i = 0; i < nDataRows; i++) {
270  retn = read_line(fp, scanLine, 0);
271  ncolsFound = breakStrCommas(scanLine, strlets, nCol);
272  if (retn == -1) {
273  fprintf(stderr, "check_consistency() error for file %s, Line %d couldn't be read\n",
274  fileName, i);
275  exit(-1);
276  }
277  if (ncolsFound != (nCol)) {
278  fprintf(stderr, "check_consistency() error for file %s, Line %d of DataLines didn't have correct commas: %d vs %d\n",
279  fileName, i, ncolsFound, nCol);
280  fprintf(stderr," %s\n", scanLine);
281  exit(-1);
282  }
283  for (int j = 0; j < ncolsFound; j++) {
284  char* fieldStr = strlets[j];
285  fillTokStruct(&fieldToken, fieldStr);
286  if (ColIsFloat[j] > 0) {
287  (void) tok_to_double(&fieldToken, DBL_MAX, -DBL_MAX, 0.0, &rerr);
288  if (rerr) {
289  fprintf(stderr, "check_consistency() error for file %s, Line %d of DataLines, col %d, "
290  "couldn't be converted to a double\n",
291  fileName, i, j);
292  fprintf(stderr," %s\n", scanLine);
293  exit(-1);
294  }
295  }
296  }
297  }
298  mdp_safe_free((void**) &strlets);
299  mdp_safe_free((void**) &scanLine);
300 }
301 /**************************************************************************************************************************/
302 #define LT_NULLLINE 0
303 #define LT_TITLELINE 1
304 #define LT_COLTITLE 2
305 #define LT_DATALINE 3
306 /*
307  * get_sizes()
308  *
309  * This routine obtains the sizes of the various elements of the file
310  * by parsing the file.
311  * (HKM: Note, this file could use some work. However, it's always
312  * going to be heuristic)
313  *
314  * Arguments:
315  *
316  * fp = File pointer
317  * nTitleLines = Number of title lines
318  * nColTitleLines = Number of column title lines
319  * nCol = Number of columns -> basically equal to the
320  * number of variables
321  * nDataRows = Number of rows of data in the file
322  *
323  */
324 
325 static void get_sizes(FILE* fp, int& nTitleLines, int& nColTitleLines,
326  int& nCol, int& nDataRows, std::vector<int>& ColIsFloat)
327 {
328  int nScanLinesMAX = 100;
329  int nScanLines = nScanLinesMAX;
330  int retn, i, j;
331  int maxCommas = 0;
332  TOKEN fieldToken;
333  char* scanLine = mdp_alloc_char_1(MAX_INPUT_STR_LN+1, '\0');
334  int* numCommas = mdp_alloc_int_1(nScanLinesMAX, -1);
335 
336  /*
337  * Rewind the file
338  */
339  rewind(fp);
340  /*
341  * Read the scan lines
342  */
343  for (i = 0; i < nScanLinesMAX; i++) {
344  retn = read_line(fp, scanLine, 0);
345  if (retn == -1) {
346  nScanLines = i;
347  break;
348  }
349  /*
350  * Strip a trailing comma from the scanline -
351  * -> These are not significant
352  */
353  int ccount = static_cast<int>(strlen(scanLine));
354  if (ccount > 0) {
355  if (scanLine[ccount-1] == ',') {
356  scanLine[ccount-1] = '\0';
357  }
358  }
359  /*
360  * Count the number of commas in the line
361  */
362  char* cptr = scanLine;
363  char* cetn = NULL;
364  numCommas[i] = 0;
365  do {
366  cetn = strchr(cptr, (int) ',');
367  if (cetn) {
368  numCommas[i]++;
369  cptr = cetn + 1;
370  }
371  } while (cetn);
372  if (i > 1) {
373  if (maxCommas < numCommas[i]) {
374  maxCommas = numCommas[i];
375  }
376  } else {
377  maxCommas = numCommas[0];
378  }
379  }
380  /*
381  * set a preliminary value of nCol
382  */
383  nCol = maxCommas + 1;
384  if (nScanLines == 0) {
385  nCol = 0;
386  }
387  char** strlets = (char**) mdp_alloc_ptr_1(maxCommas+1);
388 
389  if (ColIsFloat.size() < maxCommas+1) {
390  ColIsFloat.resize(maxCommas+1);
391  }
392 
393  /*
394  * Figure out if each column is a text or float
395  */
396  rewind(fp);
397  ColIsFloat.assign(ColIsFloat.size(), 0);
398 
399  for (i = 0; i < nScanLines; i++) {
400  retn = read_line(fp, scanLine, 0);
401  int ncolsFound = breakStrCommas(scanLine, strlets, nCol);
402  if (ncolsFound == (maxCommas + 1)) {
403  for (j = 0; j < ncolsFound; j++) {
404  char* fieldStr = strlets[j];
405  fillTokStruct(&fieldToken, fieldStr);
406  if (fieldToken.ntokes != 1) {
407  break;
408  }
409  bool rerr = false;
410  (void) tok_to_double(&fieldToken, DBL_MAX, -DBL_MAX, 0.0, &rerr);
411  if (!rerr) {
412  ColIsFloat[j]++;
413  }
414  }
415  }
416  }
417  std::vector<int>::iterator it;
418  it = std::max_element(ColIsFloat.begin(), ColIsFloat.end());
419  int maxFloats = *it;
420  it = std::min_element(ColIsFloat.begin(), ColIsFloat.end());
421  int minFloats = *it;
422  // if maxFloats == 0, we're done. No column is float
423  if (maxFloats > 0) {
424  for (j = 0; j < maxCommas + 1; j++) {
425  if (ColIsFloat[j] != maxFloats) {
426  if (ColIsFloat[j] == minFloats) {
427  // hook for debugger
428  if (ColIsFloat[j] > 0) {
429  ColIsFloat[j] = 0;
430  }
431  } else {
432  printf("WARNING: type of column %d couldn't be uniquely determined, assuming text\n", j);
433  ColIsFloat[j] = 0;
434  }
435  }
436  }
437  }
438 
439 
440  int doingLineType = LT_TITLELINE;
441  if (nScanLines == 2) {
442  nTitleLines = 0;
443  doingLineType = LT_COLTITLE;
444  }
445 
446 
447  rewind(fp);
448  for (i = 0; i < nScanLines; i++) {
449  retn = read_line(fp, scanLine, 0);
450  /*
451  * Strip a trailing comma from the scanline -
452  * -> These are not significant
453  */
454  int ccount = static_cast<int>(strlen(scanLine));
455  if (ccount > 0) {
456  if (scanLine[ccount-1] == ',') {
457  scanLine[ccount-1] = '\0';
458  }
459  }
460  int ncolsFound = breakStrCommas(scanLine, strlets, nCol);
461 
462  if (doingLineType == LT_TITLELINE) {
463  if (numCommas[i] == maxCommas) {
464  doingLineType = LT_COLTITLE;
465  nTitleLines = i;
466  }
467  }
468 
469  if (doingLineType == LT_COLTITLE) {
470  bool goodDataLine = true;
471  bool rerr = false;
472  for (j = 0; j < ncolsFound; j++) {
473  char* fieldStr = strlets[j];
474  fillTokStruct(&fieldToken, fieldStr);
475  if (fieldToken.ntokes != 1) {
476  goodDataLine = false;
477  break;
478  }
479  if ((ColIsFloat[j]) > 0) {
480  (void) tok_to_double(&fieldToken, DBL_MAX, -DBL_MAX, 0.0, &rerr);
481  if (rerr) {
482  goodDataLine = false;
483  break;
484  }
485  }
486  }
487  if (goodDataLine) {
488  doingLineType = LT_DATALINE;
489  }
490  nColTitleLines = i - nTitleLines;
491  }
492  if (doingLineType == LT_DATALINE) {
493  break;
494  }
495  }
496 
497 
498  /*
499  * Count the total number of lines in the file
500  */
501  if (doingLineType == LT_DATALINE) {
502  for (i = nColTitleLines + nTitleLines; ; i++) {
503  retn = read_line(fp, scanLine, 0);
504  if (retn == -1) {
505  nDataRows = i - nColTitleLines - nTitleLines + 1;
506  break;
507  }
508  /*
509  * Strip a trailing comma from the scanline -
510  * -> These are not significant
511  */
512  int ccount = static_cast<int>(strlen(scanLine));
513  if (ccount > 0) {
514  if (scanLine[ccount-1] == ',') {
515  scanLine[ccount-1] = '\0';
516  }
517  }
518  int ncolsFound = breakStrCommas(scanLine, strlets, nCol);
519  bool goodDataLine = true;
520  bool rerr = false;
521  for (j = 0; j < ncolsFound; j++) {
522  char* fieldStr = strlets[j];
523  fillTokStruct(&fieldToken, fieldStr);
524  if (fieldToken.ntokes != 1) {
525  goodDataLine = false;
526  break;
527  }
528  if (ColIsFloat[j] > 0) {
529  (void) tok_to_double(&fieldToken, DBL_MAX,
530  -DBL_MAX, 0.0, &rerr);
531  if (rerr) {
532  goodDataLine = false;
533  break;
534  }
535  }
536  }
537  if (! goodDataLine) {
538  doingLineType = LT_NULLLINE;
539  nDataRows = i - nColTitleLines - nTitleLines + 1;
540  break;
541  }
542  }
543  }
544  mdp_safe_free((void**) &strlets);
545  mdp_safe_free((void**) &scanLine);
546  mdp_safe_free((void**) &numCommas);
547  return;
548 }
549 
550 /*****************************************************************************/
551 /*****************************************************************************/
552 /*****************************************************************************/
553 
554 static void
555 read_title(FILE* fp, char** *title, int nTitleLines)
556 {
557  int retn;
558  *title = (char**) mdp_alloc_ptr_1(nTitleLines);
559  char* scanLine = mdp_alloc_char_1(Max_Input_Str_Ln + 1, '\0');
560  for (int i = 0; i < nTitleLines ; i++) {
561  retn = read_line(fp, scanLine, 0);
562  if (retn >= 0) {
563  /*
564  * Strip a trailing comma from the scanline -
565  * -> These are not significant
566  */
567  int ccount = static_cast<int>(strlen(scanLine));
568  if (ccount > 0) {
569  if (scanLine[ccount-1] == ',') {
570  scanLine[ccount-1] = '\0';
571  }
572  }
573  (*title)[i] = mdp_copy_string(scanLine);
574  }
575  }
576  mdp_safe_free((void**) &scanLine);
577 }
578 
579 /*****************************************************************************/
580 /*****************************************************************************/
581 /*****************************************************************************/
582 
583 static void
584 read_colTitle(FILE* fp, char**** ColMLNames_ptr, int nColTitleLines, int nCol)
585 {
586  int retn, j;
587  *ColMLNames_ptr = (char***) mdp_alloc_ptr_1(nCol);
588  char** *ColMLNames = *ColMLNames_ptr;
589  char* scanLine = mdp_alloc_char_1(Max_Input_Str_Ln + 1, '\0');
590  char** strlets = (char**) mdp_alloc_ptr_1(nCol+1);
591  if (nColTitleLines > 0) {
592  for (int i = 0; i < nColTitleLines ; i++) {
593  retn = read_line(fp, scanLine, 0);
594  if (retn >= 0) {
595  /*
596  * Strip a trailing comma from the scanline -
597  * -> These are not significant
598  */
599  int ccount = static_cast<int>(strlen(scanLine));
600  if (ccount > 0) {
601  if (scanLine[ccount-1] == ',') {
602  scanLine[ccount-1] = '\0';
603  }
604  }
605  int ncolsFound = breakStrCommas(scanLine, strlets, nCol);
606  ColMLNames[i] = mdp_alloc_VecFixedStrings(nCol, MAX_TOKEN_STR_LN+1);
607  for (j = 0; j < ncolsFound; j++) {
608  strip(strlets[j]);
609  strcpy(ColMLNames[i][j], strlets[j]);
610  }
611  }
612  }
613  } else {
614  ColMLNames[0] = mdp_alloc_VecFixedStrings(nCol, MAX_TOKEN_STR_LN+1);
615  for (j = 0; j < nCol; j++) {
616  char cbuff[256];
617  sprintf(cbuff, "Col_%d", j+1);
618  strcpy(ColMLNames[0][j], cbuff);
619  }
620  }
621  mdp_safe_free((void**) &scanLine);
622  mdp_safe_free((void**) &strlets);
623 }
624 
625 /*****************************************************************************/
626 /*****************************************************************************/
627 /*****************************************************************************/
628 
629 static double get_atol(const double* values, const int nvals,
630  const double atol)
631 {
632  int i;
633  double sum = 0.0, retn;
634  if (nvals <= 0) {
635  return gatol;
636  }
637  for (i = 0; i < nvals; i++) {
638  retn = values[i];
639  sum += retn * retn;
640  }
641  sum /= nvals;
642  retn = sqrt(sum);
643  return (retn + 1.0) * atol;
644 }
645 
646 /*****************************************************************************/
647 /*****************************************************************************/
648 /*****************************************************************************/
649 
650 static void
651 read_values(FILE* fp, double** NVValues, char** *NSValues, int nCol, int nDataRows,
652  std::vector<int>& ColIsFloat)
653 {
654  char** strlets = (char**) mdp_alloc_ptr_1(nCol+1);
655  char* scanLine = mdp_alloc_char_1(Max_Input_Str_Ln + 1, '\0');
656  TOKEN fieldToken;
657  double value;
658  int retn, j;
659  for (int i = 0; i < nDataRows; i++) {
660  retn = read_line(fp, scanLine, 0);
661  if (retn == -1) {
662  break;
663  }
664  /*
665  * Strip a trailing comma from the scanline -
666  * -> These are not significant
667  */
668  int ccount = static_cast<int>(strlen(scanLine));
669  if (ccount > 0) {
670  if (scanLine[ccount-1] == ',') {
671  scanLine[ccount-1] = '\0';
672  }
673  }
674  int ncolsFound = breakStrCommas(scanLine, strlets, nCol);
675  bool goodDataLine = true;
676  bool rerr = false;
677  for (j = 0; j < ncolsFound; j++) {
678  char* fieldStr = strlets[j];
679  NSValues[j][i] = mdp_copy_string(strlets[j]);
680  fillTokStruct(&fieldToken, fieldStr);
681  if (fieldToken.ntokes != 1) {
682  goodDataLine = false;
683  break;
684  }
685  if (ColIsFloat[j]) {
686  value = tok_to_double(&fieldToken, DBL_MAX,
687  -DBL_MAX, 0.0, &rerr);
688  if (rerr) {
689  goodDataLine = false;
690  break;
691  }
692  NVValues[j][i] = value;
693  }
694  }
695  if (! goodDataLine) {
696  break;
697  }
698  }
699  mdp_safe_free((void**) &strlets);
700  mdp_safe_free((void**) &scanLine);
701 }
702 /*****************************************************************************/
703 /*****************************************************************************/
704 /*****************************************************************************/
705 
706 static void print_usage()
707 {
708  printf("\t\n");
709  printf(" csvdiff [-h] [-a atol] [-r rtol] File1.csv File2.csv\n");
710  printf("\t\n");
711  printf("\tCompares the variable values in two Excel formatted "
712  "comma separated files.\n");
713  printf("\tThe comparison is done using a weighted norm basis.\n");
714  printf("\t\n");
715  printf("\tThe two files should be basically equal. However, File1.csv is\n");
716  printf("\ttaken as the reference file that has precedence, when there is\n");
717  printf("\tsomething to be decided upon.\n");
718  printf("\t\n");
719  printf("\t Arguments:\n");
720  printf("\t -h = Usage info\n");
721  printf("\t -a atol = Set absolute tolerance parameter - default = 1.0E-9\n");
722  printf("\t -r rtol = Set relative tolerance parameter - default = 1.0E-3\n");
723  printf("\t\n");
724  printf("\t Shell Return Values:\n");
725  printf("\t 0 = Comparison was successful\n");
726  printf("\t 1 = One or more nodal values failed the comparison\n");
727  printf("\t 2 = One or more header values failed the comparison\n");
728  printf("\t 3 = Apples to oranges, the files can not even be compared against\n");
729  printf("\t one another.\n");
730  printf("\t\n");
731 }
732 /*****************************************************************************/
733 /*****************************************************************************/
734 /*****************************************************************************/
735 
736 int main(int argc, char* argv[])
737 
738 /*
739  * main driver for csvdiff.
740  */
741 {
742  int opt_let;
743  char* fileName1=NULL, *fileName2=NULL; /* Names of the csv files */
744  FILE* fp1=NULL, *fp2=NULL;
745  int nTitleLines1 = 0, nTitleLines2 = 0;
746  int nColTitleLines1 = 0, nColTitleLines2 = 0;
747  int nCol1 = 0, nCol2 = 0, nColMAX = 0, nColcomparisons = 0;
748  int nDataRows1 = 0, nDataRows2 = 0;
749  char** title1 = 0, **title2 = 0;
750  int** compColList = NULL;
751  char** *ColMLNames1 = NULL, *** ColMLNames2 = NULL;
752  char** ColNames1 = NULL, **ColNames2 = NULL;
753  double** NVValues1 = NULL, **NVValues2 = NULL;
754  char** *NSValues1 = NULL, *** NSValues2 = NULL;
755  std::vector<int> ColIsFloat1;
756  std::vector<int> ColIsFloat2;
757  double* curVarValues1 = NULL, *curVarValues2 = NULL;
758  char** curStringValues1 = NULL, **curStringValues2 = NULL;
759  int i, j, ndiff, jmax=0, i1, i2, k;
760  bool found;
761  double max_diff, rel_diff;
762  int testPassed = RT_PASSED;
763  double atol_j, atol_arg = 0.0, rtol_arg = 0.0;
764 
765  /********************** BEGIN EXECUTION ************************************/
766  int id = 0;
767  int id2 = 0;
768  char* ggg = 0;
769  char* rrr = 0;
770  /*
771  * Interpret command line arguments
772  */
773  /* Loop over each command line option */
774  while ((opt_let = getopt(argc, argv, "ha:r:")) != EOF) {
775 
776  /* case over the option letter */
777  switch (opt_let) {
778 
779  case 'h':
780  /* Usage info was requested */
781  print_usage();
782  exit(0);
783 
784  case 'a':
785  /* atol parameter */
786 
787  ggg = optarg;
788  //printf("a = %s\n", ggg);
789  id = sscanf(ggg,"%lg", &atol_arg);
790  if (id != 1) {
791  printf(" atol param bad: %s\n", ggg);
792  exit(-1);
793  }
794  gatol = atol_arg;
795  break;
796 
797  case 'r':
798  /* rtol parameter */
799 
800  rrr = optarg;
801  //printf("r = %s\n", ggg);
802  id2 = sscanf(rrr,"%lg", &rtol_arg);
803  if (id2 != 1) {
804  printf(" rtol param bad: %s\n", rrr);
805  exit(-1);
806  }
807  grtol = rtol_arg;
808  break;
809 
810 
811  default:
812  /* Default case. Error on unknown argument. */
813  printf("default called opt_let = %c\n", opt_let);
814  fprintf(stderr, "ERROR in command line usuage:\n");
815  print_usage();
816  return 0;
817  } /* End "switch(opt_let)" */
818 
819  } /* End "while((opt_let=getopt(argc, argv, "i")) != EOF)" */
820 
821  if (optind != argc-2) {
822  print_usage();
823  exit(-1);
824  } else {
825  fileName1 = argv[argc-2];
826  fileName2 = argv[argc-1];
827  }
828 
829  /*
830  * Print Out Header
831  */
832  printf("\n");
833  printf("----------------------------------------------------------\n");
834  printf("csvdiff: CSVFile comparison utility program\n");
835  printf(" Harry K. Moffat Div. 9114 Sandia National Labs\n");
836  printf(" \n");
837  printf(" First CSV File = %s\n", fileName1);
838  printf(" Second CSV file = %s\n", fileName2);
839  printf("\n");
840  printf(" Absolute tol = %g\n", gatol);
841  printf(" Relative tol = %g\n", grtol);
842  printf("----------------------------------------------------------\n");
843  printf("\n");
844 
845  /*
846  * Open up the two ascii Files #1 and #2
847  */
848  if (!(fp1 = fopen(fileName1, "r"))) {
849  fprintf(stderr,"Error opening up file1, %s\n", fileName1);
850  exit(-1);
851  }
852  if (!(fp2 = fopen(fileName2, "r"))) {
853  fprintf(stderr, "Error opening up file2, %s\n", fileName2);
854  exit(-1);
855  }
856 
857  ColIsFloat1.resize(200, 0);
858  ColIsFloat2.resize(200, 0);
859 
860  /*
861  * Obtain the size of the problem information: Compare between files.
862  */
863 
864  get_sizes(fp1, nTitleLines1, nColTitleLines1, nCol1, nDataRows1, ColIsFloat1);
865  if (nCol1 == 0) {
866  printf("Number of columns in file %s is zero\n", fileName1);
867  testPassed = RT_FAILED_OTHER;
868  exit(RT_FAILED_OTHER);
869  }
870  if (nDataRows1 == 0) {
871  printf("Number of data rows in file %s is zero\n", fileName1);
872  testPassed = RT_FAILED_OTHER;
873  exit(RT_FAILED_OTHER);
874  }
875 
876 
877  check_consistency(fp1, fileName1, nTitleLines1, nColTitleLines1, nCol1, nDataRows1, ColIsFloat1);
878 
879  get_sizes(fp2, nTitleLines2, nColTitleLines2, nCol2, nDataRows2, ColIsFloat2);
880  if (nCol2 == 0) {
881  printf("Number of columns in file %s is zero\n", fileName2);
882  testPassed = RT_FAILED_OTHER;
883  exit(RT_FAILED_OTHER);
884  }
885  if (nDataRows2 == 0) {
886  printf("Number of data rows in file %s is zero\n", fileName2);
887  testPassed = RT_FAILED_OTHER;
888  exit(RT_FAILED_OTHER);
889  }
890 
891  if (nTitleLines1 != nTitleLines2) {
892  printf("Number of Title Lines differ:, %d %d\n",nTitleLines1, nTitleLines2);
893  testPassed = RT_FAILED_OTHER;
894  } else if (Debug_Flag) {
895  printf("Number of Title Lines in each file = %d\n", nTitleLines1);
896  }
897  if (nColTitleLines1 != nColTitleLines2) {
898  printf("Number of Column title lines differ:, %d %d\n", nColTitleLines1,
899  nColTitleLines2);
900  testPassed = RT_FAILED_OTHER;
901  } else if (Debug_Flag) {
902  printf("Number of column title lines in each file = %d\n", nColTitleLines1);
903  }
904 
905  check_consistency(fp2, fileName2, nTitleLines2, nColTitleLines2, nCol2, nDataRows2, ColIsFloat2);
906 
907  /*
908  * Right now, if the number of data rows differ, we will punt.
909  * Maybe later we can do something more significant
910  */
911  int nDataRowsMIN = min(nDataRows1, nDataRows2);
912  int nDataRowsMAX = max(nDataRows1, nDataRows2);
913  if (nDataRows1 != nDataRows2) {
914  printf("Number of Data rows in file1, %d, is different than file2, %d\n",
915  nDataRows1, nDataRows2);
916  } else {
917  printf("Number of Data rows in both files = %d\n", nDataRowsMIN);
918  }
919 
920  rewind(fp1);
921  rewind(fp2);
922  read_title(fp1, &title1, nTitleLines1);
923  read_title(fp2, &title2, nTitleLines2);
924 
925  if (nTitleLines1 > 0 || nTitleLines2 > 0) {
926  int n = min(nTitleLines1, nTitleLines2);
927  for (i = 0; i < n; i++) {
928  if (strcmp(title1[i], title2[i]) != 0) {
929  printf("Title Line %d differ:\n\t\"%s\"\n\t\"%s\"\n", i, title1[i], title2[i]);
930  testPassed = RT_FAILED_HDR;
931  } else if (Debug_Flag) {
932  printf("Title Line %d for each file: \"%s\"\n", i, title1[i]);
933  }
934  }
935  if (nTitleLines1 != nTitleLines2) {
936  printf("Number of Title Lines differ: %d %d\n", nTitleLines1, nTitleLines2);
937  testPassed = RT_FAILED_HDR;
938  }
939  } else {
940  if (nTitleLines1 != nTitleLines2) {
941  if (nTitleLines1) {
942  printf("Titles differ: title for first file: \"%s\"\n",
943  title1[0]);
944  testPassed = RT_FAILED_HDR;
945  }
946  if (nTitleLines2) {
947  printf("Titles differ: title for second file: \"%s\"\n",
948  title2[0]);
949  }
950  testPassed = RT_FAILED_HDR;
951  }
952  }
953 
954  /*
955  * Get the number of column variables in each file
956  */
957 
958  if (nCol1 != nCol2) {
959  printf("Number of column variables differ:, %d %d\n",
960  nCol1, nCol2);
961  testPassed = RT_FAILED_OTHER;
962  } else if (Debug_Flag) {
963  printf("Number of column variables in both files = %d\n",
964  nCol1);
965  }
966 
967  /*
968  * Read the names of the column variables
969  */
970  read_colTitle(fp1, &ColMLNames1, nColTitleLines1, nCol1);
971  read_colTitle(fp2, &ColMLNames2, nColTitleLines2, nCol2);
972  ColNames1 = ColMLNames1[0];
973  ColNames2 = ColMLNames2[0];
974 
975  /*
976  * Do a Comparison of the names to find the maximum number
977  * of matches.
978  */
979  nColMAX = max(nCol1, nCol2);
980 
981  compColList = mdp_alloc_int_2(nColMAX, 2, -1);
982  nColcomparisons = 0;
983  for (i = 0; i < nCol1; i++) {
984  found = false;
985  for (j = 0; j < nCol2; j++) {
986  if (!strcmp(ColNames1[i], ColNames2[j])) {
987  compColList[nColcomparisons][0] = i;
988  compColList[nColcomparisons][1] = j;
989  nColcomparisons++;
990  found = true;
991  break;
992  }
993  }
994  if (!found) {
995  printf("csvdiff WARNING Variable %s (%d) in first file not found"
996  " in second file\n", ColNames1[i], i);
997  testPassed = RT_FAILED_OTHER;
998  }
999  }
1000  for (j = 0; j < nCol2; j++) {
1001  found = false;
1002  for (i = 0; i < nColcomparisons; i++) {
1003  if (compColList[i][1] == j) {
1004  found = true;
1005  }
1006  }
1007  if (! found) {
1008  printf("csvdiff WARNING Variable %s (%d) in second file "
1009  "not found in first file\n",
1010  ColNames2[j], j);
1011  testPassed = RT_FAILED_OTHER;
1012  }
1013  }
1014 
1015  /*
1016  * Allocate storage for the column variables
1017  */
1018  NVValues1 = mdp_alloc_dbl_2(nCol1, nDataRowsMAX, 0.0);
1019  NVValues2 = mdp_alloc_dbl_2(nCol2, nDataRowsMAX, 0.0);
1020 
1021  /*
1022  * Allocate storage for the column variables
1023  */
1024  NSValues1 = (char***) mdp_alloc_ptr_2(nCol1, nDataRowsMAX);
1025  NSValues2 = (char***) mdp_alloc_ptr_2(nCol2, nDataRowsMAX);
1026 
1027  /*
1028  * Read in the values to the arrays
1029  */
1030  read_values(fp1, NVValues1, NSValues1, nCol1, nDataRows1, ColIsFloat1);
1031  read_values(fp2, NVValues2, NSValues2, nCol2, nDataRows2, ColIsFloat2);
1032 
1033  /*
1034  * Compare the solutions in each file
1035  */
1036  int method = 1;
1037  double slope1, slope2, xatol;
1038  int notOK;
1039  for (k = 0; k < nColcomparisons; k++) {
1040 
1041  i1 = compColList[k][0];
1042  i2 = compColList[k][1];
1043  bool doFltComparison = true;
1044  if (!ColIsFloat1[i1]) {
1045  doFltComparison = false;
1046  jmax = -1;
1047  }
1048  if (!ColIsFloat2[i2]) {
1049  doFltComparison = false;
1050  jmax = -1;
1051  }
1052  curStringValues1 = NSValues1[i1];
1053  curStringValues2 = NSValues2[i2];
1054  max_diff = 0.0;
1055  ndiff = 0;
1056  if (doFltComparison) {
1057  curVarValues1 = NVValues1[i1];
1058  curVarValues2 = NVValues2[i2];
1059  atol_j = get_atol(curVarValues1, nDataRows1, gatol);
1060  atol_j = min(atol_j, get_atol(curVarValues2, nDataRows2, gatol));
1061  for (j = 0; j < nDataRowsMIN; j++) {
1062 
1063  slope1 = 0.0;
1064  slope2 = 0.0;
1065  xatol = fabs(grtol * (NVValues1[0][j] - NVValues1[0][j-1]));
1066  if (j > 0 && k > 0) {
1067  slope1 = (curVarValues1[j] - curVarValues1[j-1])/
1068  (NVValues1[0][j] - NVValues1[0][j-1]);
1069  slope2 = (curVarValues2[j] - curVarValues2[j-1])/
1070  (NVValues2[0][j] - NVValues2[0][j-1]);
1071  }
1072  if (method) {
1073  notOK = diff_double_slope(curVarValues1[j], curVarValues2[j],
1074  grtol, atol_j, xatol, slope1, slope2);
1075  } else {
1076  notOK = diff_double(curVarValues1[j], curVarValues2[j],
1077  grtol, atol_j);
1078  }
1079  if (notOK) {
1080  ndiff++;
1081  rel_diff = calc_rdiff((double) curVarValues1[j],
1082  (double) curVarValues2[j], grtol, atol_j);
1083  if (rel_diff > max_diff) {
1084  jmax = j;
1085  max_diff = rel_diff;
1086  }
1087  if (ndiff < 10) {
1088  printf("\tColumn variable %s at data row %d ", ColNames1[i1], j + 1);
1089  printf(" differ: %g %g\n", curVarValues1[j],
1090  curVarValues2[j]);
1091  }
1092  }
1093  }
1094  } else {
1095  for (j = 0; j < nDataRowsMIN; j++) {
1096  strip(curStringValues1[j]);
1097  strip(curStringValues2[j]);
1098  notOK = false;
1099  if (strcmp(curStringValues1[j], curStringValues2[j])) {
1100  notOK = true;
1101  ndiff++;
1102  if (ndiff < 10) {
1103  printf("\tColumn String variable %s at data row %d ", ColNames1[i1], j + 1);
1104  printf(" differ: %s %s\n", curStringValues1[j],
1105  curStringValues2[j]);
1106  }
1107  }
1108  }
1109  }
1110 
1111  if (nDataRowsMIN != nDataRowsMAX) {
1112  ndiff += nDataRowsMAX - nDataRowsMIN;
1113  if (ndiff < 10) {
1114  if (nDataRows1 > nDataRows2) {
1115  for (j = nDataRowsMIN; j < nDataRowsMAX; j++) {
1116  printf("\tColumn variable %s at data row %d ", ColNames1[i1], j + 1);
1117  printf(" differ: %g NA\n", curVarValues1[j]);
1118  }
1119  } else {
1120  for (j = nDataRowsMIN; j < nDataRowsMAX; j++) {
1121  printf("\tColumn variable %s at data row %d ", ColNames1[i1], j + 1);
1122  printf(" differ: NA %g \n", curVarValues2[j]);
1123  }
1124  }
1125  }
1126  }
1127 
1128  /*
1129  * Print out final results of nodal variable test
1130  */
1131 
1132  if (ndiff > 0) {
1133  printf(
1134  "Column variable %s failed comparison test for %d occurrences\n",
1135  ColNames1[i1], ndiff);
1136  if (jmax >= 0) {
1137  printf(" Largest difference was at data row %d ", jmax + 1);
1138  printf(": %g %g\n", curVarValues1[jmax], curVarValues2[jmax]);
1139  }
1140  testPassed = RT_FAILED_COL;
1141  } else if (Debug_Flag) {
1142  printf("Column variable %s passed\n", ColNames1[i1]);
1143  }
1144 
1145  }
1146 
1147  return testPassed;
1148 
1149 } /************END of main() *************************************************/
1150 /*****************************************************************************/