Cantera  2.1.2
csvdiff.cpp
1 /*
2  * csvdiff File1.csv File2.csv
3  *
4  * Compares the variable values in two Excel formatted
5  * comma separated files.
6  * The comparison is done using a weighted norm basis.
7  *
8  * The two files should be basically equal. However, File1.csv is
9  * taken as the reference file, that has precedence, when there is
10  * something to be decided upon.
11  *
12  * Arguments:
13  * -h = prints this usage information
14  *
15  * Shell Return Values
16  * 0 = Comparison was successful
17  * 1 = One or more nodal values failed the comparison
18  * 2 = One or more of the header values failed the comparison
19  * 3 = Apples to oranges, the files can not even be compared against
20  * one another.
21  */
22 
23 #include <math.h>
24 #include <float.h>
25 #include <algorithm>
26 #include <cstdlib>
27 #include "cantera/base/config.h"
28 #ifndef _MSC_VER
29 #include <unistd.h>
30 #else
31 #include <string>
32 #endif
33 using namespace std;
34 
35 #if defined(__CYGWIN__)
36 #include <getopt.h>
37 #endif
38 
39 #include "mdp_allo.h"
40 //#include "cantera/base/mdp_allo.h"
41 #include "tok_input_util.h"
42 
43 int Debug_Flag = true;
44 double grtol = 1.0E-3;
45 double gatol = 1.0E-9;
46 
47 #define RT_PASSED 0
48 #define RT_FAILED_COL 1
49 #define RT_FAILED_HDR 2
50 #define RT_FAILED_OTHER 3
51 
52 /*
53  * First iteration towards getting this variable
54  */
55 int Max_Input_Str_Ln = MAX_INPUT_STR_LN;
56 /*****************************************************************************/
57 /*****************************************************************************/
58 /*****************************************************************************/
59 
60 #ifdef _MSC_VER
61 /*
62  * Windows doesn't have getopt(). This is an incomplete version that
63  * does enough to handle required functionality.
64  */
65 int optind = -1;
66 char* optarg = 0;
67 
68 int getopt(int argc, char** argv, const char*)
69 {
70  static int currArg = 1;
71  static int currOptInd = 1;
72  string tok;
73  static int charPos = 0;
74  int rc = -1;
75  if (currArg >= argc) {
76  optarg = 0;
77  return -rc;
78  }
79  tok = string(argv[currArg]);
80  currOptInd = currArg+1;
81  if (currOptInd > argc - 1) {
82  currOptInd = -1;
83  optarg = 0;
84  } else {
85  optarg = argv[currArg+1];
86  }
87  size_t len = strlen(tok.c_str());
88  if (charPos == 0) {
89  bool found = false;
90  do {
91  tok = string(argv[currArg]);
92  len = strlen(tok.c_str());
93  if (len > 1 && tok[0] == '-') {
94  found = true;
95  charPos = 1;
96  if (len > 2 && tok[1] == '-') {
97  charPos = 2;
98  }
99  } else {
100  if (optind == -1) {
101  optind = currArg;
102  }
103  }
104  if (!found) {
105  if (currArg < (argc-1)) {
106  currArg++;
107  } else {
108  optarg = 0;
109  return -1;
110  }
111  }
112  } while (!found);
113  }
114 
115  rc = tok[charPos];
116  if (charPos < static_cast<int>(len - 1)) {
117  charPos++;
118  } else {
119  charPos = 0;
120  }
121  return rc;
122 }
123 
124 #endif
125 
126 /*****************************************************************************/
127 /*****************************************************************************/
128 /*****************************************************************************/
129 
130 static int diff_double(double d1, double d2, double rtol, double atol)
131 
132 /*
133  * Compares 2 doubles. If they are not within tolerance, then this
134  * function returns true.
135  */
136 {
137  if (fabs(d1-d2) > (atol + rtol * 0.5 * (fabs(d1) + fabs(d2)))) {
138  return 1;
139  }
140  return 0;
141 }
142 
143 static int diff_double_slope(double d1, double d2, double rtol,
144  double atol, double xtol, double slope1, double slope2)
145 
146 /*
147  * Compares 2 doubles. If they are not within tolerance, then this
148  * function returns true.
149  */
150 {
151  double atol2 = xtol*(fabs(slope1) + fabs(slope2));
152  if (fabs(d1-d2) > (atol + atol2 + rtol * 0.5 * (fabs(d1) + fabs(d2)))) {
153  return 1;
154  }
155  return 0;
156 }
157 
158 /*****************************************************************************/
159 /*****************************************************************************/
160 /*****************************************************************************/
161 
162 static double calc_rdiff(double d1, double d2, double rtol, double atol)
163 
164 /*
165  * Calculates the relative difference using a fuzzy comparison
166  */
167 
168 {
169  double rhs, lhs;
170  rhs = fabs(d1-d2);
171  lhs = atol + rtol * 0.5 * (fabs(d1) + fabs(d2));
172  return rhs/lhs;
173 }
174 
175 /*****************************************************************************/
176 /*
177  * breakStrCommas():
178  * This routine will break a character string into stringlets according
179  * to the placement of commas. The commas are replaced by null
180  * characters.
181  *
182  * Argument:
183  * str => original string. On exit, this string will have been altered.
184  * strlets -> Vector of pointers to char *. The vector has a size
185  * larger than or equal to maxPieces.
186  * maxPieces -> largest number of pieces to divide the string into.
187  *
188  * Return:
189  * This returns the number of pieces that the string is actually
190  * broken up into.
191  */
192 
193 static int breakStrCommas(char* str, char** strlets, int maxPieces)
194 {
195  int numbreaks = 0;
196  if (strlets) {
197  strlets[0] = str;
198  if (str) {
199  char* cptr = str;
200  char* cetn = NULL;
201  do {
202  cetn = strchr(cptr, (int) ',');
203  if (cetn) {
204  numbreaks++;
205  cptr = cetn + 1;
206  strlets[numbreaks] = cptr;
207  *cetn = '\0';
208  }
209  } while (cetn && (numbreaks < (maxPieces - 1)));
210  }
211  }
212  return numbreaks + 1;
213 }
214 
215 /*****************************************************************************/
216 /*****************************************************************************/
217 /*****************************************************************************/
218 #define LT_NULLLINE 0
219 #define LT_TITLELINE 1
220 #define LT_COLTITLE 2
221 #define LT_DATALINE 3
222 /*
223  * get_sizes()
224  *
225  * This routine obtains the sizes of the various elements of the file
226  * by parsing the file.
227  * (HKM: Note, this file could use some work. However, it's always
228  * going to be heuristic)
229  *
230  * Arguments:
231  *
232  * fp = File pointer
233  * nTitleLines = Number of title lines
234  * nColTitleLines = Number of column title lines
235  * nCol = Number of columns -> basically equal to the
236  * number of variables
237  * nDataRows = Number of rows of data in the file
238  *
239  */
240 
241 static void get_sizes(FILE* fp, int& nTitleLines, int& nColTitleLines,
242  int& nCol, int& nDataRows, int** ColIsFloat_ptr)
243 {
244  int nScanLinesMAX = 100;
245  int nScanLines = nScanLinesMAX;
246  int retn, i, j;
247  int maxCommas = 0;
248  TOKEN fieldToken;
249  char* scanLine = mdp_alloc_char_1(MAX_INPUT_STR_LN+1, '\0');
250  int* numCommas = mdp_alloc_int_1(nScanLinesMAX, -1);
251  int* ColIsFloat = *ColIsFloat_ptr;
252 
253  /*
254  * Rewind the file
255  */
256  rewind(fp);
257  /*
258  * Read the scan lines
259  */
260  for (i = 0; i < nScanLinesMAX; i++) {
261  retn = read_line(fp, scanLine, 0);
262  if (retn == -1) {
263  nScanLines = i;
264  break;
265  }
266  /*
267  * Strip a trailing comma from the scanline -
268  * -> These are not significant
269  */
270  int ccount = static_cast<int>(strlen(scanLine));
271  if (ccount > 0) {
272  if (scanLine[ccount-1] == ',') {
273  scanLine[ccount-1] = '\0';
274  }
275  }
276  /*
277  * Count the number of commas in the line
278  */
279  char* cptr = scanLine;
280  char* cetn = NULL;
281  numCommas[i] = 0;
282  do {
283  cetn = strchr(cptr, (int) ',');
284  if (cetn) {
285  numCommas[i]++;
286  cptr = cetn + 1;
287  }
288  } while (cetn);
289  if (i > 1) {
290  if (maxCommas < numCommas[i]) {
291  maxCommas = numCommas[i];
292  }
293  }
294  }
295  /*
296  * set a preliminary value of nCol
297  */
298  nCol = maxCommas + 1;
299  if (nScanLines == 0) {
300  nCol = 0;
301  }
302  char** strlets = (char**) mdp_alloc_ptr_1(maxCommas+1);
303 
304  /*
305  * Figure out if each column is a text or float
306  */
307  rewind(fp);
308  for (i = 0; i < nScanLines; i++) {
309  retn = read_line(fp, scanLine, 0);
310  int ncolsFound = breakStrCommas(scanLine, strlets, nCol);
311  if (ncolsFound == (maxCommas + 1)) {
312  for (j = 0; j < ncolsFound; j++) {
313  char* fieldStr = strlets[j];
314  fillTokStruct(&fieldToken, fieldStr);
315  if (fieldToken.ntokes != 1) {
316  break;
317  }
318  bool rerr = false;
319  (void) tok_to_double(&fieldToken, DBL_MAX,
320  -DBL_MAX, 0.0, &rerr);
321  if (!rerr) {
322  ColIsFloat[j] = true;
323  }
324  }
325 
326  }
327  }
328 
329 
330 
331  int doingLineType = LT_TITLELINE;
332  rewind(fp);
333  for (i = 0; i < nScanLines; i++) {
334  retn = read_line(fp, scanLine, 0);
335  /*
336  * Strip a trailing comma from the scanline -
337  * -> These are not significant
338  */
339  int ccount = static_cast<int>(strlen(scanLine));
340  if (ccount > 0) {
341  if (scanLine[ccount-1] == ',') {
342  scanLine[ccount-1] = '\0';
343  }
344  }
345  int ncolsFound = breakStrCommas(scanLine, strlets, nCol);
346 
347  if (doingLineType == LT_TITLELINE) {
348  if (numCommas[i] == maxCommas) {
349  doingLineType = LT_COLTITLE;
350  nTitleLines = i;
351  }
352  }
353 
354  if (doingLineType == LT_COLTITLE) {
355  bool goodDataLine = true;
356  bool rerr = false;
357  for (j = 0; j < ncolsFound; j++) {
358  char* fieldStr = strlets[j];
359  fillTokStruct(&fieldToken, fieldStr);
360  if (fieldToken.ntokes != 1) {
361  goodDataLine = false;
362  break;
363  }
364  if ((ColIsFloat[j]) == 1) {
365  (void) tok_to_double(&fieldToken, DBL_MAX,
366  -DBL_MAX, 0.0, &rerr);
367  if (rerr) {
368  goodDataLine = false;
369  break;
370  }
371  }
372  }
373  if (goodDataLine) {
374  doingLineType = LT_DATALINE;
375  }
376  nColTitleLines = i - nTitleLines;
377  }
378  if (doingLineType == LT_DATALINE) {
379  break;
380  }
381  }
382 
383 
384  /*
385  * Count the total number of lines in the file
386  */
387  if (doingLineType == LT_DATALINE) {
388  for (i = nColTitleLines + nTitleLines; ; i++) {
389  retn = read_line(fp, scanLine, 0);
390  if (retn == -1) {
391  nDataRows = i - nColTitleLines - nTitleLines + 1;
392  break;
393  }
394  /*
395  * Strip a trailing comma from the scanline -
396  * -> These are not significant
397  */
398  int ccount = static_cast<int>(strlen(scanLine));
399  if (ccount > 0) {
400  if (scanLine[ccount-1] == ',') {
401  scanLine[ccount-1] = '\0';
402  }
403  }
404  int ncolsFound = breakStrCommas(scanLine, strlets, nCol);
405  bool goodDataLine = true;
406  bool rerr = false;
407  for (j = 0; j < ncolsFound; j++) {
408  char* fieldStr = strlets[j];
409  fillTokStruct(&fieldToken, fieldStr);
410  if (fieldToken.ntokes != 1) {
411  goodDataLine = false;
412  break;
413  }
414  if (ColIsFloat[j] == 1) {
415  (void) tok_to_double(&fieldToken, DBL_MAX,
416  -DBL_MAX, 0.0, &rerr);
417  if (rerr) {
418  goodDataLine = false;
419  break;
420  }
421  }
422  }
423  if (! goodDataLine) {
424  doingLineType = LT_NULLLINE;
425  nDataRows = i - nColTitleLines - nTitleLines + 1;
426  break;
427  }
428  }
429  }
430  mdp_safe_free((void**) &strlets);
431  mdp_safe_free((void**) &scanLine);
432  mdp_safe_free((void**) &numCommas);
433  return;
434 }
435 
436 /*****************************************************************************/
437 /*****************************************************************************/
438 /*****************************************************************************/
439 
440 static void
441 read_title(FILE* fp, char** *title, int nTitleLines)
442 {
443  int retn;
444  *title = (char**) mdp_alloc_ptr_1(nTitleLines);
445  char* scanLine = mdp_alloc_char_1(Max_Input_Str_Ln + 1, '\0');
446  for (int i = 0; i < nTitleLines ; i++) {
447  retn = read_line(fp, scanLine, 0);
448  if (retn >= 0) {
449  /*
450  * Strip a trailing comma from the scanline -
451  * -> These are not significant
452  */
453  int ccount = static_cast<int>(strlen(scanLine));
454  if (ccount > 0) {
455  if (scanLine[ccount-1] == ',') {
456  scanLine[ccount-1] = '\0';
457  }
458  }
459  (*title)[i] = mdp_copy_string(scanLine);
460  }
461  }
462  mdp_safe_free((void**) &scanLine);
463 }
464 
465 /*****************************************************************************/
466 /*****************************************************************************/
467 /*****************************************************************************/
468 
469 static void
470 read_colTitle(FILE* fp, char**** ColMLNames_ptr, int nColTitleLines, int nCol)
471 {
472  int retn, j;
473  *ColMLNames_ptr = (char***) mdp_alloc_ptr_1(nCol);
474  char** *ColMLNames = *ColMLNames_ptr;
475  char* scanLine = mdp_alloc_char_1(Max_Input_Str_Ln + 1, '\0');
476  char** strlets = (char**) mdp_alloc_ptr_1(nCol+1);
477  if (nColTitleLines > 0) {
478  for (int i = 0; i < nColTitleLines ; i++) {
479  retn = read_line(fp, scanLine, 0);
480  if (retn >= 0) {
481  /*
482  * Strip a trailing comma from the scanline -
483  * -> These are not significant
484  */
485  int ccount = static_cast<int>(strlen(scanLine));
486  if (ccount > 0) {
487  if (scanLine[ccount-1] == ',') {
488  scanLine[ccount-1] = '\0';
489  }
490  }
491  int ncolsFound = breakStrCommas(scanLine, strlets, nCol);
492  ColMLNames[i] = mdp_alloc_VecFixedStrings(nCol, MAX_TOKEN_STR_LN+1);
493  for (j = 0; j < ncolsFound; j++) {
494  strip(strlets[j]);
495  strcpy(ColMLNames[i][j], strlets[j]);
496  }
497  }
498  }
499  } else {
500  ColMLNames[0] = mdp_alloc_VecFixedStrings(nCol, MAX_TOKEN_STR_LN+1);
501  for (j = 0; j < nCol; j++) {
502  char cbuff[256];
503  sprintf(cbuff, "Col_%d", j+1);
504  strcpy(ColMLNames[0][j], cbuff);
505  }
506  }
507  mdp_safe_free((void**) &scanLine);
508  mdp_safe_free((void**) &strlets);
509 }
510 
511 /*****************************************************************************/
512 /*****************************************************************************/
513 /*****************************************************************************/
514 
515 static double get_atol(const double* values, const int nvals,
516  const double atol)
517 {
518  int i;
519  double sum = 0.0, retn;
520  if (nvals <= 0) {
521  return gatol;
522  }
523  for (i = 0; i < nvals; i++) {
524  retn = values[i];
525  sum += retn * retn;
526  }
527  sum /= nvals;
528  retn = sqrt(sum);
529  return (retn + 1.0) * atol;
530 }
531 
532 /*****************************************************************************/
533 /*****************************************************************************/
534 /*****************************************************************************/
535 
536 static void
537 read_values(FILE* fp, double** NVValues, char** *NSValues, int nCol, int nDataRows,
538  int* ColIsFloat)
539 {
540  char** strlets = (char**) mdp_alloc_ptr_1(nCol+1);
541  char* scanLine = mdp_alloc_char_1(Max_Input_Str_Ln + 1, '\0');
542  TOKEN fieldToken;
543  double value;
544  int retn, j;
545  for (int i = 0; i < nDataRows; i++) {
546  retn = read_line(fp, scanLine, 0);
547  if (retn == -1) {
548  break;
549  }
550  /*
551  * Strip a trailing comma from the scanline -
552  * -> These are not significant
553  */
554  int ccount = static_cast<int>(strlen(scanLine));
555  if (ccount > 0) {
556  if (scanLine[ccount-1] == ',') {
557  scanLine[ccount-1] = '\0';
558  }
559  }
560  int ncolsFound = breakStrCommas(scanLine, strlets, nCol);
561  bool goodDataLine = true;
562  bool rerr = false;
563  for (j = 0; j < ncolsFound; j++) {
564  char* fieldStr = strlets[j];
565  NSValues[j][i] = mdp_copy_string(strlets[j]);
566  fillTokStruct(&fieldToken, fieldStr);
567  if (fieldToken.ntokes != 1) {
568  goodDataLine = false;
569  break;
570  }
571  if (ColIsFloat[j]) {
572  value = tok_to_double(&fieldToken, DBL_MAX,
573  -DBL_MAX, 0.0, &rerr);
574  if (rerr) {
575  goodDataLine = false;
576  break;
577  }
578  NVValues[j][i] = value;
579  }
580  }
581  if (! goodDataLine) {
582  break;
583  }
584  }
585  mdp_safe_free((void**) &strlets);
586  mdp_safe_free((void**) &scanLine);
587 }
588 /*****************************************************************************/
589 /*****************************************************************************/
590 /*****************************************************************************/
591 
592 static void print_usage()
593 {
594  printf("\t\n");
595  printf(" csvdiff [-h] [-a atol] [-r rtol] File1.csv File2.csv\n");
596  printf("\t\n");
597  printf("\tCompares the variable values in two Excel formatted "
598  "comma separated files.\n");
599  printf("\tThe comparison is done using a weighted norm basis.\n");
600  printf("\t\n");
601  printf("\tThe two files should be basically equal. However, File1.csv is\n");
602  printf("\ttaken as the reference file that has precedence, when there is\n");
603  printf("\tsomething to be decided upon.\n");
604  printf("\t\n");
605  printf("\t Arguments:\n");
606  printf("\t -h = Usage info\n");
607  printf("\t -a atol = Set absolute tolerance parameter - default = 1.0E-9\n");
608  printf("\t -r rtol = Set relative tolerance parameter - default = 1.0E-3\n");
609  printf("\t\n");
610  printf("\t Shell Return Values:\n");
611  printf("\t 0 = Comparison was successful\n");
612  printf("\t 1 = One or more nodal values failed the comparison\n");
613  printf("\t 2 = One or more header values failed the comparison\n");
614  printf("\t 3 = Apples to oranges, the files can not even be compared against\n");
615  printf("\t one another.\n");
616  printf("\t\n");
617 }
618 /*****************************************************************************/
619 /*****************************************************************************/
620 /*****************************************************************************/
621 
622 int main(int argc, char* argv[])
623 
624 /*
625  * main driver for csvdiff.
626  */
627 {
628  int opt_let;
629  char* fileName1=NULL, *fileName2=NULL; /* Names of the csv files */
630  FILE* fp1=NULL, *fp2=NULL;
631  int nTitleLines1 = 0, nTitleLines2 = 0;
632  int nColTitleLines1 = 0, nColTitleLines2 = 0;
633  int nCol1 = 0, nCol2 = 0, nColMAX = 0, nColcomparisons = 0;
634  int nDataRows1 = 0, nDataRows2 = 0;
635  char** title1 = 0, **title2 = 0;
636  int** compColList = NULL;
637  char** *ColMLNames1 = NULL, *** ColMLNames2 = NULL;
638  char** ColNames1 = NULL, **ColNames2 = NULL;
639  double** NVValues1 = NULL, **NVValues2 = NULL;
640  char** *NSValues1 = NULL, *** NSValues2 = NULL;
641  int* ColIsFloat1 = NULL, *ColIsFloat2 = NULL;
642  double* curVarValues1 = NULL, *curVarValues2 = NULL;
643  char** curStringValues1 = NULL, **curStringValues2 = NULL;
644  int i, j, ndiff, jmax=0, i1, i2, k;
645  bool found;
646  double max_diff, rel_diff;
647  int testPassed = RT_PASSED;
648  double atol_j, atol_arg = 0.0, rtol_arg = 0.0;
649 
650  /********************** BEGIN EXECUTION ************************************/
651  int id = 0;
652  int id2 = 0;
653  char* ggg = 0;
654  char* rrr = 0;
655  /*
656  * Interpret command line arguments
657  */
658  /* Loop over each command line option */
659  while ((opt_let = getopt(argc, argv, "ha:r:")) != EOF) {
660 
661  /* case over the option letter */
662  switch (opt_let) {
663 
664  case 'h':
665  /* Usage info was requested */
666  print_usage();
667  exit(0);
668 
669  case 'a':
670  /* atol parameter */
671 
672  ggg = optarg;
673  //printf("a = %s\n", ggg);
674  id = sscanf(ggg,"%lg", &atol_arg);
675  if (id != 1) {
676  printf(" atol param bad: %s\n", ggg);
677  exit(-1);
678  }
679  gatol = atol_arg;
680  break;
681 
682  case 'r':
683  /* rtol parameter */
684 
685  rrr = optarg;
686  //printf("r = %s\n", ggg);
687  id2 = sscanf(rrr,"%lg", &rtol_arg);
688  if (id2 != 1) {
689  printf(" rtol param bad: %s\n", rrr);
690  exit(-1);
691  }
692  grtol = rtol_arg;
693  break;
694 
695 
696  default:
697  /* Default case. Error on unknown argument. */
698  printf("default called opt_let = %c\n", opt_let);
699  fprintf(stderr, "ERROR in command line usuage:\n");
700  print_usage();
701  return 0;
702  } /* End "switch(opt_let)" */
703 
704  } /* End "while((opt_let=getopt(argc, argv, "i")) != EOF)" */
705 
706  if (optind != argc-2) {
707  print_usage();
708  exit(-1);
709  } else {
710  fileName1 = argv[argc-2];
711  fileName2 = argv[argc-1];
712  }
713 
714  /*
715  * Print Out Header
716  */
717  printf("\n");
718  printf("----------------------------------------------------------\n");
719  printf("csvdiff: CSVFile comparison utility program\n");
720  printf(" Harry K. Moffat Div. 9114 Sandia National Labs\n");
721  printf(" \n");
722  printf(" First CSV File = %s\n", fileName1);
723  printf(" Second CSV file = %s\n", fileName2);
724  printf("\n");
725  printf(" Absolute tol = %g\n", gatol);
726  printf(" Relative tol = %g\n", grtol);
727  printf("----------------------------------------------------------\n");
728  printf("\n");
729 
730  /*
731  * Open up the two ascii Files #1 and #2
732  */
733  if (!(fp1 = fopen(fileName1, "r"))) {
734  fprintf(stderr,"Error opening up file1, %s\n", fileName1);
735  exit(-1);
736  }
737  if (!(fp2 = fopen(fileName2, "r"))) {
738  fprintf(stderr, "Error opening up file2, %s\n", fileName2);
739  exit(-1);
740  }
741 
742  ColIsFloat1 = mdp_alloc_int_1(200, 0);
743  ColIsFloat2 = mdp_alloc_int_1(200, 0);
744  /*
745  * Obtain the size of the problem information: Compare between files.
746  */
747 
748  get_sizes(fp1, nTitleLines1, nColTitleLines1, nCol1, nDataRows1, &ColIsFloat1);
749  if (nCol1 == 0) {
750  printf("Number of columns in file %s is zero\n", fileName1);
751  testPassed = RT_FAILED_OTHER;
752  exit(RT_FAILED_OTHER);
753  }
754  if (nDataRows1 == 0) {
755  printf("Number of data rows in file %s is zero\n", fileName1);
756  testPassed = RT_FAILED_OTHER;
757  exit(RT_FAILED_OTHER);
758  }
759 
760  get_sizes(fp2, nTitleLines2, nColTitleLines2, nCol2, nDataRows2, &ColIsFloat2);
761  if (nCol2 == 0) {
762  printf("Number of columns in file %s is zero\n", fileName2);
763  testPassed = RT_FAILED_OTHER;
764  exit(RT_FAILED_OTHER);
765  }
766  if (nDataRows2 == 0) {
767  printf("Number of data rows in file %s is zero\n", fileName2);
768  testPassed = RT_FAILED_OTHER;
769  exit(RT_FAILED_OTHER);
770  }
771 
772  if (nTitleLines1 != nTitleLines2) {
773  printf("Number of Title Lines differ:, %d %d\n",nTitleLines1, nTitleLines2);
774  testPassed = RT_FAILED_OTHER;
775  } else if (Debug_Flag) {
776  printf("Number of Title Lines in each file = %d\n", nTitleLines1);
777  }
778  if (nColTitleLines1 != nColTitleLines2) {
779  printf("Number of Column title lines differ:, %d %d\n", nColTitleLines1,
780  nColTitleLines2);
781  testPassed = RT_FAILED_OTHER;
782  } else if (Debug_Flag) {
783  printf("Number of column title lines in each file = %d\n", nColTitleLines1);
784  }
785 
786  /*
787  * Right now, if the number of data rows differ, we will punt.
788  * Maybe later we can do something more significant
789  */
790  int nDataRowsMIN = min(nDataRows1, nDataRows2);
791  int nDataRowsMAX = max(nDataRows1, nDataRows2);
792  if (nDataRows1 != nDataRows2) {
793  printf("Number of Data rows in file1, %d, is different than file2, %d\n",
794  nDataRows1, nDataRows2);
795  } else {
796  printf("Number of Data rows in both files = %d\n", nDataRowsMIN);
797  }
798 
799  rewind(fp1);
800  rewind(fp2);
801  read_title(fp1, &title1, nTitleLines1);
802  read_title(fp2, &title2, nTitleLines2);
803 
804  if (nTitleLines1 > 0 || nTitleLines2 > 0) {
805  int n = min(nTitleLines1, nTitleLines2);
806  for (i = 0; i < n; i++) {
807  if (strcmp(title1[i], title2[i]) != 0) {
808  printf("Title Line %d differ:\n\t\"%s\"\n\t\"%s\"\n", i, title1[i], title2[i]);
809  testPassed = RT_FAILED_HDR;
810  } else if (Debug_Flag) {
811  printf("Title Line %d for each file: \"%s\"\n", i, title1[i]);
812  }
813  }
814  if (nTitleLines1 != nTitleLines2) {
815  printf("Number of Title Lines differ: %d %d\n", nTitleLines1, nTitleLines2);
816  testPassed = RT_FAILED_HDR;
817  }
818  } else {
819  if (nTitleLines1 != nTitleLines2) {
820  if (nTitleLines1) {
821  printf("Titles differ: title for first file: \"%s\"\n",
822  title1[0]);
823  testPassed = RT_FAILED_HDR;
824  }
825  if (nTitleLines2) {
826  printf("Titles differ: title for second file: \"%s\"\n",
827  title2[0]);
828  }
829  testPassed = RT_FAILED_HDR;
830  }
831  }
832 
833  /*
834  * Get the number of column variables in each file
835  */
836 
837  if (nCol1 != nCol2) {
838  printf("Number of column variables differ:, %d %d\n",
839  nCol1, nCol2);
840  testPassed = RT_FAILED_OTHER;
841  } else if (Debug_Flag) {
842  printf("Number of column variables in both files = %d\n",
843  nCol1);
844  }
845 
846  /*
847  * Read the names of the column variables
848  */
849  read_colTitle(fp1, &ColMLNames1, nColTitleLines1, nCol1);
850  read_colTitle(fp2, &ColMLNames2, nColTitleLines2, nCol2);
851  ColNames1 = ColMLNames1[0];
852  ColNames2 = ColMLNames2[0];
853 
854  /*
855  * Do a Comparison of the names to find the maximum number
856  * of matches.
857  */
858  nColMAX = max(nCol1, nCol2);
859 
860  compColList = mdp_alloc_int_2(nColMAX, 2, -1);
861  nColcomparisons = 0;
862  for (i = 0; i < nCol1; i++) {
863  found = false;
864  for (j = 0; j < nCol2; j++) {
865  if (!strcmp(ColNames1[i], ColNames2[j])) {
866  compColList[nColcomparisons][0] = i;
867  compColList[nColcomparisons][1] = j;
868  nColcomparisons++;
869  found = true;
870  break;
871  }
872  }
873  if (!found) {
874  printf("csvdiff WARNING Variable %s (%d) in first file not found"
875  " in second file\n", ColNames1[i], i);
876  testPassed = RT_FAILED_OTHER;
877  }
878  }
879  for (j = 0; j < nCol2; j++) {
880  found = false;
881  for (i = 0; i < nColcomparisons; i++) {
882  if (compColList[i][1] == j) {
883  found = true;
884  }
885  }
886  if (! found) {
887  printf("csvdiff WARNING Variable %s (%d) in second file "
888  "not found in first file\n",
889  ColNames2[j], j);
890  testPassed = RT_FAILED_OTHER;
891  }
892  }
893 
894  /*
895  * Allocate storage for the column variables
896  */
897  NVValues1 = mdp_alloc_dbl_2(nCol1, nDataRowsMAX, 0.0);
898  NVValues2 = mdp_alloc_dbl_2(nCol2, nDataRowsMAX, 0.0);
899 
900  /*
901  * Allocate storage for the column variables
902  */
903  NSValues1 = (char***) mdp_alloc_ptr_2(nCol1, nDataRowsMAX);
904  NSValues2 = (char***) mdp_alloc_ptr_2(nCol2, nDataRowsMAX);
905 
906  /*
907  * Read in the values to the arrays
908  */
909  read_values(fp1, NVValues1, NSValues1, nCol1, nDataRows1, ColIsFloat1);
910  read_values(fp2, NVValues2, NSValues2, nCol2, nDataRows2, ColIsFloat2);
911 
912  /*
913  * Compare the solutions in each file
914  */
915  int method = 1;
916  double slope1, slope2, xatol;
917  int notOK;
918  for (k = 0; k < nColcomparisons; k++) {
919 
920  i1 = compColList[k][0];
921  i2 = compColList[k][1];
922  bool doFltComparison = true;
923  if (!ColIsFloat1[i1]) {
924  doFltComparison = false;
925  jmax = -1;
926  }
927  if (!ColIsFloat2[i2]) {
928  doFltComparison = false;
929  jmax = -1;
930  }
931  curStringValues1 = NSValues1[i1];
932  curStringValues2 = NSValues2[i2];
933  max_diff = 0.0;
934  ndiff = 0;
935  if (doFltComparison) {
936  curVarValues1 = NVValues1[i1];
937  curVarValues2 = NVValues2[i2];
938  atol_j = get_atol(curVarValues1, nDataRows1, gatol);
939  atol_j = min(atol_j, get_atol(curVarValues2, nDataRows2, gatol));
940  for (j = 0; j < nDataRowsMIN; j++) {
941 
942  slope1 = 0.0;
943  slope2 = 0.0;
944  xatol = fabs(grtol * (NVValues1[0][j] - NVValues1[0][j-1]));
945  if (j > 0 && k > 0) {
946  slope1 = (curVarValues1[j] - curVarValues1[j-1])/
947  (NVValues1[0][j] - NVValues1[0][j-1]);
948  slope2 = (curVarValues2[j] - curVarValues2[j-1])/
949  (NVValues2[0][j] - NVValues2[0][j-1]);
950  }
951  if (method) {
952  notOK = diff_double_slope(curVarValues1[j], curVarValues2[j],
953  grtol, atol_j, xatol, slope1, slope2);
954  } else {
955  notOK = diff_double(curVarValues1[j], curVarValues2[j],
956  grtol, atol_j);
957  }
958  if (notOK) {
959  ndiff++;
960  rel_diff = calc_rdiff((double) curVarValues1[j],
961  (double) curVarValues2[j], grtol, atol_j);
962  if (rel_diff > max_diff) {
963  jmax = j;
964  max_diff = rel_diff;
965  }
966  if (ndiff < 10) {
967  printf("\tColumn variable %s at data row %d ", ColNames1[i1], j + 1);
968  printf(" differ: %g %g\n", curVarValues1[j],
969  curVarValues2[j]);
970  }
971  }
972  }
973  } else {
974  for (j = 0; j < nDataRowsMIN; j++) {
975  strip(curStringValues1[j]);
976  strip(curStringValues2[j]);
977  notOK = false;
978  if (strcmp(curStringValues1[j], curStringValues2[j])) {
979  notOK = true;
980  ndiff++;
981  if (ndiff < 10) {
982  printf("\tColumn String variable %s at data row %d ", ColNames1[i1], j + 1);
983  printf(" differ: %s %s\n", curStringValues1[j],
984  curStringValues2[j]);
985  }
986  }
987  }
988  }
989 
990  if (nDataRowsMIN != nDataRowsMAX) {
991  ndiff += nDataRowsMAX - nDataRowsMIN;
992  if (ndiff < 10) {
993  if (nDataRows1 > nDataRows2) {
994  for (j = nDataRowsMIN; j < nDataRowsMAX; j++) {
995  printf("\tColumn variable %s at data row %d ", ColNames1[i1], j + 1);
996  printf(" differ: %g NA\n", curVarValues1[j]);
997  }
998  } else {
999  for (j = nDataRowsMIN; j < nDataRowsMAX; j++) {
1000  printf("\tColumn variable %s at data row %d ", ColNames1[i1], j + 1);
1001  printf(" differ: NA %g \n", curVarValues2[j]);
1002  }
1003  }
1004  }
1005  }
1006 
1007  /*
1008  * Print out final results of nodal variable test
1009  */
1010 
1011  if (ndiff > 0) {
1012  printf(
1013  "Column variable %s failed comparison test for %d occurrences\n",
1014  ColNames1[i1], ndiff);
1015  if (jmax >= 0) {
1016  printf(" Largest difference was at data row %d ", jmax + 1);
1017  printf(": %g %g\n", curVarValues1[jmax], curVarValues2[jmax]);
1018  }
1019  testPassed = RT_FAILED_COL;
1020  } else if (Debug_Flag) {
1021  printf("Column variable %s passed\n", ColNames1[i1]);
1022  }
1023 
1024  }
1025 
1026  return testPassed;
1027 
1028 } /************END of main() *************************************************/
1029 /*****************************************************************************/